import pandas as pd
from joblib import load
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import matthews_corrcoef
import numpy as np
import matplotlib.pyplot as plt

print("  ")
print("COMPARING METRICS AND STABILITY")
print("  ")

#Load the dataframe and classifier,we trained earlier
#********************************************
print("Load dataframe and trained classifier...")

data_directory = 'http://hadron.physics.fsu.edu/~dlersch/GlueX_PANDA_EIC_ML_Workshop'
data_name = 'hands_on_data_033_033_033.csv'
dataFrame = pd.read_csv(data_directory + '/' + data_name)

my_mlp = load('mlp_classifier.joblib')
my_rf = load('random_forest_classifier.joblib')

print("...done!")
print("  ")
#********************************************

#Pre-process the data and get classifier 
#predictions
#********************************************
print("Pre-process data...")
used_features = ['var1','var2','var3','var4'] #--> Change the elements here, in order to use different features
X = dataFrame[used_features].values
Y = dataFrame['label'].values

scaler = MinMaxScaler() 
X = scaler.fit_transform(X) #Uncomment this line to run without feature normalization

print("...done!")
print(" ")
#********************************************

#Scan performance metrics for different
#feature resolutions / smearings
#********************************************
scanned_mlp_scores = {
    'precision': [],
    'f1': [],
    'acc': [],
    'mcc': []
}

scanned_rf_scores = {
    'precision': [],
    'f1': [],
    'acc': [],
    'mcc': []
}

smearing_values = [0.0,0.01,0.025,0.05,0.1,0.25,0.5]
n_smearing_vals = len(smearing_values)

print("Scan performance metrics for " + str(n_smearing_vals) + " different feature resolutions...")
#++++++++++++++++++++++++++++++++++++
for s in smearing_values:
    if s != 0.0:
        smearing = np.random.normal(1.0,s,X.shape)
        X = np.multiply(X,smearing)

    mlp_prediction = my_mlp.predict(X)
    rf_prediction = my_rf.predict(X)

    scanned_mlp_scores['precision'].append(precision_score(Y,mlp_prediction,average='macro')) #--> There are differnt options for average that you might want to explore
    scanned_mlp_scores['f1'].append(f1_score(Y,mlp_prediction,average='macro'))
    scanned_mlp_scores['acc'].append(accuracy_score(Y,mlp_prediction))
    scanned_mlp_scores['mcc'].append(matthews_corrcoef(Y,mlp_prediction))

    scanned_rf_scores['precision'].append(precision_score(Y,rf_prediction,average='macro'))
    scanned_rf_scores['f1'].append(f1_score(Y,rf_prediction,average='macro'))
    scanned_rf_scores['acc'].append(accuracy_score(Y,rf_prediction))
    scanned_rf_scores['mcc'].append(matthews_corrcoef(Y,rf_prediction))
#++++++++++++++++++++++++++++++++++++

print("...done!")
print(" ")
#********************************************

#Plot and compare the results:
#********************************************
print("Plot and compare results from scan...")

fig,ax = plt.subplots(1,2,sharex=True,sharey=True)
fig.set_size_inches(20,8)
plt.rcParams.update({'font.size': 25})

ax[0].set_title('MLP Classifier')
ax[0].plot(smearing_values,scanned_mlp_scores['precision'],'k-o',linewidth=2.0,markersize=10,label='Precision')
ax[0].plot(smearing_values,scanned_mlp_scores['f1'],'m-d',linewidth=2.0,markersize=10,label='F1-Score')
ax[0].plot(smearing_values,scanned_mlp_scores['acc'],'r-s',linewidth=2.0,markersize=10,label='Accuracy')
ax[0].plot(smearing_values,scanned_mlp_scores['mcc'],'g-o',linewidth=2.0,markersize=10,label='MCC')

ax[0].set_xlabel('Feature Smearing')
ax[0].set_ylabel('Score')
ax[0].set_ylim(0.0,1.0)
ax[0].grid(True)
ax[0].legend()

ax[1].set_title('Random Forest Classifier')
ax[1].plot(smearing_values,scanned_rf_scores['precision'],'k-o',linewidth=2.0,markersize=10,label='Precision')
ax[1].plot(smearing_values,scanned_rf_scores['f1'],'m-d',linewidth=2.0,markersize=10,label='F1-Score')
ax[1].plot(smearing_values,scanned_rf_scores['acc'],'r-s',linewidth=2.0,markersize=10,label='Accuracy')
ax[1].plot(smearing_values,scanned_rf_scores['mcc'],'g-o',linewidth=2.0,markersize=10,label='MCC')

ax[1].set_xlabel('Feature Smearing')
ax[1].legend()
ax[1].set_ylim(0.0,1.0)
ax[1].grid(True)

fig.savefig('metrics_resolution_scan.png')
plt.close(fig)

print("...done!")
print(" ")
#********************************************