import pandas as pd from sklearn.cluster import KMeans import matplotlib.pyplot as plt print(" ") print("TRAIN CLASSIFIER (UN-SUPERVISED)") print(" ") #Load the dataframe from the FSU cluster #and prepare the data, i.e. select the proper features #******************************************** print("Load dataframe and prepare data...") data_directory = 'http://hadron.physics.fsu.edu/~dlersch/GlueX_PANDA_EIC_ML_Workshop' data_name = 'hands_on_data_033_033_033.csv' dataFrame = pd.read_csv(data_directory + '/' + data_name) used_features = ['var3','var4'] #--> Change the elements here, in order to use different features X = dataFrame[used_features].values print("...done!") print(" ") #******************************************** #Set clustering algorithm and fit data #******************************************** print("Setup algorithm and fit data...") kmeans = KMeans(n_clusters = 3,random_state=42) kmeans.fit(X) print("...done!") print(" ") #******************************************** #Plot the cluster centers: #******************************************** print("Cluster centers:") print(" ") clu_centers = kmeans.cluster_centers_ print(clu_centers) print(" ") #******************************************** #Plot the distance between each data point #and the found cluster centers #******************************************** print("Determine distance to clusters and add the to the dataframe...") distance_to_clu = kmeans.transform(X) dataFrame['dist_to_clu1'] = distance_to_clu[:,0] dataFrame['dist_to_clu2'] = distance_to_clu[:,1] dataFrame['dist_to_clu3'] = distance_to_clu[:,2] print("...done!") print(" ") print("Plot the distances...") plt.hist(distance_to_clu[:,0],bins=100,facecolor='g',alpha=0.5,log=True,label='Cluster1') plt.hist(distance_to_clu[:,1],bins=100,facecolor='r',alpha=0.5,log=True,label='Cluster2') plt.hist(distance_to_clu[:,2],bins=100,facecolor='b',alpha=0.5,log=True,label='Cluster3') plt.xlabel('Distance to Cluster') plt.ylabel('Entries [a.u,]') plt.legend() plt.savefig('dist_to_cluster.png') plt.close() print("...done!") print(" ") #********************************************