{ "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5-final" }, "orig_nbformat": 2, "kernelspec": { "name": "python_defaultSpec_1599912880157", "display_name": "Python 3.8.5 64-bit" } }, "nbformat": 4, "nbformat_minor": 2, "cells": [ { "source": [ "# Unsupervised Learning with kMeans\n", "\n", "Unlike the MLP and random forest classifier used before, the kMeans clustering algorithm does not need labeled data. \n", "\n", "## Loading and preparing the data\n", "\n", "We load the dataframe as usual, but we will use input features (var3 and var4) only. You can try other pairings of course. Also, we will not normalize the input features, as they all have the same value ranges already." ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 6, "metadata": { "tags": [] }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": "\nLoad dataframe and prepare data...\n...done!\n \n" } ], "source": [ "import pandas as pd\n", "from joblib import load\n", "\n", "print(\" \")\n", "print(\"Load dataframe and prepare data...\")\n", "\n", "data_directory = 'http://hadron.physics.fsu.edu/~dlersch/GlueX_PANDA_EIC_ML_Workshop'\n", "data_name = 'hands_on_data_033_033_033.csv' #---> Change this name to analyze imbalanced data\n", "dataFrame = pd.read_csv(data_directory + '/' + data_name)\n", "\n", "used_features = ['var3','var4'] #--> Change the elements here, in order to use different features\n", "X = dataFrame[used_features].values\n", "\n", "print(\"...done!\")\n", "print(\" \")" ] }, { "source": [ "## Setting up the kMeans-Algorithm and fitting the Data\n", "\n", "scikit offers different clustering algorithms, but we will stick here to the kMeans clustering algorithm. However, you are welcome to explore other options." ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 7, "metadata": { "tags": [] }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": "Setup algorithm and fit data...\n...done!\n \n" } ], "source": [ "from sklearn.cluster import KMeans\n", "\n", "print(\"Setup algorithm and fit data...\")\n", "\n", "kmeans = KMeans(n_clusters = 3,random_state=42)\n", "kmeans.fit(X)\n", "\n", "print(\"...done!\")\n", "print(\" \")" ] }, { "source": [ "## Obtaining the Results\n", "\n", "After fitting the data, we want to check the cluster centers (found by the alfgorithm) and calculate the distance between each point in our data and the corresponding cluster center." ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "array([[0.02006308, 0.00525589],\n [0.00403661, 0.00424066],\n [0.0146636 , 0.01820708]])" }, "metadata": {}, "execution_count": 8 } ], "source": [ "kmeans.cluster_centers_" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "tags": [] }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": "Determine distance to clusters and add the to the dataframe...\n...done!\n \nPlot the distances...\n" }, { "output_type": "display_data", "data": { "text/plain": "
", "image/svg+xml": "\n\n\n\n \n \n \n \n 2020-09-12T08:41:46.306721\n image/svg+xml\n \n \n Matplotlib v3.3.0, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAAAdx0lEQVR4nO3df5BcZb3n8feHEAhgSFiILpDEJCQiSYBgxh+IrEEvv5SICFwC1lUUiVwFjYgIq1UMW3iRq4tRg5tKCTvuahEw94KJIHApJChLSSYQQgJGIEBlAghEjcAlkB/f/aPPHDqT+dG/Tnef7s+rqivdp8/p/j4zmf728zzn+R5FBGZmZgC7NToAMzNrHk4KZmaWclIwM7OUk4KZmaWcFMzMLLV7owOoxgEHHBATJkxodBhmZrmycuXKlyNiTH/P5TIpSJoNzJ48eTLd3d2NDsfMLFckPTvQc7kcPoqIZRExd9SoUY0OxcyspeQyKZiZWTacFMzMLJX7OQUzaw9bt26lp6eHLVu2NDqU3BgxYgRjx45l+PDhJR+jPNc+6ujoCE80m7WHp59+mpEjR7L//vsjqdHhNL2IYNOmTbzyyitMnDhxp+ckrYyIjv6O8/CRmeXCli1bnBDKIIn999+/7J6Vk4KZ5YYTQnkq+Xk5KZiZWSqXE80toTiD53hex6xRdGVtew1xxdB/hy+88ALz5s1jxYoVjB49mne84x3Mnz+fT33qU6xZs6bs9+zq6uKEE07goIMOKuu4++67j3nz5rF69WoWL17MGWecUfZ7DySXPQVJsyUt2rx5c6NDMbM2ERGcdtppzJo1i6eeeoqVK1dy9dVX8+c//7ni1+zq6uK5554r65ht27Yxfvx4urq6OOeccyp+74HkMil4RbOZ1dtvf/tbhg8fzgUXXJBuO/LIIxk3blz6uKuriwsvvDB9fMopp3Dvvfeyfft2zj33XKZPn87hhx/OD37wA5YsWUJ3dzef/vSnmTFjBq+//jorV67kwx/+MDNnzuTEE0/k+eefB2DWrFnMmzePjo4OfvjDHzJhwgSOOOIIdtut9h/hHj6yllM8rFDKkIBZKdasWcPMmTMrOnbVqlVs3LgxHWL629/+xujRo1mwYAHf//736ejoYOvWrVx00UX86le/YsyYMdx0001861vf4oYbbgDgzTffrEutNycFM7OMTZo0ifXr13PRRRfx8Y9/nBNOOGGXfdatW8eaNWs4/vjjAdi+fTsHHnhg+vxZZ51Vl1idFMyKeP7fBjJt2jSWLFky6D677747O3bsSB/3rhHYb7/9eOSRR7jzzjtZuHAhN998c9oD6BURTJs2jQceeKDf195nn32qbEFpcjmnYGZWbx/5yEd44403WLRoUbpt9erVbNiwIX08YcIEVq1axY4dO9iwYQMPPvggAC+//DI7duzg9NNP56qrruKhhx4CYOTIkbzyyisAHHroobz00ktpUti6dStr166tV/NS7ilYTZWyVsbfwK0W6j1fJIlbbrmFefPmcc011zBixAgmTJjA/Pnz032OOeYYJk6cyNSpUznssMN4z3veA8DGjRv53Oc+l/Yirr76agDOPfdcLrjgAvbaay8eeOABlixZwle+8hU2b97Mtm3bmDdvHtOmTdsllhUrVnDaaafx17/+lWXLlnHFFVfULIG49lGjtNA4RbmLJrNubjUTzS30a2k5jz/+OIcddlijw8id/n5uLVf7yOsU8k1662ZmzSWXScHrFBqvVh/sThBmzSWXScHMzLLhiWYrWdbf5j2eb9Z4TgqlGugT0Z9eZtZCPHxUraJB8VpXbWwGjRrz91yDWWM4KZhZPhV/c6jFrQQvvPACc+bM4ZBDDmHmzJl87GMf409/+hPTp0+vqAmVVEkFuPbaa5k6dSpHHHEEH/3oR3n22Wcrev/+OCkMpoKvq7pS6S3vWqENZrXSTKWzjzrqKLq7u1m9ejVnnHEGl156acUx9OWkUEPR+dYtr5oxqXkoyZpBM5XOPu6449h7770B+MAHPkBPT0/N2umJ5gzltoRzZxPHKnly3xqiWUtnX3/99Zx88smVN6wPJwVrql6BWSvKqnT2z3/+c7q7u1m+fHnNYm2qpCBpH2A50BkRv250PNUoHkJSZ457Dc3ICxqsAZqtdPbdd9/Nd77zHZYvX86ee+5ZSZP6lemcgqQbJL0oaU2f7SdJWifpSUmXFT31TeDmLGMaUhUD2CL6vdFZdGsSu8wdNGGM/dnp52pWR81UOvvhhx/mi1/8IkuXLuXtb397TduZdU+hC1gA/J/eDZKGAdcBxwM9wApJS4GDgceAERnHVFNlfzh1BuoECAIP21TNvYb2VeffdzOVzv7GN77Bq6++yplnngnA+PHjWbp0aW3amXXpbEkTgF9HxPTk8dEUhodOTB5fnuz6NmAfYCrwOnBaROzY9RXfkknp7DJ7CLX6xlqP/9+Dzh00eQ+hP7sk1eSH6NLZrcmlsytTbunsRswpHAxsKHrcA7w/Ii4EkHQu8PJACUHSXGAuFLJjI2QxdOEPoxpIfogBSW/MzMrVVBPNABHRNcTzi4BFUOgp1COmPBvyzKIc9hB6FSdnD8WZ1UYjFq9tBMYVPR6bbCtZq19kx4u1qpcuIvQP0awsjegprACmSJpIIRnMAc4p5wUiYhmwrKOj4/wM4utXns528boDM6tUpklB0o3ALOAAST3AFRFxvaQLgTuBYcANEVHWFaclzQZmT548udYhN51S5xrKSgQ5HjKqiCdszEqWaVKIiLMH2H47cHsVr1v3nkIz6PvZ5h6BmdWaC+JBLgfxnRB25kVt7acBlbObpnT2woULOfzww5kxYwYf+tCHeOyxxyp6//403dlHpajX8FFLfcC025DRQDyUZBXqLZ392c9+lsWLFwPwyCOPVF06e/r06Rx00EElH7Nt2zbOOeectFrr0qVLufjii7njjjsqjqNYLpNCuw4f7cQf8mZ1NVDp7GeeeSZ93NXVRXd3NwsWLAAKpbMvueQSjj32WM477zy6u7uRxOc//3nGjRuXls7uXdH82GOPcfHFF/Pqq69ywAEH0NXVxYEHHsisWbOYMWMGv//97zn77LP5+te/nr7na6+9hmo4ypHLpGBmVm/NVjr7uuuu49prr+XNN9/knnvuqb6BiVzOKbT6OgWrkxzOJVk+FZfOvuOOO9h333132ae4dPaMGTO46qqrdrp4Tt/S2V/+8pd56qmnuOaaa7jqqqtqFmsuk0JELIuIuaNGjWp0KGbWJqZNm8bKlSsH3Weo0tmzZs1i4cKFfOELX9jl2N7S2atWrWLVqlU8+uij3HXXXenzfUtn95ozZw633nprBS3qXy6TgtlgfCaSZaGZSmc/8cQT6f3bbruNKVOm1KydnlPowx8kbSodQvLvPy/qffJYM5XOXrBgAXfffTfDhw9nv/3242c/+1nt2pl16ewsFJ2Sen5xxqziBd+66w+FllJuobydiuz5v0JTcensypRbOjuXw0eeUzAzy0Yuk4KZmWXDScHMciOPw92NVMnPK5dJwesUrFQ+E6l1jBgxgk2bNjkxlCgi2LRpEyNGlHfZ+1xONPeq2TWaPdHcFkqZdB7wam45/jtpFVu3bqWnpyc999+GNmLECMaOHcvw4cN32t5s12g2Myvb8OHDmThxYqPDaHm5HD4yM7NsOCmYmVnKw0d4HsFK4OswWJvIZU/BZx+ZmWUjl0nBK5rNzLLh4SNrGwOebmpmqbZNCjtf+N5jxFYGzy9YC2vbpBCdb933d0Yzs4JczimYmVk22ranYFYTHkqyFuOegpmZpXKZFLxOwcwsG7lMCl6nYGaWDc8pmNWK5xesBTgpWFvyQjaz/uVy+MjMzLLhnoJZFjyUZDnlnoKZmaWcFMzMLOWkYGZmKScFMzNLeaLZLGuedLYcaZqegqTDJC2UtETSPzc6HmsfItKbWbvLNClIukHSi5LW9Nl+kqR1kp6UdBlARDweERcA/wgck2VcZmbWv0GTgqS/D3F7RdKfBnmJLuCkPq85DLgOOBmYCpwtaWry3CeA24Dbq2hTSfzt0MxsV0P1FJ6KiH0HuY0EXhvo4Ii4D/hLn83vA56MiPUR8SawGDg12X9pRJwMfLryJpk1Memtm1kTGmqi+fQSXqOUfYodDGwoetwDvF/SLOBTwJ4M0lOQNBeYCzB+/Pgy39rMzAYzaFKIiPVDvUAp+5QiIu4F7i1hv0XAIoCOjg6P/ZiZ1VBFE82S7pb0G0mnVHD4RmBc0eOxybZy3t8X2bH881CSNaFKzz76DPBt4J0VHLsCmCJpoqQ9gDnA0nJewBfZMTPLRkVJISKei4iVEXHdYPtJuhF4ADhUUo+k8yJiG3AhcCfwOHBzRKwt5/3dUzAzy4aihBWWkp6GXc/djIhJWQRVqo6Ojuju7q7oWPfYbSgNu/iOVz1bxiStjIiO/p4rtcxF8cEjgDOB/1JtYGZm1lxKGj6KiE1Ft40RMR/4eLahDczDR2Zm2SippyDpPUUPd6PQc2hYMb2IWAYs6+joOL9RMZiZtaJSP9j/Z9H9bcDTFGoUmZlZCykpKUTEcVkHUg5Js4HZkydPbnQo1sKK62LVddLZpbatgSquktpnSKmuvE7BzCwb1ZTO9jUPzMxaTMWTxRHhSV6zrHkoyeqs5KQgaT9gCoV1CkBaGrvuPKdgZpaNkoaPJH0BuI9CaYork387swtrcJ5TsHrzRZmsXZQ6p/BV4L3As8mZSEcBf8sqKDPrhyuqWh2UmhS2RMQWAEl7RsQfgUOzC8vMzBqh1DmFHkmjgVuB/5D0V+DZrIIaiucUrK158tkyVFKV1J0OkD4MjALuSK6x3DCukmqN0LDqqf1xUrAK1KJKaioillcfkll+NWyls1kdDDqnIOmhoV6glH3MLCO+pKfV2FA9hcMkrR7keVEYSjJrS+41WKsZKim8u4TX2F6LQMysSp6AthoYNClERMPOMBqMzz4yG4IThFWomoJ4DeMVzWZm2chlUjBrRi6DYa2g1NpH+0jaLbn/LkmfkDQ829DM8qm4TlIpNZPK3b/8gNT/zawfpfYU7gNGSDoYuAv4J6Arq6DMWlGjiuoN+L6lJAsnkbZTalJQRPwn8CngJxFxJjAtu7DMWltT9CDK5QTRFkpd0SxJRwOfBs5Ltg3LJiQzK1U1SWLAY3f6zPc6jHZTalKYB1wO3BIRayVNAn6bWVRmlglPhNtQSkoKSb2j5ZL2Th6vB76SZWBm1r+m+GDvHULyGoiWU+rZR0dLegz4Y/L4SEk/yTSyweOZLWnR5s2bGxWCmQG6UunNWkNJpbMl/QE4A1gaEUcl29ZExPSM4xuUS2ebNYHOXf+Y4oq3PleKE0bxdmucwUpnl7x4LSI29NnkmkdmRnQWbgB0BnSGv3TlWKkTzRskfRCIZNHaV4HHswvLzPLurcRQ1Gvo9DREsyu1p3AB8GXgYGAjMCN5bGZtLl0/0elP+1ZQ6tlHL1NYo2BmVpXeOYa+8wsDTVZ7HqK+Bk0Kki6NiH+V9GPY9Ty4iPBpqWZmLWSonkLvvEFlp/iYmVmuDHWRnWWShgGHR8QldYrJzNqArtTO8xCdDQvFigw5pxAR2yUdU49gzKwNDDQhvVOCeGt+wesc6qvUU1JXSVoK/BJ4rXdjRPx7JlGZmVlDlJoURgCbgI8UbQugpklB0ieBjwP7AtdHxF21fH0zMxtcqUnhpxFxf/GGUoeUJN0AnAK8WFwWQ9JJwA8plOD+aUR8NyJuBW6VtB/wfQoX9DEz24WHlbJR6uK1H5e4rT9dwEnFG5LJ6+uAk4GpwNmSphbt8u3keTMzq6Oh1ikcDXwQGCPp4qKn9qXEi+xExH2SJvTZ/D7gyaQEN5IWA6dKehz4LvCbiHhogJjmAnMBxo8fX0oIZtYiXI01e0MNH+0BvC3Zb2TR9r9TqJpaqYOB4gJ7PcD7gYuAfwBGSZocEQv7HhgRi4BFUKiSWkUMZtases9E6qcCa388lFQ7Q61T6L24TldEPJt1MBHxI+BHQ+0naTYwe/LkyVmHZGbWVkqdU9hT0iJJd0m6p/dWxftuBMYVPR6bbCtJRCyLiLmjRo2qIgQzM+ur1LOPfgksBH5Kba6jsAKYImkihWQwBzin1IPdUzBrD+l1GuhzGdISh5WsfKX2FLZFxP+KiAcjYmXvrZQDJd0IPAAcKqlH0nkRsQ24ELiTQn2lmyNibalBu6dg1h56y3I3xXWp20SpPYVlkr4E3AK80bsxIv4y1IERcfYA228Hbi/x/c3MrA5KvUbz0/1sjoiYVPuQhlY0fHT+E088UeFr1DYmM2sCJQ4rtfsZSoNdo7mkpNCsOjo6oru7sqreTgpmLW6QBOGkMHBSGHROQdKlRffP7PPcv9QmPDOz+tKVSm+2s6EmmucU3b+8z3Mn0SCSZktatHnz5kaFYGbWkoaaaNYA9/t7XDcRsQxY1tHRcX6jYjCzJjfA9RkG45XRQyeFGOB+f4/NzHLHQ0g7GyopHCnp7xR6BXsl90kej8g0MjOzJtTqvYmhah+VVAm13ryi2czKUsFQUrsqdUVzU/GKZjOzbOQyKZiZWTZKLXNhZtYaPJQ0KPcUzMwslcuegieazSxrrX6W0UBy2VPwRLOZNYNWLJeRy56CmVlNeH5hF7nsKZiZWTbcUzAzG0IrDQ8NJZdJwRPNZpapzgEmlttgiCmXw0eeaDYzy0YuewpmZjU3UO+gRK1yCmsuewpmZpYNJwUzM0s5KZiZWcpJwczMUp5oNjMrVRusgHZPwczMUrlMCpJmS1q0efPmRodiZm0qOgu3VpPLpODFa2bWtDoDCZTT0aVcJgUzs2bUCj0HTzSbmWWkuLcQOVnk7J6CmZmlnBTMzCzlpGBmVgd9r8nQrJfydFIwM6uACERUXV212TgpmJlZymcfmZlVK+ktDDoQ1Bmos/d+cw0ZFXNPwczMUk2TFCRNknS9pCWNjsXMrF1lmhQk3SDpRUlr+mw/SdI6SU9KugwgItZHxHlZxmNm1oya6UykrHsKXcBJxRskDQOuA04GpgJnS5qacRxmZlaCTJNCRNwH/KXP5vcBTyY9gzeBxcCppb6mpLmSuiV1v/TSSzWM1szMGjGncDCwoehxD3CwpP0lLQSOknT5QAdHxKKI6IiIjjFjxmQdq5lZW2maU1IjYhNwQSn7SpoNzJ48eXK2QZmZtZlG9BQ2AuOKHo9NtpXM11MwM8tGI5LCCmCKpImS9gDmAEsbEIeZmfWR9SmpNwIPAIdK6pF0XkRsAy4E7gQeB26OiLVlvq4vx2lmudXMF+PJdE4hIs4eYPvtwO1VvO4yYFlHR8f5lb6GmZntqmlWNJfDPQUzy7O0umoTVljNZVLwRLOZWTZymRTMzCwbTbNOoRxep2BmLaPPEFJvee1o0MhSLnsKHj4yM8tGLpOCmZllw0nBzMxSuUwKPiXVzFpdo66vkMuk4DkFM7Ns5DIpmJlZNpwUzMws5aRgZmYpL14zM2tiA002xxXZrG7LZU/BE81mZtnIZVIwM7NsOCmYmVnKScHMzFJOCmZmlsplUnCZCzOzbOQyKfjsIzOzbOQyKZiZWTacFMzMLOWkYGZmKScFMzNLOSmYmVnKScHMzFK5TApep2Bmlo1cJgWvUzAzy0Yuk4KZmWXDScHMzFJOCmZmlnJSMDOzlJOCmZmlnBTMzCzlpGBmZiknBTMzSykiGh1DxSS9BDxb4eEHAC/XMJxm5Xa2FreztTSqne+MiDH9PZHrpFANSd0R0dHoOLLmdrYWt7O1NGM7PXxkZmYpJwUzM0u1c1JY1OgA6sTtbC1uZ2tpuna27ZyCmZntqp17CmZm1oeTgpmZpVoyKUg6SdI6SU9Kuqyf5/eUdFPy/B8kTSh67vJk+zpJJ9Y18DJV2k5J+0v6raRXJS2oe+BlqqKdx0taKenR5N+P1D34MlTRzvdJWpXcHpF0Wt2DL0M1f5/J8+OT/7uX1C3oClTx+5wg6fWi3+nCugYeES11A4YBTwGTgD2AR4Cpffb5ErAwuT8HuCm5PzXZf09gYvI6wxrdpgzauQ/wIeACYEGj25JhO48CDkruTwc2Nro9GbVzb2D35P6BwIu9j5vtVk07i55fAvwSuKTR7cno9zkBWNOo2Fuxp/A+4MmIWB8RbwKLgVP77HMq8LPk/hLgo5KUbF8cEW9ExNPAk8nrNaOK2xkRr0XE74Et9Qu3YtW08+GIeC7ZvhbYS9KedYm6fNW08z8jYluyfQTQzGePVPP3iaRPAk9T+H02s6ra2UitmBQOBjYUPe5JtvW7T/LHtBnYv8Rjm0U17cyTWrXzdOChiHgjozirVVU7Jb1f0lrgUeCCoiTRbCpup6S3Ad8ErqxDnNWq9v/tREkPS1ou6disgy22ez3fzKwRJE0DrgFOaHQsWYmIPwDTJB0G/EzSbyIiDz3BcnQCP4iIV5vgC3WWngfGR8QmSTOBWyVNi4i/1+PNW7GnsBEYV/R4bLKt330k7Q6MAjaVeGyzqKadeVJVOyWNBW4BPhMRT2UebeVq8vuMiMeBVynMoTSjatr5fuBfJT0DzAP+u6QLM463UhW3Mxm+3gQQESspzE28K/OIE62YFFYAUyRNlLQHhQmcpX32WQp8Nrl/BnBPFGZ4lgJzkrMCJgJTgAfrFHe5qmlnnlTcTkmjgduAyyLi/noFXKFq2jkx+VBB0juBdwPP1CfsslXczog4NiImRMQEYD7wLxHRrGfPVfP7HCNpGICkSRQ+h9bXKe7WO/so+cz7GPAnChn2W8m2/wF8Irk/gsLZC09S+NCfVHTst5Lj1gEnN7otGbbzGeAvFL5V9tDnzIhmulXaTuDbwGvAqqLb2xvdngza+U8UJl5XAQ8Bn2x0W7L6f1v0Gp008dlHVf4+T+/z+5xdz7hd5sLMzFKtOHxkZmYVclIwM7OUk4KZmaWcFMzMLOWkYGZmKScFa2qStieVItcmFUC/Lmm35LkOST8a5NgJks6pX7Q7vfdoSV+q4Lj/KmmxpKeSyq63S3pX0pY1FcZyrqSDKjnW2o+TgjW71yNiRkRMA44HTgauAIiI7oj4yiDHTgAakhSA0RSqYJYsKYZ2C3BvRBwSETOBy4F3VBnLuUBZSaF3MZy1HycFy42IeBGYC1yoglmSfg0g6cNF9ecfljQS+C5wbLLta8m37d9Jeii5fTA5dpakeyUtkfRHSb8oqsr5Xkn/L+mlPChppKRhkr4naYWk1ZK+2E+43wUOSd77e0m835O0RoXrO5zVzzHHAVsjIq2fHxGPRMTvindKvvkvKHr866QNwyR1Fb3H1ySdAXQAv0hi2UvSTBUKra2UdKekA5PXuVfSfEndwFcr/kVZrvnbgOVKRKxPSgC8vc9TlwBfjoj7VaimuQW4jMKq11MAJO0NHB8RWyRNAW6k8IEJhWsvTAOeA+4HjpH0IHATcFZErJC0L/A6cB6wOSLeq0Ip7vsl3RWFcuu9LgOmR8SM5L1PB2YARwIHACsk3RcRzxcdMx1YWcWPZwZwcERMT95zdET8TYX6QJdERLek4cCPgVMj4qUkOX0H+HzyGntEREd/L27twUnBWsX9wLWSfgH8e0T0aNdKmsOBBZJmANvZucjYgxHRAyBpFYWhp83A8xGxAiCSKpWSTgCOSL6FQ6GQ2RQKdf4H8iHgxojYDvxZ0nLgvexaD6ca64FJkn5MoebTXf3scyiF5PMfyc9nGIWqnL1uqmE8lkNOCpYrSYGw7RSuLnZY7/aI+K6k2yjUm7lf/V9K9WvAnyl8W9+NnS8yVHydhe0M/rch4KKIuLOiRgxsLYXCaEPZxs5DvyMAIuKvko4ETqRwVb1/5K0eQC8BayPi6AFe+7WyIraW4zkFyw1JY4CFFC4hGn2eOyQiHo2IayhUqHw38Aowsmi3URS++e+gUERu2BBvuQ44UNJ7k/cYmUzA3gn8czIUQ3J20D59ju373r8DzkrG/ccA/41dK/DeA+wpaW5Ru47QrhdZeQaYIWk3SeNIrg4o6QBgt4j4NwrFAN/TTyzrgDGSjk6OGa7C9SbMAPcUrPntlQznDKfwDfn/Atf2s988SccBOyh84/5Ncn+7pEeALuAnwL9J+gxwB0N8K46IN5Mx9x9L2ovCfMI/AD+lMLz0UDIh/RLwyT7HbpJ0vwqnkf4GuBQ4msK1egO4NCJe6HNMSDoNmC/pmxR6Ms9QuHZAsfspDFU9BjxOoZImFK7k9b+VnLJL4cwlkrYvlPR6EsMZwI8kjaLwGTCf5r+8pdWJq6SamVnKw0dmZpZyUjAzs5STgpmZpZwUzMws5aRgZmYpJwUzM0s5KZiZWer/AzzoBsCetTUSAAAAAElFTkSuQmCC\n" }, "metadata": { "needs_background": "light" } }, { "output_type": "stream", "name": "stdout", "text": "...done!\n \n" } ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "print(\"Determine distance to clusters and add the to the dataframe...\")\n", "\n", "distance_to_clu = kmeans.transform(X)\n", "\n", "dataFrame['dist_to_clu1'] = distance_to_clu[:,0]\n", "dataFrame['dist_to_clu2'] = distance_to_clu[:,1]\n", "dataFrame['dist_to_clu3'] = distance_to_clu[:,2]\n", "\n", "print(\"...done!\")\n", "print(\" \")\n", "\n", "print(\"Plot the distances...\")\n", "\n", "plt.hist(distance_to_clu[:,0],bins=100,facecolor='g',alpha=0.5,log=True,label='Cluster1')\n", "plt.hist(distance_to_clu[:,1],bins=100,facecolor='r',alpha=0.5,log=True,label='Cluster2')\n", "plt.hist(distance_to_clu[:,2],bins=100,facecolor='b',alpha=0.5,log=True,label='Cluster3')\n", "plt.xlabel('Distance to Cluster')\n", "plt.ylabel('Entries [a.u,]')\n", "plt.legend()\n", "plt.show()\n", "\n", "print(\"...done!\")\n", "print(\" \")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ] }