--- title: clusters keywords: fastai sidebar: home_sidebar nb_path: "04_utils_clusters.ipynb" ---
from sklearn import datasets
import pandas as pd
from simplebitk.utils.plots import scatter_plots_for_reduce_dimensional
iris = datasets.load_iris()
X = iris.data
y = iris.target
df = pd.DataFrame(X,columns=['x1','x2','x3','x4'])
df['dbscan']=clusters_annotation(df,cluster_methods['DBSCAN'],{'eps':0.3})
scatter_plots_for_reduce_dimensional(df,'x1',
'x2',hue='dbscan')
To find the best cluster number.
import matplotlib.pyplot as plt
X=np.random.normal(3,4,(100,4))
i=silhouette_score
a,records,peaks = find_best_cluster_number(X,DBSCAN,{'n_clusters':3,'eps':0.3},ass_method=i)
plt.plot(records[:,0],records[:,1])
plt.plot(records[peaks,0], records[peaks,1], "x")
print(a)
print(peaks)
i=calinski_harabasz_score
a,records,peaks = find_best_cluster_number(X,DBSCAN,{'n_clusters':3,'eps':0.3},ass_method=i)
plt.plot(records[:,0],records[:,1])
plt.plot(records[peaks,0], records[peaks,1], "x")
print(a)
print(peaks)
i=davies_bouldin_score
a,records,peaks = find_best_cluster_number(X,DBSCAN,{'n_clusters':3,'eps':0.3},ass_method=i)
plt.plot(records[:,0],records[:,1])
plt.plot(records[peaks,0], records[peaks,1], "x")
print(a)
print(peaks)
import sys