--- title: clusters keywords: fastai sidebar: home_sidebar nb_path: "04_utils_clusters.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
{% endraw %} {% raw %}

clusters_annotation[source]

clusters_annotation(df, method, params)

{% endraw %} {% raw %}
{% endraw %} {% raw %}
from sklearn import datasets
import pandas as pd
from simplebitk.utils.plots import scatter_plots_for_reduce_dimensional

iris = datasets.load_iris()

X = iris.data
y = iris.target
df = pd.DataFrame(X,columns=['x1','x2','x3','x4'])
df['dbscan']=clusters_annotation(df,cluster_methods['DBSCAN'],{'eps':0.3})
scatter_plots_for_reduce_dimensional(df,'x1',
                                     'x2',hue='dbscan')
{% endraw %} {% raw %}

find_peak_valley[source]

find_peak_valley(sequence, peak=True)

{% endraw %} {% raw %}

find_best_cluster_number[source]

find_best_cluster_number(df, cluster_method, params, ass_method=silhouette_score)

{% endraw %} {% raw %}
{% endraw %}

To find the best cluster number.

{% raw %}
import matplotlib.pyplot as plt
{% endraw %} {% raw %}
X=np.random.normal(3,4,(100,4))
i=silhouette_score
a,records,peaks = find_best_cluster_number(X,DBSCAN,{'n_clusters':3,'eps':0.3},ass_method=i)

plt.plot(records[:,0],records[:,1])
plt.plot(records[peaks,0], records[peaks,1], "x")

print(a)
print(peaks)
[ 2.9        -0.01688904]
[14 18]
{% endraw %} {% raw %}
i=calinski_harabasz_score
a,records,peaks = find_best_cluster_number(X,DBSCAN,{'n_clusters':3,'eps':0.3},ass_method=i)

plt.plot(records[:,0],records[:,1])
plt.plot(records[peaks,0], records[peaks,1], "x")

print(a)
print(peaks)
[2.9        5.05533082]
[14 18]
{% endraw %} {% raw %}
i=davies_bouldin_score
a,records,peaks = find_best_cluster_number(X,DBSCAN,{'n_clusters':3,'eps':0.3},ass_method=i)

plt.plot(records[:,0],records[:,1])
plt.plot(records[peaks,0], records[peaks,1], "x")

print(a)
print(peaks)
[3.5       3.2772862]
[17]
{% endraw %} {% raw %}
import sys
{% endraw %}