3
3
import seaborn as sns
4
4
import sklearn .datasets as data
5
5
from scipy .cluster import hierarchy
6
+ from collections import deque
6
7
7
8
8
9
class Utils :
@@ -110,27 +111,47 @@ def plot_dendrogram(out):
110
111
return dendrogram
111
112
112
113
@staticmethod
113
- def condense_cluster_tree (hierarchy_tree , hierarchy_clusters , min_cluster_size ):
114
- points_lambda = dict ()
115
- clusters_lambda = dict ()
116
-
117
- for k , v in hierarchy_tree .items ():
118
- if v [0 ][1 ] < min_cluster_size :
119
- for i in hierarchy_clusters [v [0 ][0 ]]:
120
- points_lambda .update ({i : 1 / v [2 ]})
121
- else :
122
- clusters_lambda .update ({v [0 ][0 ]: 1 / v [2 ]})
123
- if v [1 ][1 ] < min_cluster_size :
124
- for j in hierarchy_clusters [v [1 ][0 ]]:
125
- points_lambda .update ({j : 1 / v [2 ]})
126
- else :
127
- clusters_lambda .update ({v [1 ][0 ]: 1 / v [2 ]})
128
-
129
- return points_lambda , clusters_lambda
114
+ def condense_cluster_tree (hierarchy_tree , hierarchy_clusters , min_cluster_size , num_points ):
115
+ clusters_stabilities = dict ()
116
+
117
+ start = (num_points * 2 ) - 2
118
+
119
+ clusters_stack = deque ()
120
+ clusters_stack .append (start )
121
+
122
+ while len (clusters_stack ):
123
+ i = clusters_stack .pop ()
124
+ v = hierarchy_tree [i ]
125
+ cluster_birth = 1 / v [2 ]
126
+ points_lambda = []
127
+
128
+ while v [0 ][1 ] < min_cluster_size or v [1 ][1 ] < min_cluster_size :
129
+ next_v = None
130
+ if v [0 ][1 ] < min_cluster_size :
131
+ for _ in hierarchy_clusters [v [0 ][0 ]]:
132
+ points_lambda .append (1 / v [2 ])
133
+ else :
134
+ next_v = v [0 ][0 ]
135
+ if v [1 ][1 ] < min_cluster_size :
136
+ for _ in hierarchy_clusters [v [1 ][0 ]]:
137
+ points_lambda .append (1 / v [2 ])
138
+ else :
139
+ next_v = v [1 ][0 ]
140
+ v = hierarchy_tree [next_v ]
141
+
142
+ cluster_death_points_fall = len (hierarchy_clusters [i ]) - len (points_lambda )
143
+ cluster_death = 1 / v [2 ]
144
+ sum_stabilities = (cluster_birth - cluster_death ) * cluster_death_points_fall
145
+ sum_stabilities += - np .sum (np .array (points_lambda ) - cluster_birth )
146
+ clusters_stabilities .update ({i : sum_stabilities })
147
+
148
+ clusters_stack .append (v [0 ][0 ])
149
+ clusters_stack .append (v [1 ][0 ])
150
+
151
+ return clusters_stabilities
130
152
131
153
@staticmethod
132
- def extract_clusters (hierarchy_clusters , points_lambda , clusters_lambda ):
133
- clusters_stabilities = dict ()
154
+ def extract_clusters (hierarchy_clusters , clusters_stabilities ):
134
155
135
156
for k , v in hierarchy_clusters .items ()[:- 1 ]:
136
157
pass
@@ -146,6 +167,6 @@ def extract_clusters(hierarchy_clusters, points_lambda, clusters_lambda):
146
167
e = Utils .prims_algorithm (transformed_dist )
147
168
plot = Utils .plot_data (data , e , transformed_dist )
148
169
Z , tree , clusters = Utils .cluster_hierarchy (e , num_points = transformed_dist .shape [0 ])
149
- dn = Utils .plot_dendrogram (Z )
150
- p_lambda , c_lambda = Utils .condense_cluster_tree (tree , clusters , min_cluster_size = 5 )
151
- clusters = Utils .extract_clusters (clusters , p_lambda , c_lambda )
170
+ # dn = Utils.plot_dendrogram(Z)
171
+ c_stabilities = Utils .condense_cluster_tree (tree , clusters , min_cluster_size = 5 , num_points = transformed_dist . shape [ 0 ] )
172
+ clusters = Utils .extract_clusters (clusters , c_stabilities )
0 commit comments