Skip to content

Commit 7940d11

Browse files
committed
modifying the condensed clusters method
1 parent 832056d commit 7940d11

File tree

1 file changed

+43
-22
lines changed

1 file changed

+43
-22
lines changed

hdbscan.py

+43-22
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import seaborn as sns
44
import sklearn.datasets as data
55
from scipy.cluster import hierarchy
6+
from collections import deque
67

78

89
class Utils:
@@ -110,27 +111,47 @@ def plot_dendrogram(out):
110111
return dendrogram
111112

112113
@staticmethod
113-
def condense_cluster_tree(hierarchy_tree, hierarchy_clusters, min_cluster_size):
114-
points_lambda = dict()
115-
clusters_lambda = dict()
116-
117-
for k, v in hierarchy_tree.items():
118-
if v[0][1] < min_cluster_size:
119-
for i in hierarchy_clusters[v[0][0]]:
120-
points_lambda.update({i: 1/v[2]})
121-
else:
122-
clusters_lambda.update({v[0][0]: 1/v[2]})
123-
if v[1][1] < min_cluster_size:
124-
for j in hierarchy_clusters[v[1][0]]:
125-
points_lambda.update({j: 1/v[2]})
126-
else:
127-
clusters_lambda.update({v[1][0]: 1/v[2]})
128-
129-
return points_lambda, clusters_lambda
114+
def condense_cluster_tree(hierarchy_tree, hierarchy_clusters, min_cluster_size, num_points):
115+
clusters_stabilities = dict()
116+
117+
start = (num_points*2) - 2
118+
119+
clusters_stack = deque()
120+
clusters_stack.append(start)
121+
122+
while len(clusters_stack):
123+
i = clusters_stack.pop()
124+
v = hierarchy_tree[i]
125+
cluster_birth = 1/v[2]
126+
points_lambda = []
127+
128+
while v[0][1] < min_cluster_size or v[1][1] < min_cluster_size:
129+
next_v = None
130+
if v[0][1] < min_cluster_size:
131+
for _ in hierarchy_clusters[v[0][0]]:
132+
points_lambda.append(1/v[2])
133+
else:
134+
next_v = v[0][0]
135+
if v[1][1] < min_cluster_size:
136+
for _ in hierarchy_clusters[v[1][0]]:
137+
points_lambda.append(1/v[2])
138+
else:
139+
next_v = v[1][0]
140+
v = hierarchy_tree[next_v]
141+
142+
cluster_death_points_fall = len(hierarchy_clusters[i]) - len(points_lambda)
143+
cluster_death = 1/v[2]
144+
sum_stabilities = (cluster_birth - cluster_death) * cluster_death_points_fall
145+
sum_stabilities += -np.sum(np.array(points_lambda) - cluster_birth)
146+
clusters_stabilities.update({i: sum_stabilities})
147+
148+
clusters_stack.append(v[0][0])
149+
clusters_stack.append(v[1][0])
150+
151+
return clusters_stabilities
130152

131153
@staticmethod
132-
def extract_clusters(hierarchy_clusters, points_lambda, clusters_lambda):
133-
clusters_stabilities = dict()
154+
def extract_clusters(hierarchy_clusters, clusters_stabilities):
134155

135156
for k, v in hierarchy_clusters.items()[:-1]:
136157
pass
@@ -146,6 +167,6 @@ def extract_clusters(hierarchy_clusters, points_lambda, clusters_lambda):
146167
e = Utils.prims_algorithm(transformed_dist)
147168
plot = Utils.plot_data(data, e, transformed_dist)
148169
Z, tree, clusters = Utils.cluster_hierarchy(e, num_points=transformed_dist.shape[0])
149-
dn = Utils.plot_dendrogram(Z)
150-
p_lambda, c_lambda = Utils.condense_cluster_tree(tree, clusters, min_cluster_size=5)
151-
clusters = Utils.extract_clusters(clusters, p_lambda, c_lambda)
170+
# dn = Utils.plot_dendrogram(Z)
171+
c_stabilities = Utils.condense_cluster_tree(tree, clusters, min_cluster_size=5, num_points=transformed_dist.shape[0])
172+
clusters = Utils.extract_clusters(clusters, c_stabilities)

0 commit comments

Comments
 (0)