Skip to content

Commit ba0551d

Browse files
Select clusters
Co-authored-by: Ioannis Panagiotas <ioannis.panagiotas@neotechnology.com>
1 parent d1f30bf commit ba0551d

File tree

2 files changed

+94
-0
lines changed

2 files changed

+94
-0
lines changed

algo/src/main/java/org/neo4j/gds/hdbscan/StabilityStep.java

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
*/
2020
package org.neo4j.gds.hdbscan;
2121

22+
import com.carrotsearch.hppc.BitSet;
2223
import org.neo4j.gds.collections.ha.HugeDoubleArray;
2324

2425
class StabilityStep {
@@ -49,4 +50,35 @@ HugeDoubleArray computeStabilities(CondensedTree condensedTree, long nodeCount)
4950

5051
return result;
5152
}
53+
54+
BitSet selectedClusters(CondensedTree condensedTree, HugeDoubleArray stabilities, long nodeCount) {
55+
56+
var selectedClusters = new BitSet(nodeCount);
57+
58+
var condensedTreeRoot = condensedTree.root();
59+
var condensedTreeMaxClusterId = condensedTree.maximumClusterId();
60+
61+
var stabilitySums = HugeDoubleArray.newArray(nodeCount);
62+
for (var p = condensedTreeMaxClusterId; p >= condensedTreeRoot; p--) {
63+
var adaptedPIndex = p - nodeCount;
64+
var stabilityP = stabilities.get(adaptedPIndex);
65+
var childrenStabilitySum = stabilitySums.get(adaptedPIndex);
66+
double stabilityToAdd;
67+
if (childrenStabilitySum > stabilityP) {
68+
stabilityToAdd = childrenStabilitySum;
69+
selectedClusters.clear(adaptedPIndex);
70+
} else {
71+
stabilityToAdd = stabilityP;
72+
selectedClusters.set(adaptedPIndex);
73+
// Selected clusters below `p` are implicitly unselected - they will be ignored during- `labeling`
74+
}
75+
if (p == condensedTreeRoot) {
76+
continue;
77+
}
78+
var parent = condensedTree.parent(p);
79+
stabilitySums.addTo(parent - nodeCount, stabilityToAdd);
80+
}
81+
82+
return selectedClusters;
83+
}
5284
}

algo/src/test/java/org/neo4j/gds/hdbscan/StabilityStepTest.java

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,4 +84,66 @@ void clusterStabilityBiggerTest() {
8484
0.0
8585
}, Offset.offset(1e-10));
8686
}
87+
88+
@Test
89+
void clusterSelectionOfChildClusters() {
90+
// 3
91+
// 4 5
92+
93+
var nodeCount = 3;
94+
var root = 3;
95+
96+
var parent = HugeLongArray.of(-1, -1, -1, 0, 3, 3);
97+
var lambda = HugeDoubleArray.of(-1, -1, -1, -1, -1, -1);
98+
var size = HugeLongArray.of(-1, -1, -1);
99+
var maximumClusterId = 5;
100+
101+
var stabilities = HugeDoubleArray.of(3., 4., 5.);
102+
103+
var condensedTree = new CondensedTree(root, parent, lambda, size, maximumClusterId, nodeCount);
104+
var stabilityStep = new StabilityStep();
105+
106+
var selectedClusters = stabilityStep.selectedClusters(condensedTree, stabilities, nodeCount);
107+
108+
assertThat(selectedClusters.get(0))
109+
.withFailMessage("Root should be unselected")
110+
.isFalse();
111+
assertThat(selectedClusters.get(1))
112+
.withFailMessage("First child should be selected cluster")
113+
.isTrue();
114+
assertThat(selectedClusters.get(2))
115+
.withFailMessage("Second child should be selected cluster")
116+
.isTrue();
117+
}
118+
119+
@Test
120+
void clusterSelectionOfParentCluster() {
121+
// 3
122+
// 4 5
123+
124+
var nodeCount = 3;
125+
var root = 3;
126+
127+
var parent = HugeLongArray.of(-1, -1, -1, 0, 3, 3);
128+
var lambda = HugeDoubleArray.of(-1, -1, -1, -1, -1, -1);
129+
var size = HugeLongArray.of(-1, -1, -1);
130+
var maximumClusterId = 5;
131+
132+
var stabilities = HugeDoubleArray.of(10., 4., 5.);
133+
134+
var condensedTree = new CondensedTree(root, parent, lambda, size, maximumClusterId, nodeCount);
135+
var stabilityStep = new StabilityStep();
136+
137+
var selectedClusters = stabilityStep.selectedClusters(condensedTree, stabilities, nodeCount);
138+
139+
assertThat(selectedClusters.get(0))
140+
.withFailMessage("Root should be selected")
141+
.isTrue();
142+
assertThat(selectedClusters.get(1))
143+
.withFailMessage("First child should be selected")
144+
.isTrue();
145+
assertThat(selectedClusters.get(2))
146+
.withFailMessage("Second child should be selected")
147+
.isTrue();
148+
}
87149
}

0 commit comments

Comments
 (0)