Skip to content

Commit d1f30bf

Browse files
Calculate cluster stabilities
Co-authored-by: Ioannis Panagiotas <ioannis.panagiotas@neotechnology.com>
1 parent b546a0a commit d1f30bf

File tree

2 files changed

+139
-0
lines changed

2 files changed

+139
-0
lines changed
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*
2+
* Copyright (c) "Neo4j"
3+
* Neo4j Sweden AB [http://neo4j.com]
4+
*
5+
* This file is part of Neo4j.
6+
*
7+
* Neo4j is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* This program is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
19+
*/
20+
package org.neo4j.gds.hdbscan;
21+
22+
import org.neo4j.gds.collections.ha.HugeDoubleArray;
23+
24+
class StabilityStep {
25+
HugeDoubleArray computeStabilities(CondensedTree condensedTree, long nodeCount) {
26+
var result = HugeDoubleArray.newArray(nodeCount - 1);
27+
28+
var condensedTreeRoot = condensedTree.root();
29+
// process the leaves of the tree
30+
for (var p = 0; p < nodeCount; p++) {
31+
var lambdaP = 1. / condensedTree.lambda(p);
32+
var birthPoint = condensedTree.fellOutOf(p);
33+
var lambdaBirth = birthPoint == condensedTreeRoot
34+
? 0.
35+
: 1. / condensedTree.lambda(birthPoint);
36+
result.addTo(birthPoint - nodeCount, lambdaP - lambdaBirth);
37+
}
38+
39+
var condensedTreeMaxClusterId = condensedTree.maximumClusterId();
40+
for (var p = condensedTreeMaxClusterId; p > condensedTreeRoot; p--) {
41+
var lambdaP = 1. / condensedTree.lambda(p);
42+
var birthPoint = condensedTree.parent(p);
43+
var lambdaBirth = birthPoint == condensedTreeRoot
44+
? 0.
45+
: 1. / condensedTree.lambda(birthPoint);
46+
var sizeP = condensedTree.size(p);
47+
result.addTo(birthPoint - nodeCount, sizeP * (lambdaP - lambdaBirth));
48+
}
49+
50+
return result;
51+
}
52+
}
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/*
2+
* Copyright (c) "Neo4j"
3+
* Neo4j Sweden AB [http://neo4j.com]
4+
*
5+
* This file is part of Neo4j.
6+
*
7+
* Neo4j is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* This program is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
19+
*/
20+
package org.neo4j.gds.hdbscan;
21+
22+
import org.assertj.core.data.Offset;
23+
import org.junit.jupiter.api.Test;
24+
import org.neo4j.gds.collections.ha.HugeDoubleArray;
25+
import org.neo4j.gds.collections.ha.HugeLongArray;
26+
27+
import static org.assertj.core.api.Assertions.assertThat;
28+
29+
class StabilityStepTest {
30+
31+
@Test
32+
void clusterStability() {
33+
var nodeCount = 4;
34+
var root = 4;
35+
36+
var parent = HugeLongArray.of(5, 5, 6, 6, 0, 4, 4);
37+
var lambda = HugeDoubleArray.of(10, 10, 11, 11, 0, 12, 12);
38+
var size = HugeLongArray.of(4, 2, 2);
39+
var maximumClusterId = 6;
40+
41+
var condensedTree = new CondensedTree(root, parent, lambda, size, maximumClusterId, nodeCount);
42+
var stabilityStep = new StabilityStep();
43+
44+
var stabilities = stabilityStep.computeStabilities(condensedTree, nodeCount);
45+
46+
47+
assertThat(stabilities.toArray()).containsExactly(
48+
// stability of 4
49+
(1 / 12. - 0) + (1 / 12. - 0) + (1 / 12. - 0) + (1 / 12. - 0),
50+
// stability of 5
51+
(1 / 10. - 1 / 12.) + (1 / 10. - 1 / 12.),
52+
// stability of 6
53+
(1 / 11. - 1 / 12.) + (1 / 11. - 1 / 12.)
54+
);
55+
}
56+
57+
@Test
58+
void clusterStabilityBiggerTest() {
59+
var parent = HugeLongArray.of(8, 8, 10, 10, 11, 11, 11, 0, 7, 7, 9, 9, 0);
60+
var lambda = HugeDoubleArray.of(11.0, 11.0, 9.0, 9.0, 8.0, 7.0, 7.0, 0.0, 12.0, 12.0, 10.0, 10.0, 0.0);
61+
var size = HugeLongArray.of(7, 2, 5, 2, 3, 0, 0);
62+
var maximumClusterId = 11;
63+
var nodeCount = 7;
64+
var root = 7;
65+
66+
var condensedTree = new CondensedTree(root, parent, lambda, size, maximumClusterId, nodeCount);
67+
68+
var stabilityStep = new StabilityStep();
69+
70+
var stabilities = stabilityStep.computeStabilities(condensedTree, nodeCount);
71+
72+
assertThat(stabilities.toArray()).containsExactly(
73+
new double[] {
74+
// stability of 7
75+
7 * 1. / 12,
76+
// stability of 8
77+
2 * (1. / 11 - 1. / 12),
78+
// stability of 9
79+
5 * (1. / 10 - 1. / 12),
80+
// stability of 10
81+
2 * (1. / 9 - 1. / 10),
82+
// stability of 11
83+
(1. / 8 - 1. / 10) + 2 * (1. / 7 - 1. / 10),
84+
0.0
85+
}, Offset.offset(1e-10));
86+
}
87+
}

0 commit comments

Comments
 (0)