Skip to content

Commit 34faa99

Browse files
hdbscan entire progress tracking
Co-authored-by: Veselin Nikolov <veselin.nikolov@neotechnology.com>
1 parent 13c6e1b commit 34faa99

File tree

3 files changed

+142
-3
lines changed

3 files changed

+142
-3
lines changed

algo/src/main/java/org/neo4j/gds/hdbscan/HDBScan.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ public HDBScan(
6262

6363
@Override
6464
public HugeLongArray compute() {
65-
65+
progressTracker.beginSubTask();
6666
var kdTree = buildKDTree();
6767

6868
var nodeCount = nodes.nodeCount();
@@ -78,20 +78,22 @@ public HugeLongArray compute() {
7878

7979
var labellingStep = new LabellingStep(condensedTree, nodeCount, progressTracker);
8080
var labels= labellingStep.labels();
81+
progressTracker.endSubTask();
8182

8283
return labels;
8384
}
8485

8586
CoreResult computeCores(KdTree kdTree, long nodeCount) {
8687
HugeObjectArray<Neighbours> neighbours = HugeObjectArray.newArray(Neighbours.class, nodeCount);
87-
88+
progressTracker.beginSubTask();
8889
ParallelUtil.parallelForEachNode(
8990
nodeCount, concurrency, terminationFlag,
9091
(nodeId) -> {
9192
neighbours.set(nodeId, kdTree.neighbours(nodeId, samples));
93+
progressTracker.logProgress();
9294
}
9395
);
94-
96+
progressTracker.endSubTask();
9597
return new CoreResult(neighbours);
9698
}
9799

algo/src/main/java/org/neo4j/gds/hdbscan/HDBScanProgressTrackerCreator.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,19 @@ static Task boruvkaTask(String name, long nodeCount){
5353
return Tasks.leaf(name,nodeCount - 1);
5454
}
5555

56+
static Task hdbscanTask(String name, long nodeCount){
57+
return Tasks.task(
58+
name,
59+
List.of(
60+
kdBuildingTask("KD-Tree Construction",nodeCount),
61+
Tasks.leaf("Nearest Neighbors Search", nodeCount),
62+
boruvkaTask("MST Computation", nodeCount),
63+
hierarchyTask("Dendrogram Creation", nodeCount),
64+
condenseTask("Condensed Tree Creation ", nodeCount),
65+
labellingTask("Node Labelling", nodeCount)
66+
)
67+
);
68+
69+
}
70+
5671
}

algo/src/test/java/org/neo4j/gds/hdbscan/HDBScanE2ETest.java

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,24 @@
1919
*/
2020
package org.neo4j.gds.hdbscan;
2121

22+
import org.assertj.core.api.Assertions;
2223
import org.junit.jupiter.api.Test;
24+
import org.neo4j.gds.compat.TestLog;
2325
import org.neo4j.gds.core.concurrency.Concurrency;
26+
import org.neo4j.gds.core.utils.logging.LoggerForProgressTrackingAdapter;
27+
import org.neo4j.gds.core.utils.progress.EmptyTaskRegistryFactory;
2428
import org.neo4j.gds.core.utils.progress.tasks.ProgressTracker;
29+
import org.neo4j.gds.core.utils.progress.tasks.TaskProgressTracker;
2530
import org.neo4j.gds.extension.GdlExtension;
2631
import org.neo4j.gds.extension.GdlGraph;
2732
import org.neo4j.gds.extension.Inject;
2833
import org.neo4j.gds.extension.TestGraph;
34+
import org.neo4j.gds.logging.GdsTestLog;
2935
import org.neo4j.gds.termination.TerminationFlag;
3036

3137
import static org.assertj.core.api.Assertions.assertThat;
38+
import static org.neo4j.gds.assertj.Extractors.removingThreadId;
39+
import static org.neo4j.gds.assertj.Extractors.replaceTimings;
3240

3341
@GdlExtension
3442
class HDBScanE2ETest {
@@ -78,5 +86,119 @@ void hdbscan() {
7886
assertThat(labels).containsExactly(expectedLabels);
7987
}
8088

89+
@Test
90+
void shouldLogProgress(){
91+
92+
var progressTask = HDBScanProgressTrackerCreator.hdbscanTask("foo",graph.nodeCount());
93+
var log = new GdsTestLog();
94+
var progressTracker = new TaskProgressTracker(progressTask, new LoggerForProgressTrackingAdapter(log), new Concurrency(1), EmptyTaskRegistryFactory.INSTANCE);
95+
96+
new HDBScan(
97+
graph,
98+
graph.nodeProperties("point"),
99+
new Concurrency(1),
100+
1,
101+
2,
102+
2,
103+
progressTracker,
104+
TerminationFlag.RUNNING_TRUE
105+
).compute();
106+
107+
Assertions.assertThat(log.getMessages(TestLog.INFO))
108+
.extracting(removingThreadId())
109+
.extracting(replaceTimings())
110+
.containsExactly(
111+
"foo :: Start",
112+
"foo :: KD-Tree Construction :: Start",
113+
"foo :: KD-Tree Construction 10%",
114+
"foo :: KD-Tree Construction 20%",
115+
"foo :: KD-Tree Construction 30%",
116+
"foo :: KD-Tree Construction 40%",
117+
"foo :: KD-Tree Construction 50%",
118+
"foo :: KD-Tree Construction 60%",
119+
"foo :: KD-Tree Construction 70%",
120+
"foo :: KD-Tree Construction 80%",
121+
"foo :: KD-Tree Construction 90%",
122+
"foo :: KD-Tree Construction 100%",
123+
"foo :: KD-Tree Construction :: Finished",
124+
"foo :: Nearest Neighbors Search :: Start",
125+
"foo :: Nearest Neighbors Search 10%",
126+
"foo :: Nearest Neighbors Search 20%",
127+
"foo :: Nearest Neighbors Search 30%",
128+
"foo :: Nearest Neighbors Search 40%",
129+
"foo :: Nearest Neighbors Search 50%",
130+
"foo :: Nearest Neighbors Search 60%",
131+
"foo :: Nearest Neighbors Search 70%",
132+
"foo :: Nearest Neighbors Search 80%",
133+
"foo :: Nearest Neighbors Search 90%",
134+
"foo :: Nearest Neighbors Search 100%",
135+
"foo :: Nearest Neighbors Search :: Finished",
136+
"foo :: MST Computation :: Start",
137+
"foo :: MST Computation 11%",
138+
"foo :: MST Computation 22%",
139+
"foo :: MST Computation 33%",
140+
"foo :: MST Computation 44%",
141+
"foo :: MST Computation 55%",
142+
"foo :: MST Computation 66%",
143+
"foo :: MST Computation 77%",
144+
"foo :: MST Computation 88%",
145+
"foo :: MST Computation 100%",
146+
"foo :: MST Computation :: Finished",
147+
"foo :: Dendrogram Creation :: Start",
148+
"foo :: Dendrogram Creation 11%",
149+
"foo :: Dendrogram Creation 22%",
150+
"foo :: Dendrogram Creation 33%",
151+
"foo :: Dendrogram Creation 44%",
152+
"foo :: Dendrogram Creation 55%",
153+
"foo :: Dendrogram Creation 66%",
154+
"foo :: Dendrogram Creation 77%",
155+
"foo :: Dendrogram Creation 88%",
156+
"foo :: Dendrogram Creation 100%",
157+
"foo :: Dendrogram Creation :: Finished",
158+
"foo :: Condensed Tree Creation :: Start",
159+
"foo :: Condensed Tree Creation 11%",
160+
"foo :: Condensed Tree Creation 22%",
161+
"foo :: Condensed Tree Creation 33%",
162+
"foo :: Condensed Tree Creation 44%",
163+
"foo :: Condensed Tree Creation 55%",
164+
"foo :: Condensed Tree Creation 66%",
165+
"foo :: Condensed Tree Creation 77%",
166+
"foo :: Condensed Tree Creation 88%",
167+
"foo :: Condensed Tree Creation 100%",
168+
"foo :: Condensed Tree Creation :: Finished",
169+
"foo :: Node Labelling :: Start",
170+
"foo :: Node Labelling :: Stability calculation :: Start",
171+
"foo :: Node Labelling :: Stability calculation 11%",
172+
"foo :: Node Labelling :: Stability calculation 22%",
173+
"foo :: Node Labelling :: Stability calculation 100%",
174+
"foo :: Node Labelling :: Stability calculation :: Finished",
175+
"foo :: Node Labelling :: cluster selection :: Start",
176+
"foo :: Node Labelling :: cluster selection 11%",
177+
"foo :: Node Labelling :: cluster selection 22%",
178+
"foo :: Node Labelling :: cluster selection 33%",
179+
"foo :: Node Labelling :: cluster selection 100%",
180+
"foo :: Node Labelling :: cluster selection :: Finished",
181+
"foo :: Node Labelling :: labelling :: Start",
182+
"foo :: Node Labelling :: labelling 5%",
183+
"foo :: Node Labelling :: labelling 10%",
184+
"foo :: Node Labelling :: labelling 15%",
185+
"foo :: Node Labelling :: labelling 21%",
186+
"foo :: Node Labelling :: labelling 26%",
187+
"foo :: Node Labelling :: labelling 31%",
188+
"foo :: Node Labelling :: labelling 36%",
189+
"foo :: Node Labelling :: labelling 42%",
190+
"foo :: Node Labelling :: labelling 47%",
191+
"foo :: Node Labelling :: labelling 52%",
192+
"foo :: Node Labelling :: labelling 57%",
193+
"foo :: Node Labelling :: labelling 63%",
194+
"foo :: Node Labelling :: labelling 68%",
195+
"foo :: Node Labelling :: labelling 100%",
196+
"foo :: Node Labelling :: labelling :: Finished",
197+
"foo :: Node Labelling :: Finished",
198+
"foo :: Finished"
199+
);
200+
201+
}
202+
81203

82204
}

0 commit comments

Comments
 (0)