Skip to content

Commit bc1fd25

Browse files
Condense progress tracking
Co-authored-by: Veselin Nikolov <veselin.nikolov@neotechnology.com>
1 parent 4e31174 commit bc1fd25

File tree

5 files changed

+59
-11
lines changed

5 files changed

+59
-11
lines changed

algo/src/main/java/org/neo4j/gds/hdbscan/CondenseStep.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,15 @@
2323
import org.neo4j.gds.collections.ha.HugeLongArray;
2424
import org.neo4j.gds.core.utils.paged.HugeAtomicBitSet;
2525
import org.neo4j.gds.core.utils.paged.HugeLongArrayQueue;
26+
import org.neo4j.gds.core.utils.progress.tasks.ProgressTracker;
2627

2728
class CondenseStep {
2829
private final long nodeCount;
30+
private final ProgressTracker progressTracker;
2931

30-
CondenseStep(long nodeCount) {
32+
CondenseStep(long nodeCount, ProgressTracker progressTracker) {
3133
this.nodeCount = nodeCount;
34+
this.progressTracker = progressTracker;
3235
}
3336

3437
CondensedTree condense(ClusterHierarchy clusterHierarchy, long minClusterSize) {
@@ -46,7 +49,7 @@ CondensedTree condense(ClusterHierarchy clusterHierarchy, long minClusterSize) {
4649
// After walking through the whole hierarchy and doing this we end up
4750
// with a much smaller tree with a small number of nodes,
4851
// each of which has data about how the size of the cluster at that node decreases over varying distance.
49-
52+
progressTracker.beginSubTask();
5053
var clusterHierarchyRoot = clusterHierarchy.root();
5154
var parent = HugeLongArray.newArray(clusterHierarchyRoot + 1);
5255
var lambda = HugeDoubleArray.newArray(clusterHierarchyRoot + 1);
@@ -100,7 +103,9 @@ CondensedTree condense(ClusterHierarchy clusterHierarchy, long minClusterSize) {
100103
lambda.set(rightClusterId, fallingOutLambda);
101104
size.set(rightClusterId - nodeCount, rightSize);
102105
}
106+
progressTracker.logProgress();
103107
}
108+
progressTracker.endSubTask();
104109

105110
return new CondensedTree(currentCondensedRoot, parent, lambda, size, currentCondensedMaxClusterId, nodeCount);
106111
}

algo/src/main/java/org/neo4j/gds/hdbscan/HDBScan.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ public HugeLongArray compute() {
6868
var coreResult = computeCores(kdTree, nodeCount);
6969
var dualTreeMST = dualTreeMSTPhase(kdTree, coreResult);
7070
var clusterHierarchy = createClusterHierarchy(dualTreeMST);
71-
var condenseStep = new CondenseStep(nodeCount);
71+
var condenseStep = new CondenseStep(nodeCount,progressTracker);
7272
var condensedTree = condenseStep.condense(clusterHierarchy, minClusterSize);
7373
var labellingStep = new LabellingStep(condensedTree, nodeCount);
7474
return labellingStep.labels();

algo/src/main/java/org/neo4j/gds/hdbscan/HDBScanProgressTrackerCreator.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,11 @@ static Task kdBuildingTask(String name, long nodeCount){
2929
}
3030

3131
static Task hierarchyTask(String name, long nodeCount){
32-
return Tasks.leaf(name,nodeCount-1);
32+
return Tasks.leaf(name,nodeCount - 1);
33+
}
34+
35+
static Task condenseTask(String name, long nodeCount){
36+
return Tasks.leaf(name,nodeCount - 1);
3337
}
3438

3539
}

algo/src/test/java/org/neo4j/gds/hdbscan/ClusterHierarchyTest.java

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,8 @@ void shouldLogProgress(){
154154
var progressTask = HDBScanProgressTrackerCreator.hierarchyTask("foo",3);
155155
var log = new GdsTestLog();
156156
var progressTracker = new TaskProgressTracker(progressTask, new LoggerForProgressTrackingAdapter(log), new Concurrency(1), EmptyTaskRegistryFactory.INSTANCE);
157-
var clusterHierarchy = ClusterHierarchy.create(
158-
3,
159-
edges,
160-
progressTracker
161-
);
157+
158+
ClusterHierarchy.create(3, edges, progressTracker);
162159

163160
Assertions.assertThat(log.getMessages(TestLog.INFO))
164161
.extracting(removingThreadId())

algo/src/test/java/org/neo4j/gds/hdbscan/CondenseStepTest.java

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,21 @@
1919
*/
2020
package org.neo4j.gds.hdbscan;
2121

22+
import org.assertj.core.api.Assertions;
2223
import org.junit.jupiter.api.Test;
2324
import org.neo4j.gds.collections.ha.HugeDoubleArray;
2425
import org.neo4j.gds.collections.ha.HugeLongArray;
26+
import org.neo4j.gds.compat.TestLog;
27+
import org.neo4j.gds.core.concurrency.Concurrency;
28+
import org.neo4j.gds.core.utils.logging.LoggerForProgressTrackingAdapter;
29+
import org.neo4j.gds.core.utils.progress.EmptyTaskRegistryFactory;
30+
import org.neo4j.gds.core.utils.progress.tasks.ProgressTracker;
31+
import org.neo4j.gds.core.utils.progress.tasks.TaskProgressTracker;
32+
import org.neo4j.gds.logging.GdsTestLog;
2533

2634
import static org.assertj.core.api.Assertions.assertThat;
35+
import static org.neo4j.gds.assertj.Extractors.removingThreadId;
36+
import static org.neo4j.gds.assertj.Extractors.replaceTimings;
2737

2838
class CondenseStepTest {
2939

@@ -38,7 +48,8 @@ void minClusterSizeTwo() {
3848

3949
var clusterHierarchy = new ClusterHierarchy(root, left, right, lambda, size, nodeCount);
4050

41-
var condensedTree = new CondenseStep(nodeCount).condense(clusterHierarchy, 2L);
51+
var condensedTree = new CondenseStep(nodeCount, ProgressTracker.NULL_TRACKER)
52+
.condense(clusterHierarchy, 2L);
4253

4354
assertThat(condensedTree.root()).isEqualTo(7L);
4455
assertThat(condensedTree.maximumClusterId()).isEqualTo(11L);
@@ -89,7 +100,8 @@ void minClusterSizeThree() {
89100

90101
var clusterHierarchy = new ClusterHierarchy(root, left, right, lambda, size, nodeCount);
91102

92-
var condensedTree = new CondenseStep(nodeCount).condense(clusterHierarchy, 3L);
103+
var condensedTree = new CondenseStep(nodeCount,ProgressTracker.NULL_TRACKER)
104+
.condense(clusterHierarchy, 3L);
93105

94106
assertThat(condensedTree.root()).isEqualTo(7L);
95107
assertThat(condensedTree.maximumClusterId()).isEqualTo(7L);
@@ -113,4 +125,34 @@ void minClusterSizeThree() {
113125
assertThat(condensedTree.fellOutOf(6L)).isEqualTo(7L);
114126
assertThat(condensedTree.lambda(6L)).isEqualTo(8d);
115127
}
128+
129+
@Test
130+
void shouldLogProgress(){
131+
var nodeCount = 7L;
132+
133+
var progressTask = HDBScanProgressTrackerCreator.condenseTask("condense",7);
134+
var log = new GdsTestLog();
135+
var progressTracker = new TaskProgressTracker(progressTask, new LoggerForProgressTrackingAdapter(log), new Concurrency(1), EmptyTaskRegistryFactory.INSTANCE);
136+
var root = 12L;
137+
var left = HugeLongArray.of(5, 4, 2, 9, 0, 11);
138+
var right = HugeLongArray.of(6, 7, 3, 8, 1, 10);
139+
var lambda = HugeDoubleArray.of(7d, 8d, 9d, 10d, 11d, 12d);
140+
var size = HugeLongArray.of(2, 3, 2, 5, 2, 7);
141+
142+
var clusterHierarchy = new ClusterHierarchy(root, left, right, lambda, size, nodeCount);
143+
144+
new CondenseStep(nodeCount,progressTracker).condense(clusterHierarchy, 3L);
145+
146+
Assertions.assertThat(log.getMessages(TestLog.INFO))
147+
.extracting(removingThreadId())
148+
.extracting(replaceTimings())
149+
.containsExactly(
150+
"condense :: Start",
151+
"condense 16%",
152+
"condense 33%",
153+
"condense 50%",
154+
"condense 100%",
155+
"condense :: Finished"
156+
);
157+
}
116158
}

0 commit comments

Comments
 (0)