Skip to content

Commit c85d748

Browse files
Rename consider components to applywcc and add wcc mem est mention
1 parent 847f6dd commit c85d748

File tree

10 files changed

+113
-29
lines changed

10 files changed

+113
-29
lines changed

algo/src/main/java/org/neo4j/gds/similarity/filterednodesim/FilteredNodeSimilarityFactory.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.neo4j.gds.similarity.nodesim.TopKMap;
3838
import org.neo4j.gds.similarity.nodesim.TopNList;
3939
import org.neo4j.gds.wcc.WccAlgorithmFactory;
40+
import org.neo4j.gds.wcc.WccMemoryEstimateDefinition;
4041
import org.neo4j.gds.wcc.WccStreamConfig;
4142
import org.neo4j.gds.wcc.WccStreamConfigImpl;
4243

@@ -97,12 +98,18 @@ public MemoryEstimation memoryEstimation(CONFIG config) {
9798
.rangePerNode("array", nodeCount -> MemoryRange.of(0, nodeCount * averageVectorSize))
9899
.build();
99100
}));
100-
if (config.considerComponents()) {
101+
if (config.enableComponentsOptimization()) {
101102
builder.perNode("nodes sorted by component", HugeLongArray::memoryEstimation);
102103
builder.perNode("upper bound per component", HugeAtomicLongArray::memoryEstimation);
103-
}
104-
if (config.considerComponents() && config.componentProperty() != null) {
105-
builder.perNode("component mapping", HugeLongArray::memoryEstimation);
104+
105+
if (config.actuallyRunWCC()) {
106+
WccStreamConfig internalWccConfig = WccStreamConfigImpl.builder()
107+
.concurrency(config.concurrency())
108+
.build();
109+
builder.add("wcc", new WccMemoryEstimateDefinition().memoryEstimation(internalWccConfig));
110+
} else {
111+
builder.perNode("component mapping", HugeLongArray::memoryEstimation);
112+
}
106113
}
107114
if (config.computeToGraph() && !config.hasTopK()) {
108115
builder.add(
@@ -129,7 +136,7 @@ public MemoryEstimation memoryEstimation(CONFIG config) {
129136

130137
@Override
131138
public Task progressTask(Graph graph, CONFIG config) {
132-
if (config.runWCC()) {
139+
if (config.actuallyRunWCC()) {
133140
WccStreamConfig wccStreamConfig = WccStreamConfigImpl.builder().build();
134141
return Tasks.task(
135142
taskName(),

algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -205,16 +205,16 @@ private void prepare() {
205205
progressTracker.beginSubTask();
206206

207207
components = initComponents();
208-
if (config.runWCC()) {
208+
if (config.actuallyRunWCC()) {
209209
progressTracker.beginSubTask();
210210
}
211211
initNodeSpecificFields();
212212

213213
sourceNodesStream = initSourceNodesStream();
214214

215215
targetNodesStream = initTargetNodesStream();
216-
217-
if (config.runWCC()) {
216+
217+
if (config.actuallyRunWCC()) {
218218
progressTracker.endSubTask();
219219
}
220220
progressTracker.endSubTask();
@@ -239,7 +239,7 @@ private Stream<SimilarityResult> computeParallel() {
239239
}
240240

241241
private LongUnaryOperator initComponents() {
242-
if (!config.considerComponents()) {
242+
if (!config.enableComponentsOptimization()) {
243243
// considering everything as within the same component
244244
return n -> 0;
245245
}
@@ -410,7 +410,7 @@ private Function<Long, LongStream> initSourceNodesStream() {
410410
}
411411

412412
private BiFunction<Long, Long, LongStream> initTargetNodesStream() {
413-
if (!config.considerComponents()) {
413+
if (!config.enableComponentsOptimization()) {
414414
return (componentId, offset) -> new SetBitsIterable(targetNodes, offset).stream();
415415
}
416416

algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityBaseConfig.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,7 @@ public interface NodeSimilarityBaseConfig extends AlgoBaseConfig, RelationshipWe
5252

5353
String COMPONENT_PROPERTY_KEY = "componentProperty";
5454

55-
String CONSIDER_COMPONENTS_KEY = "considerComponents";
56-
boolean CONSIDER_COMPONENTS = false;
55+
String APPLY_WCC = "applyWcc";
5756

5857
@Configuration.DoubleRange(min = 0, max = 1)
5958
default double similarityCutoff() {
@@ -104,8 +103,8 @@ default int bottomN() {
104103
@Configuration.Key(COMPONENT_PROPERTY_KEY)
105104
default @Nullable String componentProperty() { return null; }
106105

107-
@Configuration.Key(CONSIDER_COMPONENTS_KEY)
108-
default boolean considerComponents() { return CONSIDER_COMPONENTS; }
106+
@Configuration.Key(APPLY_WCC)
107+
default boolean applyWcc() {return false;}
109108

110109
@Configuration.Ignore
111110
default int normalizedK() {
@@ -186,8 +185,13 @@ default void validateComponentProperty(
186185
}
187186

188187
@Configuration.Ignore
189-
default boolean runWCC() {
190-
return considerComponents() && componentProperty() == null;
188+
default boolean actuallyRunWCC() {
189+
return enableComponentsOptimization() && componentProperty() == null;
190+
}
191+
192+
@Configuration.Ignore
193+
default boolean enableComponentsOptimization() {
194+
return applyWcc() || componentProperty() != null;
191195
}
192196

193197
}

algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityFactory.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.neo4j.gds.core.utils.progress.tasks.Tasks;
3535
import org.neo4j.gds.similarity.SimilarityGraphBuilder;
3636
import org.neo4j.gds.wcc.WccAlgorithmFactory;
37+
import org.neo4j.gds.wcc.WccMemoryEstimateDefinition;
3738
import org.neo4j.gds.wcc.WccStreamConfig;
3839
import org.neo4j.gds.wcc.WccStreamConfigImpl;
3940

@@ -92,12 +93,18 @@ public MemoryEstimation memoryEstimation(CONFIG config) {
9293
.rangePerNode("array", nodeCount -> MemoryRange.of(0, nodeCount * averageVectorSize))
9394
.build();
9495
}));
95-
if (config.considerComponents()) {
96+
if (config.enableComponentsOptimization()) {
9697
builder.perNode("nodes sorted by component", HugeLongArray::memoryEstimation);
9798
builder.perNode("upper bound per component", HugeAtomicLongArray::memoryEstimation);
98-
}
99-
if (config.considerComponents() && config.componentProperty() != null) {
100-
builder.perNode("component mapping", HugeLongArray::memoryEstimation);
99+
100+
if (config.actuallyRunWCC()) {
101+
WccStreamConfig internalWccConfig = WccStreamConfigImpl.builder()
102+
.concurrency(config.concurrency())
103+
.build();
104+
builder.add("wcc", new WccMemoryEstimateDefinition().memoryEstimation(internalWccConfig));
105+
} else {
106+
builder.perNode("component mapping", HugeLongArray::memoryEstimation);
107+
}
101108
}
102109
if (config.computeToGraph() && !config.hasTopK()) {
103110
builder.add(
@@ -132,7 +139,7 @@ public Task progressTask(Graph graph, CONFIG config) {
132139
}
133140

134141
private Task prepareTask(Graph graph, CONFIG config) {
135-
if (config.runWCC()) {
142+
if (config.actuallyRunWCC()) {
136143
WccStreamConfig wccStreamConfig = WccStreamConfigImpl.builder().build();
137144
return Tasks.task("prepare", new WccAlgorithmFactory<>().progressTask(graph, wccStreamConfig),
138145
Tasks.leaf("initialize", graph.relationshipCount()));

algo/src/test/java/org/neo4j/gds/similarity/filterednodesim/FilteredNodeSimilarityTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ void shouldSurviveIoannisFurtherObjections(boolean enableWcc) {
139139
var config = FilteredNodeSimilarityStreamConfigImpl.builder()
140140
.sourceNodeFilter(NodeFilterSpecFactory.create(sourceNodeFilter))
141141
.concurrency(1)
142-
.considerComponents(enableWcc)
142+
.applyWcc(enableWcc)
143143
.topK(1)
144144
.topN(10)
145145
.build();

algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentNodesTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ void shouldComputeUpperIndexWithTargetFilter() {
297297
TreeSet<Long> treeSet = new TreeSet<>();
298298
treeSet.add(-1L);
299299
for (int i = 0; i < upperIndex.size(); ++i) {
300-
if (upperIndex.get(i) > 0) {
300+
if (upperIndex.get(i) >= 0) {
301301
treeSet.add(upperIndex.get(i));
302302
}
303303
}

algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentPropertyNodeSimilarityTest.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
*/
2020
package org.neo4j.gds.similarity.nodesim;
2121

22+
import org.assertj.core.api.SoftAssertions;
2223
import org.junit.jupiter.params.ParameterizedTest;
2324
import org.junit.jupiter.params.provider.Arguments;
2425
import org.junit.jupiter.params.provider.MethodSource;
@@ -148,7 +149,7 @@ void shouldComputeMemrecWithOrWithoutComponentMapping(boolean componentPropertyS
148149
.topK(TOP_K_DEFAULT)
149150
.writeProperty("writeProperty")
150151
.writeRelationshipType("writeRelationshipType")
151-
.considerComponents(true)
152+
.applyWcc(true)
152153
.componentProperty(componentPropertySet ? "compid" : null)
153154
.build();
154155

@@ -175,15 +176,20 @@ void shouldComputeMemrecWithOrWithoutComponentMapping(boolean componentPropertyS
175176
.fixed("similarityComputer", 8);
176177
if (componentPropertySet) {
177178
builder.fixed("component mapping", 8000040);
179+
} else {
180+
builder.fixed("wcc", 8000064);
178181
}
179182

180183
long topKMapRangeMin = 248_000_016L;
181184
long topKMapRangeMax = 248_000_016L;
182185
builder.fixed("topK map", MemoryRange.of(topKMapRangeMin, topKMapRangeMax));
183186

184187
MemoryTree expected = builder.build().estimate(dimensions, 1);
188+
SoftAssertions softAssertions = new SoftAssertions();
189+
softAssertions.assertThat(expected.memoryUsage().max).isEqualTo(actual.memoryUsage().max);
190+
softAssertions.assertThat(expected.memoryUsage().min).isEqualTo(actual.memoryUsage().min);
185191

186-
assertEquals(expected.memoryUsage(), actual.memoryUsage());
192+
softAssertions.assertAll();
187193
}
188194

189195
@ParameterizedTest(name = "orientation: {0}, concurrency: {1}")
@@ -192,7 +198,6 @@ void shouldOptimizeForDistinctComponentsProperty(Orientation orientation, int co
192198
Graph graph = orientation == NATURAL ? naturalGraph : reverseGraph;
193199
var config = NodeSimilarityStreamConfigImpl.builder()
194200
.similarityCutoff(0.0)
195-
.considerComponents(true)
196201
.componentProperty("compid")
197202
.concurrency(concurrency)
198203
.build();
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/*
2+
* Copyright (c) "Neo4j"
3+
* Neo4j Sweden AB [http://neo4j.com]
4+
*
5+
* This file is part of Neo4j.
6+
*
7+
* Neo4j is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* This program is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
19+
*/
20+
package org.neo4j.gds.similarity.nodesim;
21+
22+
import org.junit.jupiter.api.Test;
23+
24+
import static org.assertj.core.api.Assertions.assertThat;
25+
26+
class NodeSimilarityBaseConfigTest {
27+
28+
@Test
29+
void shouldNotRequestAnythingIfNotEnabled(){
30+
var config = NodeSimilarityStreamConfigImpl.builder().build();
31+
32+
assertThat(config.actuallyRunWCC()).isFalse();
33+
assertThat(config.enableComponentsOptimization()).isFalse();
34+
}
35+
36+
@Test
37+
void shouldRequestWccIfEnabled(){
38+
var config = NodeSimilarityStreamConfigImpl.builder().applyWcc(true).build();
39+
40+
assertThat(config.actuallyRunWCC()).isTrue();
41+
assertThat(config.enableComponentsOptimization()).isTrue();
42+
}
43+
44+
@Test
45+
void shouldNotRequestWccIfPropertyGiven(){
46+
var config = NodeSimilarityStreamConfigImpl.builder().componentProperty("foo").build();
47+
48+
assertThat(config.actuallyRunWCC()).isFalse();
49+
assertThat(config.enableComponentsOptimization()).isTrue();
50+
}
51+
52+
@Test
53+
void shouldNotRequestWccIfPropertyGivenAndWccProvided(){
54+
var config = NodeSimilarityStreamConfigImpl.builder().applyWcc(true).componentProperty("foo").build();
55+
56+
assertThat(config.actuallyRunWCC()).isFalse();
57+
assertThat(config.enableComponentsOptimization()).isTrue();
58+
}
59+
60+
}

algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -878,7 +878,7 @@ void shouldLogProgress(int concurrency) {
878878
void shouldLogProgressForWccOptimization() {
879879
var graph = naturalGraph;
880880
var config = NodeSimilarityStreamConfigImpl.builder()
881-
.considerComponents(true)
881+
.applyWcc(true)
882882
.concurrency(4)
883883
.build();
884884
var progressTask = new NodeSimilarityFactory<>().progressTask(graph, config);

doc/modules/ROOT/partials/algorithms/node-similarity/specific-configuration.adoc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,6 @@ If unspecified, the algorithm runs unweighted.
2121
| similarityMetric
2222
| String | JACCARD | yes | The metric used to compute similarity.
2323
Can be either `JACCARD`, `OVERLAP` or `COSINE`.
24-
| [[consider-components-{instance}]] considerComponents | Boolean | false | yes | If enabled applies an optimization which can increase performance for multi-component graphs. Makes use of the fact that nodes of distinct components always have a similarity of 0. If not already provided through xref:#component-property-{instance} [componentProperty], internally runs xref:algorithms/wcc.adoc[WCC].
25-
| [[component-property-{instance}]] componentProperty | String | null | yes | Name of the pre-computed node property to use for enabled xref:#consider-components-{instance} [component optimization] in case pre-computed values are available.
24+
| [[runwcc-{instance}]] runWcc | Boolean | false | yes | If enabled, an optimization which can increase performance for multi-component graphs is applied: nodes in distinct components always have a similarity of 0 and we can avoid comparing them.
25+
As a pre-processing step xref:algorithms/wcc.adoc[WCC] is run to obtain the components.
26+
| [[component-property-{instance}]] componentProperty | String | null | yes | If the components are available as a node property, they can be passed via this parameter rather than running WCC as discussed in the above xref:#runwcc-{instance} [component optimization].

0 commit comments

Comments
 (0)