Skip to content

Commit 5165902

Browse files
s1cksoerenreichardt
andcommitted
Improve tests and refactor
Co-authored-by: Sören Reichardt <soren.reichardt@neotechnology.com>
1 parent 56d2ea2 commit 5165902

File tree

3 files changed

+72
-46
lines changed

3 files changed

+72
-46
lines changed

core/src/main/java/org/neo4j/gds/core/loading/construction/NodeLabelTokenToPropertyKeys.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,23 +70,27 @@ abstract Map<String, PropertySchema> propertySchemas(
7070
Map<String, PropertySchema> importPropertySchemas
7171
);
7272

73-
static NodeLabelTokenToPropertyKeys merge(
73+
/**
74+
* Computes the union of the two given mappings without
75+
* changing the contents of the mappings themselves.
76+
*/
77+
static NodeLabelTokenToPropertyKeys union(
7478
NodeLabelTokenToPropertyKeys left,
7579
NodeLabelTokenToPropertyKeys right,
7680
Map<String, PropertySchema> importPropertySchemas
7781
) {
78-
var merge = NodeLabelTokenToPropertyKeys.lazy();
82+
var union = NodeLabelTokenToPropertyKeys.lazy();
7983

8084
left.nodeLabels().forEach(nodeLabel -> {
8185
var propertyKeys = left.propertySchemas(nodeLabel, importPropertySchemas).keySet();
82-
merge.add(NodeLabelTokens.ofNodeLabel(nodeLabel), propertyKeys);
86+
union.add(NodeLabelTokens.ofNodeLabel(nodeLabel), propertyKeys);
8387
});
8488
right.nodeLabels().forEach(nodeLabel -> {
8589
var propertyKeys = right.propertySchemas(nodeLabel, importPropertySchemas).keySet();
86-
merge.add(NodeLabelTokens.ofNodeLabel(nodeLabel), propertyKeys);
90+
union.add(NodeLabelTokens.ofNodeLabel(nodeLabel), propertyKeys);
8791
});
8892

89-
return merge;
93+
return union;
9094
}
9195

9296
private static class Fixed extends NodeLabelTokenToPropertyKeys {

core/src/main/java/org/neo4j/gds/core/loading/construction/NodesBuilder.java

Lines changed: 37 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -199,17 +199,11 @@ public Nodes build() {
199199
}
200200

201201
public Nodes build(long highestNeoId) {
202-
// Flush remaining buffer contents
203-
this.threadLocalBuilder.forEach(ThreadLocalBuilder::flush);
204-
// Collect token to property keys for final merge
205-
var nodeLabelTokenToPropertyKeysList = new ArrayList<NodeLabelTokenToPropertyKeys>();
206-
this.threadLocalBuilder.forEach(tlb -> nodeLabelTokenToPropertyKeysList.add(tlb.nodeLabelTokenToPropertyKeys));
207-
// Clean up resources held by local builders
208-
this.threadLocalBuilder.close();
202+
var localLabelTokenToPropertyKeys = closeThreadLocalBuilders();
209203

210204
var idMap = this.idMapBuilder.build(labelInformationBuilder, highestNeoId, concurrency);
211205
var nodeProperties = buildProperties(idMap);
212-
var nodeSchema = buildNodeSchema(idMap, nodeLabelTokenToPropertyKeysList, nodeProperties);
206+
var nodeSchema = buildNodeSchema(idMap, localLabelTokenToPropertyKeys, nodeProperties);
213207
var nodePropertyStore = NodePropertyStore.builder().properties(nodeProperties).build();
214208

215209
return ImmutableNodes.builder()
@@ -219,37 +213,53 @@ public Nodes build(long highestNeoId) {
219213
.build();
220214
}
221215

216+
private List<NodeLabelTokenToPropertyKeys> closeThreadLocalBuilders() {
217+
// Flush remaining buffer contents
218+
this.threadLocalBuilder.forEach(ThreadLocalBuilder::flush);
219+
// Collect token to property keys for final union
220+
var labelTokenToPropertyKeys = new ArrayList<NodeLabelTokenToPropertyKeys>();
221+
this.threadLocalBuilder.forEach(tlb -> labelTokenToPropertyKeys.add(tlb.nodeLabelTokenToPropertyKeys));
222+
// Clean up resources held by local builders
223+
this.threadLocalBuilder.close();
224+
225+
return labelTokenToPropertyKeys;
226+
}
227+
222228
private NodeSchema buildNodeSchema(
223229
IdMap idMap,
224-
Collection<NodeLabelTokenToPropertyKeys> localNodeLabelTokenToPropertyKeys,
230+
Collection<NodeLabelTokenToPropertyKeys> localLabelTokenToPropertyKeys,
225231
Map<String, NodeProperty> nodeProperties
226232
) {
227-
var nodeSchema = NodeSchema.empty();
228-
var importPropertySchemas = nodeProperties
233+
234+
// Collect the property schemas from the imported property values.
235+
var propertyKeysToSchema = nodeProperties
229236
.entrySet()
230237
.stream()
231238
.collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().propertySchema()));
232-
233-
// consider node labels without properties
234-
var nodeLabels = new HashSet<>(idMap.availableNodeLabels());
235-
// and also node labels with associated properties
236-
localNodeLabelTokenToPropertyKeys.forEach(mapping -> nodeLabels.addAll(mapping.nodeLabels()));
237-
// merge into a global mapping
238-
var globalNodeLabelTokenToPropertyKeys = localNodeLabelTokenToPropertyKeys
239+
// Union the label to property key mappings from each import thread.
240+
var globalLabelTokenToPropertyKeys = localLabelTokenToPropertyKeys
239241
.stream()
240242
.reduce(
241243
NodeLabelTokenToPropertyKeys.lazy(),
242-
(left, right) -> NodeLabelTokenToPropertyKeys.merge(left, right, importPropertySchemas)
243-
);
244-
// and construct final node property schema
245-
nodeLabels.forEach(nodeLabel -> {
246-
nodeSchema.addLabel(
247-
nodeLabel,
248-
globalNodeLabelTokenToPropertyKeys.propertySchemas(nodeLabel, importPropertySchemas)
244+
(left, right) -> NodeLabelTokenToPropertyKeys.union(left, right, propertyKeysToSchema)
249245
);
250-
});
246+
// Collect node labels without properties from the id map
247+
// as they are not stored in the above union mapping.
248+
var nodeLabels = new HashSet<>(idMap.availableNodeLabels());
249+
// Add labels that actually have node properties attached.
250+
localLabelTokenToPropertyKeys.forEach(localMapping -> nodeLabels.addAll(localMapping.nodeLabels()));
251251

252-
return nodeSchema;
252+
// Use all labels and the global label to property
253+
// key mapping to construct the final node schema.
254+
return nodeLabels.stream()
255+
.reduce(
256+
NodeSchema.empty(),
257+
(unionSchema, nodeLabel) -> unionSchema.addLabel(
258+
nodeLabel,
259+
globalLabelTokenToPropertyKeys.propertySchemas(nodeLabel, propertyKeysToSchema)
260+
),
261+
(lhs, rhs) -> lhs
262+
);
253263
}
254264

255265
private Map<String, NodeProperty> buildProperties(IdMap idMap) {
@@ -344,10 +354,6 @@ private static class ThreadLocalBuilder implements AutoCloseable {
344354
this.batchNodeProperties = new ArrayList<>(buffer.capacity());
345355
}
346356

347-
NodeLabelTokenToPropertyKeys nodeLabelTokenToPropertyKeys() {
348-
return this.nodeLabelTokenToPropertyKeys;
349-
}
350-
351357
public void addNode(long originalId, NodeLabelToken nodeLabels) {
352358
if (!seenNodeIdPredicate.test(originalId)) {
353359
long[] labels = getOrCreateLabelTokens(nodeLabels);

core/src/test/java/org/neo4j/gds/core/loading/construction/NodeLabelTokenToPropertyKeysTest.java

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -204,24 +204,40 @@ void testNodeLabelsFixed() {
204204
void testMerge() {
205205
var importPropertySchemas = Map.of(
206206
"foo", PropertySchema.of("foo", ValueType.LONG, DefaultValue.forLong(), PropertyState.TRANSIENT),
207-
"bar", PropertySchema.of("bar", ValueType.DOUBLE, DefaultValue.forDouble(), PropertyState.PERSISTENT)
207+
"bar", PropertySchema.of("bar", ValueType.DOUBLE, DefaultValue.forDouble(), PropertyState.PERSISTENT),
208+
"baz", PropertySchema.of("baz", ValueType.LONG_ARRAY, DefaultValue.forLongArray(), PropertyState.PERSISTENT)
208209
);
209210

210-
var left = NodeLabelTokenToPropertyKeys.lazy();
211-
left.add(NodeLabelTokens.ofStrings("A"), List.of("foo"));
211+
var aLabel = NodeLabel.of("A");
212+
var bLabel = NodeLabel.of("B");
213+
var cLabel = NodeLabel.of("C");
212214

213-
var right = NodeLabelTokenToPropertyKeys.lazy();
214-
right.add(NodeLabelTokens.ofStrings("B"), List.of("bar"));
215+
var mapping0 = NodeLabelTokenToPropertyKeys.lazy();
216+
mapping0.add(NodeLabelTokens.ofNodeLabels(aLabel), List.of("foo"));
217+
mapping0.add(NodeLabelTokens.ofNodeLabels(cLabel), List.of("bar"));
215218

216-
var mapping = NodeLabelTokenToPropertyKeys.merge(left, right, importPropertySchemas);
219+
var mapping1 = NodeLabelTokenToPropertyKeys.lazy();
220+
mapping1.add(NodeLabelTokens.ofNodeLabels(bLabel), List.of("bar"));
217221

218-
assertThat(mapping.nodeLabels()).containsExactlyInAnyOrder(NodeLabel.of("A"), NodeLabel.of("B"));
219-
assertThat(mapping.propertySchemas(NodeLabel.of("A"), importPropertySchemas)).isEqualTo(Map.of(
220-
"foo", PropertySchema.of("foo", ValueType.LONG, DefaultValue.forLong(), PropertyState.TRANSIENT)
222+
var mapping2 = NodeLabelTokenToPropertyKeys.lazy();
223+
mapping2.add(NodeLabelTokens.ofNodeLabels(aLabel, cLabel), List.of("baz"));
224+
225+
var union = NodeLabelTokenToPropertyKeys.union(mapping0, mapping1, importPropertySchemas);
226+
union = NodeLabelTokenToPropertyKeys.union(union, mapping2, importPropertySchemas);
227+
228+
assertThat(union.nodeLabels()).containsExactlyInAnyOrder(aLabel, bLabel, cLabel);
229+
230+
assertThat(union.propertySchemas(aLabel, importPropertySchemas)).isEqualTo(Map.of(
231+
"foo", PropertySchema.of("foo", ValueType.LONG, DefaultValue.forLong(), PropertyState.TRANSIENT),
232+
"baz", PropertySchema.of("baz", ValueType.LONG_ARRAY, DefaultValue.forLongArray(), PropertyState.PERSISTENT)
221233
));
222-
assertThat(mapping.propertySchemas(NodeLabel.of("B"), importPropertySchemas)).isEqualTo(Map.of(
234+
assertThat(union.propertySchemas(bLabel, importPropertySchemas)).isEqualTo(Map.of(
223235
"bar", PropertySchema.of("bar", ValueType.DOUBLE, DefaultValue.forDouble(), PropertyState.PERSISTENT)
224236
));
237+
assertThat(union.propertySchemas(cLabel, importPropertySchemas)).isEqualTo(Map.of(
238+
"bar", PropertySchema.of("bar", ValueType.DOUBLE, DefaultValue.forDouble(), PropertyState.PERSISTENT),
239+
"baz", PropertySchema.of("baz", ValueType.LONG_ARRAY, DefaultValue.forLongArray(), PropertyState.PERSISTENT)
240+
));
225241
}
226242
}
227243

0 commit comments

Comments
 (0)