Skip to content

Commit 4f092ea

Browse files
s1cksoerenreichardt
andcommitted
Build node schema within NodesBuilder
Co-Authored-By: Sören Reichardt <soren.reichardt@neotechnology.com>
1 parent fce4f56 commit 4f092ea

File tree

6 files changed

+313
-12
lines changed

6 files changed

+313
-12
lines changed

core/src/main/java/org/neo4j/gds/core/loading/Nodes.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,15 @@
2727
import org.neo4j.gds.api.properties.nodes.NodeProperty;
2828
import org.neo4j.gds.api.properties.nodes.NodePropertyStore;
2929
import org.neo4j.gds.api.properties.nodes.NodePropertyValues;
30+
import org.neo4j.gds.api.schema.NodeSchema;
3031

3132
import java.util.Map;
3233

3334
@ValueClass
3435
public interface Nodes {
3536

37+
NodeSchema schema();
38+
3639
IdMap idMap();
3740

3841
@Value.Default
@@ -41,11 +44,11 @@ default NodePropertyStore properties() {
4144
}
4245

4346
static Nodes of(IdMap idmap) {
44-
return ImmutableNodes.of(idmap, NodePropertyStore.empty());
47+
return ImmutableNodes.of(NodeSchema.empty(), idmap, NodePropertyStore.empty());
4548
}
4649

4750
static Nodes of(IdMap idmap, NodePropertyStore nodePropertyStore) {
48-
return ImmutableNodes.of(idmap, nodePropertyStore);
51+
return ImmutableNodes.of(NodeSchema.empty(), idmap, nodePropertyStore);
4952
}
5053

5154
static Nodes of(IdMap idMap, Map<PropertyMapping, NodePropertyValues> properties, PropertyState propertyState) {
@@ -61,6 +64,6 @@ static Nodes of(IdMap idMap, Map<PropertyMapping, NodePropertyValues> properties
6164
: nodeProperties.valueType().fallbackValue()
6265
)
6366
));
64-
return ImmutableNodes.of(idMap, builder.build());
67+
return ImmutableNodes.of(NodeSchema.empty(), idMap, builder.build());
6568
}
6669
}

core/src/main/java/org/neo4j/gds/core/loading/construction/GraphFactory.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ static NodesBuilder nodesBuilder(
121121
maxOriginalId,
122122
threadCount,
123123
TokenToNodeLabelMap.lazy(),
124+
NodeLabelTokenToPropertyKeys.lazy(),
124125
new ConcurrentHashMap<>(),
125126
idMapBuilder,
126127
labelInformation,
@@ -161,6 +162,7 @@ private static NodesBuilder fromSchema(
161162
maxOriginalId,
162163
concurrency,
163164
TokenToNodeLabelMap.fixed(elementIdentifierLabelTokenMapping, labelTokenNodeLabelMapping),
165+
NodeLabelTokenToPropertyKeys.fixed(nodeSchema),
164166
new ConcurrentHashMap<>(propertyBuildersByPropertyKey),
165167
idMapBuilder,
166168
hasLabelInformation,
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
/*
2+
* Copyright (c) "Neo4j"
3+
* Neo4j Sweden AB [http://neo4j.com]
4+
*
5+
* This file is part of Neo4j.
6+
*
7+
* Neo4j is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* This program is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
19+
*/
20+
package org.neo4j.gds.core.loading.construction;
21+
22+
import org.neo4j.gds.NodeLabel;
23+
import org.neo4j.gds.api.schema.NodeSchema;
24+
import org.neo4j.gds.api.schema.PropertySchema;
25+
26+
import java.util.HashSet;
27+
import java.util.Map;
28+
import java.util.Set;
29+
import java.util.concurrent.ConcurrentHashMap;
30+
import java.util.stream.Collectors;
31+
32+
abstract class NodeLabelTokenToPropertyKeys {
33+
34+
/**
35+
* Creates a thread-safe, mutable mapping.
36+
* <p>
37+
* The property schemas are inferred from the input data.
38+
*/
39+
static NodeLabelTokenToPropertyKeys lazy() {
40+
return new Lazy();
41+
}
42+
43+
/**
44+
* Creates thread-safe, immutable mapping.
45+
* <p>
46+
* The property schemas are inferred from given schema.
47+
*/
48+
static NodeLabelTokenToPropertyKeys fixed(NodeSchema nodeSchema) {
49+
return new Fixed(nodeSchema);
50+
}
51+
52+
/**
53+
* Assign the given property keys to the given label token.
54+
* <p>
55+
* If the token is already present, the property keys are added with set semantics.
56+
*/
57+
abstract void add(NodeLabelToken nodeLabelToken, Iterable<String> propertyKeys);
58+
59+
/**
60+
* Return the property schemas for the given node label.
61+
*/
62+
abstract Map<String, PropertySchema> propertySchemas(
63+
NodeLabel nodeLabelToken,
64+
Map<String, PropertySchema> importPropertySchemas
65+
);
66+
67+
static class Fixed extends NodeLabelTokenToPropertyKeys {
68+
69+
private final NodeSchema nodeSchema;
70+
71+
Fixed(NodeSchema nodeSchema) {
72+
this.nodeSchema = nodeSchema;
73+
}
74+
75+
@Override
76+
void add(NodeLabelToken nodeLabelToken, Iterable<String> propertyKeys) {
77+
// silence is golden
78+
}
79+
80+
@Override
81+
Map<String, PropertySchema> propertySchemas(
82+
NodeLabel nodeLabel,
83+
Map<String, PropertySchema> importPropertySchemas
84+
) {
85+
var inputPropertySchemas = nodeSchema.get(nodeLabel).properties();
86+
var loadPropertySchemas = importPropertySchemas
87+
.entrySet()
88+
.stream()
89+
.filter(entry -> inputPropertySchemas.containsKey(entry.getKey()))
90+
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
91+
92+
if (!inputPropertySchemas.equals(loadPropertySchemas)) {
93+
throw new IllegalStateException(
94+
"Property schemas inferred from loading do not match input property schema.");
95+
}
96+
97+
return inputPropertySchemas;
98+
}
99+
}
100+
101+
static class Lazy extends NodeLabelTokenToPropertyKeys {
102+
103+
private final ConcurrentHashMap<NodeLabelToken, Set<String>> labelToPropertyKeys;
104+
105+
Lazy() {
106+
this.labelToPropertyKeys = new ConcurrentHashMap<>();
107+
}
108+
109+
@Override
110+
void add(NodeLabelToken nodeLabelToken, Iterable<String> propertyKeys) {
111+
this.labelToPropertyKeys.compute(nodeLabelToken, (token, propertyKeySet) -> {
112+
var keys = (propertyKeySet == null) ? new HashSet<String>() : propertyKeySet;
113+
propertyKeys.forEach(keys::add);
114+
return keys;
115+
});
116+
117+
}
118+
119+
@Override
120+
Map<String, PropertySchema> propertySchemas(
121+
NodeLabel nodeLabel,
122+
Map<String, PropertySchema> importPropertySchemas
123+
) {
124+
return labelToPropertyKeys.keySet().stream()
125+
.filter(nodeLabelToken -> {
126+
for (int i = 0; i < nodeLabelToken.size(); i++) {
127+
if (nodeLabelToken.get(i).equals(nodeLabel)) {
128+
return true;
129+
}
130+
}
131+
return false;
132+
})
133+
.flatMap(nodeLabelToken -> this.labelToPropertyKeys.get(nodeLabelToken).stream())
134+
.collect(Collectors.toMap(propertyKey -> propertyKey, importPropertySchemas::get));
135+
}
136+
}
137+
}

core/src/main/java/org/neo4j/gds/core/loading/construction/NodesBuilder.java

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.neo4j.gds.api.properties.nodes.NodeProperty;
3030
import org.neo4j.gds.api.properties.nodes.NodePropertyStore;
3131
import org.neo4j.gds.api.properties.nodes.NodePropertyValues;
32+
import org.neo4j.gds.api.schema.NodeSchema;
3233
import org.neo4j.gds.api.schema.PropertySchema;
3334
import org.neo4j.gds.compat.LongPropertyReference;
3435
import org.neo4j.gds.core.concurrency.ParallelUtil;
@@ -56,6 +57,7 @@
5657
import java.util.concurrent.atomic.LongAdder;
5758
import java.util.function.Function;
5859
import java.util.function.LongPredicate;
60+
import java.util.stream.Collectors;
5961

6062
import static java.util.stream.Collectors.toMap;
6163

@@ -77,12 +79,14 @@ public final class NodesBuilder {
7779
private final NodeImporter nodeImporter;
7880

7981
private final ConcurrentMap<String, NodePropertiesFromStoreBuilder> propertyBuildersByPropertyKey;
80-
private final boolean hasProperties;
82+
83+
private final NodeLabelTokenToPropertyKeys nodeLabelTokenToPropertyKeys;
8184

8285
NodesBuilder(
8386
long maxOriginalId,
8487
int concurrency,
8588
TokenToNodeLabelMap tokenToNodeLabelMap,
89+
NodeLabelTokenToPropertyKeys nodeLabelTokenToPropertyKeys,
8690
ConcurrentMap<String, NodePropertiesFromStoreBuilder> propertyBuildersByPropertyKey,
8791
IdMapBuilder idMapBuilder,
8892
boolean hasLabelInformation,
@@ -92,13 +96,13 @@ public final class NodesBuilder {
9296
) {
9397
this.maxOriginalId = maxOriginalId;
9498
this.concurrency = concurrency;
99+
this.nodeLabelTokenToPropertyKeys = nodeLabelTokenToPropertyKeys;
95100
this.idMapBuilder = idMapBuilder;
96101
this.propertyStates = propertyStates;
97102
this.labelInformationBuilder = !hasLabelInformation
98103
? LabelInformationBuilders.allNodes()
99104
: LabelInformationBuilders.multiLabelWithCapacity(maxOriginalId + 1);
100105
this.propertyBuildersByPropertyKey = propertyBuildersByPropertyKey;
101-
this.hasProperties = hasProperties;
102106
this.importedNodes = new LongAdder();
103107
this.nodeImporter = new NodeImporter(
104108
idMapBuilder,
@@ -123,6 +127,7 @@ public final class NodesBuilder {
123127
hasLabelInformation,
124128
hasProperties,
125129
tokenToNodeLabelMap,
130+
nodeLabelTokenToPropertyKeys,
126131
propertyBuilderFn
127132
)
128133
);
@@ -200,13 +205,28 @@ public Nodes build(long highestNeoId) {
200205
this.threadLocalBuilder.close();
201206

202207
var idMap = this.idMapBuilder.build(labelInformationBuilder, highestNeoId, concurrency);
208+
var nodeProperties = buildProperties(idMap);
209+
var nodeSchema = buildNodeSchema(idMap, nodeProperties);
210+
var nodePropertyStore = NodePropertyStore.builder().properties(nodeProperties).build();
211+
212+
return ImmutableNodes.builder()
213+
.schema(nodeSchema)
214+
.idMap(idMap)
215+
.properties(nodePropertyStore)
216+
.build();
217+
}
203218

204-
var nodeImportResultBuilder = ImmutableNodes.builder().idMap(idMap);
205-
if (hasProperties) {
206-
var nodeProperties = buildProperties(idMap);
207-
nodeImportResultBuilder.properties(NodePropertyStore.builder().properties(nodeProperties).build());
208-
}
209-
return nodeImportResultBuilder.build();
219+
private NodeSchema buildNodeSchema(IdMap idMap, Map<String, NodeProperty> nodeProperties) {
220+
var nodeSchema = NodeSchema.empty();
221+
var importPropertySchemas = nodeProperties
222+
.entrySet()
223+
.stream()
224+
.collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().propertySchema()));
225+
226+
idMap.availableNodeLabels().forEach(label -> {
227+
nodeSchema.addLabel(label, this.nodeLabelTokenToPropertyKeys.propertySchemas(label, importPropertySchemas));
228+
});
229+
return nodeSchema;
210230
}
211231

212232
private Map<String, NodeProperty> buildProperties(IdMap idMap) {
@@ -216,7 +236,11 @@ private Map<String, NodeProperty> buildProperties(IdMap idMap) {
216236
));
217237
}
218238

219-
private static NodeProperty entryToNodeProperty(Map.Entry<String, NodePropertiesFromStoreBuilder> entry, PropertyState propertyState, IdMap idMap) {
239+
private static NodeProperty entryToNodeProperty(
240+
Map.Entry<String, NodePropertiesFromStoreBuilder> entry,
241+
PropertyState propertyState,
242+
IdMap idMap
243+
) {
220244
var nodePropertyValues = entry.getValue().build(idMap);
221245
var valueType = nodePropertyValues.valueType();
222246
return ImmutableNodeProperty.builder()
@@ -264,6 +288,7 @@ private static class ThreadLocalBuilder implements AutoCloseable {
264288
private final LongAdder importedNodes;
265289
private final LongPredicate seenNodeIdPredicate;
266290
private final TokenToNodeLabelMap tokenToNodeLabelMap;
291+
private final NodeLabelTokenToPropertyKeys nodeLabelTokenToPropertyKeys;
267292
private final NodesBatchBuffer buffer;
268293
private final Function<String, NodePropertiesFromStoreBuilder> propertyBuilderFn;
269294
private final NodeImporter nodeImporter;
@@ -277,11 +302,13 @@ private static class ThreadLocalBuilder implements AutoCloseable {
277302
boolean hasLabelInformation,
278303
boolean hasProperties,
279304
TokenToNodeLabelMap tokenToNodeLabelMap,
305+
NodeLabelTokenToPropertyKeys nodeLabelTokenToPropertyKeys,
280306
Function<String, NodePropertiesFromStoreBuilder> propertyBuilderFn
281307
) {
282308
this.importedNodes = importedNodes;
283309
this.seenNodeIdPredicate = seenNodeIdPredicate;
284310
this.tokenToNodeLabelMap = tokenToNodeLabelMap;
311+
this.nodeLabelTokenToPropertyKeys = nodeLabelTokenToPropertyKeys;
285312
this.propertyBuilderFn = propertyBuilderFn;
286313

287314
this.buffer = new NodesBatchBufferBuilder()
@@ -309,6 +336,7 @@ public void addNode(long originalId, NodeLabelToken nodeLabels) {
309336
public void addNode(long originalId, NodeLabelToken nodeLabels, PropertyValues properties) {
310337
if (!seenNodeIdPredicate.test(originalId)) {
311338
long[] labels = labelTokens(nodeLabels);
339+
this.nodeLabelTokenToPropertyKeys.add(nodeLabels, properties.propertyKeys());
312340

313341
int propertyReference = batchNodeProperties.size();
314342
batchNodeProperties.add(properties);

core/src/main/java/org/neo4j/gds/core/loading/construction/PropertyValues.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.neo4j.values.virtual.MapValue;
2626

2727
import java.util.Map;
28+
import java.util.Set;
2829
import java.util.function.BiConsumer;
2930

3031
public abstract class PropertyValues {
@@ -33,6 +34,8 @@ public abstract class PropertyValues {
3334

3435
public abstract boolean isEmpty();
3536

37+
public abstract Iterable<String> propertyKeys();
38+
3639
public static PropertyValues of(MapValue mapValue) {
3740
return new CypherPropertyValues(mapValue);
3841
}
@@ -57,6 +60,11 @@ public void forEach(BiConsumer<String, Value> consumer) {
5760
public boolean isEmpty() {
5861
return this.properties.isEmpty();
5962
}
63+
64+
@Override
65+
public Set<String> propertyKeys() {
66+
return this.properties.keySet();
67+
}
6068
}
6169

6270
private static final class CypherPropertyValues extends PropertyValues {
@@ -79,5 +87,10 @@ public void forEach(BiConsumer<String, Value> consumer) {
7987
public boolean isEmpty() {
8088
return this.properties.isEmpty();
8189
}
90+
91+
@Override
92+
public Iterable<String> propertyKeys() {
93+
return this.properties.keySet();
94+
}
8295
}
8396
}

0 commit comments

Comments
 (0)