Skip to content

Commit a85b88b

Browse files
breakanalysisadamnsch
authored andcommitted
Improve and extend hashgnn examples
1 parent 4c12338 commit a85b88b

File tree

2 files changed

+97
-53
lines changed

2 files changed

+97
-53
lines changed

doc-test/src/test/java/org/neo4j/gds/doc/HashGNNDocTest.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.neo4j.gds.catalog.GraphProjectProc;
2323
import org.neo4j.gds.embeddings.hashgnn.HashGNNMutateProc;
2424
import org.neo4j.gds.embeddings.hashgnn.HashGNNStreamProc;
25+
import org.neo4j.gds.scaling.ScalePropertiesMutateProc;
2526

2627
import java.util.List;
2728

@@ -32,6 +33,7 @@ protected List<Class<?>> procedures() {
3233
return List.of(
3334
HashGNNStreamProc.class,
3435
HashGNNMutateProc.class,
36+
ScalePropertiesMutateProc.class,
3537
GraphProjectProc.class
3638
);
3739
}

doc/modules/ROOT/pages/machine-learning/node-embeddings/hashgnn.adoc

Lines changed: 95 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -330,13 +330,13 @@ We will do this on a small {graph-description} graph of a handful nodes connecte
330330
[source, cypher, role=noplay setup-query]
331331
----
332332
CREATE
333-
(dan:Person {name: 'Dan', age: 18, experience: 53, hipster: 0}),
333+
(dan:Person {name: 'Dan', age: 18, experience: 63, hipster: 0}),
334334
(annie:Person {name: 'Annie', age: 12, experience: 5, hipster: 0}),
335335
(matt:Person {name: 'Matt', age: 22, experience: 42, hipster: 0}),
336-
(jeff:Person {name: 'Jeff', age: 51, experience: 51, hipster: 0}),
337-
(brie:Person {name: 'Brie', age: 45, experience: 23, hipster: 0}),
338-
(elsa:Person {name: 'Elsa', age: 65, experience: -99, hipster: 1}),
339-
(john:Person {name: 'John', age: 64, experience: 100, hipster: 0}),
336+
(jeff:Person {name: 'Jeff', age: 51, experience: 12, hipster: 0}),
337+
(brie:Person {name: 'Brie', age: 31, experience: 6, hipster: 0}),
338+
(elsa:Person {name: 'Elsa', age: 65, experience: 23, hipster: 1}),
339+
(john:Person {name: 'John', age: 4, experience: 100, hipster: 0}),
340340
(apple:Fruit {name: 'Apple', tropical: 0, sourness: 0.3, sweetness: 0.6}),
341341
(banana:Fruit {name: 'Banana', tropical: 1, sourness: 0.1, sweetness: 0.9}),
342342
(mango:Fruit {name: 'Mango', tropical: 1, sourness: 0.3, sweetness: 1.0}),
@@ -395,6 +395,17 @@ CALL gds.graph.project(
395395
)
396396
----
397397

398+
Since we will use binarization and the properties have different scales in some examples, we will create a scaled version of the `experience` property.
399+
400+
.The following will scale the experience property and mutate the graph:
401+
[source, cypher, role=noplay graph-project-query]
402+
----
403+
CALL gds.alpha.scaleProperties.mutate('persons', {
404+
nodeProperties: ['experience'],
405+
scaler: 'Minmax',
406+
mutateProperty: 'experience_scaled'
407+
}) YIELD nodePropertiesWritten
408+
----
398409

399410
[[algorithms-embeddings-hashgnn-examples-memory-estimation]]
400411
=== Memory Estimation
@@ -407,15 +418,15 @@ include::partial$/algorithms/shared/examples-estimate-intro.adoc[]
407418
.The following will estimate the memory requirements for running the algorithm:
408419
[source, cypher, role=noplay]
409420
----
410-
CALL gds.alpha.hashgnn.stream.estimate('persons', {nodeLabels: ['Person'], iterations: 3, embeddingDensity: 2, binarizeFeatures: {dimension: 4, densityLevel: 1}, featureProperties: ['age', 'experience']})
421+
CALL gds.alpha.hashgnn.stream.estimate('persons', {nodeLabels: ['Person'], iterations: 3, embeddingDensity: 2, binarizeFeatures: {dimension: 4, threshold: 0}, featureProperties: ['age', 'experience']})
411422
YIELD nodeCount, relationshipCount, bytesMin, bytesMax, requiredMemory
412423
----
413424

414425
.Results
415426
[opts="header", cols="1,1,1,1,1"]
416427
|===
417428
| nodeCount | relationshipCount | bytesMin | bytesMax | requiredMemory
418-
| 7 | 18 | 134912 | 134912 | "131 KiB"
429+
| 7 | 18 | 59160 | 59160 | "57 KiB"
419430
|===
420431
--
421432

@@ -434,11 +445,11 @@ include::partial$/algorithms/shared/examples-stream-intro.adoc[]
434445
CALL gds.alpha.hashgnn.stream('persons',
435446
{
436447
nodeLabels: ['Person'],
437-
iterations: 3,
448+
iterations: 1,
438449
embeddingDensity: 2,
439-
binarizeFeatures: {dimension: 4, densityLevel: 1},
450+
binarizeFeatures: {dimension: 4, threshold: 32},
440451
featureProperties: ['age', 'experience'],
441-
randomSeed: 123
452+
randomSeed: 42
442453
}
443454
)
444455
YIELD nodeId, embedding
@@ -448,13 +459,13 @@ YIELD nodeId, embedding
448459
.Results
449460
|===
450461
| nodeId | embedding
451-
| 0 | [0.0, 0.0, 0.0, 1.0]
452-
| 1 | [0.0, 0.0, 0.0, 1.0]
453-
| 2 | [0.0, 0.0, 0.0, 1.0]
454-
| 3 | [0.0, 0.0, 0.0, 1.0]
455-
| 4 | [0.0, 0.0, 0.0, 1.0]
456-
| 5 | [0.0, 1.0, 0.0, 0.0]
457-
| 6 | [0.0, 0.0, 0.0, 1.0]
462+
| 0 | [0.0, 1.0, 0.0, 0.0]
463+
| 1 | [1.0, 0.0, 1.0, 0.0]
464+
| 2 | [1.0, 1.0, 0.0, 0.0]
465+
| 3 | [1.0, 0.0, 1.0, 0.0]
466+
| 4 | [1.0, 0.0, 0.0, 0.0]
467+
| 5 | [1.0, 0.0, 1.0, 0.0]
468+
| 6 | [1.0, 1.0, 0.0, 0.0]
458469
|===
459470
--
460471

@@ -468,7 +479,7 @@ Due to the random nature of the algorithm the results will vary between the runs
468479

469480
[role=query-example]
470481
--
471-
.The following will run the algorithm on Person nodes without binarization, and stream results:
482+
.The following will run the algorithm on Person nodes on binary properties, and stream results:
472483
[source, cypher, role=noplay]
473484
----
474485
CALL gds.alpha.hashgnn.stream('persons',
@@ -499,6 +510,36 @@ YIELD nodeId, embedding
499510

500511
In this example the embedding dimension becomes `1` because without binarization it is the number of features which is `1` due to the single 'hipster' property.
501512

513+
[role=query-example]
514+
--
515+
.The following will run the algorithm on Person nodes on generated features, and stream results:
516+
[source, cypher, role=noplay]
517+
----
518+
CALL gds.alpha.hashgnn.stream('persons',
519+
{
520+
nodeLabels: ['Person'],
521+
iterations: 1,
522+
embeddingDensity: 2,
523+
generateFeatures: {dimension: 4, densityLevel: 1},
524+
randomSeed: 123
525+
}
526+
)
527+
YIELD nodeId, embedding
528+
----
529+
530+
[opts=header]
531+
.Results
532+
|===
533+
| nodeId | embedding
534+
| 0 | [0.0, 0.0, 1.0, 1.0]
535+
| 1 | [0.0, 0.0, 1.0, 0.0]
536+
| 2 | [0.0, 1.0, 1.0, 0.0]
537+
| 3 | [1.0, 0.0, 1.0, 0.0]
538+
| 4 | [0.0, 0.0, 1.0, 1.0]
539+
| 5 | [0.0, 0.0, 1.0, 0.0]
540+
| 6 | [1.0, 0.0, 0.0, 0.0]
541+
|===
542+
--
502543
[role=query-example]
503544
--
504545
.The following will run the algorithm in heterogeneous mode, and stream results:
@@ -507,11 +548,11 @@ In this example the embedding dimension becomes `1` because without binarization
507548
CALL gds.alpha.hashgnn.stream('persons',
508549
{
509550
heterogeneous: true,
510-
iterations: 3,
511-
embeddingDensity: 6,
512-
binarizeFeatures: {dimension: 6, densityLevel: 3},
513-
featureProperties: ['age', 'experience', 'sourness', 'sweetness', 'tropical'],
514-
randomSeed: 123
551+
iterations: 2,
552+
embeddingDensity: 4,
553+
binarizeFeatures: {dimension: 6, threshold: 0.2},
554+
featureProperties: ['experience_scaled', 'sourness', 'sweetness', 'tropical'],
555+
randomSeed: 42
515556
}
516557
)
517558
YIELD nodeId, embedding
@@ -521,17 +562,17 @@ YIELD nodeId, embedding
521562
.Results
522563
|===
523564
| nodeId | embedding
524-
| 0 | [1.0, 0.0, 1.0, 0.0, 1.0, 0.0]
565+
| 0 | [1.0, 1.0, 0.0, 0.0, 0.0, 1.0]
525566
| 1 | [1.0, 0.0, 1.0, 0.0, 0.0, 0.0]
526-
| 2 | [1.0, 0.0, 1.0, 0.0, 1.0, 0.0]
527-
| 3 | [1.0, 0.0, 1.0, 0.0, 1.0, 0.0]
567+
| 2 | [1.0, 1.0, 0.0, 0.0, 0.0, 0.0]
568+
| 3 | [1.0, 0.0, 1.0, 0.0, 0.0, 0.0]
528569
| 4 | [1.0, 0.0, 1.0, 0.0, 0.0, 0.0]
529-
| 5 | [1.0, 0.0, 1.0, 0.0, 1.0, 0.0]
530-
| 6 | [1.0, 0.0, 1.0, 0.0, 1.0, 0.0]
531-
| 7 | [1.0, 0.0, 1.0, 0.0, 1.0, 0.0]
532-
| 8 | [1.0, 0.0, 1.0, 0.0, 1.0, 0.0]
533-
| 9 | [1.0, 0.0, 1.0, 0.0, 1.0, 0.0]
534-
| 10 | [1.0, 0.0, 1.0, 0.0, 1.0, 0.0]
570+
| 5 | [1.0, 1.0, 0.0, 0.0, 0.0, 0.0]
571+
| 6 | [1.0, 1.0, 0.0, 0.0, 1.0, 1.0]
572+
| 7 | [1.0, 0.0, 1.0, 0.0, 0.0, 1.0]
573+
| 8 | [1.0, 0.0, 0.0, 0.0, 0.0, 1.0]
574+
| 9 | [1.0, 0.0, 0.0, 0.0, 0.0, 1.0]
575+
| 10 | [0.0, 0.0, 1.0, 0.0, 1.0, 0.0]
535576
|===
536577
--
537578

@@ -543,12 +584,12 @@ YIELD nodeId, embedding
543584
CALL gds.alpha.hashgnn.stream('persons',
544585
{
545586
heterogeneous: true,
546-
iterations: 3,
547-
embeddingDensity: 6,
548-
binarizeFeatures: {dimension: 6, densityLevel: 3},
549-
featureProperties: ['age', 'experience', 'sourness', 'sweetness', 'tropical'],
587+
iterations: 2,
588+
embeddingDensity: 4,
589+
binarizeFeatures: {dimension: 6, threshold: 0.2},
590+
featureProperties: ['experience_scaled', 'sourness', 'sweetness', 'tropical'],
550591
outputDimension: 4,
551-
randomSeed: 123
592+
randomSeed: 42
552593
}
553594
)
554595
YIELD nodeId, embedding
@@ -558,17 +599,17 @@ YIELD nodeId, embedding
558599
.Results
559600
|===
560601
| nodeId | embedding
561-
| 0 | [0.0, 0.8660253882408142, -1.7320507764816284, 0.0]
562-
| 1 | [0.0, 0.8660253882408142, -0.8660253882408142, 0.0]
563-
| 2 | [0.0, 0.8660253882408142, -1.7320507764816284, 0.0]
564-
| 3 | [0.0, 0.8660253882408142, -1.7320507764816284, 0.0]
565-
| 4 | [0.0, 0.8660253882408142, -0.8660253882408142, 0.0]
566-
| 5 | [0.0, 0.8660253882408142, -1.7320507764816284, 0.0]
567-
| 6 | [0.0, 0.8660253882408142, -1.7320507764816284, 0.0]
568-
| 7 | [0.0, 0.8660253882408142, -1.7320507764816284, 0.0]
569-
| 8 | [0.0, 0.8660253882408142, -1.7320507764816284, 0.0]
570-
| 9 | [0.0, 0.8660253882408142, -1.7320507764816284, 0.0]
571-
| 10 | [0.0, 0.8660253882408142, -1.7320507764816284, 0.0]
602+
| 0 | [0.0, 0.8660253882408142, -1.7320507764816284, 0.8660253882408142]
603+
| 1 | [0.0, 0.0, -1.7320507764816284, 0.8660253882408142]
604+
| 2 | [0.0, 0.8660253882408142, -0.8660253882408142, 0.0]
605+
| 3 | [0.0, 0.0, -1.7320507764816284, 0.8660253882408142]
606+
| 4 | [0.0, 0.0, -1.7320507764816284, 0.8660253882408142]
607+
| 5 | [0.0, 0.8660253882408142, -0.8660253882408142, 0.0]
608+
| 6 | [0.0, 0.8660253882408142, -1.7320507764816284, 0.8660253882408142]
609+
| 7 | [0.0, 0.0, -2.5980761647224426, 1.7320507764816284]
610+
| 8 | [0.0, 0.0, -1.7320507764816284, 0.8660253882408142]
611+
| 9 | [0.0, 0.0, -1.7320507764816284, 0.8660253882408142]
612+
| 10 | [0.0, 0.0, -0.8660253882408142, 0.8660253882408142]
572613
|===
573614
--
574615

@@ -585,11 +626,12 @@ include::partial$/algorithms/shared/examples-mutate-intro.adoc[]
585626
CALL gds.alpha.hashgnn.mutate(
586627
'persons',
587628
{
588-
nodeLabels: ['Person'],
589629
mutateProperty: 'hashgnn-embedding',
590-
iterations: 3,
591-
embeddingDensity: 2,
592-
binarizeFeatures: {dimension: 4, densityLevel: 1},
630+
heterogeneous: true,
631+
iterations: 2,
632+
embeddingDensity: 4,
633+
binarizeFeatures: {dimension: 6, threshold: 0.2},
634+
featureProperties: ['experience_scaled', 'sourness', 'sweetness', 'tropical'],
593635
randomSeed: 42
594636
}
595637
)
@@ -600,7 +642,7 @@ YIELD nodePropertiesWritten
600642
.Results
601643
|===
602644
| nodePropertiesWritten
603-
| 7
645+
| 11
604646
|===
605647
--
606648

0 commit comments

Comments
 (0)