Handle empty table in arrow based stream

FlorentinD · FlorentinD · commit 80e199cb4820 · 2024-03-27T10:09:15.000+01:00
diff --git a/changelog.md b/changelog.md
@@ -21,6 +21,7 @@
 * Fixed an issue where configuration parameters such as `aggregation` were ignored by `gds.graph.toUndirected`.
 * Fixed an issue where the `database` given for the `GraphDataScience` construction was not used for metadata retrieval, causing an exception to be raised if the default "neo4j" database was missing.
 * Fixed an issue where progress bars would not always complete.
+* Fixed an issue where an empty relationship type could not be streamed.
 
 
 ## Improvements
diff --git a/graphdatascience/query_runner/arrow_query_runner.py b/graphdatascience/query_runner/arrow_query_runner.py
@@ -325,6 +325,10 @@ def create_graph_constructor(
         )
 
     def _sanitize_arrow_table(self, arrow_table: Table) -> Table:
+        # empty columns cannot be used to build a chunked_array in pyarrow
+        if len(arrow_table) == 0:
+            return arrow_table
+
         dict_encoded_fields = [
             (idx, field) for idx, field in enumerate(arrow_table.schema) if is_dictionary(field.type)
         ]
diff --git a/graphdatascience/tests/integration/test_graph_ops.py b/graphdatascience/tests/integration/test_graph_ops.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 import pytest
-from pandas import Series
+from pandas import DataFrame, Series
 
 from graphdatascience.graph_data_science import GraphDataScience
 from graphdatascience.query_runner.arrow_query_runner import ArrowQueryRunner
@@ -999,3 +999,16 @@ def test_graph_nodeProperty_stream_via_run_query(gds: GraphDataScience) -> None:
         )
     )
     assert {e for e in result["degree"]} == {1, 2, 3}
+
+
+def test_empty_relationships_stream(gds: GraphDataScience) -> None:
+    G = gds.graph.construct(GRAPH_NAME, nodes=DataFrame({"nodeId": [0, 1]}))
+    gds.nodeSimilarity.filtered.mutate(
+        G, mutateRelationshipType="SIMILAR", mutateProperty="score", similarityCutoff=0.99
+    )
+
+    assert G.relationship_count() == 0
+    assert G.relationship_types()
+
+    result = gds.graph.relationships.stream(G, ["SIMILAR"])
+    assert result.empty

Original file line number	Diff line number	Diff line change
`@@ -325,6 +325,10 @@ def create_graph_constructor(`
`325`	`325`	`)`
`326`	`326`
`327`	`327`	`def _sanitize_arrow_table(self, arrow_table: Table) -> Table:`
	`328`	`+ # empty columns cannot be used to build a chunked_array in pyarrow`
	`329`	`+ if len(arrow_table) == 0:`
	`330`	`+ return arrow_table`
	`331`	`+`
`328`	`332`	`dict_encoded_fields = [`
`329`	`333`	`(idx, field) for idx, field in enumerate(arrow_table.schema) if is_dictionary(field.type)`
`330`	`334`	`]`