Skip to content

Commit cef5ccf

Browse files
authored
Merge pull request #611 from FlorentinD/progress-bar-fix
Fix progress bars getting stuck at 0%
2 parents 6be6b3e + 1f6f789 commit cef5ccf

File tree

7 files changed

+30
-20
lines changed

7 files changed

+30
-20
lines changed

changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
* Fixed an issue where source and target IDs of relationships in heterogeneous OGBL graphs were not parsed correctly.
2121
* Fixed an issue where configuration parameters such as `aggregation` were ignored by `gds.graph.toUndirected`.
2222
* Fixed an issue where the `database` given for the `GraphDataScience` construction was not used for metadata retrieval, causing an exception to be raised if the default "neo4j" database was missing.
23+
* Fixed an issue where progress bars would not always complete.
2324

2425

2526
## Improvements

graphdatascience/query_runner/arrow_graph_constructor.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
from __future__ import annotations
2+
13
import concurrent
24
import json
35
import math
46
import warnings
57
from concurrent.futures import ThreadPoolExecutor
6-
from typing import Any, Dict, List, Optional
8+
from typing import Any, Dict, List, NoReturn, Optional
79

810
import numpy
911
import pyarrow.flight as flight
@@ -93,7 +95,7 @@ def _send_action(self, action_type: str, meta_data: Dict[str, Any]) -> None:
9395

9496
json.loads(collected_result[0].body.to_pybytes().decode())
9597

96-
def _send_df(self, df: DataFrame, entity_type: str, pbar: tqdm) -> None:
98+
def _send_df(self, df: DataFrame, entity_type: str, pbar: tqdm[NoReturn]) -> None:
9799
table = Table.from_pandas(df)
98100
batches = table.to_batches(self._chunk_size)
99101
flight_descriptor = {"name": self._graph_name, "entity_type": entity_type}
@@ -108,6 +110,8 @@ def _send_df(self, df: DataFrame, entity_type: str, pbar: tqdm) -> None:
108110
for partition in batches:
109111
writer.write_batch(partition)
110112
pbar.update(partition.num_rows)
113+
# Force a refresh to avoid the progress bar getting stuck at 0%
114+
pbar.refresh()
111115

112116
def _send_dfs(self, dfs: List[DataFrame], entity_type: str) -> None:
113117
desc = "Uploading Nodes" if entity_type == "node" else "Uploading Relationships"

graphdatascience/query_runner/neo4j_query_runner.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import time
66
import warnings
77
from concurrent.futures import Future, ThreadPoolExecutor, wait
8-
from typing import Any, Dict, List, Optional, Tuple, Union
8+
from typing import Any, Dict, List, NoReturn, Optional, Tuple, Union
99
from uuid import uuid4
1010

1111
import neo4j
@@ -226,14 +226,19 @@ def _forward_cypher_warnings(self, notification: Dict[str, Any]) -> None:
226226
self._logger.info(notification)
227227

228228
def _log(self, job_id: str, future: "Future[Any]", database: Optional[str] = None) -> None:
229-
pbar = None
229+
pbar: Optional[tqdm[NoReturn]] = None
230230
warn_if_failure = True
231231

232232
while wait([future], timeout=self._LOG_POLLING_INTERVAL).not_done:
233233
try:
234234
tier = "beta." if self._server_version < ServerVersion(2, 5, 0) else ""
235+
# we only retrieve the progress of the root task
235236
progress = self.run_cypher(
236-
f"CALL gds.{tier}listProgress('{job_id}') YIELD taskName, progress", database=database
237+
f"CALL gds.{tier}listProgress('{job_id}')"
238+
+ " YIELD taskName, progress"
239+
+ " RETURN taskName, progress"
240+
+ " LIMIT 1",
241+
database=database,
237242
)
238243
except Exception as e:
239244
# Do nothing if the procedure either:
@@ -248,17 +253,19 @@ def _log(self, job_id: str, future: "Future[Any]", database: Optional[str] = Non
248253
continue
249254

250255
progress_percent = progress["progress"][0]
251-
if not progress_percent == "n/a":
252-
task_name = progress["taskName"][0].split("|--")[-1][1:]
253-
pbar = pbar or tqdm(total=100, unit="%", desc=task_name)
254-
else:
256+
if progress_percent == "n/a":
255257
return
256258

259+
root_task_name = progress["taskName"][0].split("|--")[-1][1:]
260+
if not pbar:
261+
pbar = tqdm(total=100, unit="%", desc=root_task_name, maxinterval=self._LOG_POLLING_INTERVAL)
262+
257263
parsed_progress = float(progress_percent[:-1])
258264
pbar.update(parsed_progress - pbar.n)
259265

260266
if pbar:
261267
pbar.update(100 - pbar.n)
268+
pbar.refresh()
262269

263270
def set_database(self, database: str) -> None:
264271
self._database = database

mypy.ini

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@ ignore_missing_imports = True
88
[mypy-pyarrow.flight]
99
ignore_missing_imports = True
1010

11-
[mypy-tqdm.auto]
12-
ignore_missing_imports = True
13-
1411
[mypy-textdistance]
1512
ignore_missing_imports = True
1613

requirements/dev/dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ tox == 4.11.3
1010
types-setuptools == 68.1.0.1
1111
sphinx == 7.2.6
1212
types-requests
13+
types-tqdm

scripts/checkstyle

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,16 @@ set -o errexit
44
set -o nounset
55
set -o pipefail
66

7-
black --check .
8-
isort --check .
9-
flake8
10-
mypy .
7+
python -m black --check .
8+
python -m isort --check .
9+
python -m flake8
10+
python -m mypy .
1111

1212
NOTEBOOKS="./examples/*.ipynb" # ./examples/dev/*.ipynb"
1313
for f in $NOTEBOOKS
1414
do
1515
NB=$(cat $f)
16-
FORMATTED_NB=$(jupyter nbconvert \
16+
FORMATTED_NB=$(python -m jupyter nbconvert \
1717
--clear-output \
1818
--stdout \
1919
--ClearOutputPreprocessor.enabled=True \

scripts/makestyle

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ set -o nounset
55
set -o pipefail
66
set -o xtrace
77

8-
black .
9-
isort .
10-
jupyter nbconvert \
8+
python -m black .
9+
python -m isort .
10+
python -m jupyter nbconvert \
1111
--clear-output \
1212
--inplace \
1313
--ClearOutputPreprocessor.enabled=True \

0 commit comments

Comments
 (0)