Skip to content

Commit c91c9b5

Browse files
committed
Remove external dependency on tqdm, use self-made progress bar
1 parent a7cd5b2 commit c91c9b5

File tree

3 files changed

+67
-3
lines changed

3 files changed

+67
-3
lines changed
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import sys
2+
from typing import List, Any
3+
4+
5+
class _ProgressBar:
6+
def __init__(self, items: List[Any], progress_width: int = 40):
7+
self._len = len(items)
8+
self._iter = iter(items)
9+
self._len_digits = len(str(self._len))
10+
self._progress_width = progress_width
11+
self._progress_bar = 0
12+
self._current_index = 0
13+
14+
# Calculate the increment for each item based on the list length and the progress width
15+
self._increment = self._progress_width / self._len if self._len else 1
16+
17+
# Init progress bar
18+
sys.stdout.write("[%s] (0/%d)" %
19+
(" " * self._progress_width, self._len))
20+
sys.stdout.flush()
21+
sys.stdout.write(
22+
"\b" * (self._progress_width + len(str(self._len)) + 6))
23+
24+
def __iter__(self):
25+
return self
26+
27+
def __next__(self):
28+
try:
29+
result = next(self._iter)
30+
except StopIteration as e:
31+
# End progress on StopIteration
32+
sys.stdout.write("]\n")
33+
raise e
34+
35+
# Update the current index
36+
self._current_index += 1
37+
38+
# Add the increment to the progress bar and calculate how many "=" to add
39+
self._progress_bar += self._increment
40+
while self._progress_bar >= 1:
41+
sys.stdout.write("=")
42+
sys.stdout.flush()
43+
self._progress_bar -= 1
44+
45+
# Update the numerical progress
46+
padded_index = str(self._current_index).rjust(self._len_digits)
47+
progress_info = f" ({padded_index}/{self._len})"
48+
sys.stdout.write(
49+
"\b" * (self._progress_width + len(progress_info) + 1))
50+
sys.stdout.write("[")
51+
sys.stdout.write("=" * int(self._current_index *
52+
self._progress_width / self._len))
53+
sys.stdout.write(" " * (self._progress_width -
54+
int(self._current_index * self._progress_width / self._len)))
55+
sys.stdout.write("]" + progress_info)
56+
sys.stdout.flush()
57+
sys.stdout.write("\b" * (self._progress_width - int(self._current_index * self._progress_width / self._len)
58+
+ len(progress_info) + 1))
59+
60+
return result
61+
62+
63+
def ProgressBar(list: List[Any], progress_width: int = 40):
64+
return iter(_ProgressBar(list, progress_width))

pymupdf4llm/pymupdf4llm/helpers/pymupdf_rag.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828

2929
import os
3030
import string
31-
from tqdm import tqdm
3231

3332
try:
3433
import pymupdf as fitz # available with v1.24.3
@@ -37,6 +36,7 @@
3736

3837
from pymupdf4llm.helpers.get_text_lines import get_raw_lines, is_white
3938
from pymupdf4llm.helpers.multi_column import column_boxes
39+
from pymupdf4llm.helpers.progress import ProgressBar
4040

4141
if fitz.pymupdf_version_tuple < (1, 24, 2):
4242
raise NotImplementedError("PyMuPDF version 1.24.2 or later is needed.")
@@ -767,7 +767,7 @@ def get_page_output(doc, pno, margins, textflags):
767767
textflags = fitz.TEXT_MEDIABOX_CLIP | fitz.TEXT_CID_FOR_UNKNOWN_UNICODE
768768
if show_progress:
769769
print(f"Processing {doc.name}...")
770-
pages = tqdm(pages)
770+
pages = ProgressBar(pages)
771771
for pno in pages:
772772
page_output, images, tables, graphics = get_page_output(
773773
doc, pno, margins, textflags

pymupdf4llm/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
"Programming Language :: Python :: 3",
1414
"Topic :: Utilities",
1515
]
16-
requires = ["pymupdf>=1.24.2", "tqdm>=4.66.5"]
16+
requires = ["pymupdf>=1.24.2"]
1717

1818
setuptools.setup(
1919
name="pymupdf4llm",

0 commit comments

Comments
 (0)