Skip to content

Commit a1993f5

Browse files
committed
Add show_progress option to to_markdown()
1 parent 78952f1 commit a1993f5

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

pymupdf4llm/pymupdf4llm/helpers/pymupdf_rag.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ def to_markdown(
229229
page_height=None,
230230
table_strategy="lines_strict",
231231
graphics_limit=None,
232+
show_progress=False,
232233
) -> str:
233234
"""Process the document and return the text of the selected pages.
234235
@@ -247,6 +248,7 @@ def to_markdown(
247248
page_height: (float) assumption if page layout is variable.
248249
table_strategy: choose table detection strategy
249250
graphics_limit: (int) ignore page with too many vector graphics.
251+
show_progress: (bool) print progress as each page is processed.
250252
251253
"""
252254
if write_images is False and force_text is False:
@@ -763,6 +765,8 @@ def get_page_output(doc, pno, margins, textflags):
763765
toc = doc.get_toc()
764766
textflags = fitz.TEXT_MEDIABOX_CLIP | fitz.TEXT_CID_FOR_UNKNOWN_UNICODE
765767
for pno in pages:
768+
print(f"Processing page {pno} of {len(pages)}...", end=" ", flush=True)
769+
766770
page_output, images, tables, graphics = get_page_output(
767771
doc, pno, margins, textflags
768772
)
@@ -783,6 +787,8 @@ def get_page_output(doc, pno, margins, textflags):
783787
"text": page_output,
784788
}
785789
)
790+
791+
print("Processed!")
786792

787793
return document_output
788794

0 commit comments

Comments
 (0)