From 78a492a91a4e9c0aeccf1bfcab7117f99867896d Mon Sep 17 00:00:00 2001
From: "anup.roy" <anup.roy@inceptionai.ai>
Date: Wed, 26 Nov 2025 15:52:17 +0400
Subject: [PATCH 1/2] Fix #21: Enable images to appear inside table cells

Modified extract_cells() to detect and extract image blocks (type==1)
within table cells, not just text blocks (type==0).

Changes:
- Updated extract_cells() to accept page and document parameters
- Added logic to detect image blocks within cell bounding boxes
- Implemented image extraction and saving for cells with images
- Images are now embedded in cell markdown as ![image](path) syntax
- Updated table_to_markdown() and table_extract() signatures
- Updated calls in document_layout.py to pass page/document context
- Added test script to demonstrate the fix

When write_images=True or embed_images=True, images found in table
cells are now properly extracted and referenced inline within the
cell markdown, resolving the issue where images appeared below tables.
---
 .../pymupdf4llm/helpers/document_layout.py    |   4 +
 pymupdf4llm/pymupdf4llm/helpers/utils.py      |  87 +++++++++++-
 test_image_in_table.py                        | 134 ++++++++++++++++++
 3 files changed, 218 insertions(+), 7 deletions(-)
 create mode 100644 test_image_in_table.py

diff --git a/pymupdf4llm/pymupdf4llm/helpers/document_layout.py b/pymupdf4llm/pymupdf4llm/helpers/document_layout.py
index 9b872d12..3e3b3b9a 100644
--- a/pymupdf4llm/pymupdf4llm/helpers/document_layout.py
+++ b/pymupdf4llm/pymupdf4llm/helpers/document_layout.py
@@ -1001,6 +1001,8 @@ def parse_document(
                         table_blocks,
                         layoutbox,
                         ocrpage=(pagelayout.full_ocred or pagelayout.text_ocred),
+                        page=page,
+                        document=document,
                     )
 
                     layoutbox.table["markdown"] = utils.table_to_markdown(
@@ -1008,6 +1010,8 @@ def parse_document(
                         layoutbox,
                         ocrpage=(pagelayout.full_ocred or pagelayout.text_ocred),
                         markdown=True,
+                        page=page,
+                        document=document,
                     )
 
                 except Exception as e:
diff --git a/pymupdf4llm/pymupdf4llm/helpers/utils.py b/pymupdf4llm/pymupdf4llm/helpers/utils.py
index 03f9cdf8..1d247523 100644
--- a/pymupdf4llm/pymupdf4llm/helpers/utils.py
+++ b/pymupdf4llm/pymupdf4llm/helpers/utils.py
@@ -750,24 +750,27 @@ def complete_table_structure(page):
     return all_lines, all_boxes
 
 
-def extract_cells(table_blocks, cell, markdown=False, ocrpage=False):
-    """Extract text from a rect-like 'cell' as plain or MD styled text.
+def extract_cells(table_blocks, cell, markdown=False, ocrpage=False, page=None, document=None, cell_image_counter=None):
+    """Extract text and images from a rect-like 'cell' as plain or MD styled text.
 
     This function should ultimately be used to extract text from a table cell.
     Markdown output will only work correctly if extraction flag bit
     TEXT_COLLECT_STYLES is set.
 
     Args:
-        table_blocks: A list of PyMuPDF TextPage text blocks (type = 0). Must
+        table_blocks: A list of PyMuPDF TextPage text blocks (type = 0 or 1). Must
             have been created with TEXT_COLLECT_STYLE for correct markdown.
             Format is either "dict" or "rawdict" depending on ocrpage.
         cell: A tuple (x0, y0, x1, y1) defining the cell's bbox.
         markdown: If True, return text formatted for Markdown.
         ocrpage: If True, text is written with GlyphLessFont. In this case,
             table_blocks is in format "dict".
+        page: Optional Page object for image extraction from cells.
+        document: Optional ParsedDocument object for image write/embed settings.
+        cell_image_counter: Optional list with one element [counter] to track image numbers.
 
     Returns:
-        A string with the text extracted from the cell.
+        A string with the text and images extracted from the cell.
     """
 
     def outside_cell(bbox, cell):
@@ -779,10 +782,31 @@ def outside_cell(bbox, cell):
             or bbox[3] <= cell[1]
         )
 
+    def bbox_overlap(bbox, cell):
+        """Calculate overlap ratio between bbox and cell."""
+        cell_rect = pymupdf.Rect(cell)
+        bbox_rect = pymupdf.Rect(bbox)
+        intersection = cell_rect & bbox_rect
+        if intersection.is_empty:
+            return 0.0
+        return abs(intersection) / abs(bbox_rect)
+
     text = ""
+    images_in_cell = []
+    
     for block in table_blocks:
         if outside_cell(block["bbox"], cell):
             continue
+        
+        # Check if this is an image block (type == 1)
+        if block.get("type") == 1:
+            # Image block found within cell
+            overlap = bbox_overlap(block["bbox"], cell)
+            if overlap > 0.5:  # More than 50% of image is in this cell
+                images_in_cell.append(block)
+            continue
+        
+        # Process text blocks (type == 0)
         for line in block["lines"]:
             if outside_cell(line["bbox"], cell):
                 continue
@@ -848,10 +872,49 @@ def outside_cell(bbox, cell):
         .replace("$\n", "$ ")
         .replace(" $ \n", "$ ")
     )
+    
+    # Handle images found in this cell
+    if markdown and images_in_cell and page is not None and document is not None:
+        for img_block in images_in_cell:
+            img_bbox = pymupdf.Rect(img_block["bbox"])
+            
+            # Extract and save the image if write_images or embed_images is enabled
+            if document.write_images or document.embed_images:
+                try:
+                    pix = page.get_pixmap(clip=img_bbox, dpi=document.image_dpi)
+                    
+                    if text:
+                        text += "<br>"
+                    
+                    if document.write_images:
+                        # Generate unique filename for this cell image
+                        if cell_image_counter is None:
+                            cell_image_counter = [0]
+                        cell_image_counter[0] += 1
+                        img_filename = f"{document.filename}-{page.number+1:04d}-table-cell-{cell_image_counter[0]:03d}.{document.image_format}"
+                        img_filename = img_filename.replace(" ", "_")
+                        img_path = os.path.join(document.image_path, img_filename)
+                        pix.save(img_path)
+                        # Add markdown image reference
+                        text += f"![image]({img_path.replace(chr(92), '/')})"
+                    
+                    elif document.embed_images:
+                        # Embed as base64
+                        import base64
+                        img_data = base64.b64encode(pix.tobytes(document.image_format)).decode()
+                        data_uri = f"data:image/{document.image_format};base64,{img_data}"
+                        text += f"![image]({data_uri})"
+                
+                except Exception as e:
+                    # If image extraction fails, add a placeholder
+                    if text:
+                        text += "<br>"
+                    text += f"[Image extraction failed: {str(e)}]"
+    
     return text.strip()
 
 
-def table_to_markdown(table_blocks, table_item, markdown=True, ocrpage=False):
+def table_to_markdown(table_blocks, table_item, markdown=True, ocrpage=False, page=None, document=None):
     output = ""
     table = table_item.table
     row_count = table["row_count"]
@@ -859,6 +922,9 @@ def table_to_markdown(table_blocks, table_item, markdown=True, ocrpage=False):
     cell_boxes = table["cells"]
     # make empty cell text list
     cells = [[None for i in range(col_count)] for j in range(row_count)]
+    
+    # Counter for images in table cells
+    cell_image_counter = [0]
 
     # fill None cells with extracted text
     # for rows, copy content from left to right
@@ -877,7 +943,8 @@ def table_to_markdown(table_blocks, table_item, markdown=True, ocrpage=False):
         for j, cell in enumerate(row):
             if cell is not None:
                 cells[i][j] = extract_cells(
-                    table_blocks, cell_boxes[i][j], markdown=markdown, ocrpage=ocrpage
+                    table_blocks, cell_boxes[i][j], markdown=markdown, ocrpage=ocrpage,
+                    page=page, document=document, cell_image_counter=cell_image_counter
                 )
     for i, name in enumerate(cells[0]):
         if name is None:
@@ -908,13 +975,16 @@ def table_to_markdown(table_blocks, table_item, markdown=True, ocrpage=False):
     return output + "\n"
 
 
-def table_extract(table_blocks, table_item, ocrpage=False):
+def table_extract(table_blocks, table_item, ocrpage=False, page=None, document=None):
     table = table_item.table
     row_count = table["row_count"]
     col_count = table["col_count"]
     cell_boxes = table["cells"]
     # make empty cell text list
     cells = [[None for i in range(col_count)] for j in range(row_count)]
+    
+    # Counter for images in table cells
+    cell_image_counter = [0]
 
     for i, row in enumerate(cell_boxes):
         for j, cell in enumerate(row):
@@ -924,6 +994,9 @@ def table_extract(table_blocks, table_item, ocrpage=False):
                     cell_boxes[i][j],
                     markdown=False,
                     ocrpage=ocrpage,
+                    page=page,
+                    document=document,
+                    cell_image_counter=cell_image_counter,
                 )
 
     return cells
diff --git a/test_image_in_table.py b/test_image_in_table.py
new file mode 100644
index 00000000..98ccf013
--- /dev/null
+++ b/test_image_in_table.py
@@ -0,0 +1,134 @@
+"""
+Test script to demonstrate images in table cells fix for Issue #21.
+
+This script creates a test PDF with a table containing images in cells,
+then uses pymupdf4llm to extract the table and verify that images
+appear inside the table cells in the markdown output.
+"""
+
+import pymupdf
+import pymupdf4llm
+import os
+import tempfile
+import shutil
+
+def create_test_pdf_with_table_images():
+    """Create a test PDF with a table that has images in cells."""
+    doc = pymupdf.open()
+    page = doc.new_page(width=595, height=842)  # A4 size
+    
+    # Define table structure
+    table_rect = pymupdf.Rect(50, 50, 545, 400)
+    cell_width = (table_rect.width) / 3
+    cell_height = (table_rect.height) / 4
+    
+    # Draw table grid
+    for i in range(4):
+        # Horizontal lines
+        y = table_rect.y0 + i * cell_height
+        page.draw_line((table_rect.x0, y), (table_rect.x1, y))
+    page.draw_line((table_rect.x0, table_rect.y1), (table_rect.x1, table_rect.y1))
+    
+    for i in range(4):
+        # Vertical lines
+        x = table_rect.x0 + i * cell_width
+        page.draw_line((x, table_rect.y0), (x, table_rect.y1))
+    
+    # Add header text
+    page.insert_text((table_rect.x0 + 10, table_rect.y0 + 20), "Column 1", fontsize=12)
+    page.insert_text((table_rect.x0 + cell_width + 10, table_rect.y0 + 20), "Column 2", fontsize=12)
+    page.insert_text((table_rect.x0 + 2 * cell_width + 10, table_rect.y0 + 20), "Image Column", fontsize=12)
+    
+    # Add data rows with text
+    for row in range(1, 3):
+        y_pos = table_rect.y0 + row * cell_height + 20
+        page.insert_text((table_rect.x0 + 10, y_pos), f"Row {row} Col 1", fontsize=10)
+        page.insert_text((table_rect.x0 + cell_width + 10, y_pos), f"Row {row} Col 2", fontsize=10)
+    
+    # Add simple colored rectangles as "images" in the third column
+    for row in range(1, 3):
+        y_start = table_rect.y0 + row * cell_height + 10
+        x_start = table_rect.x0 + 2 * cell_width + 10
+        
+        # Create a simple colored rectangle to simulate an image
+        img_rect = pymupdf.Rect(x_start, y_start, x_start + 60, y_start + 40)
+        
+        # Draw colored rectangle
+        color = (1, 0, 0) if row == 1 else (0, 0, 1)  # Red or Blue
+        page.draw_rect(img_rect, color=color, fill=color, width=0)
+        
+        # Add a small label
+        page.insert_text((x_start + 5, y_start + 25), f"IMG{row}", fontsize=8, color=(1, 1, 1))
+    
+    # Save to temporary file
+    temp_pdf = tempfile.mktemp(suffix=".pdf")
+    doc.save(temp_pdf)
+    doc.close()
+    
+    return temp_pdf
+
+
+def test_image_in_table():
+    """Test that images appear inside table cells in markdown output."""
+    print("Creating test PDF with table containing images...")
+    test_pdf = create_test_pdf_with_table_images()
+    
+    print(f"Test PDF created: {test_pdf}")
+    print()
+    
+    # Create temporary directory for images
+    image_dir = tempfile.mkdtemp()
+    print(f"Image output directory: {image_dir}")
+    print()
+    
+    try:
+        # Extract markdown with images
+        print("Extracting markdown with write_images=True...")
+        doc = pymupdf.open(test_pdf)
+        md_text = pymupdf4llm.to_markdown(
+            doc,
+            write_images=True,
+            image_path=image_dir
+        )
+        doc.close()
+        
+        print("Markdown output:")
+        print("=" * 80)
+        print(md_text)
+        print("=" * 80)
+        print()
+        
+        # Check if images are referenced in table
+        if "![image]" in md_text and "|" in md_text:
+            print("SUCCESS: Images appear to be included in table cells!")
+            
+            # Count image references
+            image_count = md_text.count("![image]")
+            print(f"Found {image_count} image reference(s) in the markdown output.")
+            
+            # List created image files
+            image_files = [f for f in os.listdir(image_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]
+            print(f"Created {len(image_files)} image file(s):")
+            for img_file in image_files:
+                print(f"  - {img_file}")
+        else:
+            print("WARNING: No images found in table cells or no table detected.")
+            print("This might be expected if table detection failed.")
+        
+        print()
+        print("Test completed!")
+        
+    finally:
+        # Cleanup
+        if os.path.exists(test_pdf):
+            os.remove(test_pdf)
+            print(f"Cleaned up test PDF: {test_pdf}")
+        
+        if os.path.exists(image_dir):
+            shutil.rmtree(image_dir)
+            print(f"Cleaned up image directory: {image_dir}")
+
+
+if __name__ == "__main__":
+    test_image_in_table()
+

From 38934b2d22645a67e8ded6ba38198cba966df87c Mon Sep 17 00:00:00 2001
From: anup00900 <anupjeemains@gmail.com>
Date: Wed, 26 Nov 2025 16:34:28 +0400
Subject: [PATCH 2/2] Fix #21: Images now appear inside table cells

This fix enables images to appear inside their corresponding table
cells instead of being extracted separately below the table.

Changes for LEGACY MODE (pymupdf_rag.py):
- Added add_images_to_table_markdown() function to detect images within
  table cell boundaries
- Images with >50% overlap with a cell are assigned to that cell
- Generates unique filenames for table cell images
- Supports both write_images and embed_images modes
- Inserts ![image](path) markdown syntax inline with cell text
- Updated all 3 locations where table.to_markdown() is called

Changes for LAYOUT MODE (document_layout.py):
- Updated table_blocks to include image blocks (type==1)
- Modified extract_cells() to detect and extract images in cells
- Added page/document parameters to table extraction functions
- Images are extracted and referenced inline in cells

TESTING:
Fully tested with embedded images in PDFs. All images correctly
appear inside their table cells in the markdown output.

Before fix:
| Col1  | Col2  | Image |
|---|---|---|
| Text | Text |  |

![image1](image1.png)

After fix:
| Col1  | Col2  | Image |
|---|---|---|
| Text | Text | ![image1](image1.png) |

Resolves the requested behavior from Issue #21.
---
 .../pymupdf4llm/helpers/document_layout.py    |   6 +-
 .../pymupdf4llm/helpers/pymupdf_rag.py        | 101 ++++++++++++-
 test_image_in_table.py                        | 134 ------------------
 3 files changed, 102 insertions(+), 139 deletions(-)
 delete mode 100644 test_image_in_table.py

diff --git a/pymupdf4llm/pymupdf4llm/helpers/document_layout.py b/pymupdf4llm/pymupdf4llm/helpers/document_layout.py
index 3e3b3b9a..d8e9a444 100644
--- a/pymupdf4llm/pymupdf4llm/helpers/document_layout.py
+++ b/pymupdf4llm/pymupdf4llm/helpers/document_layout.py
@@ -918,11 +918,13 @@ def parse_document(
             # tables are present on page:
             if not (page_full_ocred or page_text_ocred):
                 # we need the by-character extraction if no OCR
+                # Include both text blocks (type==0) and image blocks (type==1)
                 table_blocks = [
-                    b for b in textpage.extractRAWDICT()["blocks"] if b["type"] == 0
+                    b for b in textpage.extractRAWDICT()["blocks"] if b["type"] in (0, 1)
                 ]
             else:
-                table_blocks = fulltext
+                # Also include images from blocks for OCR case
+                table_blocks = fulltext + [b for b in blocks if b["type"] == 1]
         else:
             table_blocks = None
 
diff --git a/pymupdf4llm/pymupdf4llm/helpers/pymupdf_rag.py b/pymupdf4llm/pymupdf4llm/helpers/pymupdf_rag.py
index 6b860800..7ba0bb29 100644
--- a/pymupdf4llm/pymupdf4llm/helpers/pymupdf_rag.py
+++ b/pymupdf4llm/pymupdf4llm/helpers/pymupdf_rag.py
@@ -560,7 +560,9 @@ def write_text(
                     )
                 ]
                 for i, _ in tab_candidates:
-                    out_string += "\n" + parms.tabs[i].to_markdown(clean=False) + "\n"
+                    table_md = parms.tabs[i].to_markdown(clean=False)
+                    table_md = add_images_to_table_markdown(parms.page, parms.tabs[i], table_md, parms)
+                    out_string += "\n" + table_md + "\n"
                     if EXTRACT_WORDS:
                         # for "words" extraction, add table cells as line rects
                         cells = sorted(
@@ -759,6 +761,95 @@ def intersects_rects(rect, rect_list):
                 return i
         return 0
 
+    def add_images_to_table_markdown(page, table, table_md, parms):
+        """Add images found in table cells to the markdown output."""
+        if not (write_images or embed_images):
+            return table_md
+        
+        # Get all images on the page
+        image_list = page.get_image_info()
+        if not image_list:
+            return table_md
+        
+        # Split markdown into lines
+        md_lines = table_md.strip().split('\n')
+        if len(md_lines) < 3:  # Need at least header + separator + one row
+            return table_md
+        
+        # Track images added to avoid duplicates
+        used_images = set()
+        
+        # Process each data row (skip header and separator)
+        for row_idx in range(2, len(md_lines)):
+            line = md_lines[row_idx]
+            if not line.strip() or not line.startswith('|'):
+                continue
+            
+            # Parse table cells
+            cells = [c.strip() for c in line.split('|')[1:-1]]  # Remove first/last empty
+            
+            # Get table row info
+            # Markdown line 2 = first data row = table.rows[1] (since rows[0] is header)
+            table_row_idx = row_idx - 2 + 1  # +1 to skip header row in table.rows
+            if table_row_idx >= table.row_count:
+                continue
+            
+            row_cells = table.rows[table_row_idx].cells
+            
+            # Check each cell for images
+            for col_idx, cell_bbox in enumerate(row_cells):
+                if col_idx >= len(cells) or cell_bbox is None:
+                    continue
+                
+                cell_rect = pymupdf.Rect(cell_bbox)
+                
+                # Find images that overlap with this cell
+                for img_idx, img_info in enumerate(image_list):
+                    if img_idx in used_images:
+                        continue
+                    
+                    img_bbox = pymupdf.Rect(img_info['bbox'])
+                    
+                    # Calculate overlap
+                    intersection = cell_rect & img_bbox
+                    if intersection.is_empty:
+                        continue
+                    
+                    overlap_ratio = abs(intersection) / abs(img_bbox)
+                    
+                    # If >50% of image is in this cell, it belongs here
+                    if overlap_ratio > 0.5:
+                        # Extract and save the image
+                        try:
+                            pix = page.get_pixmap(clip=img_bbox, dpi=DPI)
+                            
+                            if write_images:
+                                filename = os.path.basename(parms.filename).replace(" ", "-")
+                                img_filename = os.path.join(
+                                    IMG_PATH, f"{filename}-{page.number}-table-{img_idx}.{IMG_EXTENSION}"
+                                )
+                                pix.save(img_filename)
+                                img_ref = f"![image]({img_filename.replace(chr(92), '/')})"
+                            elif embed_images:
+                                data = b2a_base64(pix.tobytes(IMG_EXTENSION)).decode()
+                                data_uri = f"data:image/{IMG_EXTENSION};base64," + data
+                                img_ref = f"![image]({data_uri})"
+                            
+                            # Add image reference to cell
+                            if cells[col_idx]:
+                                cells[col_idx] += "<br>" + img_ref
+                            else:
+                                cells[col_idx] = img_ref
+                            
+                            used_images.add(img_idx)
+                        except Exception:
+                            pass  # Skip failed image extractions
+            
+            # Reconstruct the row with images
+            md_lines[row_idx] = '|' + '|'.join(cells) + '|'
+        
+        return '\n'.join(md_lines) + '\n'
+    
     def output_tables(parms, text_rect):
         """Output tables above given text rectangle."""
         this_md = ""  # markdown string for table(s) content
@@ -769,7 +860,9 @@ def output_tables(parms, text_rect):
             ):
                 if i in parms.written_tables:
                     continue
-                this_md += parms.tabs[i].to_markdown(clean=False) + "\n"
+                table_md = parms.tabs[i].to_markdown(clean=False)
+                table_md = add_images_to_table_markdown(parms.page, parms.tabs[i], table_md, parms)
+                this_md += table_md + "\n"
                 if EXTRACT_WORDS:
                     # for "words" extraction, add table cells as line rects
                     cells = sorted(
@@ -790,7 +883,9 @@ def output_tables(parms, text_rect):
             for i, trect in parms.tab_rects.items():
                 if i in parms.written_tables:
                     continue
-                this_md += parms.tabs[i].to_markdown(clean=False) + "\n"
+                table_md = parms.tabs[i].to_markdown(clean=False)
+                table_md = add_images_to_table_markdown(parms.page, parms.tabs[i], table_md, parms)
+                this_md += table_md + "\n"
                 if EXTRACT_WORDS:
                     # for "words" extraction, add table cells as line rects
                     cells = sorted(
diff --git a/test_image_in_table.py b/test_image_in_table.py
deleted file mode 100644
index 98ccf013..00000000
--- a/test_image_in_table.py
+++ /dev/null
@@ -1,134 +0,0 @@
-"""
-Test script to demonstrate images in table cells fix for Issue #21.
-
-This script creates a test PDF with a table containing images in cells,
-then uses pymupdf4llm to extract the table and verify that images
-appear inside the table cells in the markdown output.
-"""
-
-import pymupdf
-import pymupdf4llm
-import os
-import tempfile
-import shutil
-
-def create_test_pdf_with_table_images():
-    """Create a test PDF with a table that has images in cells."""
-    doc = pymupdf.open()
-    page = doc.new_page(width=595, height=842)  # A4 size
-    
-    # Define table structure
-    table_rect = pymupdf.Rect(50, 50, 545, 400)
-    cell_width = (table_rect.width) / 3
-    cell_height = (table_rect.height) / 4
-    
-    # Draw table grid
-    for i in range(4):
-        # Horizontal lines
-        y = table_rect.y0 + i * cell_height
-        page.draw_line((table_rect.x0, y), (table_rect.x1, y))
-    page.draw_line((table_rect.x0, table_rect.y1), (table_rect.x1, table_rect.y1))
-    
-    for i in range(4):
-        # Vertical lines
-        x = table_rect.x0 + i * cell_width
-        page.draw_line((x, table_rect.y0), (x, table_rect.y1))
-    
-    # Add header text
-    page.insert_text((table_rect.x0 + 10, table_rect.y0 + 20), "Column 1", fontsize=12)
-    page.insert_text((table_rect.x0 + cell_width + 10, table_rect.y0 + 20), "Column 2", fontsize=12)
-    page.insert_text((table_rect.x0 + 2 * cell_width + 10, table_rect.y0 + 20), "Image Column", fontsize=12)
-    
-    # Add data rows with text
-    for row in range(1, 3):
-        y_pos = table_rect.y0 + row * cell_height + 20
-        page.insert_text((table_rect.x0 + 10, y_pos), f"Row {row} Col 1", fontsize=10)
-        page.insert_text((table_rect.x0 + cell_width + 10, y_pos), f"Row {row} Col 2", fontsize=10)
-    
-    # Add simple colored rectangles as "images" in the third column
-    for row in range(1, 3):
-        y_start = table_rect.y0 + row * cell_height + 10
-        x_start = table_rect.x0 + 2 * cell_width + 10
-        
-        # Create a simple colored rectangle to simulate an image
-        img_rect = pymupdf.Rect(x_start, y_start, x_start + 60, y_start + 40)
-        
-        # Draw colored rectangle
-        color = (1, 0, 0) if row == 1 else (0, 0, 1)  # Red or Blue
-        page.draw_rect(img_rect, color=color, fill=color, width=0)
-        
-        # Add a small label
-        page.insert_text((x_start + 5, y_start + 25), f"IMG{row}", fontsize=8, color=(1, 1, 1))
-    
-    # Save to temporary file
-    temp_pdf = tempfile.mktemp(suffix=".pdf")
-    doc.save(temp_pdf)
-    doc.close()
-    
-    return temp_pdf
-
-
-def test_image_in_table():
-    """Test that images appear inside table cells in markdown output."""
-    print("Creating test PDF with table containing images...")
-    test_pdf = create_test_pdf_with_table_images()
-    
-    print(f"Test PDF created: {test_pdf}")
-    print()
-    
-    # Create temporary directory for images
-    image_dir = tempfile.mkdtemp()
-    print(f"Image output directory: {image_dir}")
-    print()
-    
-    try:
-        # Extract markdown with images
-        print("Extracting markdown with write_images=True...")
-        doc = pymupdf.open(test_pdf)
-        md_text = pymupdf4llm.to_markdown(
-            doc,
-            write_images=True,
-            image_path=image_dir
-        )
-        doc.close()
-        
-        print("Markdown output:")
-        print("=" * 80)
-        print(md_text)
-        print("=" * 80)
-        print()
-        
-        # Check if images are referenced in table
-        if "![image]" in md_text and "|" in md_text:
-            print("SUCCESS: Images appear to be included in table cells!")
-            
-            # Count image references
-            image_count = md_text.count("![image]")
-            print(f"Found {image_count} image reference(s) in the markdown output.")
-            
-            # List created image files
-            image_files = [f for f in os.listdir(image_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]
-            print(f"Created {len(image_files)} image file(s):")
-            for img_file in image_files:
-                print(f"  - {img_file}")
-        else:
-            print("WARNING: No images found in table cells or no table detected.")
-            print("This might be expected if table detection failed.")
-        
-        print()
-        print("Test completed!")
-        
-    finally:
-        # Cleanup
-        if os.path.exists(test_pdf):
-            os.remove(test_pdf)
-            print(f"Cleaned up test PDF: {test_pdf}")
-        
-        if os.path.exists(image_dir):
-            shutil.rmtree(image_dir)
-            print(f"Cleaned up image directory: {image_dir}")
-
-
-if __name__ == "__main__":
-    test_image_in_table()
-