@@ -188,9 +188,9 @@ def join_rects_phase1(bboxes):
188188
189189 Joins any rectangles that "touch" each other.
190190 This means that their intersection is valid (but may be empty).
191- To prefer vertical joins, we will ignore small horizontal gaps.
191+ To prefer vertical joins, we will ignore small gaps.
192192 """
193- delta = (0 , 0 , 0 , 2 ) # allow this gap below
193+ delta = (0 , 0 , 0 , 10 ) # allow this gap below
194194 prects = bboxes [:]
195195 new_rects = []
196196 while prects :
@@ -199,7 +199,7 @@ def join_rects_phase1(bboxes):
199199 while repeat :
200200 repeat = False
201201 for i in range (len (prects ) - 1 , 0 , - 1 ):
202- if not ((prect0 + delta ) & prects [i ]).is_empty :
202+ if ((prect0 + delta ) & prects [i ]).is_valid :
203203 prect0 |= prects [i ]
204204 del prects [i ]
205205 repeat = True
@@ -228,11 +228,11 @@ def join_rects_phase2(bboxes):
228228 prects .sort (key = lambda b : (b .x0 , b .y0 ))
229229 new_rects = [prects [0 ]] # initialize with first item
230230
231- # walk through the rest, top to bottom, thwn left to right
231+ # walk through the rest, top to bottom, then left to right
232232 for r in prects [1 :]:
233233 r0 = new_rects [- 1 ] # previous bbox
234234
235- # join if we have similar borders and are not to far down
235+ # join if we have similar borders and are not too far down
236236 if (
237237 abs (r .x0 - r0 .x0 ) <= 3
238238 and abs (r .x1 - r0 .x1 ) <= 3
@@ -259,6 +259,7 @@ def join_rects_phase3(bboxes, path_rects, cache):
259259 # do not join across columns
260260 if prect1 .x0 > prect0 .x1 or prect1 .x1 < prect0 .x0 :
261261 continue
262+
262263 # do not join different backgrounds
263264 if in_bbox_using_cache (prect0 , path_rects , cache ) != in_bbox_using_cache (prect1 , path_rects , cache ):
264265 continue
@@ -318,7 +319,7 @@ def join_rects_phase3(bboxes, path_rects, cache):
318319 sort_rects .sort (key = lambda sr : sr [1 ]) # by computed key
319320 new_rects = [sr [0 ] for sr in sort_rects ] # extract sorted rectangles
320321
321- # move shaded text rects into a separate list
322+ # move text rects with background color into a separate list
322323 shadow_rects = []
323324 # for i in range(len(new_rects) - 1, 0, -1):
324325 # r = +new_rects[i]
0 commit comments