@@ -168,9 +168,9 @@ def join_rects_phase1(bboxes):
168168
169169 Joins any rectangles that "touch" each other.
170170 This means that their intersection is valid (but may be empty).
171- To prefer vertical joins, we will ignore small horizontal gaps.
171+ To prefer vertical joins, we will ignore small gaps.
172172 """
173- delta = (0 , 0 , 0 , 2 ) # allow this gap below
173+ delta = (0 , 0 , 0 , 10 ) # allow this gap below
174174 prects = bboxes [:]
175175 new_rects = []
176176 while prects :
@@ -179,7 +179,7 @@ def join_rects_phase1(bboxes):
179179 while repeat :
180180 repeat = False
181181 for i in range (len (prects ) - 1 , 0 , - 1 ):
182- if not ((prect0 + delta ) & prects [i ]).is_empty :
182+ if ((prect0 + delta ) & prects [i ]).is_valid :
183183 prect0 |= prects [i ]
184184 del prects [i ]
185185 repeat = True
@@ -208,11 +208,11 @@ def join_rects_phase2(bboxes):
208208 prects .sort (key = lambda b : (b .x0 , b .y0 ))
209209 new_rects = [prects [0 ]] # initialize with first item
210210
211- # walk through the rest, top to bottom, thwn left to right
211+ # walk through the rest, top to bottom, then left to right
212212 for r in prects [1 :]:
213213 r0 = new_rects [- 1 ] # previous bbox
214214
215- # join if we have similar borders and are not to far down
215+ # join if we have similar borders and are not too far down
216216 if (
217217 abs (r .x0 - r0 .x0 ) <= 3
218218 and abs (r .x1 - r0 .x1 ) <= 3
@@ -239,7 +239,7 @@ def join_rects_phase3(bboxes, path_rects):
239239 # do not join across columns
240240 if prect1 .x0 > prect0 .x1 or prect1 .x1 < prect0 .x0 :
241241 continue
242- # do not join different backgrounds
242+ # do not join areas with a different background
243243 if in_bbox (prect0 , path_rects ) != in_bbox (prect1 , path_rects ):
244244 continue
245245 temp = prect0 | prect1
@@ -297,7 +297,7 @@ def join_rects_phase3(bboxes, path_rects):
297297 sort_rects .sort (key = lambda sr : sr [1 ]) # by computed key
298298 new_rects = [sr [0 ] for sr in sort_rects ] # extract sorted rectangles
299299
300- # move shaded text rects into a separate list
300+ # move text rects with background color into a separate list
301301 shadow_rects = []
302302 # for i in range(len(new_rects) - 1, 0, -1):
303303 # r = +new_rects[i]
0 commit comments