2323
2424# Just for local readability: TODO: later switch to real type declarations of these.
2525_Op = Literal ["+" , "-" ]
26- _PK = Any
26+ _PK = Sequence [ Any ]
2727_Row = Tuple [Any ]
2828
2929
@@ -34,24 +34,27 @@ def diff_sets(
3434 json_cols : dict = None ,
3535 columns1 : Sequence [str ],
3636 columns2 : Sequence [str ],
37+ key_columns1 : Sequence [str ],
38+ key_columns2 : Sequence [str ],
3739 ignored_columns1 : Collection [str ],
3840 ignored_columns2 : Collection [str ],
3941) -> Iterator :
4042 # Differ only by columns of interest (PKs+relevant-ignored). But yield with ignored ones!
4143 sa : Set [_Row ] = {tuple (val for col , val in safezip (columns1 , row ) if col not in ignored_columns1 ) for row in a }
4244 sb : Set [_Row ] = {tuple (val for col , val in safezip (columns2 , row ) if col not in ignored_columns2 ) for row in b }
4345
44- # The first item is always the key (see TableDiffer.relevant_columns)
45- # TODO update when we add compound keys to hashdiff
46+ # The first items are always the PK (see TableSegment.relevant_columns)
4647 diffs_by_pks : Dict [_PK , List [Tuple [_Op , _Row ]]] = defaultdict (list )
4748 for row in a :
49+ pk : _PK = tuple (val for col , val in zip (key_columns1 , row ))
4850 cutrow : _Row = tuple (val for col , val in zip (columns1 , row ) if col not in ignored_columns1 )
4951 if cutrow not in sb :
50- diffs_by_pks [row [ 0 ] ].append (("-" , row ))
52+ diffs_by_pks [pk ].append (("-" , row ))
5153 for row in b :
54+ pk : _PK = tuple (val for col , val in zip (key_columns2 , row ))
5255 cutrow : _Row = tuple (val for col , val in zip (columns2 , row ) if col not in ignored_columns2 )
5356 if cutrow not in sa :
54- diffs_by_pks [row [ 0 ] ].append (("+" , row ))
57+ diffs_by_pks [pk ].append (("+" , row ))
5558
5659 warned_diff_cols = set ()
5760 for diffs in (diffs_by_pks [pk ] for pk in sorted (diffs_by_pks )):
@@ -232,6 +235,8 @@ def _bisect_and_diff_segments(
232235 json_cols = json_cols ,
233236 columns1 = table1 .relevant_columns ,
234237 columns2 = table2 .relevant_columns ,
238+ key_columns1 = table1 .key_columns ,
239+ key_columns2 = table2 .key_columns ,
235240 ignored_columns1 = self .ignored_columns1 ,
236241 ignored_columns2 = self .ignored_columns2 ,
237242 )
0 commit comments