Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 21 additions & 17 deletions pandas/tests/frame/methods/test_combine_first.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@
import pandas._testing as tm


@pytest.fixture
def reordered_frame(float_frame):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you just inline this logic in the only 2 tests where this is used?

head, tail = float_frame[:5], float_frame[5:]
combined = head.combine_first(tail)
reordered_frame = float_frame.reindex(combined.index)
return reordered_frame


class TestDataFrameCombineFirst:
def test_combine_first_mixed(self):
a = Series(["a", "b"], index=range(2))
Expand All @@ -30,17 +38,18 @@ def test_combine_first_mixed(self):
combined = f.combine_first(g)
tm.assert_frame_equal(combined, exp)

def test_combine_first(self, float_frame):
# disjoint
def test_combine_first_disjoint(self, float_frame):
head, tail = float_frame[:5], float_frame[5:]

combined = head.combine_first(tail)
reordered_frame = float_frame.reindex(combined.index)

tm.assert_frame_equal(combined, reordered_frame)
tm.assert_index_equal(combined.columns, float_frame.columns)
tm.assert_series_equal(combined["A"], reordered_frame["A"])
tm.assert_series_equal(combined["A"].reindex(head.index), head["A"])
tm.assert_series_equal(combined["A"].reindex(tail.index), tail["A"])

# same index
def test_combine_first_same_index(self, float_frame):
fcopy = float_frame.copy()
fcopy["A"] = 1
del fcopy["C"]
Expand All @@ -56,36 +65,31 @@ def test_combine_first(self, float_frame):
tm.assert_series_equal(combined["C"], fcopy2["C"])
tm.assert_series_equal(combined["D"], fcopy["D"])

# overlap
head, tail = reordered_frame[:10].copy(), reordered_frame
def test_combine_first_overlap(self, reordered_frame):
head, tail = reordered_frame[:10].copy(), reordered_frame.copy()
head["A"] = 1

combined = head.combine_first(tail)
assert (combined["A"][:10] == 1).all()

# reverse overlap
def test_combine_first_reverse_overlap(self, reordered_frame):
head, tail = reordered_frame[:10].copy(), reordered_frame.copy()
tail.iloc[:10, tail.columns.get_loc("A")] = 0
combined = tail.combine_first(head)
assert (combined["A"][:10] == 0).all()

# no overlap
Copy link
Contributor Author

@Tsmith-77 Tsmith-77 Dec 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The no overlap case had the same setup as the disjoint case, just with assertions that had a different focus, so I moved those assertions up into test_combine_first_disjoint and then removed the reverse overlap code.

f = float_frame[:10]
g = float_frame[10:]
combined = f.combine_first(g)
tm.assert_series_equal(combined["A"].reindex(f.index), f["A"])
tm.assert_series_equal(combined["A"].reindex(g.index), g["A"])

# corner cases
def test_combine_first_with_empty(self, float_frame):
comb = float_frame.combine_first(DataFrame())
tm.assert_frame_equal(comb, float_frame)

comb = DataFrame().combine_first(float_frame)
tm.assert_frame_equal(comb, float_frame.sort_index())

def test_combine_first_with_new_index(self, float_frame):
comb = float_frame.combine_first(DataFrame(index=["faz", "boo"]))
assert "faz" in comb.index

# #2525
def test_combine_first_column_union(self):
# GH#2525
df = DataFrame({"a": [1]}, index=[datetime(2012, 1, 1)])
df2 = DataFrame(columns=["b"])
result = df.combine_first(df2)
Expand Down
Loading