Skip to content

Commit 1c69e29

Browse files
test tranform ordered cat func
1 parent ff48847 commit 1c69e29

File tree

1 file changed

+138
-0
lines changed

1 file changed

+138
-0
lines changed

pandas/tests/methods/corr.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
"""
2+
Tests for core/methods/corr.py
3+
"""
4+
5+
import pytest
6+
import numpy as np
7+
from pandas import DataFrame, Series, Categorical
8+
import pandas._testing as tm
9+
from pandas.core.methods.corr import transform_ord_cat_cols_to_coded_cols
10+
11+
12+
@pytest.mark.parametrize(
13+
("input_df", "expected_df"),
14+
[
15+
pytest.param(
16+
# 1) Simple: two ordered categorical columns (with and without None)
17+
DataFrame(
18+
{
19+
"ord_cat": Series(
20+
Categorical(
21+
["low", "m", "h", "vh"],
22+
categories=["low", "m", "h", "vh"],
23+
ordered=True,
24+
)
25+
),
26+
"ord_cat_none": Series(
27+
Categorical(
28+
["low", "m", "h", None],
29+
categories=["low", "m", "h"],
30+
ordered=True,
31+
)
32+
),
33+
}
34+
),
35+
DataFrame(
36+
{
37+
# codes: low=0, m=1, h=2, vh=3
38+
"ord_cat": Series([0, 1, 2, 3], dtype="int8"),
39+
# codes: low=0, m=1, h=2, None -> NaN
40+
"ord_cat_none": Series([0, 1.0, 2.0, np.nan]),
41+
}
42+
),
43+
id="ordered-categoricals-basic",
44+
),
45+
pytest.param(
46+
# 2) Mixed dtypes: only the ordered categorical should change
47+
DataFrame(
48+
{
49+
"ordered": Series(
50+
Categorical(
51+
["a", "c", "b"],
52+
categories=["a", "b", "c"],
53+
ordered=True,
54+
)
55+
),
56+
"unordered": Series(
57+
Categorical(["x", "y", "x"], ordered=False)
58+
),
59+
"num": Series([10, 20, 30]),
60+
"text": Series(["u", "v", "w"]),
61+
}
62+
),
63+
DataFrame(
64+
{
65+
# codes: a=0, c=2, b=1
66+
"ordered": Series([0, 2, 1], dtype="int8"),
67+
# unordered categorical should be untouched (still categorical)
68+
"unordered": Series(
69+
Categorical(["x", "y", "x"], ordered=False)
70+
),
71+
"num": Series([10, 20, 30]),
72+
"text": Series(["u", "v", "w"]),
73+
}
74+
),
75+
id="mixed-types-only-ordered-changes",
76+
),
77+
pytest.param(
78+
# 3 Duplicate column names: first 'dup' is ordered categorical, second 'dup' is non-categorical
79+
DataFrame(
80+
{
81+
"dup": Series(
82+
Categorical(
83+
["low", "m", "h"],
84+
categories=["low", "m", "h"],
85+
ordered=True,
86+
)
87+
),
88+
"dup": Series([5, 6, 7]), # duplicate name, later column
89+
}
90+
),
91+
DataFrame(
92+
{
93+
# After transform: position 0 (ordered cat) becomes codes [0,1,2],
94+
# position 1 remains untouched numbers [5,6,7].
95+
"dup": Series([0, 1, 2], dtype="int8"),
96+
"dup": Series([5, 6, 7]),
97+
}
98+
),
99+
id="duplicate-names-ordered-first",
100+
),
101+
pytest.param(
102+
# 4 Duplicate column names: first 'dup' is non-categorical, second 'dup' is ordered categorical, third 'dup' is ordered categorical
103+
DataFrame(
104+
{
105+
"dup": Series(["a", "b", "c"]), # non-categorical (object)
106+
"dup": Series(
107+
Categorical(
108+
["p", "q", None],
109+
categories=["p", "q"],
110+
ordered=True,
111+
)
112+
),
113+
"dup": Series(
114+
Categorical(
115+
["low", "m", "h"],
116+
categories=["low", "m", "h"],
117+
ordered=True,
118+
)
119+
),
120+
}
121+
),
122+
DataFrame(
123+
{
124+
# First stays object; second turns into codes [0,1,NaN] and third changes into codes [0, 1, 2] as well
125+
"dup": Series(["a", "b", "c"]),
126+
"dup": Series([0.0, 1.0, np.nan]),
127+
"dup": Series([0, 1, 2], dtype="int8"),
128+
}
129+
),
130+
id="duplicate-names-ordered-and-non-categorical-and-none",
131+
),
132+
],
133+
)
134+
def test_transform_ord_cat_cols_to_coded_cols(input_df, expected_df):
135+
out_df = transform_ord_cat_cols_to_coded_cols(input_df)
136+
assert list(out_df.columns) == list(expected_df.columns)
137+
for i, col in enumerate(out_df.columns):
138+
tm.assert_series_equal(out_df.iloc[:, i], expected_df.iloc[:, i])

0 commit comments

Comments
 (0)