1+ """
2+ Tests for core/methods/corr.py
3+ """
4+
5+ import pytest
6+ import numpy as np
7+ from pandas import DataFrame , Series , Categorical
8+ import pandas ._testing as tm
9+ from pandas .core .methods .corr import transform_ord_cat_cols_to_coded_cols
10+
11+
12+ @pytest .mark .parametrize (
13+ ("input_df" , "expected_df" ),
14+ [
15+ pytest .param (
16+ # 1) Simple: two ordered categorical columns (with and without None)
17+ DataFrame (
18+ {
19+ "ord_cat" : Series (
20+ Categorical (
21+ ["low" , "m" , "h" , "vh" ],
22+ categories = ["low" , "m" , "h" , "vh" ],
23+ ordered = True ,
24+ )
25+ ),
26+ "ord_cat_none" : Series (
27+ Categorical (
28+ ["low" , "m" , "h" , None ],
29+ categories = ["low" , "m" , "h" ],
30+ ordered = True ,
31+ )
32+ ),
33+ }
34+ ),
35+ DataFrame (
36+ {
37+ # codes: low=0, m=1, h=2, vh=3
38+ "ord_cat" : Series ([0 , 1 , 2 , 3 ], dtype = "int8" ),
39+ # codes: low=0, m=1, h=2, None -> NaN
40+ "ord_cat_none" : Series ([0 , 1.0 , 2.0 , np .nan ]),
41+ }
42+ ),
43+ id = "ordered-categoricals-basic" ,
44+ ),
45+ pytest .param (
46+ # 2) Mixed dtypes: only the ordered categorical should change
47+ DataFrame (
48+ {
49+ "ordered" : Series (
50+ Categorical (
51+ ["a" , "c" , "b" ],
52+ categories = ["a" , "b" , "c" ],
53+ ordered = True ,
54+ )
55+ ),
56+ "unordered" : Series (
57+ Categorical (["x" , "y" , "x" ], ordered = False )
58+ ),
59+ "num" : Series ([10 , 20 , 30 ]),
60+ "text" : Series (["u" , "v" , "w" ]),
61+ }
62+ ),
63+ DataFrame (
64+ {
65+ # codes: a=0, c=2, b=1
66+ "ordered" : Series ([0 , 2 , 1 ], dtype = "int8" ),
67+ # unordered categorical should be untouched (still categorical)
68+ "unordered" : Series (
69+ Categorical (["x" , "y" , "x" ], ordered = False )
70+ ),
71+ "num" : Series ([10 , 20 , 30 ]),
72+ "text" : Series (["u" , "v" , "w" ]),
73+ }
74+ ),
75+ id = "mixed-types-only-ordered-changes" ,
76+ ),
77+ pytest .param (
78+ # 3 Duplicate column names: first 'dup' is ordered categorical, second 'dup' is non-categorical
79+ DataFrame (
80+ {
81+ "dup" : Series (
82+ Categorical (
83+ ["low" , "m" , "h" ],
84+ categories = ["low" , "m" , "h" ],
85+ ordered = True ,
86+ )
87+ ),
88+ "dup" : Series ([5 , 6 , 7 ]), # duplicate name, later column
89+ }
90+ ),
91+ DataFrame (
92+ {
93+ # After transform: position 0 (ordered cat) becomes codes [0,1,2],
94+ # position 1 remains untouched numbers [5,6,7].
95+ "dup" : Series ([0 , 1 , 2 ], dtype = "int8" ),
96+ "dup" : Series ([5 , 6 , 7 ]),
97+ }
98+ ),
99+ id = "duplicate-names-ordered-first" ,
100+ ),
101+ pytest .param (
102+ # 4 Duplicate column names: first 'dup' is non-categorical, second 'dup' is ordered categorical, third 'dup' is ordered categorical
103+ DataFrame (
104+ {
105+ "dup" : Series (["a" , "b" , "c" ]), # non-categorical (object)
106+ "dup" : Series (
107+ Categorical (
108+ ["p" , "q" , None ],
109+ categories = ["p" , "q" ],
110+ ordered = True ,
111+ )
112+ ),
113+ "dup" : Series (
114+ Categorical (
115+ ["low" , "m" , "h" ],
116+ categories = ["low" , "m" , "h" ],
117+ ordered = True ,
118+ )
119+ ),
120+ }
121+ ),
122+ DataFrame (
123+ {
124+ # First stays object; second turns into codes [0,1,NaN] and third changes into codes [0, 1, 2] as well
125+ "dup" : Series (["a" , "b" , "c" ]),
126+ "dup" : Series ([0.0 , 1.0 , np .nan ]),
127+ "dup" : Series ([0 , 1 , 2 ], dtype = "int8" ),
128+ }
129+ ),
130+ id = "duplicate-names-ordered-and-non-categorical-and-none" ,
131+ ),
132+ ],
133+ )
134+ def test_transform_ord_cat_cols_to_coded_cols (input_df , expected_df ):
135+ out_df = transform_ord_cat_cols_to_coded_cols (input_df )
136+ assert list (out_df .columns ) == list (expected_df .columns )
137+ for i , col in enumerate (out_df .columns ):
138+ tm .assert_series_equal (out_df .iloc [:, i ], expected_df .iloc [:, i ])
0 commit comments