clean up

pandeconscious · pandeconscious · commit e997747a6892 · 2025-11-16T18:49:36.000Z
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -12017,21 +12017,23 @@ def _transform_ord_cat_cols_to_coded_cols(self) -> DataFrame:
 
         data = self.copy(deep=False)
         cols_convert = categ.loc[:, categ.agg(lambda x: x.cat.ordered)].columns.unique()
-        single_cols = [col for col in cols_convert if isinstance(data[col], Series)]
-        duplicated_cols = [
+        ser_generating_cols = [
+            col for col in cols_convert if isinstance(data[col], Series)
+        ]
+        df_generating_cols = [
             col for col in cols_convert if isinstance(data[col], DataFrame)
         ]
 
-        if not single_cols and not duplicated_cols:
+        if not ser_generating_cols and not df_generating_cols:
             return self
 
-        if single_cols:
-            data[single_cols] = data[single_cols].apply(
+        if ser_generating_cols:
+            data[ser_generating_cols] = data[ser_generating_cols].apply(
                 lambda x: x.cat.codes.replace(-1, np.nan)
             )
 
-        if duplicated_cols:
-            data[duplicated_cols] = data[duplicated_cols].apply(
+        for df_col in df_generating_cols:
+            data[df_col] = data[df_col].apply(
                 lambda x: x.cat.codes.replace(-1, np.nan)
                 if isinstance(x.dtype, CategoricalDtype) and bool(x.dtype.ordered)
                 else x
diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
@@ -294,6 +294,39 @@ def test_corr_rank_ordered_categorical(
             corr_expected = df[col1].corr(df[col2], method=method)
             tm.assert_almost_equal(corr_calc[col1][col2], corr_expected)
 
+    @pytest.mark.parametrize("method", ["kendall", "spearman"])
+    @td.skip_if_no("scipy")
+    def test_corr_rank_ordered_categorical_duplicate_columns(
+        self,
+        method,
+    ):
+        df = DataFrame(
+            {
+                "a": [1, 2, 3, 4],
+                "b": [4, 3, 2, 1],
+                "c": [4, 3, 2, 1],
+                "d": [10, 20, 30, 40],
+                "e": [100, 200, 300, 400],
+            }
+        )
+        df["a"] = (
+            df["a"].astype("category").cat.set_categories([4, 3, 2, 1], ordered=True)
+        )
+        df["b"] = (
+            df["b"].astype("category").cat.set_categories([4, 3, 2, 1], ordered=True)
+        )
+        df["c"] = (
+            df["c"].astype("category").cat.set_categories([4, 3, 2, 1], ordered=True)
+        )
+        df.columns = ["a", "a", "c", "c", "e"]
+
+        corr_calc = df.corr(method=method)
+        for col1_idx, col2_idx in combinations(range(len(df.columns)), r=2):
+            corr_expected = df.iloc[:, col1_idx].corr(
+                df.iloc[:, col2_idx], method=method
+            )
+            tm.assert_almost_equal(corr_calc.iloc[col1_idx, col2_idx], corr_expected)
+
 
 class TestDataFrameCorrWith:
     @pytest.mark.parametrize(
diff --git a/pandas/tests/series/methods/test_cov_corr.py b/pandas/tests/series/methods/test_cov_corr.py
@@ -186,75 +186,58 @@ def test_corr_callable_method(self, datetime_series):
         tm.assert_almost_equal(df.transpose().corr(method=my_corr), expected)
 
     @pytest.mark.parametrize("method", ["kendall", "spearman"])
+    @pytest.mark.parametrize(
+        "ord_cat_series",
+        [
+            Series(  # ordered categorical series
+                pd.Categorical(
+                    ["low", "med", "high", "very_high"],
+                    categories=["low", "med", "high", "very_high"],
+                    ordered=True,
+                )
+            ),
+            Series(  # ordered categorical series with nan and a different ranking
+                pd.Categorical(
+                    ["h", "low", "vh", None],
+                    categories=["low", "m", "h", "vh"],
+                    ordered=True,
+                )
+            ),
+        ],
+    )
+    @pytest.mark.parametrize(
+        "other_series",
+        [
+            Series(  # int series against which tord cat series is correlated
+                [0, 1, 2, 3]
+            ),
+            Series(  # float series against which ord cat series is correlated
+                [2.0, 3.0, 4.5, 6.5]
+            ),
+            Series(  # other ord cat series against which ord cat series is correlated
+                pd.Categorical(
+                    ["high", "low", "very_high", "med"],
+                    categories=["low", "med", "high", "very_high"],
+                    ordered=True,
+                )
+            ),
+        ],
+    )
     def test_corr_rank_ordered_categorical(
         self,
         method,
+        ord_cat_series,
+        other_series,
     ):
         stats = pytest.importorskip("scipy.stats")
         method_scipy_func = {"kendall": stats.kendalltau, "spearman": stats.spearmanr}
-        ser_ord_cat = Series(
-            pd.Categorical(
-                ["low", "med", "high", "very_high"],
-                categories=["low", "med", "high", "very_high"],
-                ordered=True,
-            )
-        )
-        ser_ord_cat_codes = ser_ord_cat.cat.codes.replace(-1, np.nan)
-        ser_ord_int = Series([0, 1, 2, 3])
-        ser_ord_float = Series([2.0, 3.0, 4.5, 6.5])
-
-        corr_calc = ser_ord_cat.corr(ser_ord_int, method=method)
-        corr_expected = method_scipy_func[method](
-            ser_ord_cat_codes, ser_ord_int, nan_policy="omit"
-        )[0]
-        tm.assert_almost_equal(corr_calc, corr_expected)
-
-        corr_calc = ser_ord_cat.corr(ser_ord_float, method=method)
-        corr_expected = method_scipy_func[method](
-            ser_ord_cat_codes, ser_ord_float, nan_policy="omit"
-        )[0]
-        tm.assert_almost_equal(corr_calc, corr_expected)
-
-        corr_calc = ser_ord_cat.corr(ser_ord_cat, method=method)
-        corr_expected = method_scipy_func[method](
-            ser_ord_cat_codes, ser_ord_cat_codes, nan_policy="omit"
-        )[0]
-        tm.assert_almost_equal(corr_calc, corr_expected)
-
-        ser_ord_cat_shuff = Series(
-            pd.Categorical(
-                ["high", "low", "very_high", "med"],
-                categories=["low", "med", "high", "very_high"],
-                ordered=True,
-            )
-        )
-        ser_ord_cat_shuff_codes = ser_ord_cat_shuff.cat.codes.replace(-1, np.nan)
-
-        corr_calc = ser_ord_cat_shuff.corr(ser_ord_cat, method=method)
-        corr_expected = method_scipy_func[method](
-            ser_ord_cat_shuff_codes, ser_ord_cat_codes, nan_policy="omit"
-        )[0]
-        tm.assert_almost_equal(corr_calc, corr_expected)
+        ord_ser_cat_codes = ord_cat_series.cat.codes.replace(-1, np.nan)
 
-        corr_calc = ser_ord_cat_shuff.corr(ser_ord_cat_shuff, method=method)
-        corr_expected = method_scipy_func[method](
-            ser_ord_cat_shuff_codes, ser_ord_cat_shuff_codes, nan_policy="omit"
-        )[0]
-        tm.assert_almost_equal(corr_calc, corr_expected)
+        if other_series.dtype == "category" and other_series.cat.ordered:
+            other_series = other_series.cat.codes.replace(-1, np.nan)
 
-        ser_ord_cat_with_nan = Series(
-            pd.Categorical(
-                ["h", "low", "vh", None, "m"],
-                categories=["low", "m", "h", "vh"],
-                ordered=True,
-            )
-        )
-        ser_ord_cat_shuff_with_nan_codes = ser_ord_cat_with_nan.cat.codes.replace(
-            -1, np.nan
-        )
-        ser_ord_int = Series([2, 0, 1, 3, None])
-        corr_calc = ser_ord_cat_with_nan.corr(ser_ord_int, method=method)
+        corr_calc = ord_cat_series.corr(other_series, method=method)
         corr_expected = method_scipy_func[method](
-            ser_ord_cat_shuff_with_nan_codes, ser_ord_int, nan_policy="omit"
+            ord_ser_cat_codes, other_series, nan_policy="omit"
         )[0]
         tm.assert_almost_equal(corr_calc, corr_expected)