|
114 | 114 | from pandas.core.dtypes.dtypes import ( |
115 | 115 | ArrowDtype, |
116 | 116 | BaseMaskedDtype, |
117 | | - CategoricalDtype, |
118 | 117 | ExtensionDtype, |
119 | 118 | ) |
120 | 119 | from pandas.core.dtypes.generic import ( |
|
185 | 184 | treat_as_nested, |
186 | 185 | ) |
187 | 186 | from pandas.core.methods import selectn |
| 187 | +from pandas.core.methods.corr import transform_ord_cat_cols_to_coded_cols |
188 | 188 | from pandas.core.reshape.melt import melt |
189 | 189 | from pandas.core.series import Series |
190 | 190 | from pandas.core.shared_docs import _shared_docs |
@@ -11682,7 +11682,7 @@ def corr( |
11682 | 11682 | idx = cols.copy() |
11683 | 11683 |
|
11684 | 11684 | if method in ("spearman", "kendall"): |
11685 | | - data = data._transform_ord_cat_cols_to_coded_cols() |
| 11685 | + data = transform_ord_cat_cols_to_coded_cols(data) |
11686 | 11686 |
|
11687 | 11687 | mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False) |
11688 | 11688 |
|
@@ -11973,8 +11973,8 @@ def corrwith( |
11973 | 11973 | correl = num / dom |
11974 | 11974 |
|
11975 | 11975 | elif method in ["kendall", "spearman"] or callable(method): |
11976 | | - left = left._transform_ord_cat_cols_to_coded_cols() |
11977 | | - right = right._transform_ord_cat_cols_to_coded_cols() |
| 11976 | + left = transform_ord_cat_cols_to_coded_cols(left) |
| 11977 | + right = transform_ord_cat_cols_to_coded_cols(right) |
11978 | 11978 |
|
11979 | 11979 | def c(x): |
11980 | 11980 | return nanops.nancorr(x[0], x[1], method=method) |
@@ -12006,41 +12006,6 @@ def c(x): |
12006 | 12006 |
|
12007 | 12007 | return correl |
12008 | 12008 |
|
12009 | | - def _transform_ord_cat_cols_to_coded_cols(self) -> DataFrame: |
12010 | | - """ |
12011 | | - any ordered categorical columns are transformed to the respective |
12012 | | - categorical codes while other columns remain untouched |
12013 | | - """ |
12014 | | - categ = self.select_dtypes("category") |
12015 | | - if len(categ.columns) == 0: |
12016 | | - return self |
12017 | | - |
12018 | | - data = self.copy(deep=False) |
12019 | | - cols_convert = categ.loc[:, categ.agg(lambda x: x.cat.ordered)].columns.unique() |
12020 | | - ser_generating_cols = [ |
12021 | | - col for col in cols_convert if isinstance(data[col], Series) |
12022 | | - ] |
12023 | | - df_generating_cols = [ |
12024 | | - col for col in cols_convert if isinstance(data[col], DataFrame) |
12025 | | - ] |
12026 | | - |
12027 | | - if not ser_generating_cols and not df_generating_cols: |
12028 | | - return self |
12029 | | - |
12030 | | - if ser_generating_cols: |
12031 | | - data[ser_generating_cols] = data[ser_generating_cols].apply( |
12032 | | - lambda x: x.cat.codes.replace(-1, np.nan) |
12033 | | - ) |
12034 | | - |
12035 | | - for df_col in df_generating_cols: |
12036 | | - data[df_col] = data[df_col].apply( |
12037 | | - lambda x: x.cat.codes.replace(-1, np.nan) |
12038 | | - if isinstance(x.dtype, CategoricalDtype) and bool(x.dtype.ordered) |
12039 | | - else x |
12040 | | - ) |
12041 | | - |
12042 | | - return data |
12043 | | - |
12044 | 12009 | # ---------------------------------------------------------------------- |
12045 | 12010 | # ndarray-like stats methods |
12046 | 12011 |
|
|
0 commit comments