Skip to content

Commit 2673281

Browse files
clean up
1 parent 8bcd3dc commit 2673281

File tree

2 files changed

+26
-39
lines changed

2 files changed

+26
-39
lines changed

pandas/core/frame.py

Lines changed: 4 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,6 @@
114114
from pandas.core.dtypes.dtypes import (
115115
ArrowDtype,
116116
BaseMaskedDtype,
117-
CategoricalDtype,
118117
ExtensionDtype,
119118
)
120119
from pandas.core.dtypes.generic import (
@@ -185,6 +184,7 @@
185184
treat_as_nested,
186185
)
187186
from pandas.core.methods import selectn
187+
from pandas.core.methods.corr import transform_ord_cat_cols_to_coded_cols
188188
from pandas.core.reshape.melt import melt
189189
from pandas.core.series import Series
190190
from pandas.core.shared_docs import _shared_docs
@@ -11682,7 +11682,7 @@ def corr(
1168211682
idx = cols.copy()
1168311683

1168411684
if method in ("spearman", "kendall"):
11685-
data = data._transform_ord_cat_cols_to_coded_cols()
11685+
data = transform_ord_cat_cols_to_coded_cols(data)
1168611686

1168711687
mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False)
1168811688

@@ -11973,8 +11973,8 @@ def corrwith(
1197311973
correl = num / dom
1197411974

1197511975
elif method in ["kendall", "spearman"] or callable(method):
11976-
left = left._transform_ord_cat_cols_to_coded_cols()
11977-
right = right._transform_ord_cat_cols_to_coded_cols()
11976+
left = transform_ord_cat_cols_to_coded_cols(left)
11977+
right = transform_ord_cat_cols_to_coded_cols(right)
1197811978

1197911979
def c(x):
1198011980
return nanops.nancorr(x[0], x[1], method=method)
@@ -12006,41 +12006,6 @@ def c(x):
1200612006

1200712007
return correl
1200812008

12009-
def _transform_ord_cat_cols_to_coded_cols(self) -> DataFrame:
12010-
"""
12011-
any ordered categorical columns are transformed to the respective
12012-
categorical codes while other columns remain untouched
12013-
"""
12014-
categ = self.select_dtypes("category")
12015-
if len(categ.columns) == 0:
12016-
return self
12017-
12018-
data = self.copy(deep=False)
12019-
cols_convert = categ.loc[:, categ.agg(lambda x: x.cat.ordered)].columns.unique()
12020-
ser_generating_cols = [
12021-
col for col in cols_convert if isinstance(data[col], Series)
12022-
]
12023-
df_generating_cols = [
12024-
col for col in cols_convert if isinstance(data[col], DataFrame)
12025-
]
12026-
12027-
if not ser_generating_cols and not df_generating_cols:
12028-
return self
12029-
12030-
if ser_generating_cols:
12031-
data[ser_generating_cols] = data[ser_generating_cols].apply(
12032-
lambda x: x.cat.codes.replace(-1, np.nan)
12033-
)
12034-
12035-
for df_col in df_generating_cols:
12036-
data[df_col] = data[df_col].apply(
12037-
lambda x: x.cat.codes.replace(-1, np.nan)
12038-
if isinstance(x.dtype, CategoricalDtype) and bool(x.dtype.ordered)
12039-
else x
12040-
)
12041-
12042-
return data
12043-
1204412009
# ----------------------------------------------------------------------
1204512010
# ndarray-like stats methods
1204612011

pandas/core/methods/corr.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import numpy as np
2+
3+
from pandas import DataFrame
4+
5+
6+
def transform_ord_cat_cols_to_coded_cols(df: DataFrame) -> DataFrame:
7+
"""
8+
any ordered categorical columns are transformed to the respective
9+
categorical codes while other columns remain untouched
10+
"""
11+
12+
result = df
13+
made_copy = False
14+
for idx, dtype in enumerate(df.dtypes):
15+
if not dtype == "category" or not dtype.ordered:
16+
continue
17+
col = result._ixs(idx, axis=1)
18+
if not made_copy:
19+
made_copy = True
20+
result = result.copy(deep=False)
21+
result._iset_item(idx, col.cat.codes.replace(-1, np.nan))
22+
return result

0 commit comments

Comments
 (0)