From ec08c989726031bfea7328a1afb66302ad2cf5d3 Mon Sep 17 00:00:00 2001 From: cmp0xff Date: Thu, 20 Nov 2025 10:07:22 +0100 Subject: [PATCH 1/5] pandas-dev/pandas#63155 stable sort value_counts --- pandas/core/algorithms.py | 2 +- pandas/core/base.py | 7 ++++++- pandas/core/frame.py | 9 +++++++-- pandas/core/groupby/generic.py | 4 ++-- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 3569a578943d4..4e0cc93e64458 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -948,7 +948,7 @@ def value_counts_internal( result = Series(counts, index=idx, name=name, copy=False) if sort: - result = result.sort_values(ascending=ascending) + result = result.sort_values(ascending=ascending, kind="stable") if normalize: result = result / counts.sum() diff --git a/pandas/core/base.py b/pandas/core/base.py index b417cf1487417..200b16b4b6b1a 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -993,7 +993,12 @@ def value_counts( If True then the object returned will contain the relative frequencies of the unique values. sort : bool, default True - Sort by frequencies when True. Preserve the order of the data when False. + Stable sort by frequencies when True. Preserve the order of the data + when False. + + .. versionchanged:: 3.0.0 + + Prior to 3.0.0, the sort was unstable. ascending : bool, default False Sort in ascending order. bins : int, optional diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2b9adb6230028..60141548db9f0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7761,11 +7761,16 @@ def value_counts( normalize : bool, default False Return proportions rather than frequencies. sort : bool, default True - Sort by frequencies when True. Preserve the order of the data when False. + Stable sort by frequencies when True. Preserve the order of the data + when False. .. versionchanged:: 3.0.0 Prior to 3.0.0, ``sort=False`` would sort by the columns values. + + .. versionchanged:: 3.0.0 + + Prior to 3.0.0, the sort was unstable. ascending : bool, default False Sort in ascending order. dropna : bool, default True @@ -7875,7 +7880,7 @@ def value_counts( counts.name = name if sort: - counts = counts.sort_values(ascending=ascending) + counts = counts.sort_values(ascending=ascending, kind="stable") if normalize: counts /= counts.sum() diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 93e04fe61555e..dfa875249afff 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -2776,8 +2776,8 @@ def value_counts( normalize : bool, default False Return proportions rather than frequencies. sort : bool, default True - Sort by frequencies when True. When False, non-grouping columns will appear - in the order they occur in within groups. + Stable sort by frequencies when True. When False, non-grouping + columns will appear in the order they occur in within groups. .. versionchanged:: 3.0.0 From b7f38cba7bc95835c327475bcbdeb7c4e45165a6 Mon Sep 17 00:00:00 2001 From: cmp0xff Date: Sun, 23 Nov 2025 10:55:01 +0100 Subject: [PATCH 2/5] add test --- pandas/tests/test_algos.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index c876aae6dea5e..4a84ac4501258 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1446,6 +1446,12 @@ def test_value_counts_series(self): ) tm.assert_series_equal(result, expected) + def test_value_counts_stability(self): + # GH 63155 + arr = np.random.default_rng(2).integers(0, 128, 8192) + result = algos.value_counts_internal(arr, sort=True) + expected = Series(arr).value_counts(sort=False).sort_values(ascending=False, kind="stable") + tm.assert_series_equal(result, expected) class TestDuplicated: def test_duplicated_with_nas(self): From 908fd7e3254f7a6c097293b14b7e92cbc7bfdcd0 Mon Sep 17 00:00:00 2001 From: cmp0xff Date: Sun, 23 Nov 2025 11:03:14 +0100 Subject: [PATCH 3/5] pre-commit --- pandas/tests/test_algos.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 4a84ac4501258..03c1bef848acb 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1450,9 +1450,14 @@ def test_value_counts_stability(self): # GH 63155 arr = np.random.default_rng(2).integers(0, 128, 8192) result = algos.value_counts_internal(arr, sort=True) - expected = Series(arr).value_counts(sort=False).sort_values(ascending=False, kind="stable") + expected = ( + Series(arr) + .value_counts(sort=False) + .sort_values(ascending=False, kind="stable") + ) tm.assert_series_equal(result, expected) + class TestDuplicated: def test_duplicated_with_nas(self): keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object) From 9d91bd14be746a36cc9964e40091ef6408e8cc81 Mon Sep 17 00:00:00 2001 From: cmp0xff Date: Tue, 2 Dec 2025 22:49:03 +0100 Subject: [PATCH 4/5] https://github.com/pandas-dev/pandas/pull/63158#discussion_r2582822930 --- pandas/tests/test_algos.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 03c1bef848acb..65fc8e6d127bc 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1448,15 +1448,17 @@ def test_value_counts_series(self): def test_value_counts_stability(self): # GH 63155 - arr = np.random.default_rng(2).integers(0, 128, 8192) + arr = np.random.default_rng(2).integers(0, 16, 64) result = algos.value_counts_internal(arr, sort=True) - expected = ( - Series(arr) - .value_counts(sort=False) - .sort_values(ascending=False, kind="stable") - ) + + value_counts = Series(arr).value_counts(sort=False) + expected = value_counts.sort_values(ascending=False, kind="stable") tm.assert_series_equal(result, expected) + unstable_sorted = value_counts.sort_values(ascending=False, kind="quicksort") + with pytest.raises(AssertionError): + tm.assert_series_equal(result, unstable_sorted) + class TestDuplicated: def test_duplicated_with_nas(self): From e72b9404244fd406d16c8e8dcb1253f4e36e75ec Mon Sep 17 00:00:00 2001 From: cmp0xff Date: Wed, 3 Dec 2025 09:22:01 +0100 Subject: [PATCH 5/5] attempt to fix on some platforms --- pandas/tests/test_algos.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 65fc8e6d127bc..588cd08a6b618 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1448,7 +1448,7 @@ def test_value_counts_series(self): def test_value_counts_stability(self): # GH 63155 - arr = np.random.default_rng(2).integers(0, 16, 64) + arr = np.random.default_rng(2).integers(0, 32, 64) result = algos.value_counts_internal(arr, sort=True) value_counts = Series(arr).value_counts(sort=False)