Skip to content

Commit e43ff94

Browse files
feat(collection): Replace quickSort with pdqsort for performance and robustness (#922)
Co-authored-by: Nate Bosch <nbosch1@gmail.com>
1 parent f2efaaf commit e43ff94

File tree

5 files changed

+976
-55
lines changed

5 files changed

+976
-55
lines changed

pkgs/collection/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
- Add `PriorityQueue.of` constructor and optimize adding many elements.
1212
- Address diagnostics from `strict_top_level_inference`.
1313
- Run `dart format` with the new style.
14+
- Replace `quickSort` implementation with a more performant and robust
15+
Pattern-defeating Quicksort (pdqsort) algorithm.
1416

1517
## 1.19.1
1618

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
// Copyright (c) 2025, the Dart project authors. Please see the AUTHORS file
2+
// for details. All rights reserved. Use of this source code is governed by a
3+
// BSD-style license that can be found in the LICENSE file.
4+
5+
/// Reusable utilities for benchmarking sorting algorithms.
6+
library;
7+
8+
import 'dart:math';
9+
import 'package:benchmark_harness/benchmark_harness.dart';
10+
11+
// Sink variable to prevent the compiler from optimizing away benchmark code.
12+
int sink = 0;
13+
14+
/// The aggregated result of a benchmark run.
15+
class BenchmarkResult {
16+
final double mean;
17+
final int median;
18+
final double stdDev;
19+
final List<int> allTimes;
20+
21+
BenchmarkResult(this.mean, this.median, this.stdDev, this.allTimes);
22+
}
23+
24+
/// Base class for sorting benchmarks with dataset generation.
25+
abstract class SortBenchmarkBase extends BenchmarkBase {
26+
final int size;
27+
late final List<List<int>> _datasets;
28+
int _iteration = 0;
29+
int _checksum = 0;
30+
31+
SortBenchmarkBase(super.name, this.size);
32+
33+
/// Generate datasets for this benchmark condition.
34+
List<List<int>> generateDatasets();
35+
36+
@override
37+
void setup() {
38+
_datasets = generateDatasets();
39+
}
40+
41+
/// Get the next list to sort (creates a copy).
42+
List<int> get nextList {
43+
final dataset = _datasets[_iteration];
44+
_iteration++;
45+
if (_iteration == _datasets.length) _iteration = 0;
46+
return dataset.toList();
47+
}
48+
49+
/// Update checksum to prevent compiler optimization.
50+
void updateChecksum(List<int> list) {
51+
sink ^= list.first ^ list.last ^ list[list.length >> 1] ^ _checksum++;
52+
}
53+
54+
/// The core sorting operation to benchmark.
55+
void performSort();
56+
57+
@override
58+
void run() => performSort();
59+
}
60+
61+
/// Data pattern generators for consistent testing.
62+
class DatasetGenerators {
63+
/// Generate random integer lists.
64+
static List<List<int>> random(int size, {int count = 128, int? seed}) {
65+
final r = Random(seed ?? 12345);
66+
return List.generate(
67+
count, (_) => List.generate(size, (_) => r.nextInt(size)));
68+
}
69+
70+
/// Generate sorted lists.
71+
static List<List<int>> sorted(int size) {
72+
return [List.generate(size, (i) => i, growable: true)];
73+
}
74+
75+
/// Generate reverse-sorted lists.
76+
static List<List<int>> reverse(int size) {
77+
return [List.generate(size, (i) => size - i - 1, growable: true)];
78+
}
79+
80+
/// Generate lists with few unique values.
81+
static List<List<int>> fewUnique(int size,
82+
{int uniqueCount = 7, int count = 128, int? seed}) {
83+
final r = Random(seed ?? 67890);
84+
return List.generate(
85+
count, (_) => List.generate(size, (_) => r.nextInt(uniqueCount)));
86+
}
87+
88+
/// Generate pathological input (worst-case for naive quicksort).
89+
/// Contains even-indexed elements followed by odd-indexed in reverse.
90+
static List<List<int>> pathological(int size) {
91+
final sorted = List.generate(size, (i) => i, growable: false);
92+
final secondLoopStart = (size - 1).isOdd ? size - 1 : size - 2;
93+
final pathological = [
94+
for (var i = 0; i < size; i += 2) sorted[i],
95+
for (var i = secondLoopStart; i > -1; i -= 2) sorted[i],
96+
];
97+
return [pathological];
98+
}
99+
100+
/// Generate nearly sorted lists (only a few elements out of place).
101+
static List<List<int>> nearlySorted(int size,
102+
{double swapPercent = 0.05, int count = 128, int? seed}) {
103+
final r = Random(seed ?? 11111);
104+
return List.generate(count, (_) {
105+
final list = List.generate(size, (i) => i, growable: true);
106+
final numSwaps = (size * swapPercent).round();
107+
for (var i = 0; i < numSwaps; i++) {
108+
final idx1 = r.nextInt(size);
109+
final idx2 = r.nextInt(size);
110+
final temp = list[idx1];
111+
list[idx1] = list[idx2];
112+
list[idx2] = temp;
113+
}
114+
return list;
115+
});
116+
}
117+
}
118+
119+
/// Run a benchmark multiple times and collect statistics.
120+
BenchmarkResult runBenchmark(SortBenchmarkBase benchmark, int samples) {
121+
final times = <int>[];
122+
123+
// Setup datasets
124+
benchmark.setup();
125+
126+
// Warmup runs (not timed)
127+
for (var i = 0; i < 3; i++) {
128+
benchmark.run();
129+
}
130+
131+
// Timed runs
132+
for (var i = 0; i < samples; i++) {
133+
final stopwatch = Stopwatch()..start();
134+
benchmark.run();
135+
stopwatch.stop();
136+
times.add(stopwatch.elapsedMicroseconds);
137+
}
138+
139+
times.sort();
140+
final mean = times.reduce((a, b) => a + b) / samples;
141+
final median = times[samples >> 1];
142+
143+
// Calculate standard deviation
144+
final variance =
145+
times.map((t) => pow(t - mean, 2)).reduce((a, b) => a + b) / samples;
146+
final stdDev = sqrt(variance);
147+
148+
return BenchmarkResult(mean, median, stdDev, times);
149+
}
150+
151+
/// Print benchmark results as a markdown table.
152+
///
153+
/// [baselineName] and [comparisonName] are the labels for the
154+
/// two implementations
155+
/// being compared (e.g., "Legacy", "pdqsort", "MergeSort", etc.).
156+
void printResultsAsMarkdownTable(
157+
Map<String, (BenchmarkResult, BenchmarkResult)> results, int size,
158+
{required String baselineName,
159+
required String comparisonName,
160+
bool showStdDev = false}) {
161+
final separator = '=' * 100;
162+
print('\n$separator');
163+
print('Benchmark Results (Size: $size): $comparisonName vs. $baselineName');
164+
print(separator);
165+
166+
// Calculate dynamic column widths based on name lengths
167+
final baselineColWidth = max(baselineName.length + 5, 13);
168+
final comparisonColWidth = max(comparisonName.length + 5, 13);
169+
170+
final baselineHeader = '$baselineName (µs)'.padRight(baselineColWidth);
171+
final comparisonHeader = '$comparisonName (µs)'.padRight(comparisonColWidth);
172+
173+
if (showStdDev) {
174+
print(
175+
'''| Data Condition | $baselineHeader | $comparisonHeader | Improvement | StdDev |''');
176+
print(
177+
'''| :------------------ | :${'-' * (baselineColWidth - 2)}: | :${'-' * (comparisonColWidth - 2)}: | :---------: | :-----------: |''');
178+
} else {
179+
print(
180+
'''| Data Condition | $baselineHeader | $comparisonHeader | Improvement | Winner |''');
181+
print(
182+
'''| :------------------ | :${'-' * (baselineColWidth - 2)}: | :${'-' * (comparisonColWidth - 2)}: | :---------: | :-------------: |''');
183+
}
184+
185+
print(
186+
'''| **Mean** | ${' ' * baselineColWidth} | ${' ' * comparisonColWidth} | | |''');
187+
188+
for (final entry in results.entries) {
189+
final condition = entry.key;
190+
final (baseline, comparison) = entry.value;
191+
192+
final improvement = (baseline.mean - comparison.mean) / baseline.mean * 100;
193+
final improvementString =
194+
'${improvement > 0 ? '+' : ''}${improvement.toStringAsFixed(2)}%';
195+
final baselineMean = baseline.mean.round().toString();
196+
final comparisonMean = comparison.mean.round().toString();
197+
198+
if (showStdDev) {
199+
final stdDevString =
200+
'${baseline.stdDev.round()}/${comparison.stdDev.round()}';
201+
print(
202+
'''| ${condition.padRight(19)} | ${baselineMean.padLeft(baselineColWidth)} | ${comparisonMean.padLeft(comparisonColWidth)} | ${improvementString.padLeft(11)} | ${stdDevString.padLeft(13)} |''');
203+
} else {
204+
final winner = improvement > 0 ? '$comparisonName 🚀' : baselineName;
205+
print(
206+
'''| ${condition.padRight(19)} | ${baselineMean.padLeft(baselineColWidth)} | ${comparisonMean.padLeft(comparisonColWidth)} | ${improvementString.padLeft(11)} | ${winner.padLeft(15)} |''');
207+
}
208+
}
209+
210+
print(
211+
'''| **Median** | ${' ' * baselineColWidth} | ${' ' * comparisonColWidth} | | |''');
212+
213+
for (final entry in results.entries) {
214+
final condition = entry.key;
215+
final (baseline, comparison) = entry.value;
216+
217+
final improvement =
218+
(baseline.median - comparison.median) / baseline.median * 100;
219+
final improvementString =
220+
'${improvement > 0 ? '+' : ''}${improvement.toStringAsFixed(2)}%';
221+
final baselineMedian = baseline.median.toString();
222+
final comparisonMedian = comparison.median.toString();
223+
224+
if (showStdDev) {
225+
print(
226+
'''| ${condition.padRight(19)} | ${baselineMedian.padLeft(baselineColWidth)} | ${comparisonMedian.padLeft(comparisonColWidth)} | ${improvementString.padLeft(11)} | ${' '.padLeft(13)} |''');
227+
} else {
228+
final winner = improvement > 0 ? '$comparisonName 🚀' : baselineName;
229+
print(
230+
'''| ${condition.padRight(19)} | ${baselineMedian.padLeft(baselineColWidth)} | ${comparisonMedian.padLeft(comparisonColWidth)} | ${improvementString.padLeft(11)} | ${winner.padLeft(15)} |''');
231+
}
232+
}
233+
234+
print(separator);
235+
}
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/// Legacy quickSort implementation preserved for benchmarking purposes.
2+
/// This code is ONLY for benchmarking and should not be used in production.
3+
library;
4+
5+
import 'dart:math';
6+
import 'package:collection/src/utils.dart';
7+
8+
/// Performs an insertion sort into a potentially different list than the
9+
/// one containing the original values.
10+
///
11+
/// It will work in-place as well.
12+
void _movingInsertionSort<E, K>(
13+
List<E> list,
14+
K Function(E element) keyOf,
15+
int Function(K, K) compare,
16+
int start,
17+
int end,
18+
List<E> target,
19+
int targetOffset,
20+
) {
21+
var length = end - start;
22+
if (length == 0) return;
23+
target[targetOffset] = list[start];
24+
for (var i = 1; i < length; i++) {
25+
var element = list[start + i];
26+
var elementKey = keyOf(element);
27+
var min = targetOffset;
28+
var max = targetOffset + i;
29+
while (min < max) {
30+
var mid = min + ((max - min) >> 1);
31+
if (compare(elementKey, keyOf(target[mid])) < 0) {
32+
max = mid;
33+
} else {
34+
min = mid + 1;
35+
}
36+
}
37+
target.setRange(min + 1, targetOffset + i + 1, target, min);
38+
target[min] = element;
39+
}
40+
}
41+
42+
/// Sort [elements] using a quick-sort algorithm.
43+
///
44+
/// The elements are compared using [compare] on the elements.
45+
/// If [start] and [end] are provided, only that range is sorted.
46+
///
47+
/// Uses insertion sort for smaller sublists.
48+
void quickSort<E>(
49+
List<E> elements,
50+
int Function(E a, E b) compare, [
51+
int start = 0,
52+
int? end,
53+
]) {
54+
end = RangeError.checkValidRange(start, end, elements.length);
55+
_quickSort<E, E>(elements, identity, compare, Random(), start, end);
56+
}
57+
58+
/// Sort [list] using a quick-sort algorithm.
59+
///
60+
/// The elements are compared using [compare] on the value provided by [keyOf]
61+
/// on the element.
62+
/// If [start] and [end] are provided, only that range is sorted.
63+
///
64+
/// Uses insertion sort for smaller sublists.
65+
void quickSortBy<E, K>(
66+
List<E> list,
67+
K Function(E element) keyOf,
68+
int Function(K a, K b) compare, [
69+
int start = 0,
70+
int? end,
71+
]) {
72+
end = RangeError.checkValidRange(start, end, list.length);
73+
_quickSort(list, keyOf, compare, Random(), start, end);
74+
}
75+
76+
void _quickSort<E, K>(
77+
List<E> list,
78+
K Function(E element) keyOf,
79+
int Function(K a, K b) compare,
80+
Random random,
81+
int start,
82+
int end,
83+
) {
84+
const minQuickSortLength = 24;
85+
var length = end - start;
86+
while (length >= minQuickSortLength) {
87+
var pivotIndex = random.nextInt(length) + start;
88+
var pivot = list[pivotIndex];
89+
var pivotKey = keyOf(pivot);
90+
var endSmaller = start;
91+
var startGreater = end;
92+
var startPivots = end - 1;
93+
list[pivotIndex] = list[startPivots];
94+
list[startPivots] = pivot;
95+
while (endSmaller < startPivots) {
96+
var current = list[endSmaller];
97+
var relation = compare(keyOf(current), pivotKey);
98+
if (relation < 0) {
99+
endSmaller++;
100+
} else {
101+
startPivots--;
102+
var currentTarget = startPivots;
103+
list[endSmaller] = list[startPivots];
104+
if (relation > 0) {
105+
startGreater--;
106+
currentTarget = startGreater;
107+
list[startPivots] = list[startGreater];
108+
}
109+
list[currentTarget] = current;
110+
}
111+
}
112+
if (endSmaller - start < end - startGreater) {
113+
_quickSort(list, keyOf, compare, random, start, endSmaller);
114+
start = startGreater;
115+
} else {
116+
_quickSort(list, keyOf, compare, random, startGreater, end);
117+
end = endSmaller;
118+
}
119+
length = end - start;
120+
}
121+
_movingInsertionSort<E, K>(list, keyOf, compare, start, end, list, start);
122+
}

0 commit comments

Comments
 (0)