Skip to content

Commit d6c1319

Browse files
committed
tweaks
1 parent cc62c68 commit d6c1319

File tree

3 files changed

+13
-7
lines changed

3 files changed

+13
-7
lines changed

benchmark/decode-string.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { utf8EncodeJs, utf8Count, utf8DecodeJs, utf8DecodeTD } from "../src/util
55
import Benchmark from "benchmark";
66

77
for (const baseStr of ["A", "あ", "🌏"]) {
8-
const dataSet = [10, 100, 200, 1_000, 10_000, 100_000].map((n) => {
8+
const dataSet = [10, 100, 500, 1_000].map((n) => {
99
return baseStr.repeat(n);
1010
});
1111

@@ -14,7 +14,7 @@ for (const baseStr of ["A", "あ", "🌏"]) {
1414
const bytes = new Uint8Array(new ArrayBuffer(byteLength));
1515
utf8EncodeJs(str, bytes, 0);
1616

17-
console.log(`\n## string "${baseStr}" x ${str.length} (byteLength=${byteLength})\n`);
17+
console.log(`\n## string "${baseStr}" (strLength=${str.length}, byteLength=${byteLength})\n`);
1818

1919
const suite = new Benchmark.Suite();
2020

benchmark/encode-string.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@ import { utf8EncodeJs, utf8Count, utf8EncodeTE } from "../src/utils/utf8";
55
import Benchmark from "benchmark";
66

77
for (const baseStr of ["A", "あ", "🌏"]) {
8-
const dataSet = [10, 100, 200, 1_000, 10_000, 100_000].map((n) => {
8+
const dataSet = [10, 30, 50, 100].map((n) => {
99
return baseStr.repeat(n);
1010
});
1111

1212
for (const str of dataSet) {
1313
const byteLength = utf8Count(str);
1414
const buffer = new Uint8Array(byteLength);
1515

16-
console.log(`\n## string "${baseStr}" x ${str.length} (byteLength=${byteLength})\n`);
16+
console.log(`\n## string "${baseStr}" (strLength=${str.length}, byteLength=${byteLength})\n`);
1717

1818
const suite = new Benchmark.Suite();
1919

src/utils/utf8.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
/* eslint-disable @typescript-eslint/no-unnecessary-condition */
2-
import { UINT32_MAX } from "./int";
31

42
export function utf8Count(str: string): number {
53
const strLength = str.length;
@@ -88,9 +86,14 @@ export function utf8EncodeJs(str: string, output: Uint8Array, outputOffset: numb
8886
// https://encoding.spec.whatwg.org/
8987
// and available in all the modern browsers:
9088
// https://caniuse.com/textencoder
89+
// They are available in Node.js since v12 LTS as well:
90+
// https://nodejs.org/api/globals.html#textencoder
9191

9292
const sharedTextEncoder = new TextEncoder();
93-
const TEXT_ENCODER_THRESHOLD = 200;
93+
94+
// This threshold should be determined by benchmarking, which might vary in engines and input data.
95+
// Run `npx ts-node benchmark/encode-string.ts` for details.
96+
const TEXT_ENCODER_THRESHOLD = 50;
9497

9598
export function utf8EncodeTE(str: string, output: Uint8Array, outputOffset: number): void {
9699
sharedTextEncoder.encodeInto(str, output.subarray(outputOffset));
@@ -156,6 +159,9 @@ export function utf8DecodeJs(bytes: Uint8Array, inputOffset: number, byteLength:
156159
}
157160

158161
const sharedTextDecoder = new TextDecoder();
162+
163+
// This threshold should be determined by benchmarking, which might vary in engines and input data.
164+
// Run `npx ts-node benchmark/decode-string.ts` for details.
159165
const TEXT_DECODER_THRESHOLD = 200;
160166

161167
export function utf8DecodeTD(bytes: Uint8Array, inputOffset: number, byteLength: number): string {

0 commit comments

Comments
 (0)