Skip to content

Commit 4bc4a6b

Browse files
committed
LibLine: Correctly count multi-code-point glyphs towards line length
This also adds some tests to make sure things don't break.
1 parent 00eb9d1 commit 4bc4a6b

File tree

5 files changed

+131
-1
lines changed

5 files changed

+131
-1
lines changed

Meta/Lagom/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,7 @@ if (BUILD_LAGOM)
691691
LibHID
692692
LibHTTP
693693
LibIMAP
694+
LibLine
694695
LibLocale
695696
LibMarkdown
696697
LibMedia

Tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ add_subdirectory(LibGfx)
1616
add_subdirectory(LibHID)
1717
add_subdirectory(LibIMAP)
1818
add_subdirectory(LibJS)
19+
add_subdirectory(LibLine)
1920
add_subdirectory(LibLocale)
2021
add_subdirectory(LibMarkdown)
2122
add_subdirectory(LibMedia)

Tests/LibLine/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
set(TEST_SOURCES
2+
TestMetrics.cpp
3+
)
4+
5+
foreach(source IN LISTS TEST_SOURCES)
6+
serenity_test("${source}" LibLine LIBS LibLine LibUnicode)
7+
endforeach()

Tests/LibLine/TestMetrics.cpp

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
/*
2+
* Copyright (c) 2025, the SerenityOS developers.
3+
*
4+
* SPDX-License-Identifier: BSD-2-Clause
5+
*/
6+
7+
#include "LibUnicode/Forward.h"
8+
9+
#include <LibTest/TestCase.h>
10+
11+
#include <LibLine/Editor.h>
12+
13+
TEST_CASE(count_ascii_glyphs_u8)
14+
{
15+
constexpr auto string = "Hello, World!"sv; // length in bytes: 13, code points: 13, glyphs: 13
16+
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
17+
EXPECT_EQ(metrics.grapheme_breaks.size(), 13u);
18+
EXPECT_EQ(metrics.line_metrics.size(), 1u);
19+
EXPECT_EQ(metrics.line_metrics[0].length, 13u);
20+
EXPECT_EQ(metrics.line_metrics[0].visible_length, 13u);
21+
}
22+
23+
TEST_CASE(count_ascii_glyphs_u32)
24+
{
25+
constexpr u32 string[] = { 'H', 'e', 'l', 'l', 'o', ',', ' ', 'W', 'o', 'r', 'l', 'd', '!' }; // length in code points: 13, glyphs: 13
26+
auto metrics = Line::Editor::actual_rendered_string_metrics(Utf32View(string));
27+
EXPECT_EQ(metrics.grapheme_breaks.size(), 13u);
28+
EXPECT_EQ(metrics.line_metrics.size(), 1u);
29+
EXPECT_EQ(metrics.line_metrics[0].length, 13u);
30+
EXPECT_EQ(metrics.line_metrics[0].visible_length, 13u);
31+
}
32+
33+
TEST_CASE(count_simple_multibyte_glyphs_u8)
34+
{
35+
constexpr auto string = "Héllo, Wörld!"sv; // length in bytes: 15, code points: 13, glyphs: 13
36+
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
37+
EXPECT_EQ(metrics.grapheme_breaks.size(), 13u);
38+
EXPECT_EQ(metrics.line_metrics.size(), 1u);
39+
EXPECT_EQ(metrics.line_metrics[0].length, 13u);
40+
EXPECT_EQ(metrics.line_metrics[0].visible_length, 13u);
41+
}
42+
43+
TEST_CASE(count_simple_multibyte_glyphs_u32)
44+
{
45+
constexpr u32 string[] = { 'H', 0xe9, 'l', 'l', 'o', ',', ' ', 'W', 0xf6, 'r', 'l', 'd', '!' }; // length in code points: 13, glyphs: 13
46+
auto metrics = Line::Editor::actual_rendered_string_metrics(Utf32View(string));
47+
EXPECT_EQ(metrics.grapheme_breaks.size(), 13u);
48+
EXPECT_EQ(metrics.line_metrics.size(), 1u);
49+
EXPECT_EQ(metrics.line_metrics[0].length, 13u);
50+
EXPECT_EQ(metrics.line_metrics[0].visible_length, 13u);
51+
}
52+
53+
TEST_CASE(count_multi_codepoint_glyphs_u8)
54+
{
55+
if (!Unicode::is_data_available())
56+
return;
57+
58+
constexpr auto string = "Héllo, Wörld! 👩‍💻"sv; // length in bytes: 25, code points: 17, glyphs: 15
59+
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
60+
EXPECT_EQ(metrics.grapheme_breaks.size(), 15u);
61+
EXPECT_EQ(metrics.line_metrics.size(), 1u);
62+
EXPECT_EQ(metrics.line_metrics[0].length, 17u);
63+
EXPECT_EQ(metrics.line_metrics[0].visible_length, 17u);
64+
}
65+
66+
TEST_CASE(count_jp_glyphs_u8)
67+
{
68+
if (!Unicode::is_data_available())
69+
return;
70+
71+
{
72+
constexpr auto string = "コンニチハ、ワールド!"sv; // length in bytes: 33, code points: 11, glyphs: 11
73+
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
74+
EXPECT_EQ(metrics.grapheme_breaks.size(), 11u);
75+
EXPECT_EQ(metrics.line_metrics.size(), 1u);
76+
EXPECT_EQ(metrics.line_metrics[0].length, 11u);
77+
EXPECT_EQ(metrics.line_metrics[0].visible_length, 11u);
78+
}
79+
80+
{
81+
constexpr auto string = "がぎぐげご"sv; // length in bytes: 18, code points: 10, glyphs: 5
82+
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
83+
EXPECT_EQ(metrics.grapheme_breaks.size(), 5u);
84+
EXPECT_EQ(metrics.line_metrics.size(), 1u);
85+
EXPECT_EQ(metrics.line_metrics[0].length, 10u);
86+
EXPECT_EQ(metrics.line_metrics[0].visible_length, 10u);
87+
}
88+
89+
{
90+
constexpr auto string = "食べる"sv; // length in bytes: 12, code points: 4, glyphs: 3
91+
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
92+
EXPECT_EQ(metrics.grapheme_breaks.size(), 3u);
93+
EXPECT_EQ(metrics.line_metrics.size(), 1u);
94+
EXPECT_EQ(metrics.line_metrics[0].length, 4u);
95+
EXPECT_EQ(metrics.line_metrics[0].visible_length, 4u);
96+
}
97+
}
98+
99+
TEST_CASE(count_multi_codepoint_glyphs_mixed_u8)
100+
{
101+
if (!Unicode::is_data_available())
102+
return;
103+
104+
constexpr auto string = "Héllo, コンニチハ! 👩‍💻 persian word: کتاب"sv; // length in bytes: 59, code points: 36, glyphs: 34
105+
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
106+
EXPECT_EQ(metrics.grapheme_breaks.size(), 34u);
107+
EXPECT_EQ(metrics.line_metrics.size(), 1u);
108+
EXPECT_EQ(metrics.line_metrics[0].length, 36u);
109+
EXPECT_EQ(metrics.line_metrics[0].visible_length, 36u);
110+
}

Userland/Libraries/LibLine/Editor.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1940,7 +1940,6 @@ StringMetrics Editor::actual_rendered_string_metrics(Utf32View const& view, RedB
19401940

19411941
for (size_t break_index = 0; break_index < grapheme_breaks.size(); ++break_index) {
19421942
auto i = grapheme_breaks[break_index];
1943-
auto c = view[i];
19441943
if (!mask_it.is_end() && mask_it.key() <= i)
19451944
mask = *mask_it;
19461945

@@ -1950,8 +1949,20 @@ StringMetrics Editor::actual_rendered_string_metrics(Utf32View const& view, RedB
19501949
continue;
19511950
}
19521951

1952+
auto next_grapheme_start = break_index + 1 < grapheme_breaks.size() ? grapheme_breaks[break_index + 1] : view.length();
19531953
auto next_c = break_index + 1 < grapheme_breaks.size() ? view.code_points()[grapheme_breaks[break_index + 1]] : 0;
1954+
auto c = view[i];
19541955
state = actual_rendered_string_length_step(metrics, i, current_line, c, next_c, state, mask, maximum_line_width, last_return);
1956+
1957+
for (size_t j = i + 1; j < next_grapheme_start; ++j) {
1958+
// Consume the rest of the code points in this grapheme cluster without updating the state; this is just to account for their length properly.
1959+
current_line.length++;
1960+
current_line.visible_length++;
1961+
metrics.total_length++;
1962+
if (current_line.bit_length.has_value())
1963+
current_line.bit_length.value() += code_point_length_in_utf8(view[j]);
1964+
}
1965+
19551966
if (!mask_it.is_end() && mask_it.key() <= i) {
19561967
auto mask_it_peek = mask_it;
19571968
++mask_it_peek;

0 commit comments

Comments
 (0)