Skip to content

Commit 9583121

Browse files
committed
LibLine: Correctly count multi-code-point glyphs towards line length
This also adds some tests to make sure things don't break.
1 parent 00eb9d1 commit 9583121

File tree

5 files changed

+130
-1
lines changed

5 files changed

+130
-1
lines changed

Meta/Lagom/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -691,6 +691,7 @@ if (BUILD_LAGOM)
691691
LibHID
692692
LibHTTP
693693
LibIMAP
694+
LibLine
694695
LibLocale
695696
LibMarkdown
696697
LibMedia

Tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ add_subdirectory(LibGfx)
1616
add_subdirectory(LibHID)
1717
add_subdirectory(LibIMAP)
1818
add_subdirectory(LibJS)
19+
add_subdirectory(LibLine)
1920
add_subdirectory(LibLocale)
2021
add_subdirectory(LibMarkdown)
2122
add_subdirectory(LibMedia)

Tests/LibLine/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
set(TEST_SOURCES
2+
TestMetrics.cpp
3+
)
4+
5+
foreach(source IN LISTS TEST_SOURCES)
6+
serenity_test("${source}" LibLine LIBS LibLine LibUnicode)
7+
endforeach()

Tests/LibLine/TestMetrics.cpp

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
/*
2+
* Copyright (c) 2025, the SerenityOS developers.
3+
*
4+
* SPDX-License-Identifier: BSD-2-Clause
5+
*/
6+
7+
#include <LibTest/TestCase.h>
8+
9+
#include <LibLine/Editor.h>
10+
#include <LibUnicode/Forward.h>
11+
12+
TEST_CASE(count_ascii_glyphs_u8)
13+
{
14+
constexpr auto string = "Hello, World!"sv; // length in bytes: 13, code points: 13, glyphs: 13
15+
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
16+
EXPECT_EQ(metrics.grapheme_breaks.size(), 13u);
17+
EXPECT_EQ(metrics.line_metrics.size(), 1u);
18+
EXPECT_EQ(metrics.line_metrics[0].length, 13u);
19+
EXPECT_EQ(metrics.line_metrics[0].visible_length, 13u);
20+
}
21+
22+
TEST_CASE(count_ascii_glyphs_u32)
23+
{
24+
constexpr u32 string[] = { 'H', 'e', 'l', 'l', 'o', ',', ' ', 'W', 'o', 'r', 'l', 'd', '!' }; // length in code points: 13, glyphs: 13
25+
auto metrics = Line::Editor::actual_rendered_string_metrics(Utf32View(string));
26+
EXPECT_EQ(metrics.grapheme_breaks.size(), 13u);
27+
EXPECT_EQ(metrics.line_metrics.size(), 1u);
28+
EXPECT_EQ(metrics.line_metrics[0].length, 13u);
29+
EXPECT_EQ(metrics.line_metrics[0].visible_length, 13u);
30+
}
31+
32+
TEST_CASE(count_simple_multibyte_glyphs_u8)
33+
{
34+
constexpr auto string = "Héllo, Wörld!"sv; // length in bytes: 15, code points: 13, glyphs: 13
35+
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
36+
EXPECT_EQ(metrics.grapheme_breaks.size(), 13u);
37+
EXPECT_EQ(metrics.line_metrics.size(), 1u);
38+
EXPECT_EQ(metrics.line_metrics[0].length, 13u);
39+
EXPECT_EQ(metrics.line_metrics[0].visible_length, 13u);
40+
}
41+
42+
TEST_CASE(count_simple_multibyte_glyphs_u32)
43+
{
44+
constexpr u32 string[] = { 'H', 0xe9, 'l', 'l', 'o', ',', ' ', 'W', 0xf6, 'r', 'l', 'd', '!' }; // length in code points: 13, glyphs: 13
45+
auto metrics = Line::Editor::actual_rendered_string_metrics(Utf32View(string));
46+
EXPECT_EQ(metrics.grapheme_breaks.size(), 13u);
47+
EXPECT_EQ(metrics.line_metrics.size(), 1u);
48+
EXPECT_EQ(metrics.line_metrics[0].length, 13u);
49+
EXPECT_EQ(metrics.line_metrics[0].visible_length, 13u);
50+
}
51+
52+
TEST_CASE(count_multi_codepoint_glyphs_u8)
53+
{
54+
if (!Unicode::is_data_available())
55+
return;
56+
57+
constexpr auto string = "Héllo, Wörld! 👩‍💻"sv; // length in bytes: 25, code points: 17, glyphs: 15
58+
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
59+
EXPECT_EQ(metrics.grapheme_breaks.size(), 15u);
60+
EXPECT_EQ(metrics.line_metrics.size(), 1u);
61+
EXPECT_EQ(metrics.line_metrics[0].length, 17u);
62+
EXPECT_EQ(metrics.line_metrics[0].visible_length, 17u);
63+
}
64+
65+
TEST_CASE(count_jp_glyphs_u8)
66+
{
67+
if (!Unicode::is_data_available())
68+
return;
69+
70+
{
71+
constexpr auto string = "コンニチハ、ワールド!"sv; // length in bytes: 33, code points: 11, glyphs: 11
72+
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
73+
EXPECT_EQ(metrics.grapheme_breaks.size(), 11u);
74+
EXPECT_EQ(metrics.line_metrics.size(), 1u);
75+
EXPECT_EQ(metrics.line_metrics[0].length, 11u);
76+
EXPECT_EQ(metrics.line_metrics[0].visible_length, 11u);
77+
}
78+
79+
{
80+
constexpr auto string = "がぎぐげご"sv; // length in bytes: 18, code points: 10, glyphs: 5
81+
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
82+
EXPECT_EQ(metrics.grapheme_breaks.size(), 5u);
83+
EXPECT_EQ(metrics.line_metrics.size(), 1u);
84+
EXPECT_EQ(metrics.line_metrics[0].length, 10u);
85+
EXPECT_EQ(metrics.line_metrics[0].visible_length, 10u);
86+
}
87+
88+
{
89+
constexpr auto string = "食べる"sv; // length in bytes: 12, code points: 4, glyphs: 3
90+
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
91+
EXPECT_EQ(metrics.grapheme_breaks.size(), 3u);
92+
EXPECT_EQ(metrics.line_metrics.size(), 1u);
93+
EXPECT_EQ(metrics.line_metrics[0].length, 4u);
94+
EXPECT_EQ(metrics.line_metrics[0].visible_length, 4u);
95+
}
96+
}
97+
98+
TEST_CASE(count_multi_codepoint_glyphs_mixed_u8)
99+
{
100+
if (!Unicode::is_data_available())
101+
return;
102+
103+
constexpr auto string = "Héllo, コンニチハ! 👩‍💻 persian word: کتاب"sv; // length in bytes: 59, code points: 36, glyphs: 34
104+
auto metrics = Line::Editor::actual_rendered_string_metrics(string);
105+
EXPECT_EQ(metrics.grapheme_breaks.size(), 34u);
106+
EXPECT_EQ(metrics.line_metrics.size(), 1u);
107+
EXPECT_EQ(metrics.line_metrics[0].length, 36u);
108+
EXPECT_EQ(metrics.line_metrics[0].visible_length, 36u);
109+
}

Userland/Libraries/LibLine/Editor.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1940,7 +1940,6 @@ StringMetrics Editor::actual_rendered_string_metrics(Utf32View const& view, RedB
19401940

19411941
for (size_t break_index = 0; break_index < grapheme_breaks.size(); ++break_index) {
19421942
auto i = grapheme_breaks[break_index];
1943-
auto c = view[i];
19441943
if (!mask_it.is_end() && mask_it.key() <= i)
19451944
mask = *mask_it;
19461945

@@ -1950,8 +1949,20 @@ StringMetrics Editor::actual_rendered_string_metrics(Utf32View const& view, RedB
19501949
continue;
19511950
}
19521951

1952+
auto next_grapheme_start = break_index + 1 < grapheme_breaks.size() ? grapheme_breaks[break_index + 1] : view.length();
19531953
auto next_c = break_index + 1 < grapheme_breaks.size() ? view.code_points()[grapheme_breaks[break_index + 1]] : 0;
1954+
auto c = view[i];
19541955
state = actual_rendered_string_length_step(metrics, i, current_line, c, next_c, state, mask, maximum_line_width, last_return);
1956+
1957+
for (size_t j = i + 1; j < next_grapheme_start; ++j) {
1958+
// Consume the rest of the code points in this grapheme cluster without updating the state; this is just to account for their length properly.
1959+
current_line.length++;
1960+
current_line.visible_length++;
1961+
metrics.total_length++;
1962+
if (current_line.bit_length.has_value())
1963+
current_line.bit_length.value() += code_point_length_in_utf8(view[j]);
1964+
}
1965+
19551966
if (!mask_it.is_end() && mask_it.key() <= i) {
19561967
auto mask_it_peek = mask_it;
19571968
++mask_it_peek;

0 commit comments

Comments
 (0)