Skip to content

Commit c1e3d4f

Browse files
author
José Valim
committed
Optimize rstrip
This new implementation is no longer linear without affecting smaller samples. For a string that is 100 bytes long, it is 25x faster than the previous implementation. Signed-off-by: José Valim <jose.valim@plataformatec.com.br>
1 parent b4fc2b3 commit c1e3d4f

File tree

1 file changed

+27
-15
lines changed

1 file changed

+27
-15
lines changed

lib/elixir/unicode/unicode.ex

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -99,27 +99,39 @@ defmodule String.Unicode do
9999

100100
def lstrip(other) when is_binary(other), do: other
101101

102-
def rstrip(string) when is_binary(string) do
103-
do_rstrip(string, [], [])
104-
end
105-
102+
@whitespace_max_size 3
106103
for codepoint <- whitespace do
107-
c = :binary.bin_to_list(codepoint) |> :lists.reverse
108-
109-
defp do_rstrip(unquote(codepoint) <> rest, acc1, acc2) do
110-
do_rstrip(rest, unquote(c) ++ (acc1 || acc2), acc2)
104+
# We need to increment @whitespace_max_size
105+
# if we add a new entry here.
106+
case byte_size(codepoint) do
107+
3 ->
108+
defp do_rstrip(unquote(codepoint)), do: -3
109+
2 ->
110+
defp do_rstrip(<<_, unquote(codepoint)>>), do: -2
111+
1 ->
112+
defp do_rstrip(<<unquote(codepoint), unquote(codepoint), unquote(codepoint)>>), do: -3
113+
defp do_rstrip(<<_, unquote(codepoint), unquote(codepoint)>>), do: -2
114+
defp do_rstrip(<<_, _, unquote(codepoint)>>), do: -1
111115
end
112116
end
113117

114-
defp do_rstrip(<< char, rest :: binary >>, nil, acc2) do
115-
do_rstrip(rest, nil, [char|acc2])
116-
end
118+
defp do_rstrip(_), do: 0
117119

118-
defp do_rstrip(<< char, rest :: binary >>, acc1, _acc2) do
119-
do_rstrip(rest, nil, [char|acc1])
120-
end
120+
def rstrip(string) when is_binary(string) do
121+
size = byte_size(string)
122+
123+
trail =
124+
if size < @whitespace_max_size do
125+
string
126+
else
127+
binary_part(string, size, -@whitespace_max_size)
128+
end
121129

122-
defp do_rstrip(<<>>, _acc1, acc2), do: acc2 |> :lists.reverse |> IO.iodata_to_binary
130+
case do_rstrip(trail) do
131+
0 -> string
132+
x -> rstrip(binary_part(string, 0, size + x))
133+
end
134+
end
123135

124136
# Split
125137

0 commit comments

Comments
 (0)