Skip to content

Commit 7963e44

Browse files
author
José Valim
committed
Speed up upcase and downcase for large strings
Signed-off-by: José Valim <jose.valim@plataformatec.com.br>
1 parent 3ba5e44 commit 7963e44

File tree

1 file changed

+19
-19
lines changed

1 file changed

+19
-19
lines changed

lib/elixir/unicode/unicode.ex

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@ defmodule String.Unicode do
1515
data_path = Path.join(__DIR__, "UnicodeData.txt")
1616

1717
{codes, whitespace} = Enum.reduce File.stream!(data_path), {[], []}, fn(line, {cacc, wacc}) ->
18-
[ codepoint, _name, _category,
19-
_class, bidi, _decomposition,
20-
_numeric_1, _numeric_2, _numeric_3,
21-
_bidi_mirror, _unicode_1, _iso,
22-
upper, lower, title ] = :binary.split(line, ";", [:global])
18+
[codepoint, _name, _category,
19+
_class, bidi, _decomposition,
20+
_numeric_1, _numeric_2, _numeric_3,
21+
_bidi_mirror, _unicode_1, _iso,
22+
upper, lower, title] = :binary.split(line, ";", [:global])
2323

2424
title = :binary.part(title, 0, byte_size(title) - 1)
2525

@@ -36,42 +36,42 @@ defmodule String.Unicode do
3636
special_path = Path.join(__DIR__, "SpecialCasing.txt")
3737

3838
codes = Enum.reduce File.stream!(special_path), codes, fn(line, acc) ->
39-
[ codepoint, lower, title, upper, _comment ] = :binary.split(line, "; ", [:global])
39+
[codepoint, lower, title, upper, _comment] = :binary.split(line, "; ", [:global])
4040
key = to_binary.(codepoint)
4141
:lists.keystore(key, 1, acc, {key, to_binary.(upper), to_binary.(lower), to_binary.(title)})
4242
end
4343

4444
# Downcase
4545

46-
def downcase(string), do: do_downcase(string) |> IO.iodata_to_binary
46+
def downcase(string), do: downcase(string, "")
4747

4848
for {codepoint, _upper, lower, _title} <- codes, lower && lower != codepoint do
49-
defp do_downcase(unquote(codepoint) <> rest) do
50-
unquote(:binary.bin_to_list(lower)) ++ downcase(rest)
49+
defp downcase(unquote(codepoint) <> rest, acc) do
50+
downcase(rest, acc <> unquote(lower))
5151
end
5252
end
5353

54-
defp do_downcase(<< char, rest :: binary >>) do
55-
[char|do_downcase(rest)]
54+
defp downcase(<<char, rest :: binary>>, acc) do
55+
downcase(rest, <<acc::binary, char>>)
5656
end
5757

58-
defp do_downcase(""), do: []
58+
defp downcase("", acc), do: acc
5959

6060
# Upcase
6161

62-
def upcase(string), do: do_upcase(string) |> IO.iodata_to_binary
62+
def upcase(string), do: upcase(string, "")
6363

6464
for {codepoint, upper, _lower, _title} <- codes, upper && upper != codepoint do
65-
defp do_upcase(unquote(codepoint) <> rest) do
66-
unquote(:binary.bin_to_list(upper)) ++ do_upcase(rest)
65+
defp upcase(unquote(codepoint) <> rest, acc) do
66+
upcase(rest, acc <> unquote(upper))
6767
end
6868
end
6969

70-
defp do_upcase(<< char, rest :: binary >>) do
71-
[char|do_upcase(rest)]
70+
defp upcase(<<char, rest :: binary>>, acc) do
71+
upcase(rest, <<acc::binary, char>>)
7272
end
7373

74-
defp do_upcase(""), do: []
74+
defp upcase("", acc), do: acc
7575

7676
# Titlecase once
7777

@@ -207,7 +207,7 @@ defmodule String.Graphemes do
207207
end
208208

209209
cluster = Enum.reduce File.stream!(cluster_path), HashDict.new, fn(line, dict) ->
210-
[ _full, first, last, class ] = Regex.run(regex, line)
210+
[_full, first, last, class] = Regex.run(regex, line)
211211

212212
# Skip surrogates
213213
if first == "D800" and last == "DFFF" do

0 commit comments

Comments
 (0)