Skip to content

Commit ba88ee7

Browse files
author
José Valim
committed
Improve binary handling to properly consider chars and bitstring with specifiers
Closes #1963
1 parent 9cd5a47 commit ba88ee7

File tree

4 files changed

+70
-59
lines changed

4 files changed

+70
-59
lines changed

lib/elixir/lib/kernel.ex

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1421,25 +1421,19 @@ defmodule Kernel do
14211421
# Extracts concatenations in order to optimize many
14221422
# concatenations into one single clause.
14231423
defp extract_concatenations({ :<>, _, [left, right] }) do
1424-
wrap_concatenation(left) ++ extract_concatenations(right)
1424+
[wrap_concatenation(left)|extract_concatenations(right)]
14251425
end
14261426

14271427
defp extract_concatenations(other) do
1428-
wrap_concatenation(other)
1428+
[wrap_concatenation(other)]
14291429
end
14301430

1431-
# If it is a binary, we don't need to add the binary
1432-
# tag. This allows us to use <> on pattern matching.
14331431
defp wrap_concatenation(binary) when is_binary(binary) do
1434-
[binary]
1435-
end
1436-
1437-
defp wrap_concatenation({ :<<>>, _, parts }) do
1438-
parts
1432+
binary
14391433
end
14401434

14411435
defp wrap_concatenation(other) do
1442-
[{ :::, [], [other, { :binary, [], nil }] }]
1436+
{ :::, [], [other, { :binary, [], nil }] }
14431437
end
14441438

14451439
@doc """

lib/elixir/src/elixir_bitstring.erl

Lines changed: 46 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -124,41 +124,59 @@ build_bitstr_each(Fun, [{'::',_,[H,V]}|T], Meta, S, Acc) ->
124124
build_bitstr_each(Fun, [H|T], Meta, S, Acc) ->
125125
build_bitstr_each(Fun, T, Meta, S, Acc, H, default, default).
126126

127+
build_bitstr_each(Fun, T, Meta, S, Acc, H, default, Types) when is_binary(H) ->
128+
Element =
129+
case types_allow_splice(Types, []) of
130+
true ->
131+
%% See explanation in elixir_utils:elixir_to_erl/1 to know
132+
%% why we can simply convert the binary to a list.
133+
{ bin_element, ?line(Meta), { string, 0, binary_to_list(H) }, default, default };
134+
false ->
135+
case types_require_conversion(Types) of
136+
true ->
137+
{ bin_element, ?line(Meta), { string, 0, elixir_utils:characters_to_list(H) }, default, Types };
138+
false ->
139+
elixir_errors:compile_error(Meta, S#elixir_scope.file, "invalid types for literal string in <<>>. "
140+
"Accepted types are: little, big, utf8, utf16, utf32, bits, bytes, binary, bitstring")
141+
end
142+
end,
143+
144+
build_bitstr_each(Fun, T, Meta, S, [Element|Acc]);
145+
146+
build_bitstr_each(_Fun, _T, Meta, S, _Acc, H, _Size, _Types) when is_binary(H) ->
147+
elixir_errors:compile_error(Meta, S#elixir_scope.file, "size is not supported for literal string in <<>>");
148+
149+
build_bitstr_each(_Fun, _T, Meta, S, _Acc, H, _Size, _Types) when is_list(H); is_atom(H) ->
150+
elixir_errors:compile_error(Meta, S#elixir_scope.file, "invalid literal ~ts in <<>>",
151+
['Elixir.Macro':to_string(H)]);
152+
127153
build_bitstr_each(Fun, T, Meta, S, Acc, H, Size, Types) ->
128154
{ Expr, NS } = Fun(H, S),
129155

130-
AllowString = types_allow_string(Types),
131-
AllowSplice = types_allow_splice(Types),
132-
AllowAny = (AllowString orelse AllowSplice) andalso (Size == default),
133-
134-
case AllowAny andalso Expr of
135-
{ bin, _, [{ bin_element, 0, { string, 0, String }, default, default }] } when AllowString ->
136-
build_bitstr_each(Fun, T, Meta, NS, [{ bin_element, ?line(Meta), { string, 0, String }, Size, Types }|Acc]);
137-
{ bin, _, Elements } when AllowSplice ->
138-
build_bitstr_each(Fun, T, Meta, NS, lists:reverse(Elements) ++ Acc);
139-
{ cons, _, _, _ } = Cons ->
140-
build_bitstr_each(Fun, T, Meta, NS, rehash_cons(Cons, Size, Types, []) ++ Acc);
141-
{ nil, _ } ->
142-
build_bitstr_each(Fun, T, Meta, NS, Acc);
156+
case Expr of
157+
{ bin, _, Elements } ->
158+
case (Size == default) andalso types_allow_splice(Types, Elements) of
159+
true -> build_bitstr_each(Fun, T, Meta, NS, lists:reverse(Elements) ++ Acc);
160+
false -> build_bitstr_each(Fun, T, Meta, NS, [{ bin_element, ?line(Meta), Expr, Size, Types }|Acc])
161+
end;
143162
_ ->
144163
build_bitstr_each(Fun, T, Meta, NS, [{ bin_element, ?line(Meta), Expr, Size, Types }|Acc])
145164
end.
146165

147-
rehash_cons({ nil, _ }, _Size, _Types, Acc) -> Acc;
148-
rehash_cons({ cons, Line, Left, Right }, Size, Types, Acc) ->
149-
rehash_cons(Right, Size, Types, [{ bin_element, Line, Left, Size, Types }|Acc]).
150-
151-
types_allow_string([End|T]) when End == little; End == big -> types_allow_string(T);
152-
types_allow_string([UTF|T]) when UTF == utf8; UTF == utf16; UTF == utf32 -> types_allow_string(T);
153-
types_allow_string([]) -> true;
154-
types_allow_string(_) -> false.
155-
156-
types_allow_splice(default) -> true;
157-
types_allow_splice([bytes]) -> true;
158-
types_allow_splice([binary]) -> true;
159-
types_allow_splice([bits]) -> true;
160-
types_allow_splice([bitstring]) -> true;
161-
types_allow_splice(_) -> false.
166+
types_require_conversion([End|T]) when End == little; End == big -> types_require_conversion(T);
167+
types_require_conversion([UTF|T]) when UTF == utf8; UTF == utf16; UTF == utf32 -> types_require_conversion(T);
168+
types_require_conversion([]) -> true;
169+
types_require_conversion(_) -> false.
170+
171+
types_allow_splice([bytes], Elements) -> lists:all(fun has_default_size/1, Elements);
172+
types_allow_splice([binary], Elements) -> lists:all(fun has_default_size/1, Elements);
173+
types_allow_splice([bits], _) -> true;
174+
types_allow_splice([bitstring], _) -> true;
175+
types_allow_splice(default, _) -> true;
176+
types_allow_splice(_, _) -> false.
177+
178+
has_default_size({ bin_element, _, _, default, _ }) -> true;
179+
has_default_size({ bin_element, _, _, _, _ }) -> false.
162180

163181
%% Extra bitstring specifiers
164182

lib/elixir/src/elixir_utils.erl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ elixir_to_erl(Tree) when is_float(Tree) ->
7171
{ float, 0, Tree };
7272

7373
elixir_to_erl(Tree) when is_binary(Tree) ->
74+
%% Note that our binaries are utf-8 encoded and we are converting
75+
%% to a list using binary_to_list. The reason for this is that Erlang
76+
%% considers a string in a binary to be encoded in latin1, so the bytes
77+
%% are not changed in any fashion.
7478
{ bin, 0, [{ bin_element, 0, { string, 0, binary_to_list(Tree) }, default, default }] };
7579

7680
elixir_to_erl(Function) when is_function(Function) ->

lib/elixir/test/elixir/kernel/binary_test.exs

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -107,44 +107,39 @@ bar
107107
end
108108

109109
test :literal do
110-
assert <<106, 111, 115, 101>> == << "jose" :: binary >>
111-
assert <<106, 111, 115, 101>> == << "jose" :: bits >>
112-
assert <<106, 111, 115, 101>> == << "jose" :: bitstring >>
113-
assert <<106, 111, 115, 101>> == << "jose" :: bytes >>
110+
assert <<106,111,115,195,169>> == << "josé" :: binary >>
111+
assert <<106,111,115,195,169>> == << "josé" :: bits >>
112+
assert <<106,111,115,195,169>> == << "josé" :: bitstring >>
113+
assert <<106,111,115,195,169>> == << "josé" :: bytes >>
114114

115-
assert <<106, 111, 115, 101>> == << "jose" :: utf8 >>
116-
assert <<106, 111, 115, 101>> == << 'jose' :: utf8 >>
117-
118-
assert <<0, 106, 0, 111, 0, 115, 0, 101>> == << "jose" :: utf16 >>
119-
assert <<0, 106, 0, 111, 0, 115, 0, 101>> == << 'jose' :: utf16 >>
120-
121-
assert <<106, 0, 111, 0, 115, 0, 101, 0>> == << "jose" :: [utf16, little] >>
122-
assert <<106, 0, 111, 0, 115, 0, 101, 0>> == << 'jose' :: [utf16, little] >>
123-
124-
assert <<0, 0, 0, 106, 0, 0, 0, 111, 0, 0, 0, 115, 0, 0, 0, 101>> == << "jose" :: utf32 >>
125-
assert <<0, 0, 0, 106, 0, 0, 0, 111, 0, 0, 0, 115, 0, 0, 0, 101>> == << 'jose' :: utf32 >>
115+
assert <<106,111,115,195,169>> == << "josé" :: utf8 >>
116+
assert <<0,106,0,111,0,115,0,233>> == << "josé" :: utf16 >>
117+
assert <<106,0,111,0,115,0,233,0>> == << "josé" :: [utf16, little] >>
118+
assert <<0,0,0,106,0,0,0,111,0,0,0,115,0,0,0,233>> == << "josé" :: utf32 >>
126119
end
127120

128121
test :literal_errors do
129-
assert_raise ArgumentError, fn ->
122+
assert_raise CompileError, fn ->
130123
Code.eval_string(%s[<< "foo" :: integer >>])
131124
end
132125

133-
assert_raise ArgumentError, fn ->
126+
assert_raise CompileError, fn ->
134127
Code.eval_string(%s[<< "foo" :: float >>])
135128
end
136129

137-
assert_raise ArgumentError, fn ->
130+
assert_raise CompileError, fn ->
138131
Code.eval_string(%s[<< 'foo' :: binary >>])
139132
end
133+
134+
assert_raise ArgumentError, fn ->
135+
Code.eval_string(%s[<<1::size(4)>> <> "foo"])
136+
end
140137
end
141138

142-
@binary "new "
143-
@charlist 'old '
139+
@binary "new "
144140

145141
test :bitsyntax_with_expansion do
146142
assert <<@binary, "world">> == "new world"
147-
assert <<@charlist, "world">> == "old world"
148143
end
149144

150145
test :bitsyntax_translation do

0 commit comments

Comments
 (0)