Skip to content

Commit 89bebea

Browse files
author
José Valim
committed
Merge pull request #1651 from vanstee/string-split-trim
Add support for `trim` option to `String.split` and `Regex.split`
2 parents c826aad + e304ea9 commit 89bebea

File tree

8 files changed

+59
-31
lines changed

8 files changed

+59
-31
lines changed

lib/elixir/lib/kernel.ex

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3621,15 +3621,15 @@ defmodule Kernel do
36213621
case is_binary(string) do
36223622
true ->
36233623
case mod do
3624-
?b -> lc p inlist String.split(string), p != "", do: p
3625-
?a -> lc p inlist String.split(string), p != "", do: binary_to_atom(p)
3626-
?c -> lc p inlist String.split(string), p != "", do: String.to_char_list!(p)
3624+
?b -> String.split(string)
3625+
?a -> lc p inlist String.split(string), do: binary_to_atom(p)
3626+
?c -> lc p inlist String.split(string), do: String.to_char_list!(p)
36273627
end
36283628
false ->
36293629
case mod do
3630-
?b -> quote do: lc(p inlist String.split(unquote(string)), p != "", do: p)
3631-
?a -> quote do: lc(p inlist String.split(unquote(string)), p != "", do: binary_to_atom(p))
3632-
?c -> quote do: lc(p inlist String.split(unquote(string)), p != "", do: String.to_char_list!(p))
3630+
?b -> quote do: String.split(unquote(string))
3631+
?a -> quote do: lc(p inlist String.split(unquote(string)), do: binary_to_atom(p))
3632+
?c -> quote do: lc(p inlist String.split(unquote(string)), do: String.to_char_list!(p))
36333633
end
36343634
end
36353635
end

lib/elixir/lib/regex.ex

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -264,16 +264,17 @@ defmodule Regex do
264264
def split(regex, string, options // [])
265265

266266
def split(regex(re_pattern: compiled), string, options) do
267-
parts =
268-
cond do
269-
Keyword.get(options, :global) == false -> 2
270-
p = Keyword.get(options, :parts) -> p
271-
true -> :infinity
272-
end
267+
defaults = [global: true, trim: true, parts: :infinity, return: return_for(string)]
268+
options = Keyword.merge(defaults, options)
273269

274-
return = Keyword.get(options, :return, return_for(string))
275-
opts = [return: return, parts: parts]
276-
:re.split(string, compiled, opts)
270+
unless options[:global], do: options = Keyword.put(options, :parts, 2)
271+
272+
valid_options = Dict.take(options, [:parts, :return])
273+
splits = :re.split(string, compiled, valid_options)
274+
275+
if options[:trim], do: splits = Enum.filter(splits, &(&1 != ""))
276+
277+
splits
277278
end
278279

279280
@doc %B"""

lib/elixir/lib/string.ex

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ defmodule String do
140140

141141
@doc """
142142
Splits a string on substrings at each Unicode whitespace
143-
occurrence.
143+
occurrence with leading and trailing whitespace ignored.
144144
145145
## Examples
146146
@@ -149,7 +149,7 @@ defmodule String do
149149
iex> String.split("foo" <> <<194, 133>> <> "bar")
150150
["foo", "bar"]
151151
iex> String.split(" foo bar ")
152-
["", "foo", "bar", ""]
152+
["foo", "bar"]
153153
154154
"""
155155
@spec split(t) :: [t]
@@ -163,12 +163,17 @@ defmodule String do
163163
The string is split into as many parts as possible by
164164
default, unless the `global` option is set to `false`.
165165
166+
Empty strings are removed from the result, unless the
167+
`trim` option is set to `false`.
168+
166169
## Examples
167170
168171
iex> String.split("a,b,c", ",")
169172
["a", "b", "c"]
170173
iex> String.split("a,b,c", ",", global: false)
171174
["a", "b,c"]
175+
iex> String.split(" a b c ", " ", trim: false)
176+
["", "a", "b", "c", ""]
172177
173178
iex> String.split("1,2 3,4", [" ", ","])
174179
["1", "2", "3", "4"]
@@ -188,12 +193,19 @@ defmodule String do
188193
def split("", _pattern, _options), do: [""]
189194

190195
def split(binary, pattern, options) when is_regex(pattern) do
191-
Regex.split(pattern, binary, global: options[:global])
196+
Regex.split(pattern, binary, options)
192197
end
193198

194199
def split(binary, pattern, options) do
195-
opts = if options[:global] != false, do: [:global], else: []
196-
:binary.split(binary, pattern, opts)
200+
defaults = [global: true, trim: true]
201+
options = Keyword.merge(defaults, options)
202+
203+
option_keys = Enum.filter_map(options, &elem(&1, 1), &elem(&1, 0))
204+
splits = :binary.split(binary, pattern, option_keys)
205+
206+
if options[:trim], do: splits = Enum.filter(splits, &(&1 != ""))
207+
208+
splits
197209
end
198210

199211
@doc """

lib/elixir/priv/unicode.ex

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,11 @@ defmodule String.Unicode do
145145

146146
lc codepoint inlist whitespace do
147147
defp do_split(unquote(codepoint) <> rest, buffer, acc) do
148-
do_split(rest, "", [buffer | acc])
148+
if buffer != "" do
149+
do_split(rest, "", [buffer | acc])
150+
else
151+
do_split(rest, buffer, acc)
152+
end
149153
end
150154
end
151155

@@ -154,7 +158,11 @@ defmodule String.Unicode do
154158
end
155159

156160
defp do_split(<<>>, buffer, acc) do
157-
[buffer | acc]
161+
if buffer != "" do
162+
[buffer | acc]
163+
else
164+
acc
165+
end
158166
end
159167

160168
# Graphemes

lib/elixir/test/elixir/regex_test.exs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,13 +113,14 @@ defmodule Regex.BinaryTest do
113113
end
114114
115115
test :split do
116-
assert Regex.split(%r",", "") == [""]
116+
assert Regex.split(%r",", "") == []
117117
assert Regex.split(%r" ", "foo bar baz") == ["foo", "bar", "baz"]
118118
assert Regex.split(%r" ", "foo bar baz", parts: 2) == ["foo", "bar baz"]
119119
assert Regex.split(%r"\s", "foobar") == ["foobar"]
120120
assert Regex.split(%r" ", "foo bar baz") == ["foo", "bar", "baz"]
121-
assert Regex.split(%r"=", "key=") == ["key", ""]
122-
assert Regex.split(%r"=", "=value") == ["", "value"]
121+
assert Regex.split(%r" ", " foo bar baz ", trim: false) == ["", "foo", "bar", "baz", ""]
122+
assert Regex.split(%r"=", "key=") == ["key"]
123+
assert Regex.split(%r"=", "=value") == ["value"]
123124
end
124125
125126
test :replace do

lib/elixir/test/elixir/string_test.exs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,33 @@ defmodule StringTest do
1818
test :split do
1919
assert String.split("") == [""]
2020
assert String.split("foo bar") == ["foo", "bar"]
21-
assert String.split(" foo bar") == ["", "foo", "bar"]
22-
assert String.split("foo bar ") == ["foo", "bar", ""]
23-
assert String.split(" foo bar ") == ["", "foo", "bar", ""]
24-
assert String.split("foo\t\n\v\f\r\sbar\n") == ["foo", "", "", "", "", "", "bar", ""]
21+
assert String.split(" foo bar") == ["foo", "bar"]
22+
assert String.split("foo bar ") == ["foo", "bar"]
23+
assert String.split(" foo bar ") == ["foo", "bar"]
24+
assert String.split("foo\t\n\v\f\r\sbar\n") == ["foo", "bar"]
2525
assert String.split("foo" <> <<31>> <> "bar") == ["foo", "bar"]
2626
assert String.split("foo" <> <<194, 133>> <> "bar") == ["foo", "bar"]
2727

2828
assert String.split("", ",") == [""]
2929
assert String.split("a,b,c", ",") == ["a", "b", "c"]
3030
assert String.split("a,b", ".") == ["a,b"]
3131
assert String.split("1,2 3,4", [" ", ","]) == ["1", "2", "3", "4"]
32+
assert String.split(" a b c ", " ") == ["a", "b", "c"]
3233

3334
assert String.split("a,b,c", ",", global: false) == ["a", "b,c"]
3435
assert String.split("1,2 3,4", [" ", ","], global: false) == ["1", "2 3,4"]
36+
37+
assert String.split(" a b c ", " ", trim: false) == ["", "a", "b", "c", ""]
38+
assert String.split(" a b c ", " ", trim: false, global: false) == ["", "a b c "]
3539
end
3640

3741
test :split_with_regex do
3842
assert String.split("", %r{,}) == [""]
3943
assert String.split("a,b", %r{,}) == ["a", "b"]
4044
assert String.split("a,b,c", %r{,}) == ["a", "b", "c"]
4145
assert String.split("a,b,c", %r{,}, global: false) == ["a", "b,c"]
46+
assert String.split("a,b.c ", %r{\W}) == ["a", "b", "c"]
47+
assert String.split("a,b.c ", %r{\W}, trim: false) == ["a", "b", "c", ""]
4248
assert String.split("a,b", %r{\.}) == ["a,b"]
4349
end
4450

lib/ex_unit/lib/ex_unit/doc_test.ex

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ defmodule ExUnit.DocTest do
331331
end
332332

333333
defp extract_tests(line, doc) do
334-
lines = String.split(doc, %r/\n/) |> adjust_indent
334+
lines = String.split(doc, %r/\n/, trim: false) |> adjust_indent
335335
extract_tests(lines, line, "", "", [], true)
336336
end
337337

lib/iex/test/iex/helpers_test.exs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ defmodule IEx.HelpersTest do
133133
assert ["ebin", "lib", "mix.exs", "test"]
134134
= capture_io(fn -> ls end)
135135
|> String.split
136-
|> Enum.filter(&(&1 != ""))
136+
|> Enum.map(String.strip(&1))
137137
|> Enum.sort
138138
assert capture_io(fn -> ls "~" end) == capture_io(fn -> ls System.user_home end)
139139
end

0 commit comments

Comments
 (0)