Skip to content

Commit 9ff3280

Browse files
author
José Valim
committed
Merge pull request #1827 from pminten/unicode-default
Make unicode the default for regular expressions
2 parents 2d1facb + 6fbc92b commit 9ff3280

File tree

3 files changed

+15
-10
lines changed

3 files changed

+15
-10
lines changed

lib/elixir/lib/regex.ex

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,12 @@ defmodule Regex do
1010
# A simple regular expressions that matches foo anywhere in the string
1111
%r/foo/
1212
13-
# A regular expression with case insensitive options and handling for unicode chars
14-
%r/foo/iu
13+
# A regular expression with case insensitive options
14+
%r/foo/i
1515
1616
The `re` module provides several options, the ones available in Elixir, followed by
1717
their shortcut in parenthesis, are:
1818
19-
* `unicode` (u) - enables unicode specific patterns like \p
2019
* `caseless` (i) - add case insensitivity
2120
* `dotall` (s) - causes dot to match newlines and also set newline to anycrlf.
2221
The new line setting can be overridden by setting `(*CR)` or `(*LF)` or
@@ -75,7 +74,9 @@ defmodule Regex do
7574
{ :error, { :invalid_option, rest } }
7675

7776
translated_options ->
78-
compile(source, translated_options, options)
77+
# Always use the unicode option, we don't have a latin1 legacy like
78+
# Erlang.
79+
compile(source, [:unicode|translated_options], options)
7980
end
8081
end
8182

@@ -367,7 +368,10 @@ defmodule Regex do
367368
defp return_for(element) when is_binary(element), do: :binary
368369
defp return_for(element) when is_list(element), do: :list
369370

370-
defp translate_options(<<?u, t :: binary>>), do: [:unicode|translate_options(t)]
371+
defp translate_options(<<?u, t :: binary>>) do
372+
IO.write "The /u flag for regular expressions is no longer needed\n#{Exception.format_stacktrace}"
373+
translate_options(t)
374+
end
371375
defp translate_options(<<?i, t :: binary>>), do: [:caseless|translate_options(t)]
372376
defp translate_options(<<?x, t :: binary>>), do: [:extended|translate_options(t)]
373377
defp translate_options(<<?f, t :: binary>>), do: [:firstline|translate_options(t)]

lib/elixir/lib/string.ex

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ defmodule String do
222222

223223
def split("", _pattern, _options), do: [""]
224224

225-
def split(binary, "", options), do: split(binary, %r""u, options)
225+
def split(binary, "", options), do: split(binary, %r"", options)
226226

227227
def split(binary, pattern, options) when is_regex(pattern) do
228228
Regex.split(pattern, binary, options)

lib/elixir/test/elixir/regex_test.exs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,12 @@ defmodule Regex.BinaryTest do
4242
end
4343

4444
test :opts do
45-
assert Regex.opts(Regex.compile!("foo", "u")) == "u"
45+
assert Regex.opts(Regex.compile!("foo", "i")) == "i"
4646
end
4747

48-
test :unicode do
49-
assert ("josé" =~ %r"\p{Latin}$"u)
48+
test :unicode_by_default do
49+
assert ("josé" =~ %r"\p{Latin}$")
50+
refute ("£" =~ %r/\p{Lu}/)
5051
end
5152

5253
test :groups do
@@ -168,7 +169,7 @@ defmodule Regex.BinaryTest do
168169
end
169170
170171
defp matches_escaped?(string, match) do
171-
Regex.match? %r/#{Regex.escape(string)}/usimx, match
172+
Regex.match? %r/#{Regex.escape(string)}/simx, match
172173
end
173174
end
174175

0 commit comments

Comments
 (0)