Skip to content

Commit ae819fc

Browse files
[String] improve AbstractUnicodeString::ascii() fallback logic
1 parent 9dcea42 commit ae819fc

File tree

1 file changed

+31
-12
lines changed

1 file changed

+31
-12
lines changed

AbstractUnicodeString.php

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -73,18 +73,21 @@ public static function fromCodePoints(int ...$codes): self
7373
*
7474
* Install the intl extension for best results.
7575
*
76-
* @param string[] $rules See "*-Latin" rules from Transliterator::listIDs()
76+
* @param string[]|\Transliterator[] $rules See "*-Latin" rules from Transliterator::listIDs()
7777
*/
7878
public function ascii(array $rules = []): self
7979
{
8080
$str = clone $this;
8181
$s = $str->string;
8282
$str->string = '';
83-
$step = 0;
83+
84+
$rules[] = 'nfkd';
85+
$rules[] = '[:nonspacing mark:] remove';
8486

8587
if (\function_exists('transliterator_transliterate')) {
88+
$rules[] = 'any-latin/bgn';
89+
$rules[] = 'nfkd';
8690
$rules[] = '[:nonspacing mark:] remove';
87-
$rules[] = 'any-latin';
8891
}
8992

9093
while (\strlen($s) !== $i = strspn($s, self::ASCII)) {
@@ -93,32 +96,48 @@ public function ascii(array $rules = []): self
9396
$s = substr($s, $i);
9497
}
9598

96-
if (1 === ++$step) {
97-
if (!normalizer_is_normalized($s, self::NFKD)) {
98-
$s = normalizer_normalize($s, self::NFKD);
99+
if ($rules && !$rule = array_shift($rules)) {
100+
$rules = []; // An empty rule interrupts the next ones
101+
}
102+
103+
if ($rules && $rule) {
104+
if ($rule instanceof \Transliterator) {
105+
$s = $rule->transliterate($s);
106+
continue;
99107
}
100-
} elseif (2 === $step) {
101-
$s = str_replace(self::TRANSLIT_FROM, self::TRANSLIT_TO, $s);
102-
} elseif (3 === $step && '' !== $rule = strtolower(array_shift($rules))) {
103-
$step = 2;
104108

105-
if ('[:nonspacing mark:] remove' === $rule) {
109+
if ('nfkd' === $rule = strtolower($rule)) {
110+
if (!normalizer_is_normalized($s, self::NFKD)) {
111+
$s = normalizer_normalize($s, self::NFKD);
112+
}
113+
} elseif ('[:nonspacing mark:] remove' === $rule) {
106114
$s = preg_replace('/\p{Mn}++/u', '', $s);
107115
} elseif ('de-ascii' === $rule) {
108116
$s = preg_replace("/([AUO])\u{0308}(?=\p{Ll})/u", '$1e', $s);
109117
$s = str_replace(["a\u{0308}", "o\u{0308}", "u\u{0308}", "A\u{0308}", "O\u{0308}", "U\u{0308}"], ['ae', 'oe', 'ue', 'AE', 'OE', 'UE'], $s);
110118
} elseif (\function_exists('transliterator_transliterate')) {
111119
if (null === $transliterator = self::$transliterators[$rule] ?? self::$transliterators[$rule] = \Transliterator::create($rule)) {
112-
throw new InvalidArgumentException(sprintf('Unknown transliteration rule "%s".', $rule));
120+
if ('any-latin/bgn' === $rule) {
121+
$rule = 'any-latin';
122+
$transliterator = self::$transliterators[$rule] ?? self::$transliterators[$rule] = \Transliterator::create($rule);
123+
}
124+
125+
if (null === $transliterator) {
126+
throw new InvalidArgumentException(sprintf('Unknown transliteration rule "%s".', $rule));
127+
}
128+
129+
self::$transliterators['any-latin/bgn'] = $transliterator;
113130
}
114131

115132
$s = $transliterator->transliterate($s);
116133
}
117134
} elseif (!\function_exists('iconv')) {
135+
$s = str_replace(self::TRANSLIT_FROM, self::TRANSLIT_TO, $s);
118136
$s = preg_replace('/[^\x00-\x7F]/u', '?', $s);
119137
} elseif (\ICONV_IMPL === 'glibc') {
120138
$s = iconv('UTF-8', 'ASCII//TRANSLIT', $s);
121139
} else {
140+
$s = str_replace(self::TRANSLIT_FROM, self::TRANSLIT_TO, $s);
122141
$s = preg_replace_callback('/[^\x00-\x7F]/u', static function ($c) {
123142
$c = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c[0]);
124143

0 commit comments

Comments
 (0)