@@ -73,18 +73,21 @@ public static function fromCodePoints(int ...$codes): self
7373 *
7474 * Install the intl extension for best results.
7575 *
76- * @param string[] $rules See "*-Latin" rules from Transliterator::listIDs()
76+ * @param string[]|\Transliterator[] $rules See "*-Latin" rules from Transliterator::listIDs()
7777 */
7878 public function ascii (array $ rules = []): self
7979 {
8080 $ str = clone $ this ;
8181 $ s = $ str ->string ;
8282 $ str ->string = '' ;
83- $ step = 0 ;
83+
84+ $ rules [] = 'nfkd ' ;
85+ $ rules [] = '[:nonspacing mark:] remove ' ;
8486
8587 if (\function_exists ('transliterator_transliterate ' )) {
88+ $ rules [] = 'any-latin/bgn ' ;
89+ $ rules [] = 'nfkd ' ;
8690 $ rules [] = '[:nonspacing mark:] remove ' ;
87- $ rules [] = 'any-latin ' ;
8891 }
8992
9093 while (\strlen ($ s ) !== $ i = strspn ($ s , self ::ASCII )) {
@@ -93,32 +96,48 @@ public function ascii(array $rules = []): self
9396 $ s = substr ($ s , $ i );
9497 }
9598
96- if (1 === ++$ step ) {
97- if (!normalizer_is_normalized ($ s , self ::NFKD )) {
98- $ s = normalizer_normalize ($ s , self ::NFKD );
99+ if ($ rules && !$ rule = array_shift ($ rules )) {
100+ $ rules = []; // An empty rule interrupts the next ones
101+ }
102+
103+ if ($ rules && $ rule ) {
104+ if ($ rule instanceof \Transliterator) {
105+ $ s = $ rule ->transliterate ($ s );
106+ continue ;
99107 }
100- } elseif (2 === $ step ) {
101- $ s = str_replace (self ::TRANSLIT_FROM , self ::TRANSLIT_TO , $ s );
102- } elseif (3 === $ step && '' !== $ rule = strtolower (array_shift ($ rules ))) {
103- $ step = 2 ;
104108
105- if ('[:nonspacing mark:] remove ' === $ rule ) {
109+ if ('nfkd ' === $ rule = strtolower ($ rule )) {
110+ if (!normalizer_is_normalized ($ s , self ::NFKD )) {
111+ $ s = normalizer_normalize ($ s , self ::NFKD );
112+ }
113+ } elseif ('[:nonspacing mark:] remove ' === $ rule ) {
106114 $ s = preg_replace ('/\p{Mn}++/u ' , '' , $ s );
107115 } elseif ('de-ascii ' === $ rule ) {
108116 $ s = preg_replace ("/([AUO]) \u{0308}(?=\p{Ll})/u " , '$1e ' , $ s );
109117 $ s = str_replace (["a \u{0308}" , "o \u{0308}" , "u \u{0308}" , "A \u{0308}" , "O \u{0308}" , "U \u{0308}" ], ['ae ' , 'oe ' , 'ue ' , 'AE ' , 'OE ' , 'UE ' ], $ s );
110118 } elseif (\function_exists ('transliterator_transliterate ' )) {
111119 if (null === $ transliterator = self ::$ transliterators [$ rule ] ?? self ::$ transliterators [$ rule ] = \Transliterator::create ($ rule )) {
112- throw new InvalidArgumentException (sprintf ('Unknown transliteration rule "%s". ' , $ rule ));
120+ if ('any-latin/bgn ' === $ rule ) {
121+ $ rule = 'any-latin ' ;
122+ $ transliterator = self ::$ transliterators [$ rule ] ?? self ::$ transliterators [$ rule ] = \Transliterator::create ($ rule );
123+ }
124+
125+ if (null === $ transliterator ) {
126+ throw new InvalidArgumentException (sprintf ('Unknown transliteration rule "%s". ' , $ rule ));
127+ }
128+
129+ self ::$ transliterators ['any-latin/bgn ' ] = $ transliterator ;
113130 }
114131
115132 $ s = $ transliterator ->transliterate ($ s );
116133 }
117134 } elseif (!\function_exists ('iconv ' )) {
135+ $ s = str_replace (self ::TRANSLIT_FROM , self ::TRANSLIT_TO , $ s );
118136 $ s = preg_replace ('/[^\x00-\x7F]/u ' , '? ' , $ s );
119137 } elseif (\ICONV_IMPL === 'glibc ' ) {
120138 $ s = iconv ('UTF-8 ' , 'ASCII//TRANSLIT ' , $ s );
121139 } else {
140+ $ s = str_replace (self ::TRANSLIT_FROM , self ::TRANSLIT_TO , $ s );
122141 $ s = preg_replace_callback ('/[^\x00-\x7F]/u ' , static function ($ c ) {
123142 $ c = iconv ('UTF-8 ' , 'ASCII//IGNORE//TRANSLIT ' , $ c [0 ]);
124143
0 commit comments