@@ -490,18 +490,12 @@ defmodule String.Normalizer do
490490 normalize_nfd ( rest , acc <> binary )
491491 end
492492
493- for { binary , decomposition } <- decompositions do
494- defp normalize_nfd ( unquote ( binary ) <> rest , acc ) do
495- normalize_nfd ( unquote ( IO . iodata_to_binary ( decomposition ) ) <> rest , acc )
496- end
497- end
498-
499493 defp normalize_nfd ( binary , acc ) do
500494 { n , rest } = String.Unicode . next_grapheme_size ( binary )
501495 part = :binary . part ( binary , 0 , n )
502496 case n do
503- 1 -> normalize_nfd ( rest , acc <> part )
504- _ -> normalize_nfd ( rest , acc <> canonical_order ( part ) )
497+ 1 -> normalize_nfc ( rest , acc <> part )
498+ _ -> normalize_nfd ( rest , acc <> canonical_order ( part , [ ] ) )
505499 end
506500 end
507501
@@ -520,11 +514,21 @@ defmodule String.Normalizer do
520514 end
521515 end
522516
523- defp canonical_order ( binary ) do
524- binary
525- |> :unicode . characters_to_list ( )
526- |> Enum . sort_by ( & combining_class / 1 )
527- |> :unicode . characters_to_binary ( )
517+ for { binary , decomposition } <- decompositions do
518+ defp canonical_order ( unquote ( binary ) <> rest , acc ) do
519+ canonical_order ( unquote ( IO . iodata_to_binary ( decomposition ) ) <> rest , acc )
520+ end
521+ end
522+ defp canonical_order ( << h :: utf8 , t :: binary >> , acc ) do
523+ canonical_order ( t , [ { h , combining_class ( h ) } | acc ] )
524+ end
525+ defp canonical_order ( << >> , [ { x , _ } ] ) do
526+ << x :: utf8 >>
527+ end
528+ defp canonical_order ( << >> , acc ) do
529+ :lists . keysort ( 2 , Enum . reverse ( acc ) )
530+ |> Enum . map ( & << elem ( & 1 , 0 ) :: utf8 >> )
531+ |> IO . iodata_to_binary
528532 end
529533
530534 for { codepoint , class } <- combining_classes do
@@ -533,8 +537,6 @@ defmodule String.Normalizer do
533537
534538 defp combining_class ( _ ) , do: 0
535539
536- defp compose ( << _ :: utf8 >> = binary ) , do: binary
537-
538540 defp compose ( << lead :: utf8 , vowel :: utf8 , rest :: binary >> ) when lead in 0x1100 .. 0x1112 and vowel in 0x1161 .. 0x1175 do
539541 codepoint = 0xAC00 + ( ( lead - 0x1100 ) * 588 ) + ( ( vowel - 0x1161 ) * 28 )
540542 case rest do
@@ -545,29 +547,28 @@ defmodule String.Normalizer do
545547 end
546548 end
547549
548- for { composition , [ _ , _ ] = binary } <- compositions do
549- defp compose ( unquote ( IO . iodata_to_binary ( binary ) ) ) , do: unquote ( composition )
550- end
551-
552- defp compose ( << cp :: utf8 , rest :: binary >> ) do
553- compose ( rest , << cp :: utf8 >> , "" , combining_class ( cp ) - 1 )
550+ defp compose ( binary ) do
551+ compose_one ( binary ) || (
552+ << cp :: utf8 , rest :: binary >> = binary
553+ compose_many ( rest , << cp :: utf8 >> , "" , combining_class ( cp ) - 1 )
554+ )
554555 end
555556
556- defp compose ( "" , base , accents , _ ) , do: base <> accents
557+ defp compose_many ( "" , base , accents , _ ) , do: base <> accents
557558
558- defp compose ( << cp :: utf8 , rest :: binary >> , base , accents , last_class ) do
559+ defp compose_many ( << cp :: utf8 , rest :: binary >> , base , accents , last_class ) do
559560 part_class = combining_class ( cp )
560561 combined = << base :: binary , cp :: utf8 >>
561- if last_class < part_class and composable? ( combined ) do
562- compose ( rest , compose ( combined ) , accents , last_class )
562+ if composed = ( last_class < part_class && compose_one ( combined ) ) do
563+ compose_many ( rest , composed , accents , last_class )
563564 else
564- compose ( rest , base , << accents :: binary , cp :: utf8 >> , part_class )
565+ compose_many ( rest , base , << accents :: binary , cp :: utf8 >> , part_class )
565566 end
566567 end
567568
568- for { _ , [ _ , _ ] = binary } <- compositions do
569- defp composable? ( unquote ( IO . iodata_to_binary ( binary ) ) ) , do: true
569+ for { composition , [ _ , _ ] = binary } <- compositions do
570+ defp compose_one ( unquote ( IO . iodata_to_binary ( binary ) ) ) , do: unquote ( composition )
570571 end
571572
572- defp composable? ( _ ) , do: false
573+ defp compose_one ( _ ) , do: nil
573574end
0 commit comments