1515 T1 == $[ , T2 == $]
1616).
1717
18- % % New ops table
19-
2018-define (at_op (T ),
2119 T == $@ ).
2220
2321-define (unary_op (T ),
24- % T == $&;
22+ T == $& ;
2523 T == $! ;
2624 T == $^ ).
2725
@@ -259,7 +257,7 @@ tokenize([$.,T1,T2|Rest], Line, Scope, Tokens) when
259257% ## Single Token Operators
260258tokenize ([$. ,T |Rest ], Line , Scope , Tokens ) when
261259 ? at_op (T ); ? unary_op (T ); ? dual_op (T ); ? mult_op (T ); ? comp_op (T );
262- ? match_op (T ); ? tail_op (T ); T == $& ->
260+ ? match_op (T ); ? tail_op (T ) ->
263261 handle_call_identifier (Rest , Line , list_to_atom ([T ]), Scope , Tokens );
264262
265263% Dot call
@@ -339,7 +337,7 @@ tokenize([$:,T1,T2|Rest], Line, Scope, Tokens) when
339337% ## Single Token Operators
340338tokenize ([$: ,T |Rest ], Line , Scope , Tokens ) when
341339 ? at_op (T ); ? unary_op (T ); ? dual_op (T ); ? mult_op (T ); ? comp_op (T );
342- ? match_op (T ); ? tail_op (T ); T == $& ; T == $ . ->
340+ ? match_op (T ); ? tail_op (T ); T == $. ->
343341 tokenize (Rest , Line , Scope , [{ atom , Line , list_to_atom ([T ]) }|Tokens ]);
344342
345343% End of line
@@ -364,10 +362,6 @@ tokenize("\r\n" ++ Rest, Line, Scope, Tokens) ->
364362
365363% Stand-alone tokens
366364
367- % ## &
368- tokenize ([$& ,H |Rest ], Line , Scope , Tokens ) when ? is_digit (H ) ->
369- tokenize (Rest , Line , Scope , [{ '&' , Line , [list_to_integer ([H ])] }|Tokens ]);
370-
371365% ## Three token operators
372366tokenize ([T1 ,T2 ,T3 |Rest ], Line , Scope , Tokens ) when ? unary_op3 (T1 , T2 , T3 ) ->
373367 handle_nonl_op (Rest , Line , unary_op , list_to_atom ([T1 ,T2 ,T3 ]), Scope , Tokens );
@@ -474,17 +468,13 @@ tokenize([H|_] = String, Line, Scope, Tokens) when ?is_upcase(H) ->
474468% Identifier
475469
476470tokenize ([H |_ ] = String , Line , Scope , Tokens ) when ? is_downcase (H ); H == $_ ->
477- case tokenize_any_identifier (String , Line , [], Scope ) of
478- { error , _ } = Error -> Error ;
479- { Rest , { Kind , _ , Identifier } } ->
480- case check_keyword (Line , Kind , Identifier , Tokens ) of
481- nomatch ->
482- tokenize (Rest , Line , Scope , [{ Kind , Line , Identifier }|Tokens ]);
483- { ok , [Check |T ] } ->
484- handle_terminator (Rest , Line , Scope , Check , T );
485- { error , Token } ->
486- { error , { Line , " syntax error before: " , Token } }
487- end
471+ case tokenize_any_identifier (String , Line , Scope , Tokens ) of
472+ { keyword , Rest , Check , T } ->
473+ handle_terminator (Rest , Line , Scope , Check , T );
474+ { identifier , Rest , Token } ->
475+ tokenize (Rest , Line , Scope , [Token |Tokens ]);
476+ { error , _ } = Error ->
477+ Error
488478 end ;
489479
490480% Ambiguous unary/binary operators tokens
@@ -763,50 +753,39 @@ tokenize_identifier(Rest, Acc) ->
763753
764754% % Tokenize any identifier, handling kv, punctuated, paren, bracket and do identifiers.
765755
766- tokenize_any_identifier (String , Line , Acc , Scope ) ->
767- { Rest , Identifier } = tokenize_identifier (String , Acc ),
756+ tokenize_any_identifier (String , Line , Scope , Tokens ) ->
757+ { Rest , Identifier } = tokenize_identifier (String , [] ),
768758
769759 case Rest of
770760 [H |T ] when H == $? ; H == $! ->
771761 Atom = unsafe_to_atom (Identifier ++ [H ], Scope ),
772- tokenize_kw_or_call_identifier ( punctuated_identifier , Line , Atom , T );
762+ tokenize_kw_or_other ( T , punctuated_identifier , Line , Atom , Tokens );
773763 _ ->
774764 Atom = unsafe_to_atom (Identifier , Scope ),
775- tokenize_kw_or_call_identifier ( identifier , Line , Atom , Rest )
765+ tokenize_kw_or_other ( Rest , identifier , Line , Atom , Tokens )
776766 end .
777767
778- % % Tokenize kw or call identifier (paren | bracket | do)
779-
780- tokenize_kw_or_call_identifier (_Kind , Line , Atom , [$: ,H |T ]) when ? is_space (H ) ->
781- { [H |T ], { kw_identifier , Line , Atom } };
768+ tokenize_kw_or_other ([$: ,H |T ], _Kind , Line , Atom , _Tokens ) when ? is_space (H ) ->
769+ { identifier , [H |T ], { kw_identifier , Line , Atom } };
782770
783- tokenize_kw_or_call_identifier ( _Kind , Line , Atom , [ $: , H | _ ] ) when ? is_atom_start (H ) ->
771+ tokenize_kw_or_other ([ $: , H | _ ], _Kind , Line , Atom , _Tokens ) when ? is_atom_start (H ) ->
784772 { error , { Line , " keyword argument must be followed by space after: " , atom_to_list (Atom ) ++ [$: ] } };
785773
786- tokenize_kw_or_call_identifier (Kind , Line , Atom , Rest ) ->
787- { Rest , check_call_identifier (Kind , Line , Atom , Rest ) }.
774+ tokenize_kw_or_other (Rest , Kind , Line , Atom , Tokens ) ->
775+ case check_keyword (Line , Atom , Tokens ) of
776+ nomatch ->
777+ { identifier , Rest , check_call_identifier (Kind , Line , Atom , Rest ) };
778+ { ok , [Check |T ] } ->
779+ { keyword , Rest , Check , T };
780+ { error , Token } ->
781+ { error , { Line , " syntax error before: " , Token } }
782+ end .
788783
789784% % Check if it is a call identifier (paren | bracket | do)
790785
791786check_call_identifier (_Kind , Line , Atom , [$( |_ ]) -> { paren_identifier , Line , Atom };
792787check_call_identifier (_Kind , Line , Atom , [$[ |_ ]) -> { bracket_identifier , Line , Atom };
793- check_call_identifier (Kind , Line , Atom , Rest ) ->
794- case next_is_block (Rest ) of
795- false -> { Kind , Line , Atom };
796- BlockIdentifier -> { BlockIdentifier , Line , Atom }
797- end .
798-
799- next_is_block ([Space |Tokens ]) when Space == $\t ; Space == $\s ->
800- next_is_block (Tokens );
801-
802- next_is_block ([$d ,$o ,H |_ ]) when ? is_identifier (H ); ? is_terminator (H ) ->
803- false ;
804-
805- next_is_block ([$d ,$o |_ ]) ->
806- do_identifier ;
807-
808- next_is_block (_ ) ->
809- false .
788+ check_call_identifier (Kind , Line , Atom , _Rest ) -> { Kind , Line , Atom }.
810789
811790add_token_with_nl (Left , [{eol ,_ ,newline }|T ]) -> [Left |T ];
812791add_token_with_nl (Left , T ) -> [Left |T ].
@@ -884,40 +863,47 @@ terminator('<<') -> '>>'.
884863
885864% % Keywords checking
886865
887- check_keyword (Line , Identifier , Atom , [{ '.' , _ }|_ ] = Tokens ) ->
888- { ok , [{ Identifier , Line , Atom }|Tokens ] };
866+ check_keyword (_Line , _Atom , [{ '.' , _ }|_ ]) ->
867+ nomatch ;
868+
869+ check_keyword (Line , do , [{ identifier , Line , Atom }|T ]) ->
870+ { ok , [{ do , Line }, { do_identifier , Line , Atom }|T ] };
889871
890- check_keyword (Line , Identifier , Atom , Tokens ) when
891- Identifier == identifier ; Identifier == do_identifier ;
892- Identifier == bracket_identifier ; Identifier == paren_identifier ->
872+ check_keyword (Line , do , Tokens ) ->
873+ case do_keyword_valid (Tokens ) of
874+ true -> { ok , [{ do , Line }|Tokens ] };
875+ false -> { error , " do" }
876+ end ;
877+
878+ check_keyword (Line , Atom , Tokens ) ->
893879 case keyword (Atom ) of
894- do ->
895- case do_keyword_valid (Tokens ) of
896- true -> { ok , [{ Atom , Line }|Tokens ] };
897- false -> { error , " do" }
898- end ;
899880 false -> nomatch ;
900- true -> { ok , [{ Atom , Line }|Tokens ] };
881+ token -> { ok , [{ Atom , Line }|Tokens ] };
901882 block -> { ok , [{ block_identifier , Line , Atom }|Tokens ] };
902883 unary_op -> { ok , [{ unary_op , Line , Atom }|Tokens ] };
903884 Kind -> { ok , add_token_with_nl ({ Kind , Line , Atom }, Tokens ) }
904- end ;
905-
906- check_keyword (_ , _ , _ , _ ) -> nomatch .
885+ end .
907886
887+ % % do is only valid after the end, true, false and nil keywords
908888do_keyword_valid ([{ Atom , _ }|_ ]) ->
909- is_boolean (keyword (Atom ));
910- do_keyword_valid (_ ) -> true .
889+ case Atom of
890+ 'end' -> true ;
891+ nil -> true ;
892+ true -> true ;
893+ false -> true ;
894+ _ -> keyword (Atom ) == false
895+ end ;
896+
897+ do_keyword_valid (_ ) ->
898+ true .
911899
912900% Regular keywords
913- keyword ('fn' ) -> true ;
914- keyword ('end' ) -> true ;
915- keyword ('true' ) -> true ;
916- keyword ('false' ) -> true ;
917- keyword ('nil' ) -> true ;
918-
919- % Special handling for do
920- keyword ('do' ) -> do ;
901+ keyword ('fn' ) -> token ;
902+ keyword ('do' ) -> token ;
903+ keyword ('end' ) -> token ;
904+ keyword ('true' ) -> token ;
905+ keyword ('false' ) -> token ;
906+ keyword ('nil' ) -> token ;
921907
922908% Operators keywords
923909keyword ('not' ) -> unary_op ;
0 commit comments