@@ -119,20 +119,29 @@ tokenize(String, Line, Column, Opts) ->
119119 false -> <<" nofile" >>
120120 end ,
121121
122- Existing = case lists :keyfind (existing_atoms_only , 1 , Opts ) of
123- {existing_atoms_only , true } -> true ;
124- false -> false
122+ ExistingAtomsOnly = case lists :keyfind (existing_atoms_only , 1 , Opts ) of
123+ {existing_atoms_only , ExistingAtomsOnlyBool } when
124+ is_boolean (ExistingAtomsOnlyBool ) -> ExistingAtomsOnlyBool ;
125+ _ -> false
125126 end ,
126127
127- Check = case lists :keyfind (check_terminators , 1 , Opts ) of
128- {check_terminators , false } -> false ;
129- false -> true
128+ CheckTerminators = case lists :keyfind (check_terminators , 1 , Opts ) of
129+ {check_terminators , CheckTerminatorsBool } when
130+ is_boolean (CheckTerminatorsBool ) -> CheckTerminatorsBool ;
131+ _ -> true
132+ end ,
133+
134+ PreserveComments = case lists :keyfind (preserve_comments , 1 , Opts ) of
135+ {preserve_comments , PreserveCommentsBool } when
136+ is_boolean (PreserveCommentsBool ) -> PreserveCommentsBool ;
137+ _ -> false
130138 end ,
131139
132140 tokenize (String , Line , Column , # elixir_tokenizer {
133141 file = File ,
134- existing_atoms_only = Existing ,
135- check_terminators = Check ,
142+ existing_atoms_only = ExistingAtomsOnly ,
143+ check_terminators = CheckTerminators ,
144+ preserve_comments = PreserveComments ,
136145 identifier_tokenizer = elixir_config :get (identifier_tokenizer )
137146 }).
138147
@@ -170,8 +179,14 @@ tokenize([$0, $o, H | T], Line, Column, Scope, Tokens) when ?is_octal(H) ->
170179% Comments
171180
172181tokenize ([$# | String ], Line , Column , Scope , Tokens ) ->
173- Rest = tokenize_comment (String ),
174- tokenize (Rest , Line , Column , Scope , Tokens );
182+ {Rest , Comment , Length } = tokenize_comment (String , [$# ], 1 ),
183+ case Scope # elixir_tokenizer .preserve_comments of
184+ true ->
185+ CommentToken = {comment , {Line , Column , Column + Length }, Comment },
186+ tokenize (Rest , Line , Column + Length , Scope , [CommentToken | Tokens ]);
187+ false ->
188+ tokenize (Rest , Line , Column , Scope , Tokens )
189+ end ;
175190
176191% Sigils
177192
@@ -453,8 +468,8 @@ tokenize([$% | T], Line, Column, Scope, Tokens) ->
453468 tokenize (T , Line , Column + 1 , Scope , [{'%' , {Line , Column , Column + 1 }} | Tokens ]);
454469
455470tokenize ([$. | T ], Line , Column , Scope , Tokens ) ->
456- {Rest , Counter , Offset } = strip_dot_space (T , 0 , Column + 1 ),
457- handle_dot ([$. | Rest ], Line + Counter , Offset - 1 , Column , Scope , Tokens );
471+ {Rest , Counter , Offset , CommentTokens } = strip_dot_space (T , 0 , Column + 1 , Line , [] ),
472+ handle_dot ([$. | Rest ], Line , Offset - 1 , Column , Scope , Tokens , CommentTokens , Counter );
458473
459474% Identifiers
460475
@@ -498,12 +513,18 @@ strip_horizontal_space([H | T], Counter) when ?is_horizontal_space(H) ->
498513strip_horizontal_space (T , Counter ) ->
499514 {T , Counter }.
500515
501- strip_dot_space (T , Counter , Column ) ->
516+ strip_dot_space (T , Counter , Column , StartLine , Tokens ) ->
502517 case strip_horizontal_space (T ) of
503- {" #" ++ Rest , _ } -> strip_dot_space (tokenize_comment (Rest ), Counter , 1 );
504- {" \r\n " ++ Rest , _ } -> strip_dot_space (Rest , Counter + 1 , 1 );
505- {" \n " ++ Rest , _ } -> strip_dot_space (Rest , Counter + 1 , 1 );
506- {Rest , Length } -> {Rest , Counter , Column + Length }
518+ {" #" ++ R , _ } ->
519+ {Rest , Comment , Length } = tokenize_comment (R , [$# ], 1 ),
520+ CommentToken = {comment , {StartLine + Counter , Column , Column + Length }, Comment },
521+ strip_dot_space (Rest , Counter , 1 , StartLine , [CommentToken | Tokens ]);
522+ {" \r\n " ++ Rest , _ } ->
523+ strip_dot_space (Rest , Counter + 1 , 1 , StartLine , Tokens );
524+ {" \n " ++ Rest , _ } ->
525+ strip_dot_space (Rest , Counter + 1 , 1 , StartLine , Tokens );
526+ {Rest , Length } ->
527+ {Rest , Counter , Column + Length , Tokens }
507528 end .
508529
509530handle_char (7 ) -> {" \\ a" , " alert" };
@@ -572,51 +593,59 @@ handle_op(Rest, Line, Column, Kind, Length, Op, Scope, Tokens) ->
572593 add_token_with_nl ({Kind , {Line , Column , Column + Length }, Op }, Tokens ))
573594 end .
574595
596+ handle_comments (CommentTokens , Tokens , Scope ) ->
597+ case Scope # elixir_tokenizer .preserve_comments of
598+ true -> lists :append (CommentTokens , Tokens );
599+ false -> Tokens
600+ end .
601+
575602% ## Three Token Operators
576- handle_dot ([$. , T1 , T2 , T3 | Rest ], Line , Column , DotColumn , Scope , Tokens ) when
603+ handle_dot ([$. , T1 , T2 , T3 | Rest ], Line , Column , DotColumn , Scope , Tokens , CommentTokens , Counter ) when
577604 ? unary_op3 (T1 , T2 , T3 ); ? comp_op3 (T1 , T2 , T3 ); ? and_op3 (T1 , T2 , T3 ); ? or_op3 (T1 , T2 , T3 );
578605 ? arrow_op3 (T1 , T2 , T3 ); ? three_op (T1 , T2 , T3 ) ->
579- handle_call_identifier (Rest , Line , Column + 1 , DotColumn , 3 , list_to_atom ([T1 , T2 , T3 ]), Scope , Tokens );
606+ handle_call_identifier (Rest , Line , Column + 1 , DotColumn , 3 , list_to_atom ([T1 , T2 , T3 ]), Scope , Tokens , CommentTokens , Counter );
580607
581608% ## Two Token Operators
582- handle_dot ([$. , T1 , T2 | Rest ], Line , Column , DotColumn , Scope , Tokens ) when
609+ handle_dot ([$. , T1 , T2 | Rest ], Line , Column , DotColumn , Scope , Tokens , CommentTokens , Counter ) when
583610 ? comp_op2 (T1 , T2 ); ? rel_op2 (T1 , T2 ); ? and_op (T1 , T2 ); ? or_op (T1 , T2 );
584611 ? arrow_op (T1 , T2 ); ? in_match_op (T1 , T2 ); ? two_op (T1 , T2 ); ? stab_op (T1 , T2 );
585612 ? type_op (T1 , T2 ) ->
586- handle_call_identifier (Rest , Line , Column + 1 , DotColumn , 2 , list_to_atom ([T1 , T2 ]), Scope , Tokens );
613+ handle_call_identifier (Rest , Line , Column + 1 , DotColumn , 2 , list_to_atom ([T1 , T2 ]), Scope , Tokens , CommentTokens , Counter );
587614
588615% ## Single Token Operators
589- handle_dot ([$. , T | Rest ], Line , Column , DotColumn , Scope , Tokens ) when
616+ handle_dot ([$. , T | Rest ], Line , Column , DotColumn , Scope , Tokens , CommentTokens , Counter ) when
590617 ? at_op (T ); ? unary_op (T ); ? capture_op (T ); ? dual_op (T ); ? mult_op (T );
591618 ? rel_op (T ); ? match_op (T ); ? pipe_op (T ) ->
592- handle_call_identifier (Rest , Line , Column + 1 , DotColumn , 1 , list_to_atom ([T ]), Scope , Tokens );
619+ handle_call_identifier (Rest , Line , Column + 1 , DotColumn , 1 , list_to_atom ([T ]), Scope , Tokens , CommentTokens , Counter );
593620
594621% ## Exception for .( as it needs to be treated specially in the parser
595- handle_dot ([$. , $( | Rest ], Line , Column , DotColumn , Scope , Tokens ) ->
596- tokenize ([$( | Rest ], Line , Column + 2 , Scope , add_token_with_nl ({dot_call_op , {Line , DotColumn , DotColumn + 1 }, '.' }, Tokens ));
622+ handle_dot ([$. , $( | Rest ], Line , Column , DotColumn , Scope , Tokens , CommentTokens , Counter ) ->
623+ TokensSoFar = add_token_with_nl ({dot_call_op , {Line , DotColumn , DotColumn + 1 }, '.' }, Tokens ),
624+ tokenize ([$( | Rest ], Line + Counter , Column + 2 , Scope , handle_comments (CommentTokens , TokensSoFar , Scope ));
597625
598- handle_dot ([$. , H | T ] = Original , Line , Column , DotColumn , Scope , Tokens ) when ? is_quote (H ) ->
626+ handle_dot ([$. , H | T ] = Original , Line , Column , DotColumn , Scope , Tokens , CommentTokens , Counter ) when ? is_quote (H ) ->
599627 case elixir_interpolation :extract (Line , Column + 2 , Scope , true , T , H ) of
600628 {NewLine , NewColumn , [Part ], Rest } when is_binary (Part ) ->
601629 case unsafe_to_atom (Part , Line , Scope ) of
602630 {ok , Atom } ->
603- Token = check_call_identifier (Line , Column , max (NewColumn - Column , 0 ), Atom , Rest ),
604- tokenize ( Rest , NewLine , NewColumn , Scope ,
605- [Token | add_token_with_nl ({ '.' , { Line , DotColumn , DotColumn + 1 }}, Tokens )]);
631+ Token = check_call_identifier (Line + Counter , Column , max (NewColumn - Column , 0 ), Atom , Rest ),
632+ TokensSoFar = add_token_with_nl ({ '.' , { Line , DotColumn , DotColumn + 1 }}, Tokens ) ,
633+ tokenize ( Rest , NewLine , NewColumn , Scope , [Token | handle_comments ( CommentTokens , TokensSoFar , Scope )]);
606634 {error , Reason } ->
607635 {error , Reason , Original , Tokens }
608636 end ;
609637 {error , Reason } ->
610638 interpolation_error (Reason , Original , Tokens , " (for function name starting at line ~B )" , [Line ])
611639 end ;
612640
613- handle_dot ([$. | Rest ], Line , Column , DotColumn , Scope , Tokens ) ->
614- tokenize (Rest , Line , Column + 1 , Scope , add_token_with_nl ({'.' , {Line , DotColumn , DotColumn + 1 }}, Tokens )).
641+ handle_dot ([$. | Rest ], Line , Column , DotColumn , Scope , Tokens , CommentTokens , Counter ) ->
642+ TokensSoFar = add_token_with_nl ({'.' , {Line , DotColumn , DotColumn + 1 }}, Tokens ),
643+ tokenize (Rest , Line + Counter , Column + 1 , Scope , handle_comments (CommentTokens , TokensSoFar , Scope )).
615644
616- handle_call_identifier (Rest , Line , Column , DotColumn , Length , Op , Scope , Tokens ) ->
617- {_ , {_ , _ , NewColumn }, _ } = Token = check_call_identifier (Line , Column , Length , Op , Rest ),
618- tokenize ( Rest , Line , NewColumn , Scope ,
619- [Token | add_token_with_nl ({ '.' , { Line , DotColumn , DotColumn + 1 }}, Tokens )]).
645+ handle_call_identifier (Rest , Line , Column , DotColumn , Length , Op , Scope , Tokens , CommentTokens , Counter ) ->
646+ {_ , {NewLine , _ , NewColumn }, _ } = Token = check_call_identifier (Line + Counter , Column , Length , Op , Rest ),
647+ TokensSoFar = add_token_with_nl ({ '.' , { Line , DotColumn , DotColumn + 1 }}, Tokens ) ,
648+ tokenize ( Rest , NewLine , NewColumn , Scope , [Token | handle_comments ( CommentTokens , TokensSoFar , Scope )]).
620649
621650% ## Ambiguous unary/binary operators tokens
622651handle_space_sensitive_tokens ([Sign , NotMarker | T ], Line , Column , Scope , [{Identifier , _ , _ } = H | Tokens ]) when
@@ -825,10 +854,14 @@ tokenize_bin(Rest, Acc, Length) ->
825854
826855% % Comments
827856
828- tokenize_comment (" \r\n " ++ _ = Rest ) -> Rest ;
829- tokenize_comment (" \n " ++ _ = Rest ) -> Rest ;
830- tokenize_comment ([_ | Rest ]) -> tokenize_comment (Rest );
831- tokenize_comment ([]) -> [].
857+ tokenize_comment (" \r\n " ++ _ = Rest , Acc , Length ) ->
858+ {Rest , lists :reverse (Acc ), Length };
859+ tokenize_comment (" \n " ++ _ = Rest , Acc , Length ) ->
860+ {Rest , lists :reverse (Acc ), Length };
861+ tokenize_comment ([H | Rest ], Acc , Length ) ->
862+ tokenize_comment (Rest , [H | Acc ], Length + 1 );
863+ tokenize_comment ([], Acc , Length ) ->
864+ {[], Acc , Length }.
832865
833866% % Identifiers
834867
0 commit comments