@@ -128,23 +128,44 @@ private string getACallBasedTokenFeatureComponent(
128128
129129/** This module provides functionality for getting the function body feature associated with a particular entity. */
130130module FunctionBodies {
131+ string getTokenizedAstNode ( ASTNode node ) {
132+ // NB: Unary and binary operator expressions e.g. -a, a + b and compound
133+ // assignments e.g. a += b can be identified by the expression type.
134+ result = node .( Identifier ) .getName ( )
135+ or
136+ // Computed property accesses for which we can predetermine the property being accessed.
137+ // NB: May alias with operators e.g. could have '+' as a property name.
138+ result = node .( IndexExpr ) .getPropertyName ( )
139+ or
140+ // We use `getRawValue` to give us distinct representations for `0xa`, `0xA`, and `10`.
141+ result = node .( NumberLiteral ) .getRawValue ( )
142+ or
143+ // We use `getValue` rather than `getRawValue` so we assign `"a"` and `'a'` the same representation.
144+ not node instanceof NumberLiteral and
145+ result = node .( Literal ) .getValue ( )
146+ or
147+ result = node .( TemplateElement ) .getRawValue ( )
148+ }
149+
131150 /** Holds if `location` is the location of an AST node within the entity `entity` and `token` is a node attribute associated with that AST node. */
132151 private predicate bodyTokens ( DatabaseFeatures:: Entity entity , Location location , string token ) {
133152 // Performance optimization: Restrict the set of entities to those containing an endpoint to featurize.
134153 entity =
135154 getRepresentativeEntityForEndpoint ( any ( FeaturizationConfig cfg ) .getAnEndpointToFeaturize ( ) ) and
136- // Performance optimization: If a function has more than 256 body tokens , then featurize it as
137- // absent. This approximates the behavior of the classifer on non-generic body features where
138- // large body features are replaced by the absent token.
155+ // Performance optimization: If a function has more than 256 body subtokens , then featurize it as absent. This
156+ // approximates the behavior of the classifer on non-generic body features where large body
157+ // features are replaced by the absent token.
139158 //
140159 // We count nodes instead of tokens because tokens are often not unique.
141- strictcount ( DatabaseFeatures:: AstNode node |
142- DatabaseFeatures:: astNodes ( entity , _, _, node , _) and
143- exists ( string t | DatabaseFeatures:: nodeAttributes ( node , t ) )
160+ strictcount ( ASTNode node |
161+ node .getParent * ( ) = entity .getDefinedFunction ( ) and
162+ not node = entity .getDefinedFunction ( ) .getIdentifier ( ) and
163+ exists ( getTokenizedAstNode ( node ) )
144164 ) <= 256 and
145- exists ( DatabaseFeatures:: AstNode node |
146- DatabaseFeatures:: astNodes ( entity , _, _, node , _) and
147- token = unique( string t | DatabaseFeatures:: nodeAttributes ( node , t ) ) and
165+ exists ( ASTNode node |
166+ node .getParent * ( ) = entity .getDefinedFunction ( ) and
167+ not node = entity .getDefinedFunction ( ) .getIdentifier ( ) and
168+ token = getTokenizedAstNode ( node ) and
148169 location = node .getLocation ( )
149170 )
150171 }
0 commit comments