@@ -25,9 +25,8 @@ private string getTokenFeature(DataFlow::Node endpoint, string featureName) {
2525 result = unique( string x | x = FunctionBodies:: getBodyTokenFeatureForEntity ( entity ) )
2626 )
2727 or
28- exists ( getACallBasedTokenFeatureComponent ( endpoint , _, featureName ) ) and
2928 result =
30- concat ( DataFlow:: CallNode call , string component |
29+ strictconcat ( DataFlow:: CallNode call , string component |
3130 component = getACallBasedTokenFeatureComponent ( endpoint , call , featureName )
3231 |
3332 component , " "
@@ -110,12 +109,13 @@ private string getACallBasedTokenFeatureComponent(
110109
111110/** This module provides functionality for getting the function body feature associated with a particular entity. */
112111module FunctionBodies {
113- /** Holds if `node` is an AST node within the entity `entity` and `token` is a node attribute associated with `node`. */
114- private predicate bodyTokens (
115- DatabaseFeatures:: Entity entity , DatabaseFeatures:: AstNode node , string token
116- ) {
117- DatabaseFeatures:: astNodes ( entity , _, _, node , _) and
118- token = unique( string t | DatabaseFeatures:: nodeAttributes ( node , t ) )
112+ /** Holds if `location` is the location of an AST node within the entity `entity` and `token` is a node attribute associated with that AST node. */
113+ private predicate bodyTokens ( DatabaseFeatures:: Entity entity , Location location , string token ) {
114+ exists ( DatabaseFeatures:: AstNode node |
115+ DatabaseFeatures:: astNodes ( entity , _, _, node , _) and
116+ token = unique( string t | DatabaseFeatures:: nodeAttributes ( node , t ) ) and
117+ location = node .getLocation ( )
118+ )
119119 }
120120
121121 /**
@@ -127,23 +127,18 @@ module FunctionBodies {
127127 // If a function has more than 256 body subtokens, then featurize it as absent. This
128128 // approximates the behavior of the classifer on non-generic body features where large body
129129 // features are replaced by the absent token.
130- if count ( DatabaseFeatures:: AstNode node , string token | bodyTokens ( entity , node , token ) ) > 256
131- then result = ""
132- else
133- result =
134- concat ( int i , string rankedToken |
135- rankedToken =
136- rank [ i ] ( DatabaseFeatures:: AstNode node , string token , Location l |
137- bodyTokens ( entity , node , token ) and l = node .getLocation ( )
138- |
139- token
140- order by
141- l .getFile ( ) .getAbsolutePath ( ) , l .getStartLine ( ) , l .getStartColumn ( ) , l .getEndLine ( ) ,
142- l .getEndColumn ( ) , token
143- )
144- |
145- rankedToken , " " order by i
146- )
130+ //
131+ // We count locations instead of tokens because tokens are often not unique.
132+ strictcount ( Location l | bodyTokens ( entity , l , _) ) <= 256 and
133+ result =
134+ strictconcat ( string token , Location l |
135+ bodyTokens ( entity , l , token )
136+ |
137+ token , " "
138+ order by
139+ l .getFile ( ) .getAbsolutePath ( ) , l .getStartLine ( ) , l .getStartColumn ( ) , l .getEndLine ( ) ,
140+ l .getEndColumn ( ) , token
141+ )
147142 }
148143}
149144
@@ -247,11 +242,12 @@ private module AccessPaths {
247242 else accessPath = previousAccessPath + " " + paramName
248243 )
249244 or
250- exists ( string callbackName , string index |
245+ exists ( string callbackName , int index |
251246 node =
252- getNamedParameter ( previousNode .getASuccessor ( "param " + index ) .getMember ( callbackName ) ,
253- paramName ) and
254- index != "-1" and // ignore receiver
247+ getNamedParameter ( previousNode
248+ .getASuccessor ( API:: Label:: parameter ( index ) )
249+ .getMember ( callbackName ) , paramName ) and
250+ index != - 1 and // ignore receiver
255251 if includeStructuralInfo = true
256252 then
257253 accessPath =
@@ -280,10 +276,13 @@ private string getASupportedFeatureName() {
280276 * `featureValue` for the endpoint `endpoint`.
281277 */
282278predicate tokenFeatures ( DataFlow:: Node endpoint , string featureName , string featureValue ) {
283- featureName = getASupportedFeatureName ( ) and
279+ ModelScoring :: endpoints ( endpoint ) and
284280 (
285- featureValue = unique( string x | x = getTokenFeature ( endpoint , featureName ) )
286- or
287- not exists ( unique( string x | x = getTokenFeature ( endpoint , featureName ) ) ) and featureValue = ""
281+ if strictcount ( getTokenFeature ( endpoint , featureName ) ) = 1
282+ then featureValue = getTokenFeature ( endpoint , featureName )
283+ else (
284+ // Performance note: this is a Cartesian product between all endpoints and feature names.
285+ featureValue = "" and featureName = getASupportedFeatureName ( )
286+ )
288287 )
289288}
0 commit comments