@@ -127,7 +127,7 @@ string captureQualifierFlow(DataFlowSummaryTargetApi api) {
127127 api = returnNodeEnclosingCallable ( ret ) and
128128 isOwnInstanceAccessNode ( ret )
129129 ) and
130- result = Printing:: asValueModel ( api , qualifierString ( ) , "ReturnValue" )
130+ result = Printing:: asLiftedValueModel ( api , qualifierString ( ) , "ReturnValue" )
131131}
132132
133133private int accessPathLimit0 ( ) { result = 2 }
@@ -237,7 +237,7 @@ string captureThroughFlow0(
237237 input = parameterNodeAsInput ( p ) and
238238 output = getOutput ( returnNodeExt ) and
239239 input != output and
240- result = Printing:: asTaintModel ( api , input , output )
240+ result = Printing:: asLiftedTaintModel ( api , input , output )
241241 )
242242}
243243
@@ -291,26 +291,269 @@ private string getContent(PropagateContentFlow::AccessPath ap, int i) {
291291 )
292292}
293293
294+ /**
295+ * Gets the MaD string representation of a store step access path.
296+ */
294297private string printStoreAccessPath ( PropagateContentFlow:: AccessPath ap ) {
295298 result = concat ( int i | | getContent ( ap , i ) , "" order by i )
296299}
297300
301+ /**
302+ * Gets the MaD string representation of a read step access path.
303+ */
298304private string printReadAccessPath ( PropagateContentFlow:: AccessPath ap ) {
299305 result = concat ( int i | | getContent ( ap , i ) , "" order by i desc )
300306}
301307
302- string captureContentFlow ( DataFlowSummaryTargetApi api ) {
308+ /**
309+ * Holds if the access path `ap` contains a field or synthetic field access.
310+ */
311+ private predicate mentionsField ( PropagateContentFlow:: AccessPath ap ) {
312+ exists ( ContentSet head , PropagateContentFlow:: AccessPath tail |
313+ head = ap .getHead ( ) and
314+ tail = ap .getTail ( ) and
315+ ( mentionsField ( tail ) or isField ( head ) )
316+ )
317+ }
318+
319+ private predicate apiFlow (
320+ DataFlowSummaryTargetApi api , DataFlow:: ParameterNode p , PropagateContentFlow:: AccessPath reads ,
321+ ReturnNodeExt returnNodeExt , PropagateContentFlow:: AccessPath stores , boolean preservesValue
322+ ) {
323+ PropagateContentFlow:: flow ( p , reads , returnNodeExt , stores , preservesValue ) and
324+ returnNodeExt .getEnclosingCallable ( ) = api and
325+ p .getEnclosingCallable ( ) = api
326+ }
327+
328+ /**
329+ * A class of APIs relevant for modeling using content flow.
330+ * The following heuristic is applied:
331+ * Content flow is only relevant for an API, if
332+ * #content flow <= 2 * #parameters + 3
333+ * If an API produces more content flow, it is likely that
334+ * 1. Types are not sufficiently constrained leading to a combinatorial
335+ * explosion in dispatch and thus in the generated summaries.
336+ * 2. It is a reasonable approximation to use the non-content based flow
337+ * detection instead, as reads and stores would use a significant
338+ * part of an objects internal state.
339+ */
340+ private class ContentDataFlowSummaryTargetApi extends DataFlowSummaryTargetApi {
341+ ContentDataFlowSummaryTargetApi ( ) {
342+ count ( string input , string output |
343+ exists (
344+ DataFlow:: ParameterNode p , PropagateContentFlow:: AccessPath reads ,
345+ ReturnNodeExt returnNodeExt , PropagateContentFlow:: AccessPath stores
346+ |
347+ apiFlow ( this , p , reads , returnNodeExt , stores , _) and
348+ input = parameterNodeAsContentInput ( p ) + printReadAccessPath ( reads ) and
349+ output = getContentOutput ( returnNodeExt ) + printStoreAccessPath ( stores )
350+ )
351+ ) <= 2 * this .getNumberOfParameters ( ) + 3
352+ }
353+ }
354+
355+ pragma [ nomagic]
356+ private predicate apiContentFlow (
357+ ContentDataFlowSummaryTargetApi api , DataFlow:: ParameterNode p ,
358+ PropagateContentFlow:: AccessPath reads , ReturnNodeExt returnNodeExt ,
359+ PropagateContentFlow:: AccessPath stores , boolean preservesValue
360+ ) {
361+ PropagateContentFlow:: flow ( p , reads , returnNodeExt , stores , preservesValue ) and
362+ returnNodeExt .getEnclosingCallable ( ) = api and
363+ p .getEnclosingCallable ( ) = api
364+ }
365+
366+ /**
367+ * Holds if any of the content sets in `path` translates into a synthetic field.
368+ */
369+ private predicate hasSyntheticContent ( PropagateContentFlow:: AccessPath path ) {
370+ exists ( PropagateContentFlow:: AccessPath tail , ContentSet head |
371+ head = path .getHead ( ) and
372+ tail = path .getTail ( ) and
373+ (
374+ exists ( getSyntheticName ( head ) ) or
375+ hasSyntheticContent ( tail )
376+ )
377+ )
378+ }
379+
380+ /**
381+ * A module containing predicates for validating access paths containing content sets
382+ * that translates into synthetic fields, when used for generated summary models.
383+ */
384+ private module AccessPathSyntheticValidation {
385+ /**
386+ * Holds if there exists an API that has content flow from `read` (on type `t1`)
387+ * to `store` (on type `t2`).
388+ */
389+ private predicate step (
390+ Type t1 , PropagateContentFlow:: AccessPath read , Type t2 , PropagateContentFlow:: AccessPath store
391+ ) {
392+ exists ( DataFlow:: ParameterNode p , ReturnNodeExt returnNodeExt |
393+ p .getType ( ) = t1 and
394+ returnNodeExt .getType ( ) = t2 and
395+ apiContentFlow ( _, p , read , returnNodeExt , store , _)
396+ )
397+ }
398+
399+ /**
400+ * Holds if there exists an API that has content flow from `read` (on type `t1`)
401+ * to `store` (on type `t2`), where `read` does not have synthetic content and `store` does.
402+ *
403+ * Step A -> Synth.
404+ */
405+ private predicate synthPathEntry (
406+ Type t1 , PropagateContentFlow:: AccessPath read , Type t2 , PropagateContentFlow:: AccessPath store
407+ ) {
408+ not hasSyntheticContent ( read ) and
409+ hasSyntheticContent ( store ) and
410+ step ( t1 , read , t2 , store )
411+ }
412+
413+ /**
414+ * Holds if there exists an API that has content flow from `read` (on type `t1`)
415+ * to `store` (on type `t2`), where `read` has synthetic content
416+ * and `store` does not.
417+ *
418+ * Step Synth -> A.
419+ */
420+ private predicate synthPathExit (
421+ Type t1 , PropagateContentFlow:: AccessPath read , Type t2 , PropagateContentFlow:: AccessPath store
422+ ) {
423+ hasSyntheticContent ( read ) and
424+ not hasSyntheticContent ( store ) and
425+ step ( t1 , read , t2 , store )
426+ }
427+
428+ /**
429+ * Takes one or more synthetic steps.
430+ * Synth ->+ Synth
431+ */
432+ private predicate synthPathStepRec (
433+ Type t1 , PropagateContentFlow:: AccessPath read , Type t2 , PropagateContentFlow:: AccessPath store
434+ ) {
435+ hasSyntheticContent ( read ) and
436+ hasSyntheticContent ( store ) and
437+ (
438+ step ( t1 , read , t2 , store )
439+ or
440+ exists ( PropagateContentFlow:: AccessPath mid , Type midType |
441+ step ( t1 , read , midType , mid ) and synthPathStepRec ( midType , mid .reverse ( ) , t2 , store )
442+ )
443+ )
444+ }
445+
446+ /**
447+ * Holds if there exists a path of steps from `read` to an exit.
448+ *
449+ * read ->* Synth -> A
450+ */
451+ private predicate reachesSynthExit ( Type t , PropagateContentFlow:: AccessPath read ) {
452+ synthPathExit ( t , read , _, _)
453+ or
454+ exists ( PropagateContentFlow:: AccessPath mid , Type midType |
455+ synthPathStepRec ( t , read , midType , mid ) and synthPathExit ( midType , mid .reverse ( ) , _, _)
456+ )
457+ }
458+
459+ /**
460+ * Holds if there exists a path of steps from an entry to `store`.
461+ *
462+ * A -> Synth ->* store
463+ */
464+ private predicate synthEntryReaches ( Type t , PropagateContentFlow:: AccessPath store ) {
465+ synthPathEntry ( _, _, t , store )
466+ or
467+ exists ( PropagateContentFlow:: AccessPath mid , Type midType |
468+ synthPathEntry ( _, _, midType , mid ) and synthPathStepRec ( midType , mid .reverse ( ) , t , store )
469+ )
470+ }
471+
472+ /**
473+ * Holds if at least one of the access paths `read` (on type `t1`) and `store` (on type `t2`)
474+ * contain content that will be translated into a synthetic field, when being used in
475+ * a MaD summary model, and if there is a range of APIs, such that
476+ * when chaining their flow access paths, there exists access paths `A` and `B` where
477+ * A ->* read -> store ->* B and where `A` and `B` do not contain content that will
478+ * be translated into a synthetic field.
479+ *
480+ * This is needed because we don't want to include summaries that reads from or
481+ * stores into a "dead" synthetic field.
482+ *
483+ * Example:
484+ * Assume we have a type `t` (in this case `t1` = `t2`) with methods `getX` and
485+ * `setX`, which gets and sets a private field `X` on `t`.
486+ * This would lead to the following content flows
487+ * getX : Argument[this].SyntheticField[t.X] -> ReturnValue.
488+ * setX : Argument[0] -> Argument[this].SyntheticField[t.X]
489+ * As the reads and stores are on synthetic fields we should only make summaries
490+ * if both of these methods exist.
491+ */
492+ pragma [ nomagic]
493+ predicate acceptReadStore (
494+ Type t1 , PropagateContentFlow:: AccessPath read , Type t2 , PropagateContentFlow:: AccessPath store
495+ ) {
496+ synthPathEntry ( t1 , read , t2 , store ) and reachesSynthExit ( t2 , store .reverse ( ) )
497+ or
498+ exists ( PropagateContentFlow:: AccessPath store0 | store0 .reverse ( ) = read |
499+ synthEntryReaches ( t1 , store0 ) and synthPathExit ( t1 , read , t2 , store )
500+ or
501+ synthEntryReaches ( t1 , store0 ) and
502+ step ( t1 , read , t2 , store ) and
503+ reachesSynthExit ( t2 , store .reverse ( ) )
504+ )
505+ }
506+ }
507+
508+ /**
509+ * Holds, if the API `api` has relevant flow from `read` on `p` to `store` on `returnNodeExt`.
510+ * Flow is considered relevant,
511+ * 1. If `read` or `store` do not contain a content set that translates into a synthetic field.
512+ * 2. If `read` or `store` contain a content set that translates into a synthetic field, and if
513+ * the synthetic content is "live" on the relevant declaring type.
514+ */
515+ private predicate apiRelevantContentFlow (
516+ ContentDataFlowSummaryTargetApi api , DataFlow:: ParameterNode p ,
517+ PropagateContentFlow:: AccessPath read , ReturnNodeExt returnNodeExt ,
518+ PropagateContentFlow:: AccessPath store , boolean preservesValue
519+ ) {
520+ apiContentFlow ( api , p , read , returnNodeExt , store , preservesValue ) and
521+ (
522+ not hasSyntheticContent ( read ) and not hasSyntheticContent ( store )
523+ or
524+ AccessPathSyntheticValidation:: acceptReadStore ( p .getType ( ) , read , returnNodeExt .getType ( ) , store )
525+ )
526+ }
527+
528+ pragma [ nomagic]
529+ private predicate captureContentFlow0 (
530+ ContentDataFlowSummaryTargetApi api , string input , string output , boolean preservesValue ,
531+ boolean lift
532+ ) {
303533 exists (
304- DataFlow:: ParameterNode p , ReturnNodeExt returnNodeExt , string input , string output ,
305- PropagateContentFlow:: AccessPath reads , PropagateContentFlow:: AccessPath stores ,
306- boolean preservesValue
534+ DataFlow:: ParameterNode p , ReturnNodeExt returnNodeExt , PropagateContentFlow:: AccessPath reads ,
535+ PropagateContentFlow:: AccessPath stores
307536 |
308- PropagateContentFlow:: flow ( p , reads , returnNodeExt , stores , preservesValue ) and
309- returnNodeExt .getEnclosingCallable ( ) = api and
537+ apiRelevantContentFlow ( api , p , reads , returnNodeExt , stores , preservesValue ) and
310538 input = parameterNodeAsContentInput ( p ) + printReadAccessPath ( reads ) and
311539 output = getContentOutput ( returnNodeExt ) + printStoreAccessPath ( stores ) and
312540 input != output and
313- result = Printing:: asModel ( api , input , output , preservesValue )
541+ ( if mentionsField ( reads ) or mentionsField ( stores ) then lift = false else lift = true )
542+ )
543+ }
544+
545+ /**
546+ * Gets the content based summary model(s) of the API `api` (if there is flow from a parameter to
547+ * the return value or a parameter).
548+ *
549+ * Models are lifted to the best type in case the read and store access paths do not
550+ * contain a field or synthetic field access.
551+ */
552+ string captureContentFlow ( ContentDataFlowSummaryTargetApi api ) {
553+ exists ( string input , string output , boolean lift , boolean preservesValue |
554+ captureContentFlow0 ( api , input , output , _, lift ) and
555+ preservesValue = max ( boolean p | captureContentFlow0 ( api , input , output , p , lift ) ) and
556+ result = Printing:: asModel ( api , input , output , preservesValue , lift )
314557 )
315558}
316559
0 commit comments