Skip to content

Commit 7fd4755

Browse files
authored
Merge pull request #20919 from yoff/python/header-splitting-experiments
Python: detecting header splitting in synthetic app
2 parents 5784a21 + 2c835dc commit 7fd4755

File tree

6 files changed

+119
-6
lines changed

6 files changed

+119
-6
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Added taint flow model and type model for `urllib.parseurl`.

python/ql/lib/semmle/python/frameworks/Stdlib.model.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,8 @@ extensions:
142142
- ["typing", "Member[cast]", "Argument[1,val:]", "ReturnValue", "value"]
143143
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.parse_qs
144144
- ["urllib", "Member[parse].Member[parse_qs]", "Argument[0,qs:]", "ReturnValue", "taint"]
145+
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
146+
- ["urllib", "Member[parse].Member[urlparse]", "Argument[0,urlstring:]", "ReturnValue", "taint"]
145147
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote
146148
- ["urllib", "Member[parse].Member[quote]", "Argument[0,string:]", "ReturnValue", "taint"]
147149
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.quote_plus
@@ -181,7 +183,9 @@ extensions:
181183
- addsTo:
182184
pack: codeql/python-all
183185
extensible: typeModel
184-
data: []
186+
data:
187+
# See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
188+
- ["urllib.parse.ParseResult~Subclass", 'urllib', 'Member[parse].Member[urlparse]']
185189

186190
- addsTo:
187191
pack: codeql/python-all

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,67 @@ module Stdlib {
245245
}
246246
}
247247

248+
/**
249+
* Provides models for the `urllib.parse.ParseResult` class
250+
*
251+
* See https://docs.python.org/3.9/library/urllib.parse.html#urllib.parse.ParseResult.
252+
*/
253+
module ParseResult {
254+
/** Gets a reference to the `urllib.parse.ParseResult` class. */
255+
API::Node classRef() {
256+
result = API::moduleImport("urllib").getMember("parse").getMember("ParseResult")
257+
or
258+
result = ModelOutput::getATypeNode("urllib.parse.ParseResult~Subclass").getASubclass*()
259+
}
260+
261+
/**
262+
* A source of instances of `urllib.parse.ParseResult`, extend this class to model new instances.
263+
*
264+
* This can include instantiations of the class, return values from function
265+
* calls, or a special parameter that will be set when functions are called by an external
266+
* library.
267+
*
268+
* Use the predicate `ParseResult::instance()` to get references to instances of `urllib.parse.ParseResult`.
269+
*/
270+
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
271+
272+
/** A direct instantiation of `urllib.parse.ParseResult`. */
273+
private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode {
274+
ClassInstantiation() { this = classRef().getACall() }
275+
}
276+
277+
/** Gets a reference to an instance of `urllib.parse.ParseResult`. */
278+
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
279+
t.start() and
280+
result instanceof InstanceSource
281+
or
282+
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
283+
}
284+
285+
/** Gets a reference to an instance of `urllib.parse.ParseResult`. */
286+
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
287+
288+
/**
289+
* Taint propagation for `urllib.parse.ParseResult`.
290+
*/
291+
private class InstanceTaintSteps extends InstanceTaintStepsHelper {
292+
InstanceTaintSteps() { this = "urllib.parse.ParseResult" }
293+
294+
override DataFlow::Node getInstance() { result = instance() }
295+
296+
override string getAttributeName() {
297+
result in [
298+
"netloc", "path", "params", "query", "fragment", "username", "password", "hostname",
299+
"port"
300+
]
301+
}
302+
303+
override string getMethodName() { none() }
304+
305+
override string getAsyncMethodName() { none() }
306+
}
307+
}
308+
248309
// ---------------------------------------------------------------------------
249310
// logging
250311
// ---------------------------------------------------------------------------

python/ql/test/experimental/query-tests/Security/CWE-409/DecompressionBombs.expected

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,23 @@
11
edges
22
| test.py:10:16:10:24 | ControlFlowNode for file_path | test.py:11:21:11:29 | ControlFlowNode for file_path | provenance | |
33
| test.py:11:5:11:35 | ControlFlowNode for Attribute() | test.py:11:5:11:52 | ControlFlowNode for Attribute() | provenance | Config |
4-
| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:35 | ControlFlowNode for Attribute() | provenance | MaD:85 |
4+
| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:35 | ControlFlowNode for Attribute() | provenance | MaD:86 |
55
| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:11:5:11:52 | ControlFlowNode for Attribute() | provenance | Config |
66
| test.py:11:21:11:29 | ControlFlowNode for file_path | test.py:12:21:12:29 | ControlFlowNode for file_path | provenance | |
77
| test.py:12:5:12:35 | ControlFlowNode for Attribute() | test.py:12:5:12:48 | ControlFlowNode for Attribute() | provenance | Config |
8-
| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:35 | ControlFlowNode for Attribute() | provenance | MaD:85 |
8+
| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:35 | ControlFlowNode for Attribute() | provenance | MaD:86 |
99
| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:12:5:12:48 | ControlFlowNode for Attribute() | provenance | Config |
1010
| test.py:12:21:12:29 | ControlFlowNode for file_path | test.py:14:26:14:34 | ControlFlowNode for file_path | provenance | |
1111
| test.py:14:10:14:35 | ControlFlowNode for Attribute() | test.py:15:14:15:29 | ControlFlowNode for Attribute() | provenance | Config |
12-
| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:14:10:14:35 | ControlFlowNode for Attribute() | provenance | MaD:85 |
12+
| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:14:10:14:35 | ControlFlowNode for Attribute() | provenance | MaD:86 |
1313
| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:15:14:15:29 | ControlFlowNode for Attribute() | provenance | Config |
1414
| test.py:14:26:14:34 | ControlFlowNode for file_path | test.py:18:26:18:34 | ControlFlowNode for file_path | provenance | |
1515
| test.py:18:10:18:35 | ControlFlowNode for Attribute() | test.py:19:14:19:39 | ControlFlowNode for Attribute() | provenance | Config |
16-
| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:18:10:18:35 | ControlFlowNode for Attribute() | provenance | MaD:85 |
16+
| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:18:10:18:35 | ControlFlowNode for Attribute() | provenance | MaD:86 |
1717
| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:19:14:19:39 | ControlFlowNode for Attribute() | provenance | Config |
1818
| test.py:18:26:18:34 | ControlFlowNode for file_path | test.py:22:21:22:29 | ControlFlowNode for file_path | provenance | |
1919
| test.py:22:5:22:30 | ControlFlowNode for Attribute() | test.py:22:5:22:60 | ControlFlowNode for Attribute() | provenance | Config |
20-
| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:30 | ControlFlowNode for Attribute() | provenance | MaD:85 |
20+
| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:30 | ControlFlowNode for Attribute() | provenance | MaD:86 |
2121
| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:22:5:22:60 | ControlFlowNode for Attribute() | provenance | Config |
2222
| test.py:22:21:22:29 | ControlFlowNode for file_path | test.py:24:18:24:26 | ControlFlowNode for file_path | provenance | |
2323
| test.py:24:18:24:26 | ControlFlowNode for file_path | test.py:24:5:24:52 | ControlFlowNode for Attribute() | provenance | Config |

python/ql/test/query-tests/Security/CWE-113-HeaderInjection/Tests1/HeaderInjection.expected

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,16 @@ edges
1111
| flask_tests.py:31:5:31:14 | ControlFlowNode for rfs_header | flask_tests.py:33:11:33:20 | ControlFlowNode for rfs_header | provenance | |
1212
| flask_tests.py:31:5:31:14 | ControlFlowNode for rfs_header | flask_tests.py:35:12:35:21 | ControlFlowNode for rfs_header | provenance | |
1313
| flask_tests.py:31:18:31:24 | ControlFlowNode for request | flask_tests.py:31:5:31:14 | ControlFlowNode for rfs_header | provenance | AdditionalTaintStep |
14+
| http_test.py:5:16:5:19 | ControlFlowNode for self | http_test.py:6:45:6:53 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep |
15+
| http_test.py:6:9:6:19 | ControlFlowNode for parsed_path | http_test.py:7:40:7:56 | ControlFlowNode for Attribute | provenance | AdditionalTaintStep |
16+
| http_test.py:6:23:6:54 | ControlFlowNode for Attribute() | http_test.py:6:9:6:19 | ControlFlowNode for parsed_path | provenance | |
17+
| http_test.py:6:45:6:53 | ControlFlowNode for Attribute | http_test.py:6:23:6:54 | ControlFlowNode for Attribute() | provenance | MaD:77 |
18+
| http_test.py:7:9:7:14 | ControlFlowNode for params | http_test.py:8:23:8:28 | ControlFlowNode for params | provenance | |
19+
| http_test.py:7:18:7:57 | ControlFlowNode for Attribute() | http_test.py:7:9:7:14 | ControlFlowNode for params | provenance | |
20+
| http_test.py:7:40:7:56 | ControlFlowNode for Attribute | http_test.py:7:18:7:57 | ControlFlowNode for Attribute() | provenance | MaD:76 |
21+
| http_test.py:8:9:8:19 | ControlFlowNode for input_value | http_test.py:12:40:12:50 | ControlFlowNode for input_value | provenance | |
22+
| http_test.py:8:23:8:28 | ControlFlowNode for params | http_test.py:8:23:8:47 | ControlFlowNode for Attribute() | provenance | dict.get |
23+
| http_test.py:8:23:8:47 | ControlFlowNode for Attribute() | http_test.py:8:9:8:19 | ControlFlowNode for input_value | provenance | |
1424
| wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | wsgiref_tests.py:6:5:6:10 | ControlFlowNode for h_name | provenance | |
1525
| wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | wsgiref_tests.py:7:5:7:9 | ControlFlowNode for h_val | provenance | |
1626
| wsgiref_tests.py:6:5:6:10 | ControlFlowNode for h_name | wsgiref_tests.py:8:17:8:22 | ControlFlowNode for h_name | provenance | |
@@ -28,6 +38,17 @@ nodes
2838
| flask_tests.py:31:18:31:24 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
2939
| flask_tests.py:33:11:33:20 | ControlFlowNode for rfs_header | semmle.label | ControlFlowNode for rfs_header |
3040
| flask_tests.py:35:12:35:21 | ControlFlowNode for rfs_header | semmle.label | ControlFlowNode for rfs_header |
41+
| http_test.py:5:16:5:19 | ControlFlowNode for self | semmle.label | ControlFlowNode for self |
42+
| http_test.py:6:9:6:19 | ControlFlowNode for parsed_path | semmle.label | ControlFlowNode for parsed_path |
43+
| http_test.py:6:23:6:54 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
44+
| http_test.py:6:45:6:53 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
45+
| http_test.py:7:9:7:14 | ControlFlowNode for params | semmle.label | ControlFlowNode for params |
46+
| http_test.py:7:18:7:57 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
47+
| http_test.py:7:40:7:56 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
48+
| http_test.py:8:9:8:19 | ControlFlowNode for input_value | semmle.label | ControlFlowNode for input_value |
49+
| http_test.py:8:23:8:28 | ControlFlowNode for params | semmle.label | ControlFlowNode for params |
50+
| http_test.py:8:23:8:47 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
51+
| http_test.py:12:40:12:50 | ControlFlowNode for input_value | semmle.label | ControlFlowNode for input_value |
3152
| wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | semmle.label | ControlFlowNode for environ |
3253
| wsgiref_tests.py:6:5:6:10 | ControlFlowNode for h_name | semmle.label | ControlFlowNode for h_name |
3354
| wsgiref_tests.py:7:5:7:9 | ControlFlowNode for h_val | semmle.label | ControlFlowNode for h_val |
@@ -39,5 +60,6 @@ subpaths
3960
| flask_tests.py:20:36:20:61 | ControlFlowNode for Subscript | flask_tests.py:1:29:1:35 | ControlFlowNode for ImportMember | flask_tests.py:20:36:20:61 | ControlFlowNode for Subscript | This HTTP header is constructed from a $@. | flask_tests.py:1:29:1:35 | ControlFlowNode for ImportMember | user-provided value |
4061
| flask_tests.py:33:11:33:20 | ControlFlowNode for rfs_header | flask_tests.py:1:29:1:35 | ControlFlowNode for ImportMember | flask_tests.py:33:11:33:20 | ControlFlowNode for rfs_header | This HTTP header is constructed from a $@. | flask_tests.py:1:29:1:35 | ControlFlowNode for ImportMember | user-provided value |
4162
| flask_tests.py:35:12:35:21 | ControlFlowNode for rfs_header | flask_tests.py:1:29:1:35 | ControlFlowNode for ImportMember | flask_tests.py:35:12:35:21 | ControlFlowNode for rfs_header | This HTTP header is constructed from a $@. | flask_tests.py:1:29:1:35 | ControlFlowNode for ImportMember | user-provided value |
63+
| http_test.py:12:40:12:50 | ControlFlowNode for input_value | http_test.py:5:16:5:19 | ControlFlowNode for self | http_test.py:12:40:12:50 | ControlFlowNode for input_value | This HTTP header is constructed from a $@. | http_test.py:5:16:5:19 | ControlFlowNode for self | user-provided value |
4264
| wsgiref_tests.py:8:17:8:22 | ControlFlowNode for h_name | wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | wsgiref_tests.py:8:17:8:22 | ControlFlowNode for h_name | This HTTP header is constructed from a $@. | wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | user-provided value |
4365
| wsgiref_tests.py:8:42:8:46 | ControlFlowNode for h_val | wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | wsgiref_tests.py:8:42:8:46 | ControlFlowNode for h_val | This HTTP header is constructed from a $@. | wsgiref_tests.py:4:14:4:20 | ControlFlowNode for environ | user-provided value |
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from http.server import HTTPServer, BaseHTTPRequestHandler
2+
import urllib.parse
3+
4+
class VulnerableHandler(BaseHTTPRequestHandler):
5+
def do_GET(self):
6+
parsed_path = urllib.parse.urlparse(self.path)
7+
params = urllib.parse.parse_qs(parsed_path.query)
8+
input_value = params.get("input", [""])[0]
9+
# Unsafe: Directly including user input in headers
10+
self.send_response(200)
11+
try:
12+
self.send_header("X-Info", input_value) # BAD
13+
except Exception as e:
14+
print(f"[!] Header injection failed: {e}")
15+
self.end_headers()
16+
self.wfile.write(b"Hello world!")
17+
18+
19+
# if __name__ == "__main__":
20+
# print("Serving vulnerable app on http://127.0.0.1:8080")
21+
# httpd = HTTPServer(("127.0.0.1", 8080), VulnerableHandler)
22+
# httpd.serve_forever()

0 commit comments

Comments
 (0)