|
14 | 14 |
|
15 | 15 | import pytest |
16 | 16 |
|
| 17 | +from sagemaker.core import git_utils |
17 | 18 | from sagemaker.core.git_utils import _validate_git_config |
18 | 19 |
|
19 | 20 |
|
@@ -135,3 +136,200 @@ def test_validate_git_config_repo_none(): |
135 | 136 |
|
136 | 137 | with pytest.raises(ValueError, match="'repo' must be a string"): |
137 | 138 | _validate_git_config(git_config) |
| 139 | + |
| 140 | + |
| 141 | +class TestGitUrlSanitization: |
| 142 | + """Test cases for Git URL sanitization to prevent injection attacks.""" |
| 143 | + |
| 144 | + def test_sanitize_git_url_valid_https_urls(self): |
| 145 | + """Test that valid HTTPS URLs pass sanitization.""" |
| 146 | + valid_urls = [ |
| 147 | + "https://github.com/user/repo.git", |
| 148 | + "https://gitlab.com/user/repo.git", |
| 149 | + "https://token@github.com/user/repo.git", |
| 150 | + "https://user:pass@github.com/user/repo.git", |
| 151 | + "http://internal-git.company.com/repo.git", |
| 152 | + ] |
| 153 | + |
| 154 | + for url in valid_urls: |
| 155 | + result = git_utils._sanitize_git_url(url) |
| 156 | + assert result == url |
| 157 | + |
| 158 | + def test_sanitize_git_url_valid_ssh_urls(self): |
| 159 | + """Test that valid SSH URLs pass sanitization.""" |
| 160 | + valid_urls = [ |
| 161 | + "git@github.com:user/repo.git", |
| 162 | + "git@gitlab.com:user/repo.git", |
| 163 | + "ssh://git@github.com/user/repo.git", |
| 164 | + "ssh://git-codecommit.us-west-2.amazonaws.com/v1/repos/test-repo/", |
| 165 | + "git@internal-git.company.com:repo.git", |
| 166 | + ] |
| 167 | + |
| 168 | + for url in valid_urls: |
| 169 | + result = git_utils._sanitize_git_url(url) |
| 170 | + assert result == url |
| 171 | + |
| 172 | + def test_sanitize_git_url_blocks_multiple_at_https(self): |
| 173 | + """Test that HTTPS URLs with multiple @ symbols are blocked.""" |
| 174 | + malicious_urls = [ |
| 175 | + "https://user@attacker.com@github.com/repo.git", |
| 176 | + "https://token@evil.com@gitlab.com/user/repo.git", |
| 177 | + "https://a@b@c@github.com/repo.git", |
| 178 | + "https://user@malicious-host@github.com/legit/repo.git", |
| 179 | + ] |
| 180 | + |
| 181 | + for url in malicious_urls: |
| 182 | + with pytest.raises(ValueError) as error: |
| 183 | + git_utils._sanitize_git_url(url) |
| 184 | + assert "multiple @ symbols detected" in str(error.value) |
| 185 | + |
| 186 | + def test_sanitize_git_url_blocks_multiple_at_ssh(self): |
| 187 | + """Test that SSH URLs with multiple @ symbols are blocked.""" |
| 188 | + malicious_urls = [ |
| 189 | + "git@attacker.com@github.com:repo.git", |
| 190 | + "git@evil@gitlab.com:user/repo.git", |
| 191 | + "ssh://git@malicious@github.com/repo.git", |
| 192 | + "git@a@b@c:repo.git", |
| 193 | + ] |
| 194 | + |
| 195 | + for url in malicious_urls: |
| 196 | + with pytest.raises(ValueError) as error: |
| 197 | + git_utils._sanitize_git_url(url) |
| 198 | + assert any( |
| 199 | + phrase in str(error.value) |
| 200 | + for phrase in ["multiple @ symbols detected", "exactly one @ symbol"] |
| 201 | + ) |
| 202 | + |
| 203 | + def test_sanitize_git_url_blocks_invalid_schemes_and_git_at_format(self): |
| 204 | + """Test that invalid schemes and git@ format violations are blocked.""" |
| 205 | + unsupported_scheme_urls = [ |
| 206 | + "git-github.com:user/repo.git", |
| 207 | + ] |
| 208 | + |
| 209 | + for url in unsupported_scheme_urls: |
| 210 | + with pytest.raises(ValueError) as error: |
| 211 | + git_utils._sanitize_git_url(url) |
| 212 | + assert "Unsupported URL scheme" in str(error.value) |
| 213 | + |
| 214 | + invalid_git_at_urls = [ |
| 215 | + "git@github.com@evil.com:repo.git", |
| 216 | + ] |
| 217 | + |
| 218 | + for url in invalid_git_at_urls: |
| 219 | + with pytest.raises(ValueError) as error: |
| 220 | + git_utils._sanitize_git_url(url) |
| 221 | + assert "exactly one @ symbol" in str(error.value) |
| 222 | + |
| 223 | + def test_sanitize_git_url_blocks_url_encoding_obfuscation(self): |
| 224 | + """Test that URL-encoded obfuscation attempts are blocked.""" |
| 225 | + obfuscated_urls = [ |
| 226 | + "https://github.com%25evil.com/repo.git", |
| 227 | + "https://user@github.com%40attacker.com/repo.git", |
| 228 | + "https://github.com%2Fevil.com/repo.git", |
| 229 | + "https://github.com%3Aevil.com/repo.git", |
| 230 | + ] |
| 231 | + |
| 232 | + for url in obfuscated_urls: |
| 233 | + with pytest.raises(ValueError) as error: |
| 234 | + git_utils._sanitize_git_url(url) |
| 235 | + assert any( |
| 236 | + phrase in str(error.value) |
| 237 | + for phrase in ["Suspicious URL encoding detected", "Invalid characters in hostname"] |
| 238 | + ) |
| 239 | + |
| 240 | + def test_sanitize_git_url_blocks_invalid_hostname_chars(self): |
| 241 | + """Test that hostnames with invalid characters are blocked.""" |
| 242 | + invalid_urls = [ |
| 243 | + "https://github<script>.com/repo.git", |
| 244 | + "https://github>.com/repo.git", |
| 245 | + "https://github[].com/repo.git", |
| 246 | + "https://github{}.com/repo.git", |
| 247 | + ] |
| 248 | + |
| 249 | + for url in invalid_urls: |
| 250 | + with pytest.raises(ValueError) as error: |
| 251 | + git_utils._sanitize_git_url(url) |
| 252 | + assert any( |
| 253 | + phrase in str(error.value) |
| 254 | + for phrase in [ |
| 255 | + "Invalid characters in hostname", |
| 256 | + "Failed to parse URL", |
| 257 | + "does not appear to be an IPv4 or IPv6 address", |
| 258 | + ] |
| 259 | + ) |
| 260 | + |
| 261 | + def test_sanitize_git_url_blocks_unsupported_schemes(self): |
| 262 | + """Test that unsupported URL schemes are blocked.""" |
| 263 | + unsupported_urls = [ |
| 264 | + "ftp://github.com/repo.git", |
| 265 | + "file:///local/repo.git", |
| 266 | + "javascript:alert('xss')", |
| 267 | + "data:text/html,<script>alert('xss')</script>", |
| 268 | + ] |
| 269 | + |
| 270 | + for url in unsupported_urls: |
| 271 | + with pytest.raises(ValueError) as error: |
| 272 | + git_utils._sanitize_git_url(url) |
| 273 | + assert "Unsupported URL scheme" in str(error.value) |
| 274 | + |
| 275 | + def test_git_clone_repo_blocks_malicious_https_url(self): |
| 276 | + """Test that git_clone_repo blocks malicious HTTPS URLs.""" |
| 277 | + malicious_git_config = { |
| 278 | + "repo": "https://user@attacker.com@github.com/legit/repo.git", |
| 279 | + "branch": "main", |
| 280 | + } |
| 281 | + entry_point = "train.py" |
| 282 | + |
| 283 | + with pytest.raises(ValueError) as error: |
| 284 | + git_utils.git_clone_repo(malicious_git_config, entry_point) |
| 285 | + assert "multiple @ symbols detected" in str(error.value) |
| 286 | + |
| 287 | + def test_git_clone_repo_blocks_malicious_ssh_url(self): |
| 288 | + """Test that git_clone_repo blocks malicious SSH URLs.""" |
| 289 | + malicious_git_config = { |
| 290 | + "repo": "git@OBVIOUS@github.com:sage-maker/temp-sev2.git", |
| 291 | + "branch": "main", |
| 292 | + } |
| 293 | + entry_point = "train.py" |
| 294 | + |
| 295 | + with pytest.raises(ValueError) as error: |
| 296 | + git_utils.git_clone_repo(malicious_git_config, entry_point) |
| 297 | + assert "exactly one @ symbol" in str(error.value) |
| 298 | + |
| 299 | + def test_git_clone_repo_blocks_url_encoded_attack(self): |
| 300 | + """Test that git_clone_repo blocks URL-encoded attacks.""" |
| 301 | + malicious_git_config = { |
| 302 | + "repo": "https://github.com%40attacker.com/repo.git", |
| 303 | + "branch": "main", |
| 304 | + } |
| 305 | + entry_point = "train.py" |
| 306 | + |
| 307 | + with pytest.raises(ValueError) as error: |
| 308 | + git_utils.git_clone_repo(malicious_git_config, entry_point) |
| 309 | + assert "Suspicious URL encoding detected" in str(error.value) |
| 310 | + |
| 311 | + def test_sanitize_git_url_comprehensive_attack_scenarios(self): |
| 312 | + attack_scenarios = [ |
| 313 | + "https://USER@YOUR_NGROK_OR_LOCALHOST/malicious.git@github.com%25legit%25repo.git", |
| 314 | + "https://user@malicious-host@github.com/legit/repo.git", |
| 315 | + "git@attacker.com@github.com:user/repo.git", |
| 316 | + "ssh://git@evil.com@github.com/repo.git", |
| 317 | + "https://github.com%40evil.com/repo.git", |
| 318 | + "https://user@github.com%2Fevil.com/repo.git", |
| 319 | + ] |
| 320 | + |
| 321 | + entry_point = "train.py" |
| 322 | + |
| 323 | + for malicious_url in attack_scenarios: |
| 324 | + git_config = {"repo": malicious_url} |
| 325 | + with pytest.raises(ValueError) as error: |
| 326 | + git_utils.git_clone_repo(git_config, entry_point) |
| 327 | + assert any( |
| 328 | + phrase in str(error.value) |
| 329 | + for phrase in [ |
| 330 | + "multiple @ symbols detected", |
| 331 | + "exactly one @ symbol", |
| 332 | + "Suspicious URL encoding detected", |
| 333 | + "Invalid characters in hostname", |
| 334 | + ] |
| 335 | + ) |
0 commit comments