Skip to content

Commit a40ddc9

Browse files
committed
Merge remote-tracking branch 'origin/ACP2E-4328' into PR_2025_11_20_flowers
2 parents 436510c + a2e91ce commit a40ddc9

File tree

2 files changed

+239
-0
lines changed

2 files changed

+239
-0
lines changed

app/code/Magento/UrlRewrite/Model/Storage/DbStorage.php

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,12 @@ protected function doFindOneByData(array $data)
112112
$result = null;
113113
$requestPath = $data[UrlRewrite::REQUEST_PATH];
114114
$decodedRequestPath = urldecode($requestPath);
115+
116+
// Validate UTF-8 encoding to prevent collation errors and reject malicious input
117+
if (!$this->isValidRequestPath($decodedRequestPath)) {
118+
return null;
119+
}
120+
115121
$data[UrlRewrite::REQUEST_PATH] = array_unique(
116122
[
117123
rtrim($requestPath, '/'),
@@ -130,6 +136,39 @@ protected function doFindOneByData(array $data)
130136
return $this->connection->fetchRow($this->prepareSelect($data));
131137
}
132138

139+
/**
140+
* Validate request path for valid UTF-8 and prevent collation errors
141+
*
142+
* This method checks for:
143+
* - Invalid UTF-8 sequences (e.g., overlong encodings like %c0%ae)
144+
* - 4-byte UTF-8 characters (emojis) that may cause collation mismatches
145+
* - Non-printable and control characters
146+
*
147+
* @param string $path
148+
* @return bool
149+
*/
150+
private function isValidRequestPath(string $path): bool
151+
{
152+
// Check for valid UTF-8 encoding
153+
if (!mb_check_encoding($path, 'UTF-8')) {
154+
return false;
155+
}
156+
157+
// Check for 4-byte UTF-8 characters (emojis, etc.) that can cause collation issues
158+
// 4-byte UTF-8 chars start with bytes 0xF0-0xF7 (11110xxx in binary)
159+
if (preg_match('/[\x{10000}-\x{10FFFF}]/u', $path)) {
160+
return false;
161+
}
162+
163+
// Check for control characters and other problematic characters
164+
// Allow: letters, numbers, hyphen, underscore, period, forward slash, and common URL-safe chars
165+
if (preg_match('/[\x00-\x1F\x7F]/', $path)) {
166+
return false;
167+
}
168+
169+
return true;
170+
}
171+
133172
/**
134173
* Extract most relevant url rewrite from url rewrites list
135174
*

app/code/Magento/UrlRewrite/Test/Unit/Model/Storage/DbStorageTest.php

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,4 +618,204 @@ public function testDeleteByData(): void
618618

619619
$this->storage->deleteByData($data);
620620
}
621+
622+
/**
623+
* Test that invalid UTF-8 sequences are rejected to prevent collation errors
624+
*
625+
* @dataProvider invalidRequestPathDataProvider
626+
* @param string $requestPath
627+
* @param string $description
628+
* @return void
629+
*/
630+
public function testFindOneByDataRejectsInvalidUtf8Sequences(string $requestPath, string $description): void
631+
{
632+
$data = [
633+
UrlRewrite::REQUEST_PATH => $requestPath,
634+
UrlRewrite::STORE_ID => 1
635+
];
636+
637+
// Database should never be queried for invalid paths
638+
$this->connectionMock->expects($this->never())
639+
->method('fetchAll');
640+
641+
$this->connectionMock->expects($this->never())
642+
->method('fetchRow');
643+
644+
$result = $this->storage->findOneByData($data);
645+
646+
$this->assertNull($result, "Failed for case: {$description}");
647+
}
648+
649+
/**
650+
* Test that valid UTF-8 paths with normal characters work correctly
651+
*
652+
* @dataProvider validRequestPathDataProvider
653+
* @param string $requestPath
654+
* @param string $description
655+
* @return void
656+
*/
657+
public function testFindOneByDataAcceptsValidUtf8Paths(string $requestPath, string $description): void
658+
{
659+
$data = [
660+
UrlRewrite::REQUEST_PATH => $requestPath,
661+
UrlRewrite::STORE_ID => 1
662+
];
663+
664+
$this->connectionMock->method('quoteIdentifier')
665+
->willReturnArgument(0);
666+
667+
$this->select->method('where')
668+
->willReturnSelf();
669+
670+
// Database should be queried normally
671+
$this->connectionMock->expects($this->once())
672+
->method('fetchAll')
673+
->with($this->select)
674+
->willReturn([]);
675+
676+
$result = $this->storage->findOneByData($data);
677+
678+
// Result should be null (no matching URL found), but query should have executed
679+
$this->assertNull($result, "Failed for case: {$description}");
680+
}
681+
682+
/**
683+
* Data provider for invalid request paths that should be rejected
684+
*
685+
* @return array
686+
*/
687+
public function invalidRequestPathDataProvider(): array
688+
{
689+
return [
690+
// Path traversal attempts with overlong UTF-8 encoding (invalid UTF-8)
691+
[
692+
'%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/etc/passwd',
693+
'Path traversal with overlong encoding (%c0%ae) - invalid UTF-8'
694+
],
695+
// Invalid single UTF-8 byte
696+
[
697+
'%C0',
698+
'Invalid single UTF-8 byte'
699+
],
700+
// Emojis (4-byte UTF-8 characters that cause collation issues)
701+
[
702+
'🔎',
703+
'Magnifying glass emoji (U+1F50E) - 4-byte UTF-8'
704+
],
705+
[
706+
'search/🔎',
707+
'Emoji in path segment - 4-byte UTF-8'
708+
],
709+
[
710+
'😀',
711+
'Grinning face emoji (U+1F600) - 4-byte UTF-8'
712+
],
713+
[
714+
'🎉',
715+
'Party popper emoji (U+1F389) - 4-byte UTF-8'
716+
],
717+
// Control characters
718+
[
719+
"test\x00path",
720+
'Null byte in path'
721+
],
722+
[
723+
"test\x1Fpath",
724+
'Unit separator control character'
725+
],
726+
[
727+
"test\x7Fpath",
728+
'DEL control character'
729+
],
730+
// Mixed invalid sequences
731+
[
732+
'%c0%ae%c0%ae/🔎/test',
733+
'Overlong encoding with emoji - invalid UTF-8'
734+
],
735+
// More 4-byte UTF-8 characters
736+
[
737+
'𝕳𝖊𝖑𝖑𝖔',
738+
'Mathematical alphanumeric symbols (U+1D573-U+1D586) - 4-byte UTF-8'
739+
],
740+
[
741+
'🏠',
742+
'House emoji (U+1F3E0) - 4-byte UTF-8'
743+
],
744+
];
745+
}
746+
747+
/**
748+
* Data provider for valid request paths that should be accepted
749+
*
750+
* @return array
751+
*/
752+
public function validRequestPathDataProvider(): array
753+
{
754+
return [
755+
// Standard ASCII paths
756+
[
757+
'products/laptop',
758+
'Simple product path'
759+
],
760+
[
761+
'category/electronics/computers',
762+
'Multi-level category path'
763+
],
764+
[
765+
'about-us',
766+
'Simple CMS page'
767+
],
768+
// Valid 3-byte UTF-8 characters (should work)
769+
[
770+
'café-menu',
771+
'French accented character (é)'
772+
],
773+
[
774+
'products/niño',
775+
'Spanish ñ character'
776+
],
777+
[
778+
'münchen-store',
779+
'German umlaut (ü)'
780+
],
781+
[
782+
'товар',
783+
'Cyrillic characters'
784+
],
785+
[
786+
'产品',
787+
'Chinese characters (3-byte UTF-8)'
788+
],
789+
[
790+
'المنتجات',
791+
'Arabic characters'
792+
],
793+
// Special URL-safe characters
794+
[
795+
'product-name-123',
796+
'Hyphens and numbers'
797+
],
798+
[
799+
'product_name_test',
800+
'Underscores'
801+
],
802+
[
803+
'category/sub.category',
804+
'Dots in path'
805+
],
806+
[
807+
'path/to/page',
808+
'Forward slashes'
809+
],
810+
// URL-encoded valid characters
811+
[
812+
'product%20name',
813+
'URL-encoded space'
814+
],
815+
[
816+
'category%2Fsubcategory',
817+
'URL-encoded forward slash'
818+
],
819+
];
820+
}
621821
}

0 commit comments

Comments
 (0)