@@ -618,4 +618,204 @@ public function testDeleteByData(): void
618618
619619 $ this ->storage ->deleteByData ($ data );
620620 }
621+
622+ /**
623+ * Test that invalid UTF-8 sequences are rejected to prevent collation errors
624+ *
625+ * @dataProvider invalidRequestPathDataProvider
626+ * @param string $requestPath
627+ * @param string $description
628+ * @return void
629+ */
630+ public function testFindOneByDataRejectsInvalidUtf8Sequences (string $ requestPath , string $ description ): void
631+ {
632+ $ data = [
633+ UrlRewrite::REQUEST_PATH => $ requestPath ,
634+ UrlRewrite::STORE_ID => 1
635+ ];
636+
637+ // Database should never be queried for invalid paths
638+ $ this ->connectionMock ->expects ($ this ->never ())
639+ ->method ('fetchAll ' );
640+
641+ $ this ->connectionMock ->expects ($ this ->never ())
642+ ->method ('fetchRow ' );
643+
644+ $ result = $ this ->storage ->findOneByData ($ data );
645+
646+ $ this ->assertNull ($ result , "Failed for case: {$ description }" );
647+ }
648+
649+ /**
650+ * Test that valid UTF-8 paths with normal characters work correctly
651+ *
652+ * @dataProvider validRequestPathDataProvider
653+ * @param string $requestPath
654+ * @param string $description
655+ * @return void
656+ */
657+ public function testFindOneByDataAcceptsValidUtf8Paths (string $ requestPath , string $ description ): void
658+ {
659+ $ data = [
660+ UrlRewrite::REQUEST_PATH => $ requestPath ,
661+ UrlRewrite::STORE_ID => 1
662+ ];
663+
664+ $ this ->connectionMock ->method ('quoteIdentifier ' )
665+ ->willReturnArgument (0 );
666+
667+ $ this ->select ->method ('where ' )
668+ ->willReturnSelf ();
669+
670+ // Database should be queried normally
671+ $ this ->connectionMock ->expects ($ this ->once ())
672+ ->method ('fetchAll ' )
673+ ->with ($ this ->select )
674+ ->willReturn ([]);
675+
676+ $ result = $ this ->storage ->findOneByData ($ data );
677+
678+ // Result should be null (no matching URL found), but query should have executed
679+ $ this ->assertNull ($ result , "Failed for case: {$ description }" );
680+ }
681+
682+ /**
683+ * Data provider for invalid request paths that should be rejected
684+ *
685+ * @return array
686+ */
687+ public function invalidRequestPathDataProvider (): array
688+ {
689+ return [
690+ // Path traversal attempts with overlong UTF-8 encoding (invalid UTF-8)
691+ [
692+ '%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/%c0%ae%c0%ae/etc/passwd ' ,
693+ 'Path traversal with overlong encoding (%c0%ae) - invalid UTF-8 '
694+ ],
695+ // Invalid single UTF-8 byte
696+ [
697+ '%C0 ' ,
698+ 'Invalid single UTF-8 byte '
699+ ],
700+ // Emojis (4-byte UTF-8 characters that cause collation issues)
701+ [
702+ '🔎 ' ,
703+ 'Magnifying glass emoji (U+1F50E) - 4-byte UTF-8 '
704+ ],
705+ [
706+ 'search/🔎 ' ,
707+ 'Emoji in path segment - 4-byte UTF-8 '
708+ ],
709+ [
710+ '😀 ' ,
711+ 'Grinning face emoji (U+1F600) - 4-byte UTF-8 '
712+ ],
713+ [
714+ '🎉 ' ,
715+ 'Party popper emoji (U+1F389) - 4-byte UTF-8 '
716+ ],
717+ // Control characters
718+ [
719+ "test \x00path " ,
720+ 'Null byte in path '
721+ ],
722+ [
723+ "test \x1Fpath " ,
724+ 'Unit separator control character '
725+ ],
726+ [
727+ "test \x7Fpath " ,
728+ 'DEL control character '
729+ ],
730+ // Mixed invalid sequences
731+ [
732+ '%c0%ae%c0%ae/🔎/test ' ,
733+ 'Overlong encoding with emoji - invalid UTF-8 '
734+ ],
735+ // More 4-byte UTF-8 characters
736+ [
737+ '𝕳𝖊𝖑𝖑𝖔 ' ,
738+ 'Mathematical alphanumeric symbols (U+1D573-U+1D586) - 4-byte UTF-8 '
739+ ],
740+ [
741+ '🏠 ' ,
742+ 'House emoji (U+1F3E0) - 4-byte UTF-8 '
743+ ],
744+ ];
745+ }
746+
747+ /**
748+ * Data provider for valid request paths that should be accepted
749+ *
750+ * @return array
751+ */
752+ public function validRequestPathDataProvider (): array
753+ {
754+ return [
755+ // Standard ASCII paths
756+ [
757+ 'products/laptop ' ,
758+ 'Simple product path '
759+ ],
760+ [
761+ 'category/electronics/computers ' ,
762+ 'Multi-level category path '
763+ ],
764+ [
765+ 'about-us ' ,
766+ 'Simple CMS page '
767+ ],
768+ // Valid 3-byte UTF-8 characters (should work)
769+ [
770+ 'café-menu ' ,
771+ 'French accented character (é) '
772+ ],
773+ [
774+ 'products/niño ' ,
775+ 'Spanish ñ character '
776+ ],
777+ [
778+ 'münchen-store ' ,
779+ 'German umlaut (ü) '
780+ ],
781+ [
782+ 'товар ' ,
783+ 'Cyrillic characters '
784+ ],
785+ [
786+ '产品 ' ,
787+ 'Chinese characters (3-byte UTF-8) '
788+ ],
789+ [
790+ 'المنتجات ' ,
791+ 'Arabic characters '
792+ ],
793+ // Special URL-safe characters
794+ [
795+ 'product-name-123 ' ,
796+ 'Hyphens and numbers '
797+ ],
798+ [
799+ 'product_name_test ' ,
800+ 'Underscores '
801+ ],
802+ [
803+ 'category/sub.category ' ,
804+ 'Dots in path '
805+ ],
806+ [
807+ 'path/to/page ' ,
808+ 'Forward slashes '
809+ ],
810+ // URL-encoded valid characters
811+ [
812+ 'product%20name ' ,
813+ 'URL-encoded space '
814+ ],
815+ [
816+ 'category%2Fsubcategory ' ,
817+ 'URL-encoded forward slash '
818+ ],
819+ ];
820+ }
621821}
0 commit comments