22/// This is a replacement for the `url` crate dependency.
33#[ derive( Debug ) ]
44pub ( crate ) struct ParsedUrl < ' a > {
5- scheme : String , // Owned to allow normalization to lowercase
6- username : & ' a str ,
7- password : Option < & ' a str > ,
8- host : Option < String > , // Owned to allow normalization to lowercase
9- port : Option < u16 > ,
10- path : & ' a str ,
5+ pub scheme : String , // Owned to allow normalization to lowercase
6+ pub username : & ' a str ,
7+ pub password : Option < & ' a str > ,
8+ pub host : Option < String > , // Owned to allow normalization to lowercase
9+ pub port : Option < u16 > ,
10+ pub path : & ' a str ,
1111}
1212
1313/// Minimal parse error type to replace url::ParseError
1414#[ derive( Debug , Clone , PartialEq , Eq , thiserror:: Error ) ]
15- pub enum ParseError {
15+ #[ allow( missing_docs) ]
16+ pub enum UrlParseError {
1617 #[ error( "relative URL without a base" ) ]
1718 RelativeUrlWithoutBase ,
18- #[ error( "invalid port number" ) ]
19+ #[ error( "invalid port number - must be between 1-65535 " ) ]
1920 InvalidPort ,
2021 #[ error( "invalid domain character" ) ]
2122 InvalidDomainCharacter ,
23+ #[ error( "Scheme requires host" ) ]
24+ SchemeRequiresHost ,
2225}
2326
2427/// Check if a character is valid in a URL scheme.
@@ -30,30 +33,24 @@ fn is_valid_scheme_char(c: char) -> bool {
3033impl < ' a > ParsedUrl < ' a > {
3134 /// Parse a URL string into its components.
3235 /// Expected format: scheme://[user[:password]@]host[:port]/path
33- pub ( crate ) fn parse ( input : & ' a str ) -> Result < Self , ParseError > {
36+ pub ( crate ) fn parse ( input : & ' a str ) -> Result < Self , UrlParseError > {
3437 // Find scheme by looking for first ':'
35- let first_colon = input. find ( ':' ) . ok_or ( ParseError :: RelativeUrlWithoutBase ) ?;
36-
38+ let first_colon = input. find ( ':' ) . ok_or ( UrlParseError :: RelativeUrlWithoutBase ) ?;
3739 let scheme_str = & input[ ..first_colon] ;
3840 // Normalize scheme to lowercase for case-insensitive matching (matches url crate behavior)
3941 let scheme = scheme_str. to_ascii_lowercase ( ) ;
40-
41- // Verify it's followed by "//"
42- if !input[ first_colon..] . starts_with ( "://" ) {
43- return Err ( ParseError :: RelativeUrlWithoutBase ) ;
44- }
45-
46- // Start after "://"
47- let after_scheme = & input[ first_colon + 3 ..] ;
42+ let Some ( after_scheme) = input[ first_colon..] . strip_prefix ( "://" ) else {
43+ return Err ( UrlParseError :: RelativeUrlWithoutBase ) ;
44+ } ;
4845
4946 // Check for relative URL (scheme without proper authority)
5047 if scheme_str. is_empty ( ) {
51- return Err ( ParseError :: RelativeUrlWithoutBase ) ;
48+ return Err ( UrlParseError :: RelativeUrlWithoutBase ) ;
5249 }
5350
5451 // Validate scheme characters (check original before lowercase conversion)
5552 if !scheme_str. chars ( ) . all ( is_valid_scheme_char) {
56- return Err ( ParseError :: RelativeUrlWithoutBase ) ;
53+ return Err ( UrlParseError :: RelativeUrlWithoutBase ) ;
5754 }
5855
5956 // Find path start (first '/' after scheme)
@@ -84,7 +81,7 @@ impl<'a> ParsedUrl<'a> {
8481 let ( h, p) = Self :: parse_host_port ( host_port) ?;
8582 // If we have user info, we must have a host
8683 if h. is_none ( ) {
87- return Err ( ParseError :: InvalidDomainCharacter ) ;
84+ return Err ( UrlParseError :: InvalidDomainCharacter ) ;
8885 }
8986 ( user, pass, h, p)
9087 } else {
@@ -97,7 +94,7 @@ impl<'a> ParsedUrl<'a> {
9794 // Scheme is already lowercase at this point
9895 let requires_host = matches ! ( scheme. as_str( ) , "http" | "https" | "git" | "ssh" | "ftp" | "ftps" ) ;
9996 if requires_host && host. is_none ( ) {
100- return Err ( ParseError :: InvalidDomainCharacter ) ;
97+ return Err ( UrlParseError :: SchemeRequiresHost ) ;
10198 }
10299
103100 Ok ( ParsedUrl {
@@ -110,7 +107,7 @@ impl<'a> ParsedUrl<'a> {
110107 } )
111108 }
112109
113- fn parse_host_port ( host_port : & str ) -> Result < ( Option < String > , Option < u16 > ) , ParseError > {
110+ fn parse_host_port ( host_port : & str ) -> Result < ( Option < String > , Option < u16 > ) , UrlParseError > {
114111 if host_port. is_empty ( ) {
115112 return Ok ( ( None , None ) ) ;
116113 }
@@ -119,23 +116,23 @@ impl<'a> ParsedUrl<'a> {
119116 if host_port. starts_with ( '[' ) {
120117 if let Some ( bracket_end) = host_port. find ( ']' ) {
121118 // IPv6 addresses are case-insensitive, normalize to lowercase
122- let host = Some ( host_port[ ..bracket_end + 1 ] . to_ascii_lowercase ( ) ) ;
119+ let host = Some ( host_port[ ..= bracket_end] . to_ascii_lowercase ( ) ) ;
123120 let remaining = & host_port[ bracket_end + 1 ..] ;
124121
125122 if remaining. is_empty ( ) {
126123 return Ok ( ( host, None ) ) ;
127124 } else if let Some ( port_str) = remaining. strip_prefix ( ':' ) {
128- let port = port_str. parse :: < u16 > ( ) . map_err ( |_| ParseError :: InvalidPort ) ?;
125+ let port = port_str. parse :: < u16 > ( ) . map_err ( |_| UrlParseError :: InvalidPort ) ?;
129126 // Validate port is in valid range (1-65535, port 0 is invalid)
130127 if port == 0 {
131- return Err ( ParseError :: InvalidPort ) ;
128+ return Err ( UrlParseError :: InvalidPort ) ;
132129 }
133130 return Ok ( ( host, Some ( port) ) ) ;
134131 } else {
135- return Err ( ParseError :: InvalidDomainCharacter ) ;
132+ return Err ( UrlParseError :: InvalidDomainCharacter ) ;
136133 }
137134 } else {
138- return Err ( ParseError :: InvalidDomainCharacter ) ;
135+ return Err ( UrlParseError :: InvalidDomainCharacter ) ;
139136 }
140137 }
141138
@@ -151,10 +148,10 @@ impl<'a> ParsedUrl<'a> {
151148 } else if potential_port. chars ( ) . all ( |c| c. is_ascii_digit ( ) ) {
152149 let host_str = & host_port[ ..colon_pos] ;
153150 let host = Self :: normalize_hostname ( host_str) ?;
154- let port = potential_port. parse :: < u16 > ( ) . map_err ( |_| ParseError :: InvalidPort ) ?;
151+ let port = potential_port. parse :: < u16 > ( ) . map_err ( |_| UrlParseError :: InvalidPort ) ?;
155152 // Validate port is in valid range (1-65535, port 0 is invalid)
156153 if port == 0 {
157- return Err ( ParseError :: InvalidPort ) ;
154+ return Err ( UrlParseError :: InvalidPort ) ;
158155 }
159156 return Ok ( ( Some ( host) , Some ( port) ) ) ;
160157 }
@@ -175,10 +172,10 @@ impl<'a> ParsedUrl<'a> {
175172 /// Validate and possibly normalize a hostname
176173 /// Valid DNS hostnames are normalized to lowercase
177174 /// Invalid strings (like injection attempts) are preserved as-is but ? is rejected
178- fn normalize_hostname ( host : & str ) -> Result < String , ParseError > {
175+ fn normalize_hostname ( host : & str ) -> Result < String , UrlParseError > {
179176 // Reject ? character which git's url parser rejects
180177 if host. contains ( '?' ) {
181- return Err ( ParseError :: InvalidDomainCharacter ) ;
178+ return Err ( UrlParseError :: InvalidDomainCharacter ) ;
182179 }
183180
184181 // Only normalize if it looks like a valid DNS hostname
@@ -189,36 +186,6 @@ impl<'a> ParsedUrl<'a> {
189186 Ok ( host. to_owned ( ) )
190187 }
191188 }
192-
193- pub ( crate ) fn scheme ( & self ) -> & str {
194- & self . scheme
195- }
196-
197- pub ( crate ) fn username ( & self ) -> & str {
198- self . username
199- }
200-
201- pub ( crate ) fn password ( & self ) -> Option < & str > {
202- self . password
203- }
204-
205- pub ( crate ) fn host_str ( & self ) -> Option < & str > {
206- self . host . as_deref ( )
207- }
208-
209- pub ( crate ) fn port ( & self ) -> Option < u16 > {
210- self . port
211- }
212-
213- pub ( crate ) fn path ( & self ) -> & str {
214- self . path
215- }
216-
217- /// Check if this URL cannot be a base (is relative).
218- /// For our purposes, since we only parse URLs with "://", they can all be a base.
219- pub ( crate ) fn cannot_be_a_base ( & self ) -> bool {
220- false
221- }
222189}
223190
224191#[ cfg( test) ]
@@ -228,56 +195,56 @@ mod tests {
228195 #[ test]
229196 fn test_simple_url ( ) {
230197 let url = ParsedUrl :: parse ( "http://example.com/path" ) . unwrap ( ) ;
231- assert_eq ! ( url. scheme( ) , "http" ) ;
232- assert_eq ! ( url. host_str ( ) , Some ( "example.com" ) ) ;
233- assert_eq ! ( url. path( ) , "/path" ) ;
234- assert_eq ! ( url. username( ) , "" ) ;
235- assert_eq ! ( url. password( ) , None ) ;
236- assert_eq ! ( url. port( ) , None ) ;
198+ assert_eq ! ( url. scheme, "http" ) ;
199+ assert_eq ! ( url. host . as_deref ( ) , Some ( "example.com" ) ) ;
200+ assert_eq ! ( url. path, "/path" ) ;
201+ assert_eq ! ( url. username, "" ) ;
202+ assert_eq ! ( url. password, None ) ;
203+ assert_eq ! ( url. port, None ) ;
237204 }
238205
239206 #[ test]
240207 fn test_url_with_port ( ) {
241208 let url = ParsedUrl :: parse ( "http://example.com:8080/path" ) . unwrap ( ) ;
242- assert_eq ! ( url. scheme( ) , "http" ) ;
243- assert_eq ! ( url. host_str ( ) , Some ( "example.com" ) ) ;
244- assert_eq ! ( url. port( ) , Some ( 8080 ) ) ;
245- assert_eq ! ( url. path( ) , "/path" ) ;
209+ assert_eq ! ( url. scheme, "http" ) ;
210+ assert_eq ! ( url. host . as_deref ( ) , Some ( "example.com" ) ) ;
211+ assert_eq ! ( url. port, Some ( 8080 ) ) ;
212+ assert_eq ! ( url. path, "/path" ) ;
246213 }
247214
248215 #[ test]
249216 fn test_url_with_user ( ) {
250217 let url = ParsedUrl :: parse ( "http://user@example.com/path" ) . unwrap ( ) ;
251- assert_eq ! ( url. scheme( ) , "http" ) ;
252- assert_eq ! ( url. username( ) , "user" ) ;
253- assert_eq ! ( url. host_str ( ) , Some ( "example.com" ) ) ;
254- assert_eq ! ( url. path( ) , "/path" ) ;
218+ assert_eq ! ( url. scheme, "http" ) ;
219+ assert_eq ! ( url. username, "user" ) ;
220+ assert_eq ! ( url. host . as_deref ( ) , Some ( "example.com" ) ) ;
221+ assert_eq ! ( url. path, "/path" ) ;
255222 }
256223
257224 #[ test]
258225 fn test_url_with_user_and_password ( ) {
259226 let url = ParsedUrl :: parse ( "http://user:pass@example.com/path" ) . unwrap ( ) ;
260- assert_eq ! ( url. scheme( ) , "http" ) ;
261- assert_eq ! ( url. username( ) , "user" ) ;
262- assert_eq ! ( url. password( ) , Some ( "pass" ) ) ;
263- assert_eq ! ( url. host_str ( ) , Some ( "example.com" ) ) ;
264- assert_eq ! ( url. path( ) , "/path" ) ;
227+ assert_eq ! ( url. scheme, "http" ) ;
228+ assert_eq ! ( url. username, "user" ) ;
229+ assert_eq ! ( url. password, Some ( "pass" ) ) ;
230+ assert_eq ! ( url. host . as_deref ( ) , Some ( "example.com" ) ) ;
231+ assert_eq ! ( url. path, "/path" ) ;
265232 }
266233
267234 #[ test]
268235 fn test_url_with_ipv6 ( ) {
269236 let url = ParsedUrl :: parse ( "http://[::1]/path" ) . unwrap ( ) ;
270- assert_eq ! ( url. scheme( ) , "http" ) ;
271- assert_eq ! ( url. host_str ( ) , Some ( "[::1]" ) ) ;
272- assert_eq ! ( url. path( ) , "/path" ) ;
237+ assert_eq ! ( url. scheme, "http" ) ;
238+ assert_eq ! ( url. host . as_deref ( ) , Some ( "[::1]" ) ) ;
239+ assert_eq ! ( url. path, "/path" ) ;
273240 }
274241
275242 #[ test]
276243 fn test_url_with_ipv6_and_port ( ) {
277244 let url = ParsedUrl :: parse ( "http://[::1]:8080/path" ) . unwrap ( ) ;
278- assert_eq ! ( url. scheme( ) , "http" ) ;
279- assert_eq ! ( url. host_str ( ) , Some ( "[::1]" ) ) ;
280- assert_eq ! ( url. port( ) , Some ( 8080 ) ) ;
281- assert_eq ! ( url. path( ) , "/path" ) ;
245+ assert_eq ! ( url. scheme, "http" ) ;
246+ assert_eq ! ( url. host . as_deref ( ) , Some ( "[::1]" ) ) ;
247+ assert_eq ! ( url. port, Some ( 8080 ) ) ;
248+ assert_eq ! ( url. path, "/path" ) ;
282249 }
283250}
0 commit comments