11use scraper:: { Html , Selector } ;
2- use std:: { collections:: HashMap , fs:: { self , File , create_dir_all} , io:: Write , path:: PathBuf } ; // Added PathBuf, HashMap
3- use cargo:: core:: resolver:: features:: CliFeatures ;
2+ use std:: { collections:: HashMap , fs:: { self , File , create_dir_all} , io:: { Write } , path:: { Path , PathBuf } } ; // Removed BufRead, BufReader
3+ use cargo:: core:: resolver:: features:: { CliFeatures } ;
44// use cargo::core::SourceId; // Removed unused import
55// use cargo::util::Filesystem; // Removed unused import
66
@@ -122,48 +122,8 @@ edition = "2021"
122122 ops:: doc ( & ws, & doc_opts) . map_err ( DocLoaderError :: CargoLib ) ?; // Use ws
123123 // --- End Cargo API ---
124124
125- // --- Find the actual documentation directory ---
126- // Iterate through subdirectories in `target/doc` and find the one containing `index.html`.
127125 let base_doc_path = temp_dir_path. join ( "doc" ) ;
128-
129- let mut target_docs_path: Option < PathBuf > = None ;
130- let mut found_count = 0 ;
131-
132- if base_doc_path. is_dir ( ) {
133- for entry_result in fs:: read_dir ( & base_doc_path) ? {
134- let entry = entry_result?;
135- if entry. file_type ( ) ?. is_dir ( ) {
136- let dir_path = entry. path ( ) ;
137- let index_html_path = dir_path. join ( "index.html" ) ;
138- if index_html_path. is_file ( ) {
139- if target_docs_path. is_none ( ) {
140- target_docs_path = Some ( dir_path) ;
141- }
142- found_count += 1 ;
143- } else {
144- }
145- }
146- }
147- }
148-
149- let docs_path = match ( found_count, target_docs_path) {
150- ( 1 , Some ( path) ) => {
151- path
152- } ,
153- ( 0 , _) => {
154- return Err ( DocLoaderError :: CargoLib ( anyhow:: anyhow!(
155- "Could not find any subdirectory containing index.html within '{}'. Cargo doc might have failed or produced unexpected output." ,
156- base_doc_path. display( )
157- ) ) ) ;
158- } ,
159- ( count, _) => {
160- return Err ( DocLoaderError :: CargoLib ( anyhow:: anyhow!(
161- "Expected exactly one subdirectory containing index.html within '{}', but found {}. Cannot determine the correct documentation path." ,
162- base_doc_path. display( ) , count
163- ) ) ) ;
164- }
165- } ;
166- // --- End finding documentation directory ---
126+ let docs_path = find_documentation_path ( & base_doc_path, crate_name) ?;
167127
168128 eprintln ! ( "Using documentation path: {}" , docs_path. display( ) ) ; // Log the path we are actually using
169129
@@ -201,7 +161,7 @@ edition = "2021"
201161 }
202162 }
203163
204- // --- Initialize paths_to_process and explicitly add the root index.html if it exists ---
164+ // --- Initialize paths_to_process and explicitly add the root index.html if it exists ---
205165 let mut paths_to_process: Vec < PathBuf > = Vec :: new ( ) ;
206166 let root_index_path = docs_path. join ( "index.html" ) ;
207167 if root_index_path. is_file ( ) {
@@ -311,4 +271,255 @@ edition = "2021"
311271
312272 eprintln ! ( "Finished document loading. Found {} final documents." , documents. len( ) ) ;
313273 Ok ( documents)
314- }
274+ }
275+
276+ /// Finds the correct documentation directory for a specific crate within a base 'doc' directory.
277+ ///
278+ /// Handles cases where multiple subdirectories might exist (e.g., due to dependencies)
279+ /// or where the directory name doesn't exactly match the crate name (hyphens vs underscores, renames).
280+ fn find_documentation_path ( base_doc_path : & Path , crate_name : & str ) -> Result < PathBuf , DocLoaderError > {
281+ let mut candidate_doc_paths: Vec < PathBuf > = Vec :: new ( ) ;
282+
283+ if base_doc_path. is_dir ( ) {
284+ for entry_result in fs:: read_dir ( base_doc_path) ? {
285+ let entry = entry_result?;
286+ if entry. file_type ( ) ?. is_dir ( ) {
287+ let dir_path = entry. path ( ) ;
288+ // Check if index.html exists within the subdirectory
289+ if dir_path. join ( "index.html" ) . is_file ( ) {
290+ candidate_doc_paths. push ( dir_path) ;
291+ }
292+ }
293+ }
294+ }
295+
296+ match candidate_doc_paths. len ( ) {
297+ 0 => Err ( DocLoaderError :: CargoLib ( anyhow:: anyhow!(
298+ "Could not find any subdirectory containing index.html within '{}'. Cargo doc might have failed or produced unexpected output." ,
299+ base_doc_path. display( )
300+ ) ) ) ,
301+ 1 => Ok ( candidate_doc_paths. remove ( 0 ) ) , // Exactly one candidate, assume it's correct
302+ _ => {
303+ // Multiple candidates, try to disambiguate
304+ let mut matched_path: Option < PathBuf > = None ;
305+ let normalized_input_crate_name = crate_name. replace ( '-' , "_" ) ;
306+
307+ // Prepare scraper selector for title tag
308+ let title_selector = Selector :: parse ( "html > head > title" )
309+ . map_err ( |e| DocLoaderError :: Selector ( format ! ( "Failed to parse title selector: {}" , e) ) ) ?;
310+
311+ for candidate_path in candidate_doc_paths {
312+ // 1. Check index.html's title tag
313+ let index_html_path = candidate_path. join ( "index.html" ) ;
314+ let mut found_match_in_file = false ;
315+ if index_html_path. is_file ( ) {
316+ if let Ok ( html_content) = fs:: read_to_string ( & index_html_path) {
317+ let document = Html :: parse_document ( & html_content) ;
318+ if let Some ( title_element) = document. select ( & title_selector) . next ( ) {
319+ let title_text = title_element. text ( ) . collect :: < String > ( ) ;
320+ // Normalize the extracted title part for comparison
321+ let normalized_title_crate_part = title_text
322+ . split ( " - Rust" ) // Split at " - Rust"
323+ . next ( ) // Take the first part
324+ . unwrap_or ( "" ) // Handle cases where " - Rust" isn't present
325+ . trim ( ) // Trim whitespace
326+ . replace ( '-' , "_" ) ; // Normalize hyphens
327+
328+ if normalized_title_crate_part == normalized_input_crate_name {
329+ found_match_in_file = true ;
330+ }
331+ }
332+ } else {
333+ eprintln ! ( "[WARN] Failed to read index.html at '{}'" , index_html_path. display( ) ) ;
334+ }
335+ }
336+
337+ // 2. If matched via title, track it
338+ if found_match_in_file {
339+ if matched_path. is_some ( ) {
340+ // Found a second match! Ambiguous.
341+ return Err ( DocLoaderError :: CargoLib ( anyhow:: anyhow!(
342+ "Found multiple documentation directories matching crate name '{}' based on index.html title within '{}' (e.g., '{}' and '{}'). Cannot determine the correct path." ,
343+ crate_name, base_doc_path. display( ) , matched_path. unwrap( ) . display( ) , candidate_path. display( )
344+ ) ) ) ;
345+ }
346+ matched_path = Some ( candidate_path) ;
347+ }
348+ }
349+
350+ // 3. Return the unique match or error
351+ matched_path. ok_or_else ( || DocLoaderError :: CargoLib ( anyhow:: anyhow!(
352+ "Found multiple candidate documentation directories within '{}', but none could be uniquely identified as matching crate '{}' using the index.html title." ,
353+ base_doc_path. display( ) , crate_name
354+
355+
356+ ) ) )
357+ }
358+ }
359+ }
360+
361+ #[ cfg( test) ]
362+ mod tests {
363+ use super :: * ;
364+ use std:: fs;
365+ use std:: io:: Write ;
366+ use tempfile:: tempdir;
367+
368+ // Helper to create dummy doc structure including index.html content
369+ fn setup_test_dir_with_titles ( base : & Path , dirs : & [ ( & str , Option < & str > ) ] ) -> std:: io:: Result < ( ) > {
370+ for ( name, title_content) in dirs {
371+ let dir_path = base. join ( name) ;
372+ fs:: create_dir_all ( & dir_path) ?; // Use create_dir_all
373+
374+ if let Some ( title) = title_content {
375+ // Create index.html with the specified title
376+ let index_path = dir_path. join ( "index.html" ) ;
377+ let mut index_file = File :: create ( index_path) ?;
378+ // Basic HTML structure with the title
379+ writeln ! ( index_file, "<!DOCTYPE html><html><head><title>{}</title></head><body>Content</body></html>" , title) ?;
380+ } else {
381+ // Create an empty index.html if no title specified (or handle differently if needed)
382+ File :: create ( dir_path. join ( "index.html" ) ) ?;
383+ }
384+
385+ // Optionally create search-index.js if needed for other tests, but not used for title check
386+ // let search_index_path = dir_path.join("search-index.js");
387+ // if fs::metadata(&search_index_path).is_err() { // Avoid overwriting if exists
388+ // // Example: Create a dummy search-index.js if required by other logic
389+ // // File::create(search_index_path)?;
390+ // }
391+ }
392+ Ok ( ( ) )
393+ }
394+
395+
396+ #[ test]
397+ fn test_find_docs_no_dirs ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
398+ let temp = tempdir ( ) ?;
399+ let base_path = temp. path ( ) ;
400+ let result = find_documentation_path ( base_path, "my_crate" ) ;
401+ assert ! ( result. is_err( ) ) ;
402+ assert ! ( result. unwrap_err( ) . to_string( ) . contains( "Could not find any subdirectory" ) ) ;
403+ Ok ( ( ) )
404+ }
405+
406+ #[ test]
407+ fn test_find_docs_one_dir_correct ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
408+ let temp = tempdir ( ) ?;
409+ let base_path = temp. path ( ) ;
410+ setup_test_dir_with_titles ( base_path, & [ ( "my_crate" , Some ( "my_crate - Rust" ) ) ] ) ?;
411+ let result = find_documentation_path ( base_path, "my_crate" ) ?;
412+ assert_eq ! ( result, base_path. join( "my_crate" ) ) ;
413+ Ok ( ( ) )
414+ }
415+
416+ #[ test]
417+ fn test_find_docs_one_dir_wrong_name ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
418+ let temp = tempdir ( ) ?;
419+ let base_path = temp. path ( ) ;
420+ setup_test_dir_with_titles ( base_path, & [ ( "other_crate" , Some ( "other_crate - Rust" ) ) ] ) ?;
421+ let result = find_documentation_path ( base_path, "my_crate" ) ?;
422+ // If only one dir exists, we assume it's the right one, even if name doesn't match
423+ assert_eq ! ( result, base_path. join( "other_crate" ) ) ;
424+ Ok ( ( ) )
425+ }
426+
427+ #[ test]
428+ fn test_find_docs_multiple_dirs_disambiguate_ok ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
429+ let temp = tempdir ( ) ?;
430+ let base_path = temp. path ( ) ;
431+ setup_test_dir_with_titles ( base_path, & [
432+ ( "dep_crate" , Some ( "dep_crate - Rust" ) ) ,
433+ ( "my_crate" , Some ( "my_crate - Rust" ) ) , // Correct title
434+ ] ) ?;
435+ let result = find_documentation_path ( base_path, "my_crate" ) ?;
436+ assert_eq ! ( result, base_path. join( "my_crate" ) ) ;
437+ Ok ( ( ) )
438+ }
439+
440+ #[ test]
441+ fn test_find_docs_multiple_dirs_hyphen_ok ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
442+ let temp = tempdir ( ) ?;
443+ let base_path = temp. path ( ) ;
444+ setup_test_dir_with_titles ( base_path, & [
445+ ( "dep_crate" , Some ( "dep_crate - Rust" ) ) ,
446+ // Crate name has hyphen, title might use underscore or hyphen
447+ ( "my_crate_docs" , Some ( "my_crate - Rust" ) ) , // Title uses underscore matching normalized name
448+ ] ) ?;
449+ let result = find_documentation_path ( base_path, "my-crate" ) ?; // Input has hyphen
450+ assert_eq ! ( result, base_path. join( "my_crate_docs" ) ) ;
451+ Ok ( ( ) )
452+ }
453+
454+ #[ test]
455+ fn test_find_docs_multiple_dirs_no_match ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
456+ let temp = tempdir ( ) ?;
457+ let base_path = temp. path ( ) ;
458+ setup_test_dir_with_titles ( base_path, & [
459+ ( "dep_crate" , Some ( "dep_crate - Rust" ) ) ,
460+ ( "another_dep" , Some ( "another_dep - Rust" ) ) ,
461+ ] ) ?;
462+ let result = find_documentation_path ( base_path, "my_crate" ) ;
463+ assert ! ( result. is_err( ) ) ;
464+ assert ! ( result. unwrap_err( ) . to_string( ) . contains( "none could be uniquely identified" ) ) ;
465+ Ok ( ( ) )
466+ }
467+
468+ #[ test]
469+ fn test_find_docs_multiple_dirs_ambiguous_match ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
470+ let temp = tempdir ( ) ?;
471+ let base_path = temp. path ( ) ;
472+ setup_test_dir_with_titles ( base_path, & [
473+ ( "my_crate_v1" , Some ( "my_crate - Rust" ) ) , // Matches normalized "my_crate"
474+ ( "my_crate_v2" , Some ( "my_crate - Rust" ) ) , // Also matches normalized "my_crate"
475+ ] ) ?;
476+ let result = find_documentation_path ( base_path, "my_crate" ) ;
477+ assert ! ( result. is_err( ) ) ;
478+ assert ! ( result. unwrap_err( ) . to_string( ) . contains( "Found multiple documentation directories matching" ) ) ;
479+ Ok ( ( ) )
480+ }
481+
482+ #[ test]
483+ fn test_find_docs_multiple_dirs_missing_index_html ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
484+ let temp = tempdir ( ) ?;
485+ let base_path = temp. path ( ) ;
486+ // Create dirs but only one with index.html
487+ fs:: create_dir_all ( base_path. join ( "dep_crate" ) ) ?;
488+ setup_test_dir_with_titles ( base_path, & [ ( "my_crate" , Some ( "my_crate - Rust" ) ) ] ) ?;
489+
490+ let result = find_documentation_path ( base_path, "my_crate" ) ?;
491+ // Should still find the correct one as the other is not a candidate
492+ assert_eq ! ( result, base_path. join( "my_crate" ) ) ;
493+ Ok ( ( ) )
494+ }
495+
496+ #[ test]
497+ fn test_find_docs_multiple_dirs_malformed_title ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
498+ let temp = tempdir ( ) ?;
499+ let base_path = temp. path ( ) ;
500+ setup_test_dir_with_titles ( base_path, & [
501+ ( "dep_crate" , Some ( "Completely Wrong Title Format" ) ) , // Malformed title
502+ ( "my_crate" , Some ( "my_crate - Rust" ) ) ,
503+ ] ) ?;
504+ let result = find_documentation_path ( base_path, "my_crate" ) ?;
505+ // Should ignore malformed and find correct one
506+ assert_eq ! ( result, base_path. join( "my_crate" ) ) ;
507+ Ok ( ( ) )
508+ }
509+
510+ #[ test]
511+ fn test_find_docs_multiple_dirs_disambiguate_by_title ( ) -> Result < ( ) , Box < dyn std:: error:: Error > > {
512+ let temp = tempdir ( ) ?;
513+ let base_path = temp. path ( ) ;
514+ // Simulate missing search-index.js but correct title in index.html
515+ setup_test_dir_with_titles ( base_path, & [
516+ ( "stdio_fixture" , Some ( "stdio_fixture - Rust" ) ) ,
517+ ( "clap" , Some ( "clap - Rust" ) ) ,
518+ ] ) ?;
519+
520+ let result = find_documentation_path ( base_path, "clap" ) ?;
521+ assert_eq ! ( result, base_path. join( "clap" ) ) ;
522+ Ok ( ( ) )
523+ }
524+
525+ }
0 commit comments