Skip to content

Commit 0a55551

Browse files
committed
fix(doc_loader): use HTML title to find doc path
Refactored documentation path finding logic into `find_documentation_path` and added comprehensive unit tests. Previous attempts using directory names or search-index.js failed when `cargo doc` generated multiple directories or when directory names didn't match crate names (e.g., `async-stripe` -> `stripe`), or when search-index.js was missing. This fix uses the HTML <title> tag within the index.html of candidate directories to reliably identify the correct documentation path. It handles name normalization (hyphens/underscores) and ambiguity. Removed unused imports (`BufReader`, `BufRead`) after refactoring.
1 parent 41912a2 commit 0a55551

File tree

3 files changed

+258
-45
lines changed

3 files changed

+258
-45
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ tempfile = "3.19.1"
2424
anyhow = "1.0.97"
2525
schemars = "0.8.22"
2626
clap = { version = "4.5.34", features = ["cargo", "derive", "env"] }
27+
regex = "1.11.1"
2728

2829

2930
# --- Platform Specific Dependencies ---

src/doc_loader.rs

Lines changed: 256 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use scraper::{Html, Selector};
2-
use std::{collections::HashMap, fs::{self, File, create_dir_all}, io::Write, path::PathBuf}; // Added PathBuf, HashMap
3-
use cargo::core::resolver::features::CliFeatures;
2+
use std::{collections::HashMap, fs::{self, File, create_dir_all}, io::{Write}, path::{Path, PathBuf}}; // Removed BufRead, BufReader
3+
use cargo::core::resolver::features::{CliFeatures};
44
// use cargo::core::SourceId; // Removed unused import
55
// use cargo::util::Filesystem; // Removed unused import
66

@@ -122,48 +122,8 @@ edition = "2021"
122122
ops::doc(&ws, &doc_opts).map_err(DocLoaderError::CargoLib)?; // Use ws
123123
// --- End Cargo API ---
124124

125-
// --- Find the actual documentation directory ---
126-
// Iterate through subdirectories in `target/doc` and find the one containing `index.html`.
127125
let base_doc_path = temp_dir_path.join("doc");
128-
129-
let mut target_docs_path: Option<PathBuf> = None;
130-
let mut found_count = 0;
131-
132-
if base_doc_path.is_dir() {
133-
for entry_result in fs::read_dir(&base_doc_path)? {
134-
let entry = entry_result?;
135-
if entry.file_type()?.is_dir() {
136-
let dir_path = entry.path();
137-
let index_html_path = dir_path.join("index.html");
138-
if index_html_path.is_file() {
139-
if target_docs_path.is_none() {
140-
target_docs_path = Some(dir_path);
141-
}
142-
found_count += 1;
143-
} else {
144-
}
145-
}
146-
}
147-
}
148-
149-
let docs_path = match (found_count, target_docs_path) {
150-
(1, Some(path)) => {
151-
path
152-
},
153-
(0, _) => {
154-
return Err(DocLoaderError::CargoLib(anyhow::anyhow!(
155-
"Could not find any subdirectory containing index.html within '{}'. Cargo doc might have failed or produced unexpected output.",
156-
base_doc_path.display()
157-
)));
158-
},
159-
(count, _) => {
160-
return Err(DocLoaderError::CargoLib(anyhow::anyhow!(
161-
"Expected exactly one subdirectory containing index.html within '{}', but found {}. Cannot determine the correct documentation path.",
162-
base_doc_path.display(), count
163-
)));
164-
}
165-
};
166-
// --- End finding documentation directory ---
126+
let docs_path = find_documentation_path(&base_doc_path, crate_name)?;
167127

168128
eprintln!("Using documentation path: {}", docs_path.display()); // Log the path we are actually using
169129

@@ -201,7 +161,7 @@ edition = "2021"
201161
}
202162
}
203163

204-
// --- Initialize paths_to_process and explicitly add the root index.html if it exists ---
164+
// --- Initialize paths_to_process and explicitly add the root index.html if it exists ---
205165
let mut paths_to_process: Vec<PathBuf> = Vec::new();
206166
let root_index_path = docs_path.join("index.html");
207167
if root_index_path.is_file() {
@@ -311,4 +271,255 @@ edition = "2021"
311271

312272
eprintln!("Finished document loading. Found {} final documents.", documents.len());
313273
Ok(documents)
314-
}
274+
}
275+
276+
/// Finds the correct documentation directory for a specific crate within a base 'doc' directory.
277+
///
278+
/// Handles cases where multiple subdirectories might exist (e.g., due to dependencies)
279+
/// or where the directory name doesn't exactly match the crate name (hyphens vs underscores, renames).
280+
fn find_documentation_path(base_doc_path: &Path, crate_name: &str) -> Result<PathBuf, DocLoaderError> {
281+
let mut candidate_doc_paths: Vec<PathBuf> = Vec::new();
282+
283+
if base_doc_path.is_dir() {
284+
for entry_result in fs::read_dir(base_doc_path)? {
285+
let entry = entry_result?;
286+
if entry.file_type()?.is_dir() {
287+
let dir_path = entry.path();
288+
// Check if index.html exists within the subdirectory
289+
if dir_path.join("index.html").is_file() {
290+
candidate_doc_paths.push(dir_path);
291+
}
292+
}
293+
}
294+
}
295+
296+
match candidate_doc_paths.len() {
297+
0 => Err(DocLoaderError::CargoLib(anyhow::anyhow!(
298+
"Could not find any subdirectory containing index.html within '{}'. Cargo doc might have failed or produced unexpected output.",
299+
base_doc_path.display()
300+
))),
301+
1 => Ok(candidate_doc_paths.remove(0)), // Exactly one candidate, assume it's correct
302+
_ => {
303+
// Multiple candidates, try to disambiguate
304+
let mut matched_path: Option<PathBuf> = None;
305+
let normalized_input_crate_name = crate_name.replace('-', "_");
306+
307+
// Prepare scraper selector for title tag
308+
let title_selector = Selector::parse("html > head > title")
309+
.map_err(|e| DocLoaderError::Selector(format!("Failed to parse title selector: {}", e)))?;
310+
311+
for candidate_path in candidate_doc_paths {
312+
// 1. Check index.html's title tag
313+
let index_html_path = candidate_path.join("index.html");
314+
let mut found_match_in_file = false;
315+
if index_html_path.is_file() {
316+
if let Ok(html_content) = fs::read_to_string(&index_html_path) {
317+
let document = Html::parse_document(&html_content);
318+
if let Some(title_element) = document.select(&title_selector).next() {
319+
let title_text = title_element.text().collect::<String>();
320+
// Normalize the extracted title part for comparison
321+
let normalized_title_crate_part = title_text
322+
.split(" - Rust") // Split at " - Rust"
323+
.next() // Take the first part
324+
.unwrap_or("") // Handle cases where " - Rust" isn't present
325+
.trim() // Trim whitespace
326+
.replace('-', "_"); // Normalize hyphens
327+
328+
if normalized_title_crate_part == normalized_input_crate_name {
329+
found_match_in_file = true;
330+
}
331+
}
332+
} else {
333+
eprintln!("[WARN] Failed to read index.html at '{}'", index_html_path.display());
334+
}
335+
}
336+
337+
// 2. If matched via title, track it
338+
if found_match_in_file {
339+
if matched_path.is_some() {
340+
// Found a second match! Ambiguous.
341+
return Err(DocLoaderError::CargoLib(anyhow::anyhow!(
342+
"Found multiple documentation directories matching crate name '{}' based on index.html title within '{}' (e.g., '{}' and '{}'). Cannot determine the correct path.",
343+
crate_name, base_doc_path.display(), matched_path.unwrap().display(), candidate_path.display()
344+
)));
345+
}
346+
matched_path = Some(candidate_path);
347+
}
348+
}
349+
350+
// 3. Return the unique match or error
351+
matched_path.ok_or_else(|| DocLoaderError::CargoLib(anyhow::anyhow!(
352+
"Found multiple candidate documentation directories within '{}', but none could be uniquely identified as matching crate '{}' using the index.html title.",
353+
base_doc_path.display(), crate_name
354+
355+
356+
)))
357+
}
358+
}
359+
}
360+
361+
#[cfg(test)]
362+
mod tests {
363+
use super::*;
364+
use std::fs;
365+
use std::io::Write;
366+
use tempfile::tempdir;
367+
368+
// Helper to create dummy doc structure including index.html content
369+
fn setup_test_dir_with_titles(base: &Path, dirs: &[(&str, Option<&str>)]) -> std::io::Result<()> {
370+
for (name, title_content) in dirs {
371+
let dir_path = base.join(name);
372+
fs::create_dir_all(&dir_path)?; // Use create_dir_all
373+
374+
if let Some(title) = title_content {
375+
// Create index.html with the specified title
376+
let index_path = dir_path.join("index.html");
377+
let mut index_file = File::create(index_path)?;
378+
// Basic HTML structure with the title
379+
writeln!(index_file, "<!DOCTYPE html><html><head><title>{}</title></head><body>Content</body></html>", title)?;
380+
} else {
381+
// Create an empty index.html if no title specified (or handle differently if needed)
382+
File::create(dir_path.join("index.html"))?;
383+
}
384+
385+
// Optionally create search-index.js if needed for other tests, but not used for title check
386+
// let search_index_path = dir_path.join("search-index.js");
387+
// if fs::metadata(&search_index_path).is_err() { // Avoid overwriting if exists
388+
// // Example: Create a dummy search-index.js if required by other logic
389+
// // File::create(search_index_path)?;
390+
// }
391+
}
392+
Ok(())
393+
}
394+
395+
396+
#[test]
397+
fn test_find_docs_no_dirs() -> Result<(), Box<dyn std::error::Error>> {
398+
let temp = tempdir()?;
399+
let base_path = temp.path();
400+
let result = find_documentation_path(base_path, "my_crate");
401+
assert!(result.is_err());
402+
assert!(result.unwrap_err().to_string().contains("Could not find any subdirectory"));
403+
Ok(())
404+
}
405+
406+
#[test]
407+
fn test_find_docs_one_dir_correct() -> Result<(), Box<dyn std::error::Error>> {
408+
let temp = tempdir()?;
409+
let base_path = temp.path();
410+
setup_test_dir_with_titles(base_path, &[("my_crate", Some("my_crate - Rust"))])?;
411+
let result = find_documentation_path(base_path, "my_crate")?;
412+
assert_eq!(result, base_path.join("my_crate"));
413+
Ok(())
414+
}
415+
416+
#[test]
417+
fn test_find_docs_one_dir_wrong_name() -> Result<(), Box<dyn std::error::Error>> {
418+
let temp = tempdir()?;
419+
let base_path = temp.path();
420+
setup_test_dir_with_titles(base_path, &[("other_crate", Some("other_crate - Rust"))])?;
421+
let result = find_documentation_path(base_path, "my_crate")?;
422+
// If only one dir exists, we assume it's the right one, even if name doesn't match
423+
assert_eq!(result, base_path.join("other_crate"));
424+
Ok(())
425+
}
426+
427+
#[test]
428+
fn test_find_docs_multiple_dirs_disambiguate_ok() -> Result<(), Box<dyn std::error::Error>> {
429+
let temp = tempdir()?;
430+
let base_path = temp.path();
431+
setup_test_dir_with_titles(base_path, &[
432+
("dep_crate", Some("dep_crate - Rust")),
433+
("my_crate", Some("my_crate - Rust")), // Correct title
434+
])?;
435+
let result = find_documentation_path(base_path, "my_crate")?;
436+
assert_eq!(result, base_path.join("my_crate"));
437+
Ok(())
438+
}
439+
440+
#[test]
441+
fn test_find_docs_multiple_dirs_hyphen_ok() -> Result<(), Box<dyn std::error::Error>> {
442+
let temp = tempdir()?;
443+
let base_path = temp.path();
444+
setup_test_dir_with_titles(base_path, &[
445+
("dep_crate", Some("dep_crate - Rust")),
446+
// Crate name has hyphen, title might use underscore or hyphen
447+
("my_crate_docs", Some("my_crate - Rust")), // Title uses underscore matching normalized name
448+
])?;
449+
let result = find_documentation_path(base_path, "my-crate")?; // Input has hyphen
450+
assert_eq!(result, base_path.join("my_crate_docs"));
451+
Ok(())
452+
}
453+
454+
#[test]
455+
fn test_find_docs_multiple_dirs_no_match() -> Result<(), Box<dyn std::error::Error>> {
456+
let temp = tempdir()?;
457+
let base_path = temp.path();
458+
setup_test_dir_with_titles(base_path, &[
459+
("dep_crate", Some("dep_crate - Rust")),
460+
("another_dep", Some("another_dep - Rust")),
461+
])?;
462+
let result = find_documentation_path(base_path, "my_crate");
463+
assert!(result.is_err());
464+
assert!(result.unwrap_err().to_string().contains("none could be uniquely identified"));
465+
Ok(())
466+
}
467+
468+
#[test]
469+
fn test_find_docs_multiple_dirs_ambiguous_match() -> Result<(), Box<dyn std::error::Error>> {
470+
let temp = tempdir()?;
471+
let base_path = temp.path();
472+
setup_test_dir_with_titles(base_path, &[
473+
("my_crate_v1", Some("my_crate - Rust")), // Matches normalized "my_crate"
474+
("my_crate_v2", Some("my_crate - Rust")), // Also matches normalized "my_crate"
475+
])?;
476+
let result = find_documentation_path(base_path, "my_crate");
477+
assert!(result.is_err());
478+
assert!(result.unwrap_err().to_string().contains("Found multiple documentation directories matching"));
479+
Ok(())
480+
}
481+
482+
#[test]
483+
fn test_find_docs_multiple_dirs_missing_index_html() -> Result<(), Box<dyn std::error::Error>> {
484+
let temp = tempdir()?;
485+
let base_path = temp.path();
486+
// Create dirs but only one with index.html
487+
fs::create_dir_all(base_path.join("dep_crate"))?;
488+
setup_test_dir_with_titles(base_path, &[("my_crate", Some("my_crate - Rust"))])?;
489+
490+
let result = find_documentation_path(base_path, "my_crate")?;
491+
// Should still find the correct one as the other is not a candidate
492+
assert_eq!(result, base_path.join("my_crate"));
493+
Ok(())
494+
}
495+
496+
#[test]
497+
fn test_find_docs_multiple_dirs_malformed_title() -> Result<(), Box<dyn std::error::Error>> {
498+
let temp = tempdir()?;
499+
let base_path = temp.path();
500+
setup_test_dir_with_titles(base_path, &[
501+
("dep_crate", Some("Completely Wrong Title Format")), // Malformed title
502+
("my_crate", Some("my_crate - Rust")),
503+
])?;
504+
let result = find_documentation_path(base_path, "my_crate")?;
505+
// Should ignore malformed and find correct one
506+
assert_eq!(result, base_path.join("my_crate"));
507+
Ok(())
508+
}
509+
510+
#[test]
511+
fn test_find_docs_multiple_dirs_disambiguate_by_title() -> Result<(), Box<dyn std::error::Error>> {
512+
let temp = tempdir()?;
513+
let base_path = temp.path();
514+
// Simulate missing search-index.js but correct title in index.html
515+
setup_test_dir_with_titles(base_path, &[
516+
("stdio_fixture", Some("stdio_fixture - Rust")),
517+
("clap", Some("clap - Rust")),
518+
])?;
519+
520+
let result = find_documentation_path(base_path, "clap")?;
521+
assert_eq!(result, base_path.join("clap"));
522+
Ok(())
523+
}
524+
525+
}

0 commit comments

Comments
 (0)