@@ -267,8 +267,19 @@ fn process_documentation_directory(docs_path: &Path) -> Result<Vec<Document>, Do
267267 } ;
268268
269269 // Check file extension to decide processing method
270- if path. extension ( ) . map_or ( false , |ext| ext == "html" ) {
271- // Process HTML using scraper
270+ let extension = path. extension ( ) . and_then ( OsStr :: to_str) ;
271+ let path_str_for_check = path. to_string_lossy ( ) ; // For checking substrings
272+
273+ if extension == Some ( "md" ) {
274+ // Process Markdown: Use raw content
275+ if !file_content. trim ( ) . is_empty ( ) {
276+ documents. push ( Document {
277+ path : path_str,
278+ content : file_content, // Store the raw Markdown content
279+ } ) ;
280+ }
281+ } else if path_str_for_check. ends_with ( ".rs.html" ) { // Check for rust source files specifically
282+ // Process Rust source HTML view: Use raw content (like Markdown)
272283 let html_document = Html :: parse_document ( & file_content) ;
273284 if let Some ( main_content_element) = html_document. select ( & content_selector) . next ( ) {
274285 let text_content: String = main_content_element
@@ -289,19 +300,31 @@ fn process_documentation_directory(docs_path: &Path) -> Result<Vec<Document>, Do
289300 } else {
290301 // eprintln!("[DEBUG] 'main-content' selector not found for HTML: {}", path.display());
291302 }
292- } else if path. extension ( ) . map_or ( false , |ext| ext == "md" ) {
293- // Process Markdown: Use raw content
294- if !file_content. trim ( ) . is_empty ( ) {
295- documents. push ( Document {
296- path : path_str,
297- content : file_content, // Store the raw Markdown content
298- } ) ;
303+ } else if extension == Some ( "html" ) { // Process other HTML using scraper
304+ // Process regular HTML using scraper
305+ let html_document = Html :: parse_document ( & file_content) ;
306+ if let Some ( main_content_element) = html_document. select ( & content_selector) . next ( ) {
307+ let text_content: String = main_content_element
308+ . text ( )
309+ . map ( |s| s. trim ( ) )
310+ . filter ( |s| !s. is_empty ( ) )
311+ . collect :: < Vec < & str > > ( )
312+ . join ( "\n " ) ;
313+
314+ if !text_content. is_empty ( ) {
315+ documents. push ( Document {
316+ path : path_str,
317+ content : text_content,
318+ } ) ;
319+ } else {
320+ // eprintln!("[DEBUG] No text content found in main section for HTML: {}", path.display());
321+ }
299322 } else {
300- eprintln ! ( "[DEBUG] Skipping empty Markdown file : {}" , path. display( ) ) ;
323+ // eprintln!("[DEBUG] 'main-content' selector not found for HTML : {}", path.display());
301324 }
302325 } else {
303- // Should not happen due to WalkDir filter, but handle defensively
304- eprintln ! ( "[WARN] Skipping file with unexpected extension: {}" , path. display( ) ) ;
326+ // Should not happen due to WalkDir filter, but handle defensively
327+ eprintln ! ( "[WARN] Skipping file with unexpected extension: {}" , path. display( ) ) ;
305328 }
306329 }
307330
0 commit comments