22import fnmatch
33import pathspec
44
5+
56def crawl_local_files (
67 directory ,
78 include_patterns = None ,
89 exclude_patterns = None ,
910 max_file_size = None ,
1011 use_relative_paths = True ,
11- progress_callback = None ,
1212):
1313 """
1414 Crawl files in a local directory with similar interface as crawl_github_files.
@@ -18,7 +18,6 @@ def crawl_local_files(
1818 exclude_patterns (set): File patterns to exclude (e.g. {"tests/*"})
1919 max_file_size (int): Maximum file size in bytes
2020 use_relative_paths (bool): Whether to use paths relative to directory
21- progress_callback (callable): Function to report progress, takes (processed, total) as arguments
2221
2322 Returns:
2423 dict: {"files": {filepath: content}}
@@ -91,28 +90,41 @@ def crawl_local_files(
9190 else :
9291 included = True
9392
93+ processed_files += 1 # Increment processed count regardless of inclusion/exclusion
94+
95+ status = "processed"
9496 if not included or excluded :
95- processed_files += 1
96- if progress_callback :
97- progress_callback (processed_files , total_files )
98- continue
97+ status = "skipped (excluded)"
98+ # Print progress for skipped files due to exclusion
99+ if total_files > 0 :
100+ percentage = (processed_files / total_files ) * 100
101+ rounded_percentage = int (percentage )
102+ print (f"\033 [92mProgress: { processed_files } /{ total_files } ({ rounded_percentage } %) { relpath } [{ status } ]\033 [0m" )
103+ continue # Skip to next file if not included or excluded
99104
100105 if max_file_size and os .path .getsize (filepath ) > max_file_size :
101- processed_files += 1
102- if progress_callback :
103- progress_callback (processed_files , total_files )
104- continue
105-
106+ status = "skipped (size limit)"
107+ # Print progress for skipped files due to size limit
108+ if total_files > 0 :
109+ percentage = (processed_files / total_files ) * 100
110+ rounded_percentage = int (percentage )
111+ print (f"\033 [92mProgress: { processed_files } /{ total_files } ({ rounded_percentage } %) { relpath } [{ status } ]\033 [0m" )
112+ continue # Skip large files
113+
114+ # --- File is being processed ---
106115 try :
107116 with open (filepath , "r" , encoding = "utf-8" ) as f :
108117 content = f .read ()
109118 files_dict [relpath ] = content
110119 except Exception as e :
111120 print (f"Warning: Could not read file { filepath } : { e } " )
121+ status = "skipped (read error)"
112122
113- processed_files += 1
114- if progress_callback :
115- progress_callback (processed_files , total_files )
123+ # --- Print progress for processed or error files ---
124+ if total_files > 0 :
125+ percentage = (processed_files / total_files ) * 100
126+ rounded_percentage = int (percentage )
127+ print (f"\033 [92mProgress: { processed_files } /{ total_files } ({ rounded_percentage } %) { relpath } [{ status } ]\033 [0m" )
116128
117129 return {"files" : files_dict }
118130
0 commit comments