Skip to content

Commit 0b2078b

Browse files
committed
Merge remote-tracking branch 'upstream/main' into crawling-progress
2 parents cd947bb + cd9e2b9 commit 0b2078b

File tree

9 files changed

+539
-236
lines changed

9 files changed

+539
-236
lines changed

.env.sample

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,5 @@
11
GEMINI_PROJECT_ID=<GEMINI_PROJECT_ID>
2-
GITHUB_TOKEN=<GITHUB_TOKEN>
2+
GEMINI_API_KEY=<GEMINI_API_KEY>
3+
GITHUB_TOKEN=<GITHUB_TOKEN>
4+
OPENROUTER_API_KEY = <OPENROUTER_API_KEY>
5+
OPENROUTER_MODEL = <OPENROUTER_MODEL>

.gitignore

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,4 +99,11 @@ coverage/
9999
llm_cache.json
100100

101101
# Output files
102-
output/
102+
output/
103+
104+
# uv manage
105+
pyproject.toml
106+
uv.lock
107+
108+
docs/*.pdf
109+
docs/design-cn.md

README.md

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,16 @@ This is a tutorial project of [Pocket Flow](https://github.com/The-Pocket/Pocket
7373
## 🚀 Getting Started
7474

7575
1. Clone this repository
76+
```bash
77+
git clone https://github.com/The-Pocket/PocketFlow-Tutorial-Codebase-Knowledge
78+
```
7679

77-
2. Install dependencies:
80+
3. Install dependencies:
7881
```bash
7982
pip install -r requirements.txt
8083
```
8184

82-
3. Set up LLM in [`utils/call_llm.py`](./utils/call_llm.py) by providing credentials. By default, you can use the AI Studio key with this client for Gemini Pro 2.5:
85+
4. Set up LLM in [`utils/call_llm.py`](./utils/call_llm.py) by providing credentials. By default, you can use the AI Studio key with this client for Gemini Pro 2.5:
8386

8487
```python
8588
client = genai.Client(
@@ -92,7 +95,7 @@ This is a tutorial project of [Pocket Flow](https://github.com/The-Pocket/Pocket
9295
python utils/call_llm.py
9396
```
9497

95-
4. Generate a complete codebase tutorial by running the main script:
98+
5. Generate a complete codebase tutorial by running the main script:
9699
```bash
97100
# Analyze a GitHub repository
98101
python main.py --repo https://github.com/username/repo --include "*.py" "*.js" --exclude "tests/*" --max-size 50000
@@ -112,6 +115,8 @@ This is a tutorial project of [Pocket Flow](https://github.com/The-Pocket/Pocket
112115
- `-e, --exclude` - Files to exclude (e.g., "tests/*" "docs/*")
113116
- `-s, --max-size` - Maximum file size in bytes (default: 100KB)
114117
- `--language` - Language for the generated tutorial (default: "english")
118+
- `--max-abstractions` - Maximum number of abstractions to identify (default: 10)
119+
- `--no-cache` - Disable LLM response caching (default: caching enabled)
115120

116121
The application will crawl the repository, analyze the codebase structure, generate tutorial content in the specified language, and save the output in the specified directory (default: ./output).
117122

docs/_config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Basic site settings
2-
title: Codebase2Tutorial
2+
title: Pocket Flow
33

44
# Theme settings
55
remote_theme: just-the-docs/just-the-docs

main.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@
1414
}
1515

1616
DEFAULT_EXCLUDE_PATTERNS = {
17+
"assets/*", "data/*", "examples/*", "images/*", "public/*", "static/*", "temp/*",
18+
"docs/*",
1719
"venv/*", ".venv/*", "*test*", "tests/*", "docs/*", "examples/*", "v1/*",
18-
"dist/*", "build/*", "experimental/*", "deprecated/*",
20+
"dist/*", "build/*", "experimental/*", "deprecated/*", "misc/*",
1921
"legacy/*", ".git/*", ".github/*", ".next/*", ".vscode/*", "obj/*", "bin/*", "node_modules/*", "*.log"
2022
}
2123

@@ -36,6 +38,10 @@ def main():
3638
parser.add_argument("-s", "--max-size", type=int, default=100000, help="Maximum file size in bytes (default: 100000, about 100KB).")
3739
# Add language parameter for multi-language support
3840
parser.add_argument("--language", default="english", help="Language for the generated tutorial (default: english)")
41+
# Add use_cache parameter to control LLM caching
42+
parser.add_argument("--no-cache", action="store_true", help="Disable LLM response caching (default: caching enabled)")
43+
# Add max_abstraction_num parameter to control the number of abstractions
44+
parser.add_argument("--max-abstractions", type=int, default=10, help="Maximum number of abstractions to identify (default: 10)")
3945

4046
args = parser.parse_args()
4147

@@ -61,6 +67,12 @@ def main():
6167

6268
# Add language for multi-language support
6369
"language": args.language,
70+
71+
# Add use_cache flag (inverse of no-cache flag)
72+
"use_cache": not args.no_cache,
73+
74+
# Add max_abstraction_num parameter
75+
"max_abstraction_num": args.max_abstractions,
6476

6577
# Outputs will be populated by the nodes
6678
"files": [],
@@ -73,6 +85,7 @@ def main():
7385

7486
# Display starting message with repository/directory and language
7587
print(f"Starting tutorial generation for: {args.repo or args.dir} in {args.language.capitalize()} language")
88+
print(f"LLM caching: {'Disabled' if args.no_cache else 'Enabled'}")
7689

7790
# Create the flow instance
7891
tutorial_flow = create_tutorial_flow()

0 commit comments

Comments
 (0)