1+ const fs = require ( 'fs' )
2+ const path = require ( 'path' )
3+ const yaml = require ( 'js-yaml' )
4+
5+ const sidebarConfig = require ( '../documentation/sidebars.js' )
6+ const BASE_URL = 'https://questdb.com/docs/'
7+
8+ const processedFiles = new Map ( )
9+
10+ function extractMetadataAndContent ( filePath ) {
11+ if ( processedFiles . has ( filePath ) ) {
12+ return processedFiles . get ( filePath )
13+ }
14+
15+ try {
16+ let content
17+
18+ if ( fs . existsSync ( filePath ) ) {
19+ content = fs . readFileSync ( filePath , 'utf8' )
20+ } else {
21+ const mdxPath = filePath . replace ( '.md' , '.mdx' )
22+ if ( fs . existsSync ( mdxPath ) ) {
23+ content = fs . readFileSync ( mdxPath , 'utf8' )
24+ } else {
25+ throw new Error ( `File not found: ${ filePath } or ${ mdxPath } ` )
26+ }
27+ }
28+
29+ const frontmatterRegex = / ^ - - - \s * \n ( [ \s \S ] * ?) \n - - - \s * \n ( [ \s \S ] * ) $ /
30+ const match = content . match ( frontmatterRegex )
31+
32+ let frontmatter = { }
33+ let mainContent = content
34+
35+ if ( match ) {
36+ frontmatter = yaml . load ( match [ 1 ] ) || { }
37+ mainContent = match [ 2 ]
38+ }
39+
40+ let cleanContent = mainContent
41+ // Remove import statements
42+ . replace ( / ^ i m p o r t \s + .* $ / gm, '' )
43+ // Remove self-closing components
44+ . replace ( / < [ A - Z ] [ ^ > ] * \/ > / g, '' )
45+ // Remove headers
46+ . replace ( / ^ # { 1 , 6 } \s * ( .* ) $ / gm, '$1' )
47+ // Remove extra newlines
48+ . replace ( / \n \s * \n \s * \n / g, '\n\n' )
49+ . trim ( )
50+
51+ const result = {
52+ title : frontmatter . title || null ,
53+ description : frontmatter . description || null ,
54+ content : cleanContent
55+ }
56+
57+ processedFiles . set ( filePath , result )
58+ return result
59+
60+ } catch ( error ) {
61+ console . warn ( `Warning: Could not read file ${ filePath } : ${ error . message } ` )
62+ const result = { title : null , description : null , content : '' }
63+ processedFiles . set ( filePath , result )
64+ return result
65+ }
66+ }
67+
68+ function generateUrl ( docId ) {
69+ if ( docId === 'introduction' ) {
70+ return BASE_URL
71+ }
72+ return BASE_URL + docId
73+ }
74+
75+ function processForLlmsTxt ( items , indent = 0 , isTopLevel = false ) {
76+ let result = ''
77+ const indentStr = ' ' . repeat ( indent )
78+
79+ for ( const item of items ) {
80+ if ( typeof item === 'string' ) {
81+ const docPath = path . join ( './documentation' , item + '.md' )
82+ const { title, description } = extractMetadataAndContent ( docPath )
83+ const url = generateUrl ( item )
84+
85+ const displayTitle = title || item
86+ result += `${ indentStr } - [${ displayTitle } ](${ url } )`
87+ if ( description ) {
88+ result += `: ${ description } `
89+ }
90+ result += '\n'
91+
92+ } else if ( item . type === 'doc' ) {
93+ const docId = item . id
94+ const docPath = path . join ( './documentation' , docId + '.md' )
95+ const { title, description } = extractMetadataAndContent ( docPath )
96+ const url = generateUrl ( docId )
97+
98+ const displayTitle = item . label || title || docId
99+ result += `${ indentStr } - [${ displayTitle } ](${ url } )`
100+ if ( description ) {
101+ result += `: ${ description } `
102+ }
103+ result += '\n'
104+
105+ } else if ( item . type === 'category' ) {
106+ if ( isTopLevel ) {
107+ result += `\n## ${ item . label } \n`
108+ if ( item . items && item . items . length > 0 ) {
109+ result += processForLlmsTxt ( item . items , 0 , false )
110+ }
111+ } else {
112+ result += `${ indentStr } ${ item . label } \n`
113+ if ( item . items && item . items . length > 0 ) {
114+ result += processForLlmsTxt ( item . items , indent + 1 , false )
115+ }
116+ }
117+
118+ } else if ( item . type === 'link' ) {
119+ const linkText = item . label || item . href
120+ result += `${ indentStr } - [${ linkText } ](${ item . href } )\n`
121+ }
122+ }
123+
124+ return result
125+ }
126+
127+ function processForLlmsFullTxt ( items , headerLevel = 2 , isTopLevel = false ) {
128+ let result = ''
129+ const headerPrefix = '#' . repeat ( headerLevel )
130+
131+ for ( const item of items ) {
132+ if ( typeof item === 'string' ) {
133+ const docPath = path . join ( './documentation' , item + '.md' )
134+ const { title, description, content } = extractMetadataAndContent ( docPath )
135+
136+ if ( content . trim ( ) ) {
137+ const displayTitle = title || item
138+ result += `\n${ headerPrefix } # ${ displayTitle } \n`
139+ if ( description ) {
140+ result += `**Description**: ${ description } \n`
141+ }
142+ result += content + '\n\n'
143+ }
144+
145+ } else if ( item . type === 'doc' ) {
146+ const docId = item . id
147+ const docPath = path . join ( './documentation' , docId + '.md' )
148+ const { title, description, content } = extractMetadataAndContent ( docPath )
149+
150+ if ( content . trim ( ) ) {
151+ const displayTitle = item . label || title || docId
152+
153+ if ( isTopLevel ) {
154+ result += `\n${ headerPrefix } ${ displayTitle } \n`
155+ } else {
156+ result += `\n${ headerPrefix } # ${ displayTitle } \n`
157+ }
158+
159+ if ( description ) {
160+ result += `**Description**: ${ description } \n`
161+ }
162+ result += '\n' + content + '\n\n'
163+ }
164+
165+ } else if ( item . type === 'category' ) {
166+ if ( isTopLevel ) {
167+ result += `\n## ${ item . label } \n\n`
168+ if ( item . items && item . items . length > 0 ) {
169+ result += processForLlmsFullTxt ( item . items , 3 , false )
170+ }
171+ } else {
172+ result += `\n${ headerPrefix } ${ item . label } \n\n`
173+ if ( item . items && item . items . length > 0 ) {
174+ result += processForLlmsFullTxt ( item . items , headerLevel + 1 , false )
175+ }
176+ }
177+
178+ }
179+ }
180+
181+ return result
182+ }
183+
184+ function generateLlmsFiles ( ) {
185+ console . log ( 'Generating llms.txt and llms-full.txt from QuestDB documentation...' )
186+
187+ const docs = sidebarConfig . docs
188+
189+ let llmsOutput = `# QuestDB Documentation
190+
191+ ## Getting Started
192+
193+ `
194+ llmsOutput += processForLlmsTxt ( docs , 0 , true )
195+
196+ let llmsFullOutput = `# QuestDB Documentation - Complete Content
197+
198+ This file contains the complete text content of QuestDB documentation organized hierarchically.
199+
200+ ## Getting Started
201+
202+ `
203+ llmsFullOutput += processForLlmsFullTxt ( docs , 2 , true )
204+
205+ fs . writeFileSync ( './static/llms.txt' , llmsOutput )
206+ fs . writeFileSync ( './static/llms-full.txt' , llmsFullOutput )
207+
208+ console . log ( '✅ llms.txt generated successfully!' )
209+ console . log ( ` - Size: ${ ( llmsOutput . length / 1024 ) . toFixed ( 2 ) } KB` )
210+
211+ console . log ( '✅ llms-full.txt generated successfully!' )
212+ console . log ( ` - Size: ${ ( llmsFullOutput . length / 1024 / 1024 ) . toFixed ( 2 ) } MB` )
213+ }
214+
215+ try {
216+ generateLlmsFiles ( )
217+ } catch ( error ) {
218+ console . error ( 'Error generating llms files:' , error )
219+ }
0 commit comments