Skip to content

Commit b050c02

Browse files
authored
Merge pull request #107 from epilot-dev/claude/slack-add-llms-txt-support-7aWYF
feat: add llms.txt support for LLM-friendly docs
2 parents 9bae25f + 7396d23 commit b050c02

2 files changed

Lines changed: 319 additions & 0 deletions

File tree

docusaurus.config.js

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,28 @@ const DOCS_URL = process.env.DOCS_URL || 'https://docs.epilot.io';
8888
},
8989
},
9090
],
91+
[
92+
require.resolve('./src/plugins/llms-txt/index.js'),
93+
{
94+
siteDescription: `epilot is a cloud-native SaaS platform (Energy XRM) designed for energy suppliers and service providers in the German energy market.
95+
96+
The platform provides:
97+
- **Entity Management**: Flexible data modeling with schemas for contacts, products, orders, opportunities, and custom entities
98+
- **Journey Builder**: Visual designer for customer onboarding journeys, self-service portals, and multi-step forms
99+
- **Workflow Engine**: Configurable workflows for business process automation with task management and SLA tracking
100+
- **Automation Engine**: Rule-based automation for entity mapping, document generation, and integrations
101+
- **Portal Framework**: Customer (ECP) and installer portals with granular permissions and self-service capabilities
102+
- **Messaging Hub**: Centralized email and messaging with templates and shared inboxes
103+
- **Integration Hub (ERP Toolkit)**: Pre-built connectors for SAP, Wilken, and other ERP/billing systems
104+
- **REST APIs & SDK**: Comprehensive APIs for all platform capabilities with TypeScript SDK
105+
- **Document Generation**: Template-based document creation with variable support
106+
- **Pricing & Product Catalog**: Flexible product management with tiered pricing and availability rules
107+
- **AI Features**: AI-powered copilot for entity management, messaging, and workflow automation
108+
- **Apps & Marketplace**: Extensibility platform for third-party and custom applications
109+
110+
This documentation covers all aspects of the epilot platform for developers, administrators, and integration partners.`,
111+
},
112+
],
91113
[
92114
require.resolve('./src/plugins/changelog/index.js'),
93115
{

src/plugins/llms-txt/index.js

Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
/* eslint-disable @typescript-eslint/no-var-requires */
2+
const fs = require('fs-extra');
3+
const path = require('path');
4+
const matter = require('gray-matter');
5+
6+
const PLUGIN_NAME = 'docusaurus-plugin-llms-txt';
7+
8+
/**
9+
* Cleans markdown content for LLM consumption.
10+
* Removes MDX-specific syntax, JSX components, and other non-standard markdown.
11+
*/
12+
function cleanMarkdownForLlm(content) {
13+
let cleaned = content;
14+
15+
// Remove import statements
16+
cleaned = cleaned.replace(/^import\s+.*?(?:from\s+)?['"].*?['"];?\s*$/gm, '');
17+
18+
// Remove export statements
19+
cleaned = cleaned.replace(/^export\s+(?:default\s+)?.*?;?\s*$/gm, '');
20+
21+
// Remove JSX self-closing components like <Component />
22+
cleaned = cleaned.replace(/<[A-Z][a-zA-Z0-9]*\s*[^>]*\/>/g, '');
23+
24+
// Remove JSX opening and closing tags with content
25+
cleaned = cleaned.replace(/<[A-Z][a-zA-Z0-9]*[^>]*>[\s\S]*?<\/[A-Z][a-zA-Z0-9]*>/g, '');
26+
27+
// Remove remaining JSX tags
28+
cleaned = cleaned.replace(/<[A-Z][a-zA-Z0-9]*[^>]*>/g, '');
29+
cleaned = cleaned.replace(/<\/[A-Z][a-zA-Z0-9]*>/g, '');
30+
31+
// Remove MDX expressions {expression}
32+
cleaned = cleaned.replace(/\{[^}]+\}/g, '');
33+
34+
// Clean up Docusaurus admonitions - convert to blockquotes
35+
cleaned = cleaned.replace(
36+
/^:::\s*(note|tip|info|warning|danger|caution)(?:\s+(.+?))?$/gm,
37+
(_, type, title) => {
38+
const capitalizedType = type.charAt(0).toUpperCase() + type.slice(1);
39+
return title ? `> **${capitalizedType}: ${title}**` : `> **${capitalizedType}**`;
40+
},
41+
);
42+
cleaned = cleaned.replace(/^:::$/gm, '');
43+
44+
// Remove HTML comments
45+
cleaned = cleaned.replace(/<!--[\s\S]*?-->/g, '');
46+
47+
// Remove multiple consecutive blank lines (keep max 2)
48+
cleaned = cleaned.replace(/\n{3,}/g, '\n\n');
49+
50+
return cleaned.trim();
51+
}
52+
53+
/**
54+
* Generates plain text content from a markdown source file.
55+
*/
56+
async function generatePageContent(filePath) {
57+
if (!(await fs.pathExists(filePath))) {
58+
return null;
59+
}
60+
61+
const fileContent = await fs.readFile(filePath, 'utf-8');
62+
const { data: frontMatter, content } = matter(fileContent);
63+
const cleanedContent = cleanMarkdownForLlm(content);
64+
65+
let result = '';
66+
67+
const title = frontMatter.title || frontMatter.sidebar_label;
68+
if (title) {
69+
result += `# ${title}\n\n`;
70+
}
71+
72+
if (frontMatter.description) {
73+
result += `${frontMatter.description}\n\n`;
74+
}
75+
76+
result += cleanedContent;
77+
78+
return result;
79+
}
80+
81+
/**
82+
* Recursively collects all doc routes with source file paths.
83+
*/
84+
function collectDocRoutes(routes) {
85+
const result = [];
86+
87+
function walk(routeList) {
88+
for (const route of routeList) {
89+
if (route.metadata && route.metadata.sourceFilePath) {
90+
result.push({
91+
path: route.path,
92+
sourceFilePath: route.metadata.sourceFilePath,
93+
});
94+
}
95+
if (route.routes) {
96+
walk(route.routes);
97+
}
98+
}
99+
}
100+
101+
walk(routes);
102+
return result;
103+
}
104+
105+
/**
106+
* Generates the root llms.txt content with a page index.
107+
*/
108+
function generateRootLlmsTxt(siteConfig, items, siteDescription) {
109+
const siteUrl = siteConfig.url;
110+
const lines = [];
111+
112+
lines.push(`# ${siteConfig.title}`);
113+
lines.push('');
114+
115+
if (siteDescription) {
116+
lines.push(siteDescription);
117+
lines.push('');
118+
} else if (siteConfig.tagline) {
119+
lines.push(siteConfig.tagline);
120+
lines.push('');
121+
}
122+
123+
lines.push('## Documentation Pages');
124+
lines.push('');
125+
126+
// Group items by top-level path
127+
const grouped = new Map();
128+
129+
for (const item of items) {
130+
const pathParts = item.path.split('/').filter(Boolean);
131+
const groupKey = pathParts.length > 1 ? `/${pathParts[0]}/${pathParts[1]}` : `/${pathParts[0] || ''}`;
132+
133+
if (!grouped.has(groupKey)) {
134+
grouped.set(groupKey, []);
135+
}
136+
grouped.get(groupKey).push(item);
137+
}
138+
139+
for (const [groupPath, groupItems] of grouped) {
140+
lines.push(`### ${groupPath}`);
141+
lines.push('');
142+
143+
for (const item of groupItems) {
144+
const fullUrl = `${siteUrl}${item.path}`;
145+
lines.push(`- [${item.title}](${fullUrl}): ${fullUrl}/llms.txt`);
146+
if (item.description) {
147+
lines.push(` ${item.description}`);
148+
}
149+
}
150+
lines.push('');
151+
}
152+
153+
lines.push('---');
154+
lines.push('');
155+
lines.push('> This file follows the llms.txt standard. See: https://llmstxt.org/');
156+
157+
return lines.join('\n');
158+
}
159+
160+
/**
161+
* @param {import('@docusaurus/types').LoadContext} context
162+
* @param {object} options
163+
*/
164+
module.exports = function pluginLlmsTxt(context, options = {}) {
165+
const { siteDescription } = options;
166+
167+
return {
168+
name: PLUGIN_NAME,
169+
170+
async postBuild({ siteConfig, routes, outDir, siteDir }) {
171+
const docRoutes = collectDocRoutes(routes);
172+
173+
if (docRoutes.length === 0) {
174+
console.warn(`[${PLUGIN_NAME}] No doc routes with source files found. Falling back to docs/ directory scan.`);
175+
}
176+
177+
// Collect all doc files from the docs/ directory as a reliable source
178+
const docsDir = path.join(siteDir, 'docs');
179+
const allDocFiles = [];
180+
181+
async function scanDir(dir, relativePath = '') {
182+
const entries = await fs.readdir(dir, { withFileTypes: true });
183+
for (const entry of entries) {
184+
const fullPath = path.join(dir, entry.name);
185+
const relPath = path.join(relativePath, entry.name);
186+
187+
if (entry.isDirectory()) {
188+
await scanDir(fullPath, relPath);
189+
} else if (entry.name.endsWith('.md') || entry.name.endsWith('.mdx')) {
190+
allDocFiles.push({ fullPath, relativePath: relPath });
191+
}
192+
}
193+
}
194+
195+
if (await fs.pathExists(docsDir)) {
196+
await scanDir(docsDir);
197+
}
198+
199+
console.log(`[${PLUGIN_NAME}] Found ${allDocFiles.length} doc files to process.`);
200+
201+
const items = [];
202+
let successCount = 0;
203+
204+
// Generate per-page llms.txt files
205+
await Promise.all(
206+
allDocFiles.map(async ({ fullPath, relativePath }) => {
207+
try {
208+
const content = await generatePageContent(fullPath);
209+
if (!content) return;
210+
211+
const fileContent = await fs.readFile(fullPath, 'utf-8');
212+
const { data: frontMatter } = matter(fileContent);
213+
214+
// Determine the URL path for this doc
215+
// e.g. docs/journeys/journey-builder.md -> /docs/journeys/journey-builder
216+
let urlPath = relativePath
217+
.replace(/\.mdx?$/, '')
218+
.replace(/\\/g, '/');
219+
220+
// Handle index files (intro.md or index.md at directory level)
221+
if (urlPath.endsWith('/intro')) {
222+
// Keep as-is, Docusaurus maps these to the directory path or /intro
223+
}
224+
225+
const docPath = `/docs/${urlPath}`;
226+
227+
// Write llms.txt for this page
228+
const outputDir = path.join(outDir, docPath);
229+
const outputPath = path.join(outputDir, 'llms.txt');
230+
231+
await fs.ensureDir(outputDir);
232+
await fs.writeFile(outputPath, content, 'utf-8');
233+
successCount++;
234+
235+
// Collect metadata for root index
236+
const title = frontMatter.title || frontMatter.sidebar_label || urlPath.split('/').pop();
237+
items.push({
238+
path: docPath,
239+
title,
240+
description: frontMatter.description,
241+
});
242+
} catch (err) {
243+
console.error(`[${PLUGIN_NAME}] Failed to process ${relativePath}:`, err.message);
244+
}
245+
}),
246+
);
247+
248+
console.log(`[${PLUGIN_NAME}] Generated ${successCount} per-page llms.txt files.`);
249+
250+
// Sort items by path
251+
items.sort((a, b) => a.path.localeCompare(b.path));
252+
253+
// Generate root llms.txt
254+
try {
255+
const rootContent = generateRootLlmsTxt(siteConfig, items, siteDescription);
256+
const rootPath = path.join(outDir, 'llms.txt');
257+
await fs.writeFile(rootPath, rootContent, 'utf-8');
258+
console.log(`[${PLUGIN_NAME}] Generated root llms.txt with ${items.length} entries.`);
259+
} catch (err) {
260+
console.error(`[${PLUGIN_NAME}] Failed to generate root llms.txt:`, err.message);
261+
throw err;
262+
}
263+
264+
// Generate llms-full.txt with all docs concatenated
265+
try {
266+
const fullLines = [];
267+
fullLines.push(`# ${siteConfig.title} - Complete Documentation`);
268+
fullLines.push('');
269+
270+
if (siteDescription) {
271+
fullLines.push(siteDescription);
272+
} else if (siteConfig.tagline) {
273+
fullLines.push(siteConfig.tagline);
274+
}
275+
fullLines.push('');
276+
fullLines.push('---');
277+
fullLines.push('');
278+
279+
for (const { fullPath, relativePath } of allDocFiles.sort((a, b) => a.relativePath.localeCompare(b.relativePath))) {
280+
const content = await generatePageContent(fullPath);
281+
if (content) {
282+
fullLines.push(content);
283+
fullLines.push('');
284+
fullLines.push('---');
285+
fullLines.push('');
286+
}
287+
}
288+
289+
const fullPath = path.join(outDir, 'llms-full.txt');
290+
await fs.writeFile(fullPath, fullLines.join('\n'), 'utf-8');
291+
console.log(`[${PLUGIN_NAME}] Generated llms-full.txt.`);
292+
} catch (err) {
293+
console.error(`[${PLUGIN_NAME}] Failed to generate llms-full.txt:`, err.message);
294+
}
295+
},
296+
};
297+
};

0 commit comments

Comments
 (0)