Initial public push: docs cosmos v4 + AI module + framework groundwork

This is the snapshot the production landing site (nibiru-framework.com) is
deployed from. Brings together the recent splash + docs migration to the v4
"Cosmos" design system, the new in-framework AI module, and the framework
groundwork that backs the framework-reference extraction.

What lands:
- docs/: Astro + Starlight site with the v4 dark cosmic palette, GalaxyHero
  canvas constellation, Mission Control chat (wired to /api/oracle →
  api.neuronetz.ai via providers.mjs Ollama), 5-panel MMVC stage
  (Model · AI · Module · Controller · View), translated EN/DE/JA/ES/FR
  content, PWA + sitemap + llms.txt + Umami analytics.
- docs/design-system/: canonical mockup bundle (source/index-v2.html for
  splash, source/docs-system.html + preview/ for docs, SPEC.md, tokens).
- docs/scripts/extraction/framework-reference-v2.md: deep framework
  reference (~1.6k lines, file:line citations, every public factory and
  idiom — basis for the LoRA training corpus.
- application/module/ai/: AI module with chat / embed / RAG / agent
  plugins, plus pdoQuery / httpGet / fileRead tools and Modelfile +
  smoke-test in training/.
- application/module/users/: user / ACL / form-factory traits used as the
  reference plugin pattern for the framework docs.
- application/settings/config/database/: schema + seed migrations
  including the AI module tables (200–203).
- Form factory + autogenerator changes the framework-reference-v2 covers.

Production secrets stay out: docs/.env, settings.production.ini and
ai.production.ini are all gitignored (.example files are in tree).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
stephan
2026-05-08 15:22:18 +02:00
parent a60ce90643
commit 48c839d927
662 changed files with 172811 additions and 1 deletions

View File

@@ -0,0 +1,128 @@
#!/usr/bin/env node
/**
* Export the docs as a LoRA-training-ready corpus.
*
* node scripts/build-corpus.mjs
*
* Outputs four files under dist/corpus/:
* - chunks.jsonl — raw chunks (one section per line)
* - instructions.jsonl — instruction/input/output triples
* - chat.jsonl — sharegpt/chat-format messages
* - completion.jsonl — prompt/completion pairs (legacy fine-tunes)
*
* The instruction text for each chunk is derived from the section heading
* with a per-language template ("How do I X?", "Wie X?", "X するには?").
*/
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { chunkFile, walkDocs } from './lib/chunk.mjs';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const DOCS_DIR = path.resolve(__dirname, '../src/content/docs');
const OUT_DIR = path.resolve(__dirname, '../dist/corpus');
const SYSTEM_PROMPT = {
en: 'You are an expert on the Nibiru PHP framework. Answer based on the documentation, with concrete code examples and file paths where helpful.',
de: 'Du bist Experte für das Nibiru-PHP-Framework. Antworte auf Basis der Dokumentation, mit konkreten Code-Beispielen und Dateipfaden, wo es hilft.',
ja: 'あなたは Nibiru PHP フレームワークの専門家です。ドキュメントに基づいて、有用な箇所では具体的なコード例とファイルパスを示して回答してください。',
es: 'Eres un experto en el framework PHP Nibiru. Responde basándote en la documentación, con ejemplos de código concretos y rutas de archivos donde sea útil.',
fr: "Tu es expert du framework PHP Nibiru. Réponds sur la base de la documentation, avec des exemples de code concrets et des chemins de fichiers lorsque c'est utile.",
};
const QUESTION_PREFIX = {
en: ['How do I', 'What is', 'Explain', 'Show me'],
de: ['Wie', 'Was ist', 'Erkläre', 'Zeig mir'],
ja: ['', '', 'について教えてください:', ''],
es: ['¿Cómo', '¿Qué es', 'Explica', 'Muéstrame'],
fr: ['Comment', "Qu'est-ce que", 'Explique', 'Montre-moi'],
};
function questionFor(chunk) {
const lang = chunk.language || 'en';
const heading = chunk.sectionTitle || chunk.pageTitle;
if (lang === 'ja') {
return `${heading} について教えてください。`;
}
const prefixes = QUESTION_PREFIX[lang] || QUESTION_PREFIX.en;
const prefix = prefixes[heading.length % prefixes.length];
if (lang === 'es' || lang === 'fr') {
return `${prefix} ${heading.toLowerCase()} ?`.replace(' ', ' ');
}
return `${prefix} ${heading.toLowerCase()}?`;
}
function ensureDir(d) {
fs.mkdirSync(d, { recursive: true });
}
function writeJsonl(filePath, items) {
ensureDir(path.dirname(filePath));
const stream = fs.createWriteStream(filePath, { encoding: 'utf8' });
for (const item of items) stream.write(JSON.stringify(item) + '\n');
stream.end();
return new Promise((res) => stream.on('close', res));
}
async function main() {
console.log(`Walking ${DOCS_DIR}`);
const files = walkDocs(DOCS_DIR);
const chunks = files.flatMap((f) => chunkFile(f, DOCS_DIR));
console.log(`Produced ${chunks.length} chunks across ${files.length} files.`);
const chunksOut = chunks.map((c) => ({
id: c.id,
url: c.url,
pageTitle: c.pageTitle,
sectionTitle: c.sectionTitle,
language: c.language,
tokens: c.tokens,
content: c.content,
}));
const instructionsOut = chunks.map((c) => ({
instruction: questionFor(c),
input: '',
output: c.content,
metadata: { language: c.language, source: c.url, page: c.pageTitle },
}));
const chatOut = chunks.map((c) => ({
messages: [
{ role: 'system', content: SYSTEM_PROMPT[c.language] || SYSTEM_PROMPT.en },
{ role: 'user', content: questionFor(c) },
{ role: 'assistant', content: c.content },
],
metadata: { language: c.language, source: c.url, page: c.pageTitle },
}));
const completionOut = chunks.map((c) => ({
prompt: `${SYSTEM_PROMPT[c.language] || SYSTEM_PROMPT.en}\n\nQuestion: ${questionFor(c)}\n\nAnswer:`,
completion: ' ' + c.content,
}));
await writeJsonl(path.join(OUT_DIR, 'chunks.jsonl'), chunksOut);
await writeJsonl(path.join(OUT_DIR, 'instructions.jsonl'), instructionsOut);
await writeJsonl(path.join(OUT_DIR, 'chat.jsonl'), chatOut);
await writeJsonl(path.join(OUT_DIR, 'completion.jsonl'), completionOut);
const stats = {
generatedAt: new Date().toISOString(),
fileCount: files.length,
chunkCount: chunks.length,
byLanguage: chunks.reduce((acc, c) => {
acc[c.language] = (acc[c.language] || 0) + 1;
return acc;
}, {}),
};
fs.writeFileSync(path.join(OUT_DIR, 'stats.json'), JSON.stringify(stats, null, 2));
console.log(`Wrote 4 JSONL files + stats.json to ${OUT_DIR}`);
console.log(JSON.stringify(stats, null, 2));
}
main().catch((e) => {
console.error(e);
process.exit(1);
});