Initial public push: docs cosmos v4 + AI module + framework groundwork

This is the snapshot the production landing site (nibiru-framework.com) is deployed from. Brings together the recent splash + docs migration to the v4 "Cosmos" design system, the new in-framework AI module, and the framework groundwork that backs the framework-reference extraction. What lands: - docs/: Astro + Starlight site with the v4 dark cosmic palette, GalaxyHero canvas constellation, Mission Control chat (wired to /api/oracle → api.neuronetz.ai via providers.mjs Ollama), 5-panel MMVC stage (Model · AI · Module · Controller · View), translated EN/DE/JA/ES/FR content, PWA + sitemap + llms.txt + Umami analytics. - docs/design-system/: canonical mockup bundle (source/index-v2.html for splash, source/docs-system.html + preview/ for docs, SPEC.md, tokens). - docs/scripts/extraction/framework-reference-v2.md: deep framework reference (~1.6k lines, file:line citations, every public factory and idiom — basis for the LoRA training corpus. - application/module/ai/: AI module with chat / embed / RAG / agent plugins, plus pdoQuery / httpGet / fileRead tools and Modelfile + smoke-test in training/. - application/module/users/: user / ACL / form-factory traits used as the reference plugin pattern for the framework docs. - application/settings/config/database/: schema + seed migrations including the AI module tables (200–203). - Form factory + autogenerator changes the framework-reference-v2 covers. Production secrets stay out: docs/.env, settings.production.ini and ai.production.ini are all gitignored (.example files are in tree). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-08 15:22:18 +02:00
parent a60ce90643
commit 48c839d927
662 changed files with 172811 additions and 1 deletions
--- a/docs/scripts/build-oracle-index.mjs
+++ b/docs/scripts/build-oracle-index.mjs
@@ -0,0 +1,117 @@
+#!/usr/bin/env node
+/**
+ * Build a vector index over the docs for the in-site Oracle (RAG).
+ *
+ *   node scripts/build-oracle-index.mjs
+ *
+ * Defaults to Ollama at https://api.neuronetz.ai with model nomic-embed-text.
+ * Override via env:
+ *   OLLAMA_BASE_URL=...
+ *   OLLAMA_EMBED_MODEL=...     (e.g. nomic-embed-text, mxbai-embed-large)
+ *   EMBED_PROVIDER=openai      (uses OpenAI embeddings via OPENAI_API_KEY)
+ *
+ * Output: public/oracle-index.json
+ *
+ * Soft-fail behaviour: if the embedding provider is unreachable or the model
+ * is missing, an empty index is written and the runtime endpoint will operate
+ * in chat-only (no-RAG) mode.
+ */
+
+import fs from 'node:fs';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { chunkFile, walkDocs } from './lib/chunk.mjs';
+import { embed, embedConfig } from './lib/providers.mjs';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const DOCS_DIR = path.resolve(__dirname, '../src/content/docs');
+const OUT_FILE = path.resolve(__dirname, '../public/oracle-index.json');
+const BATCH = 16;
+
+function embedToBase64(f32) {
+	return Buffer.from(new Float32Array(f32).buffer).toString('base64');
+}
+
+async function main() {
+	const cfg = embedConfig();
+	console.log(`Embedding provider: ${cfg.provider}`);
+	if (cfg.provider === 'ollama') {
+		console.log(`  Ollama: ${cfg.ollamaUrl}`);
+		console.log(`  Model:  ${cfg.ollamaEmbedModel}`);
+	} else if (cfg.provider === 'openai') {
+		console.log(`  OpenAI model: ${cfg.openaiEmbedModel}`);
+		if (!cfg.hasOpenAIKey) {
+			console.warn('  ⚠ OPENAI_API_KEY missing — writing empty index (chat-only mode).');
+			writeEmpty('openai-key-missing');
+			return;
+		}
+	}
+
+	console.log(`Walking ${DOCS_DIR}…`);
+	const files = walkDocs(DOCS_DIR);
+	console.log(`Found ${files.length} markdown files.`);
+
+	const chunks = files.flatMap((f) => chunkFile(f, DOCS_DIR));
+	console.log(`Produced ${chunks.length} chunks.`);
+
+	const embeddings = [];
+	try {
+		for (let i = 0; i < chunks.length; i += BATCH) {
+			const batch = chunks.slice(i, i + BATCH);
+			const inputs = batch.map((c) => `${c.pageTitle}\n${c.sectionTitle}\n\n${c.content}`);
+			const vecs = await embed(inputs);
+			for (const v of vecs) embeddings.push(v);
+			process.stdout.write(`\r  embedded ${embeddings.length}/${chunks.length}`);
+		}
+		process.stdout.write('\n');
+	} catch (err) {
+		console.error(`\n⚠ Embedding failed: ${err.message}`);
+		console.error(`  → writing empty index, Oracle will run in chat-only (no-RAG) mode.`);
+		if (cfg.provider === 'ollama') {
+			console.error(`  → To fix: pull the embedding model on your Ollama server:`);
+			console.error(`        curl ${cfg.ollamaUrl}/api/pull -d '{"name":"${cfg.ollamaEmbedModel}"}'`);
+		}
+		writeEmpty(err.message);
+		return;
+	}
+
+	const dim = embeddings[0]?.length ?? 0;
+	const out = {
+		provider: cfg.provider,
+		model: cfg.provider === 'ollama' ? cfg.ollamaEmbedModel : cfg.openaiEmbedModel,
+		dim,
+		builtAt: new Date().toISOString(),
+		chunks: chunks.map((c) => ({
+			id: c.id,
+			url: c.url,
+			pageTitle: c.pageTitle,
+			sectionTitle: c.sectionTitle,
+			language: c.language,
+			content: c.content,
+		})),
+		embeddings: embeddings.map(embedToBase64),
+	};
+
+	fs.mkdirSync(path.dirname(OUT_FILE), { recursive: true });
+	fs.writeFileSync(OUT_FILE, JSON.stringify(out));
+	const kb = (JSON.stringify(out).length / 1024).toFixed(1);
+	console.log(`✔ Wrote ${OUT_FILE} (${kb} KB, dim=${dim})`);
+}
+
+function writeEmpty(reason) {
+	fs.mkdirSync(path.dirname(OUT_FILE), { recursive: true });
+	fs.writeFileSync(
+		OUT_FILE,
+		JSON.stringify(
+			{ provider: null, model: null, dim: 0, builtAt: null, reason, chunks: [], embeddings: [] },
+			null,
+			2
+		)
+	);
+}
+
+main().catch((e) => {
+	console.error(e);
+	writeEmpty(e.message);
+	process.exit(0); // soft-fail so Docker build doesn't break the site
+});