Initial public push: docs cosmos v4 + AI module + framework groundwork
This is the snapshot the production landing site (nibiru-framework.com) is deployed from. Brings together the recent splash + docs migration to the v4 "Cosmos" design system, the new in-framework AI module, and the framework groundwork that backs the framework-reference extraction. What lands: - docs/: Astro + Starlight site with the v4 dark cosmic palette, GalaxyHero canvas constellation, Mission Control chat (wired to /api/oracle → api.neuronetz.ai via providers.mjs Ollama), 5-panel MMVC stage (Model · AI · Module · Controller · View), translated EN/DE/JA/ES/FR content, PWA + sitemap + llms.txt + Umami analytics. - docs/design-system/: canonical mockup bundle (source/index-v2.html for splash, source/docs-system.html + preview/ for docs, SPEC.md, tokens). - docs/scripts/extraction/framework-reference-v2.md: deep framework reference (~1.6k lines, file:line citations, every public factory and idiom — basis for the LoRA training corpus. - application/module/ai/: AI module with chat / embed / RAG / agent plugins, plus pdoQuery / httpGet / fileRead tools and Modelfile + smoke-test in training/. - application/module/users/: user / ACL / form-factory traits used as the reference plugin pattern for the framework docs. - application/settings/config/database/: schema + seed migrations including the AI module tables (200–203). - Form factory + autogenerator changes the framework-reference-v2 covers. Production secrets stay out: docs/.env, settings.production.ini and ai.production.ini are all gitignored (.example files are in tree). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
117
docs/scripts/build-oracle-index.mjs
Normal file
117
docs/scripts/build-oracle-index.mjs
Normal file
@@ -0,0 +1,117 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Build a vector index over the docs for the in-site Oracle (RAG).
|
||||
*
|
||||
* node scripts/build-oracle-index.mjs
|
||||
*
|
||||
* Defaults to Ollama at https://api.neuronetz.ai with model nomic-embed-text.
|
||||
* Override via env:
|
||||
* OLLAMA_BASE_URL=...
|
||||
* OLLAMA_EMBED_MODEL=... (e.g. nomic-embed-text, mxbai-embed-large)
|
||||
* EMBED_PROVIDER=openai (uses OpenAI embeddings via OPENAI_API_KEY)
|
||||
*
|
||||
* Output: public/oracle-index.json
|
||||
*
|
||||
* Soft-fail behaviour: if the embedding provider is unreachable or the model
|
||||
* is missing, an empty index is written and the runtime endpoint will operate
|
||||
* in chat-only (no-RAG) mode.
|
||||
*/
|
||||
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { chunkFile, walkDocs } from './lib/chunk.mjs';
|
||||
import { embed, embedConfig } from './lib/providers.mjs';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const DOCS_DIR = path.resolve(__dirname, '../src/content/docs');
|
||||
const OUT_FILE = path.resolve(__dirname, '../public/oracle-index.json');
|
||||
const BATCH = 16;
|
||||
|
||||
function embedToBase64(f32) {
|
||||
return Buffer.from(new Float32Array(f32).buffer).toString('base64');
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const cfg = embedConfig();
|
||||
console.log(`Embedding provider: ${cfg.provider}`);
|
||||
if (cfg.provider === 'ollama') {
|
||||
console.log(` Ollama: ${cfg.ollamaUrl}`);
|
||||
console.log(` Model: ${cfg.ollamaEmbedModel}`);
|
||||
} else if (cfg.provider === 'openai') {
|
||||
console.log(` OpenAI model: ${cfg.openaiEmbedModel}`);
|
||||
if (!cfg.hasOpenAIKey) {
|
||||
console.warn(' ⚠ OPENAI_API_KEY missing — writing empty index (chat-only mode).');
|
||||
writeEmpty('openai-key-missing');
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Walking ${DOCS_DIR}…`);
|
||||
const files = walkDocs(DOCS_DIR);
|
||||
console.log(`Found ${files.length} markdown files.`);
|
||||
|
||||
const chunks = files.flatMap((f) => chunkFile(f, DOCS_DIR));
|
||||
console.log(`Produced ${chunks.length} chunks.`);
|
||||
|
||||
const embeddings = [];
|
||||
try {
|
||||
for (let i = 0; i < chunks.length; i += BATCH) {
|
||||
const batch = chunks.slice(i, i + BATCH);
|
||||
const inputs = batch.map((c) => `${c.pageTitle}\n${c.sectionTitle}\n\n${c.content}`);
|
||||
const vecs = await embed(inputs);
|
||||
for (const v of vecs) embeddings.push(v);
|
||||
process.stdout.write(`\r embedded ${embeddings.length}/${chunks.length}`);
|
||||
}
|
||||
process.stdout.write('\n');
|
||||
} catch (err) {
|
||||
console.error(`\n⚠ Embedding failed: ${err.message}`);
|
||||
console.error(` → writing empty index, Oracle will run in chat-only (no-RAG) mode.`);
|
||||
if (cfg.provider === 'ollama') {
|
||||
console.error(` → To fix: pull the embedding model on your Ollama server:`);
|
||||
console.error(` curl ${cfg.ollamaUrl}/api/pull -d '{"name":"${cfg.ollamaEmbedModel}"}'`);
|
||||
}
|
||||
writeEmpty(err.message);
|
||||
return;
|
||||
}
|
||||
|
||||
const dim = embeddings[0]?.length ?? 0;
|
||||
const out = {
|
||||
provider: cfg.provider,
|
||||
model: cfg.provider === 'ollama' ? cfg.ollamaEmbedModel : cfg.openaiEmbedModel,
|
||||
dim,
|
||||
builtAt: new Date().toISOString(),
|
||||
chunks: chunks.map((c) => ({
|
||||
id: c.id,
|
||||
url: c.url,
|
||||
pageTitle: c.pageTitle,
|
||||
sectionTitle: c.sectionTitle,
|
||||
language: c.language,
|
||||
content: c.content,
|
||||
})),
|
||||
embeddings: embeddings.map(embedToBase64),
|
||||
};
|
||||
|
||||
fs.mkdirSync(path.dirname(OUT_FILE), { recursive: true });
|
||||
fs.writeFileSync(OUT_FILE, JSON.stringify(out));
|
||||
const kb = (JSON.stringify(out).length / 1024).toFixed(1);
|
||||
console.log(`✔ Wrote ${OUT_FILE} (${kb} KB, dim=${dim})`);
|
||||
}
|
||||
|
||||
function writeEmpty(reason) {
|
||||
fs.mkdirSync(path.dirname(OUT_FILE), { recursive: true });
|
||||
fs.writeFileSync(
|
||||
OUT_FILE,
|
||||
JSON.stringify(
|
||||
{ provider: null, model: null, dim: 0, builtAt: null, reason, chunks: [], embeddings: [] },
|
||||
null,
|
||||
2
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
main().catch((e) => {
|
||||
console.error(e);
|
||||
writeEmpty(e.message);
|
||||
process.exit(0); // soft-fail so Docker build doesn't break the site
|
||||
});
|
||||
Reference in New Issue
Block a user