Initial public push: docs cosmos v4 + AI module + framework groundwork

This is the snapshot the production landing site (nibiru-framework.com) is
deployed from. Brings together the recent splash + docs migration to the v4
"Cosmos" design system, the new in-framework AI module, and the framework
groundwork that backs the framework-reference extraction.

What lands:
- docs/: Astro + Starlight site with the v4 dark cosmic palette, GalaxyHero
  canvas constellation, Mission Control chat (wired to /api/oracle →
  api.neuronetz.ai via providers.mjs Ollama), 5-panel MMVC stage
  (Model · AI · Module · Controller · View), translated EN/DE/JA/ES/FR
  content, PWA + sitemap + llms.txt + Umami analytics.
- docs/design-system/: canonical mockup bundle (source/index-v2.html for
  splash, source/docs-system.html + preview/ for docs, SPEC.md, tokens).
- docs/scripts/extraction/framework-reference-v2.md: deep framework
  reference (~1.6k lines, file:line citations, every public factory and
  idiom — basis for the LoRA training corpus.
- application/module/ai/: AI module with chat / embed / RAG / agent
  plugins, plus pdoQuery / httpGet / fileRead tools and Modelfile +
  smoke-test in training/.
- application/module/users/: user / ACL / form-factory traits used as the
  reference plugin pattern for the framework docs.
- application/settings/config/database/: schema + seed migrations
  including the AI module tables (200–203).
- Form factory + autogenerator changes the framework-reference-v2 covers.

Production secrets stay out: docs/.env, settings.production.ini and
ai.production.ini are all gitignored (.example files are in tree).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
stephan
2026-05-08 15:22:18 +02:00
parent a60ce90643
commit 48c839d927
662 changed files with 172811 additions and 1 deletions

163
docs/scripts/lib/chunk.mjs Normal file
View File

@@ -0,0 +1,163 @@
// Markdown → chunks at H2/H3 boundaries.
// Used by both build-oracle-index.mjs (RAG) and build-corpus.mjs (LoRA training).
import fs from 'node:fs';
import path from 'node:path';
const TARGET_TOKENS = 600;
const MIN_TOKENS = 120;
const MAX_TOKENS = 900;
// Cheap token estimate: ~4 chars per token for English / European languages,
// closer to 1.5 for CJK. We use a conservative average to avoid undersizing chunks.
export function estimateTokens(text) {
const cjk = (text.match(/[぀-ヿ㐀-䶿一-鿿豈-﫿]/g) || []).length;
const other = text.length - cjk;
return Math.ceil(cjk / 1.5 + other / 4);
}
function stripFrontmatter(md) {
if (!md.startsWith('---')) return { frontmatter: {}, body: md };
const end = md.indexOf('\n---', 3);
if (end === -1) return { frontmatter: {}, body: md };
const fm = md.slice(3, end).trim();
const body = md.slice(end + 4).replace(/^\n/, '');
const frontmatter = {};
for (const line of fm.split('\n')) {
const m = line.match(/^([A-Za-z0-9_-]+):\s*(.*)$/);
if (m) frontmatter[m[1]] = m[2].replace(/^["']|["']$/g, '');
}
return { frontmatter, body };
}
function slugify(s) {
return String(s)
.toLowerCase()
.normalize('NFKD')
.replace(/[̀-ͯ]/g, '')
.replace(/[^a-z0-9\s-]/g, '')
.trim()
.replace(/\s+/g, '-');
}
// Split body at H2/H3 boundaries; keep code fences intact.
function splitByHeadings(body) {
const sections = [];
const lines = body.split('\n');
let inFence = false;
let current = { heading: null, level: 0, anchor: null, lines: [] };
for (const line of lines) {
const fence = line.match(/^(```|~~~)/);
if (fence) inFence = !inFence;
if (!inFence) {
const h = line.match(/^(#{2,3})\s+(.+?)\s*$/);
if (h) {
if (current.lines.length || current.heading) sections.push(current);
current = {
heading: h[2].trim(),
level: h[1].length,
anchor: slugify(h[2].trim()),
lines: [line],
};
continue;
}
}
current.lines.push(line);
}
if (current.lines.length || current.heading) sections.push(current);
return sections;
}
// Further split a too-large section by paragraph boundaries, preserving fences.
function splitOversized(section) {
const text = section.lines.join('\n');
if (estimateTokens(text) <= MAX_TOKENS) return [section];
const parts = [];
const paras = text.split(/\n\n+/);
let buf = [];
let bufTokens = 0;
for (const p of paras) {
const t = estimateTokens(p);
if (bufTokens + t > TARGET_TOKENS && buf.length) {
parts.push({ ...section, lines: buf.join('\n\n').split('\n') });
buf = [];
bufTokens = 0;
}
buf.push(p);
bufTokens += t;
}
if (buf.length) parts.push({ ...section, lines: buf.join('\n\n').split('\n') });
return parts;
}
// Merge tiny adjacent sections so chunks don't drop below MIN_TOKENS.
function mergeSmall(sections) {
const out = [];
for (const s of sections) {
const text = s.lines.join('\n');
const tokens = estimateTokens(text);
if (out.length && tokens < MIN_TOKENS) {
const prev = out[out.length - 1];
prev.lines = [...prev.lines, '', ...s.lines];
} else {
out.push({ ...s, lines: [...s.lines] });
}
}
return out;
}
export function chunkFile(filePath, rootDir) {
const raw = fs.readFileSync(filePath, 'utf8');
const { frontmatter, body } = stripFrontmatter(raw);
// URL: docs/<lang>/<rest>.md(x) → /<lang>/<rest>/
const rel = path.relative(rootDir, filePath).replace(/\\/g, '/');
const parts = rel.split('/');
const lang = parts[0];
const slug = parts.slice(1).join('/').replace(/\.(md|mdx)$/, '').replace(/\/index$/, '');
const baseUrl = '/' + (slug ? `${lang}/${slug}/` : `${lang}/`);
let sections = splitByHeadings(body);
sections = sections.flatMap(splitOversized);
sections = mergeSmall(sections);
const pageTitle = frontmatter.title || slug || 'Untitled';
const pageDescription = frontmatter.description || '';
return sections
.filter((s) => s.lines.join('\n').trim().length > 0)
.map((s, idx) => {
const content = s.lines.join('\n').trim();
const sectionTitle = s.heading || pageTitle;
const url = s.anchor && s.heading ? `${baseUrl}#${s.anchor}` : baseUrl;
return {
id: `${rel}#${s.anchor ?? `_${idx}`}`,
language: lang,
file: rel,
url,
pageTitle,
pageDescription,
sectionTitle,
headingLevel: s.level || 1,
tokens: estimateTokens(content),
content,
};
});
}
export function walkDocs(docsDir) {
const out = [];
const stack = [docsDir];
while (stack.length) {
const d = stack.pop();
for (const entry of fs.readdirSync(d, { withFileTypes: true })) {
const p = path.join(d, entry.name);
if (entry.isDirectory()) stack.push(p);
else if (/\.(md|mdx)$/.test(entry.name)) out.push(p);
}
}
return out.sort();
}

View File

@@ -0,0 +1,138 @@
// Unified provider abstraction for chat and embeddings.
// Used by build-oracle-index.mjs (build time) and src/pages/api/oracle.ts (runtime).
const DEFAULT_OLLAMA_URL = 'https://api.neuronetz.ai';
const DEFAULT_OLLAMA_CHAT = 'qwen2.5-coder:14b';
const DEFAULT_OLLAMA_EMBED = 'nomic-embed-text';
const DEFAULT_ANTHROPIC = 'claude-haiku-4-5-20251001';
const DEFAULT_OPENAI_EMBED = 'text-embedding-3-small';
export function llmConfig() {
return {
provider: process.env.LLM_PROVIDER ?? 'ollama',
ollamaUrl: process.env.OLLAMA_BASE_URL ?? DEFAULT_OLLAMA_URL,
ollamaChatModel: process.env.OLLAMA_CHAT_MODEL ?? DEFAULT_OLLAMA_CHAT,
anthropicModel: process.env.ANTHROPIC_MODEL ?? DEFAULT_ANTHROPIC,
hasAnthropicKey: !!process.env.ANTHROPIC_API_KEY,
};
}
export function embedConfig() {
const provider = process.env.EMBED_PROVIDER ?? 'ollama';
return {
provider,
ollamaUrl: process.env.OLLAMA_BASE_URL ?? DEFAULT_OLLAMA_URL,
ollamaEmbedModel: process.env.OLLAMA_EMBED_MODEL ?? DEFAULT_OLLAMA_EMBED,
openaiEmbedModel: process.env.OPENAI_EMBED_MODEL ?? DEFAULT_OPENAI_EMBED,
hasOpenAIKey: !!process.env.OPENAI_API_KEY,
};
}
// ---------------------------------------------------------------------------
// Embeddings
// ---------------------------------------------------------------------------
async function ollamaEmbedBatch(baseUrl, model, inputs) {
const out = [];
// Ollama /api/embeddings is single-input. Batch by looping.
for (const text of inputs) {
const res = await fetch(`${baseUrl.replace(/\/$/, '')}/api/embeddings`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model, prompt: text }),
});
if (!res.ok) {
const body = await res.text();
throw new Error(`Ollama embeddings ${res.status}: ${body}`);
}
const data = await res.json();
if (!Array.isArray(data.embedding)) {
throw new Error(`Ollama embeddings: unexpected response: ${JSON.stringify(data).slice(0, 200)}`);
}
out.push(data.embedding);
}
return out;
}
async function openaiEmbedBatch(model, inputs) {
const { default: OpenAI } = await import('openai');
const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
const res = await client.embeddings.create({ model, input: inputs });
return res.data.map((d) => d.embedding);
}
export async function embed(inputs, opts = {}) {
const cfg = embedConfig();
const provider = opts.provider ?? cfg.provider;
const list = Array.isArray(inputs) ? inputs : [inputs];
if (provider === 'ollama') {
return ollamaEmbedBatch(cfg.ollamaUrl, cfg.ollamaEmbedModel, list);
}
if (provider === 'openai') {
if (!cfg.hasOpenAIKey) throw new Error('OPENAI_API_KEY not set.');
return openaiEmbedBatch(cfg.openaiEmbedModel, list);
}
throw new Error(`Unknown EMBED_PROVIDER: ${provider}`);
}
// ---------------------------------------------------------------------------
// Chat
// ---------------------------------------------------------------------------
export async function chat({ system, messages, maxTokens = 800 }) {
const cfg = llmConfig();
if (cfg.provider === 'ollama') {
const url = `${cfg.ollamaUrl.replace(/\/$/, '')}/api/chat`;
const ollamaMessages = [];
if (system) ollamaMessages.push({ role: 'system', content: system });
for (const m of messages) {
if (m.role === 'user' || m.role === 'assistant') {
ollamaMessages.push({ role: m.role, content: m.content });
}
}
const res = await fetch(url, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: cfg.ollamaChatModel,
messages: ollamaMessages,
stream: false,
options: { num_predict: maxTokens, temperature: 0.4 },
}),
});
if (!res.ok) {
const body = await res.text();
throw new Error(`Ollama chat ${res.status}: ${body}`);
}
const data = await res.json();
return {
text: data.message?.content ?? '',
model: cfg.ollamaChatModel,
provider: 'ollama',
};
}
if (cfg.provider === 'anthropic') {
if (!cfg.hasAnthropicKey) throw new Error('ANTHROPIC_API_KEY not set.');
const { default: Anthropic } = await import('@anthropic-ai/sdk');
const client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
const apiMessages = messages
.filter((m) => m.role === 'user' || m.role === 'assistant')
.map((m) => ({ role: m.role, content: m.content }));
const completion = await client.messages.create({
model: cfg.anthropicModel,
max_tokens: maxTokens,
system,
messages: apiMessages.length ? apiMessages : [{ role: 'user', content: '' }],
});
const text = completion.content
.filter((p) => p.type === 'text')
.map((p) => p.text)
.join('\n');
return { text, model: cfg.anthropicModel, provider: 'anthropic' };
}
throw new Error(`Unknown LLM_PROVIDER: ${cfg.provider}`);
}