The Ollama URL was leaking via:
- prose in /en/, /de/, /ja/, /es/, /fr/ docs (oracle, deployment,
local-testing, ai/module/{overview,embed,training})
- code blocks teaching users to curl the host directly
- .env.example, Dockerfile, docker-compose.yml defaults
- providers.mjs, translate-docs.mjs, build-oracle-index.mjs defaults
- LandingScripts.astro comment
- lora-runbook.md prose + SSH host
- the GET handler at /api/oracle which echoed `ollamaUrl` back to public callers
- the "Oracle is silent" fallback message at /api/oracle POST
Replacements:
- prose: "neuronetz.ai" → "your Ollama instance"
- example URLs in code blocks: https://api.neuronetz.ai → https://your-ollama-host.example
- code-level defaults: → http://localhost:11434 (Ollama's standard local port)
- GET /api/oracle: dropped the `ollamaUrl` field; provider + model still exposed
- runbook SSH host: neuronetz@cloud.neuronetz.ai → <gpu-user>@<gpu-host>
Production chat is unaffected: docs/.env (gitignored) on the production
host still pins OLLAMA_BASE_URL=https://api.neuronetz.ai. The only
change in the running container is that the GET handler no longer
echoes the URL.
analytics.neuronetz.ai (Umami tracking) is intentionally left intact —
it's a public, brand-owned subdomain meant to be visible.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
118 lines
3.8 KiB
JavaScript
118 lines
3.8 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* Build a vector index over the docs for the in-site Oracle (RAG).
|
|
*
|
|
* node scripts/build-oracle-index.mjs
|
|
*
|
|
* Defaults to Ollama at https://your-ollama-host.example with model nomic-embed-text.
|
|
* Override via env:
|
|
* OLLAMA_BASE_URL=...
|
|
* OLLAMA_EMBED_MODEL=... (e.g. nomic-embed-text, mxbai-embed-large)
|
|
* EMBED_PROVIDER=openai (uses OpenAI embeddings via OPENAI_API_KEY)
|
|
*
|
|
* Output: public/oracle-index.json
|
|
*
|
|
* Soft-fail behaviour: if the embedding provider is unreachable or the model
|
|
* is missing, an empty index is written and the runtime endpoint will operate
|
|
* in chat-only (no-RAG) mode.
|
|
*/
|
|
|
|
import fs from 'node:fs';
|
|
import path from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
import { chunkFile, walkDocs } from './lib/chunk.mjs';
|
|
import { embed, embedConfig } from './lib/providers.mjs';
|
|
|
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
const DOCS_DIR = path.resolve(__dirname, '../src/content/docs');
|
|
const OUT_FILE = path.resolve(__dirname, '../public/oracle-index.json');
|
|
const BATCH = 16;
|
|
|
|
function embedToBase64(f32) {
|
|
return Buffer.from(new Float32Array(f32).buffer).toString('base64');
|
|
}
|
|
|
|
async function main() {
|
|
const cfg = embedConfig();
|
|
console.log(`Embedding provider: ${cfg.provider}`);
|
|
if (cfg.provider === 'ollama') {
|
|
console.log(` Ollama: ${cfg.ollamaUrl}`);
|
|
console.log(` Model: ${cfg.ollamaEmbedModel}`);
|
|
} else if (cfg.provider === 'openai') {
|
|
console.log(` OpenAI model: ${cfg.openaiEmbedModel}`);
|
|
if (!cfg.hasOpenAIKey) {
|
|
console.warn(' ⚠ OPENAI_API_KEY missing — writing empty index (chat-only mode).');
|
|
writeEmpty('openai-key-missing');
|
|
return;
|
|
}
|
|
}
|
|
|
|
console.log(`Walking ${DOCS_DIR}…`);
|
|
const files = walkDocs(DOCS_DIR);
|
|
console.log(`Found ${files.length} markdown files.`);
|
|
|
|
const chunks = files.flatMap((f) => chunkFile(f, DOCS_DIR));
|
|
console.log(`Produced ${chunks.length} chunks.`);
|
|
|
|
const embeddings = [];
|
|
try {
|
|
for (let i = 0; i < chunks.length; i += BATCH) {
|
|
const batch = chunks.slice(i, i + BATCH);
|
|
const inputs = batch.map((c) => `${c.pageTitle}\n${c.sectionTitle}\n\n${c.content}`);
|
|
const vecs = await embed(inputs);
|
|
for (const v of vecs) embeddings.push(v);
|
|
process.stdout.write(`\r embedded ${embeddings.length}/${chunks.length}`);
|
|
}
|
|
process.stdout.write('\n');
|
|
} catch (err) {
|
|
console.error(`\n⚠ Embedding failed: ${err.message}`);
|
|
console.error(` → writing empty index, Oracle will run in chat-only (no-RAG) mode.`);
|
|
if (cfg.provider === 'ollama') {
|
|
console.error(` → To fix: pull the embedding model on your Ollama server:`);
|
|
console.error(` curl ${cfg.ollamaUrl}/api/pull -d '{"name":"${cfg.ollamaEmbedModel}"}'`);
|
|
}
|
|
writeEmpty(err.message);
|
|
return;
|
|
}
|
|
|
|
const dim = embeddings[0]?.length ?? 0;
|
|
const out = {
|
|
provider: cfg.provider,
|
|
model: cfg.provider === 'ollama' ? cfg.ollamaEmbedModel : cfg.openaiEmbedModel,
|
|
dim,
|
|
builtAt: new Date().toISOString(),
|
|
chunks: chunks.map((c) => ({
|
|
id: c.id,
|
|
url: c.url,
|
|
pageTitle: c.pageTitle,
|
|
sectionTitle: c.sectionTitle,
|
|
language: c.language,
|
|
content: c.content,
|
|
})),
|
|
embeddings: embeddings.map(embedToBase64),
|
|
};
|
|
|
|
fs.mkdirSync(path.dirname(OUT_FILE), { recursive: true });
|
|
fs.writeFileSync(OUT_FILE, JSON.stringify(out));
|
|
const kb = (JSON.stringify(out).length / 1024).toFixed(1);
|
|
console.log(`✔ Wrote ${OUT_FILE} (${kb} KB, dim=${dim})`);
|
|
}
|
|
|
|
function writeEmpty(reason) {
|
|
fs.mkdirSync(path.dirname(OUT_FILE), { recursive: true });
|
|
fs.writeFileSync(
|
|
OUT_FILE,
|
|
JSON.stringify(
|
|
{ provider: null, model: null, dim: 0, builtAt: null, reason, chunks: [], embeddings: [] },
|
|
null,
|
|
2
|
|
)
|
|
);
|
|
}
|
|
|
|
main().catch((e) => {
|
|
console.error(e);
|
|
writeEmpty(e.message);
|
|
process.exit(0); // soft-fail so Docker build doesn't break the site
|
|
});
|