This is the snapshot the production landing site (nibiru-framework.com) is deployed from. Brings together the recent splash + docs migration to the v4 "Cosmos" design system, the new in-framework AI module, and the framework groundwork that backs the framework-reference extraction. What lands: - docs/: Astro + Starlight site with the v4 dark cosmic palette, GalaxyHero canvas constellation, Mission Control chat (wired to /api/oracle → api.neuronetz.ai via providers.mjs Ollama), 5-panel MMVC stage (Model · AI · Module · Controller · View), translated EN/DE/JA/ES/FR content, PWA + sitemap + llms.txt + Umami analytics. - docs/design-system/: canonical mockup bundle (source/index-v2.html for splash, source/docs-system.html + preview/ for docs, SPEC.md, tokens). - docs/scripts/extraction/framework-reference-v2.md: deep framework reference (~1.6k lines, file:line citations, every public factory and idiom — basis for the LoRA training corpus. - application/module/ai/: AI module with chat / embed / RAG / agent plugins, plus pdoQuery / httpGet / fileRead tools and Modelfile + smoke-test in training/. - application/module/users/: user / ACL / form-factory traits used as the reference plugin pattern for the framework docs. - application/settings/config/database/: schema + seed migrations including the AI module tables (200–203). - Form factory + autogenerator changes the framework-reference-v2 covers. Production secrets stay out: docs/.env, settings.production.ini and ai.production.ini are all gitignored (.example files are in tree). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
84 lines
2.3 KiB
PHP
84 lines
2.3 KiB
PHP
<?php
|
||
namespace Nibiru\Module\Ai\Plugins;
|
||
|
||
/**
|
||
* Embeddings — turn text into vectors via Ollama (or OpenAI fallback).
|
||
*
|
||
* $vec = $ai->embed()->one('hello world'); // float[]
|
||
* $vecs = $ai->embed()->batch(['a', 'b', 'c']); // float[][]
|
||
* $sim = \Nibiru\Module\Ai\Plugins\Embed::cosine($a, $b); // 0..1
|
||
*/
|
||
class Embed
|
||
{
|
||
protected \stdClass $cfg;
|
||
protected Ollama $ollama;
|
||
|
||
public function __construct(\stdClass $cfg)
|
||
{
|
||
$this->cfg = $cfg;
|
||
$this->ollama = new Ollama($cfg);
|
||
}
|
||
|
||
/**
|
||
* Embed a single string. Returns a flat float[].
|
||
*/
|
||
public function one(string $text): array
|
||
{
|
||
$model = $this->cfg->embed_model ?? 'nomic-embed-text';
|
||
$res = $this->ollama->embed($model, $text);
|
||
if (!isset($res['embedding']) || !is_array($res['embedding'])) {
|
||
throw new \RuntimeException('Ollama embed: no `embedding` in response.');
|
||
}
|
||
return array_map('floatval', $res['embedding']);
|
||
}
|
||
|
||
/**
|
||
* Embed many strings. Sequential under the hood (Ollama embeddings
|
||
* endpoint is single-input), but rate-limited by config.
|
||
*/
|
||
public function batch(array $texts): array
|
||
{
|
||
$out = [];
|
||
foreach ($texts as $t) {
|
||
$out[] = $this->one((string) $t);
|
||
}
|
||
return $out;
|
||
}
|
||
|
||
/**
|
||
* Cosine similarity between two equal-length vectors. Returns 0–1.
|
||
*/
|
||
public static function cosine(array $a, array $b): float
|
||
{
|
||
$dot = 0.0;
|
||
$na = 0.0;
|
||
$nb = 0.0;
|
||
$len = min(count($a), count($b));
|
||
for ($i = 0; $i < $len; $i++) {
|
||
$dot += $a[$i] * $b[$i];
|
||
$na += $a[$i] * $a[$i];
|
||
$nb += $b[$i] * $b[$i];
|
||
}
|
||
$denom = sqrt($na) * sqrt($nb);
|
||
return $denom === 0.0 ? 0.0 : $dot / $denom;
|
||
}
|
||
|
||
/**
|
||
* Pack a vector to a base64 string for compact storage in JSON.
|
||
*/
|
||
public static function pack(array $vec): string
|
||
{
|
||
return base64_encode(pack('f*', ...$vec));
|
||
}
|
||
|
||
/**
|
||
* Inverse of pack().
|
||
*/
|
||
public static function unpack(string $b64): array
|
||
{
|
||
$bin = base64_decode($b64, true);
|
||
if ($bin === false) return [];
|
||
return array_values(unpack('f*', $bin));
|
||
}
|
||
}
|