-- ============================================================================= -- ai_rag_chunk -- -- One row per text chunk in a RAG collection. The embedding column stores -- the vector as a base64-packed Float32Array (4 bytes/dim). Cosine search -- is done in PHP after fetching all chunks for the collection — fine up -- to ~10k chunks per collection. For larger sets, drop in a vector index -- extension (pgvector, MySQL HeatWave LakeHouse vector) and update -- Rag::search() accordingly. -- ============================================================================= CREATE TABLE IF NOT EXISTS ai_rag_chunk ( ai_rag_chunk_id INT(11) NOT NULL AUTO_INCREMENT, ai_rag_chunk_collection_id INT(11) NOT NULL, ai_rag_chunk_text MEDIUMTEXT NOT NULL, ai_rag_chunk_metadata JSON NULL, ai_rag_chunk_embedding LONGTEXT NOT NULL, -- base64-packed Float32Array ai_rag_chunk_token_count INT(11) NOT NULL DEFAULT 0, ai_rag_chunk_source VARCHAR(512) NULL, -- denormalised from metadata for indexing ai_rag_chunk_created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (ai_rag_chunk_id), KEY ai_rag_chunk_collection_idx (ai_rag_chunk_collection_id), KEY ai_rag_chunk_source_idx (ai_rag_chunk_source), CONSTRAINT ai_rag_chunk_collection_fk FOREIGN KEY (ai_rag_chunk_collection_id) REFERENCES ai_rag_collection (ai_rag_collection_id) ON DELETE CASCADE ) ENGINE = InnoDB DEFAULT CHARSET = utf8mb4 COLLATE = utf8mb4_unicode_ci;