scaffold: project skeleton, schema, healthz/readyz, CI
Initial project structure for neuronetz-gateway per scope-docs/SPEC.md: - Python 3.12 / FastAPI / SQLAlchemy 2.0 (async) / Redis / Postgres stack managed by uv. Multi-stage non-root Dockerfile, prod + dev compose files (ollama service is NEVER published in either), Caddyfile + systemd unit, justfile, GitHub Actions CI (ruff, mypy --strict, pytest, bandit, pip-audit). - Pydantic-Settings config covering every env var from SPEC §7, including the MODEL_DISCOVERY_* keys for the dynamic-discovery feature (§4.6). - Alembic 0001_initial creates the full gateway schema (8 tables, 3 enums, notify_key_revoked() trigger), incl. allow_all_models on tenant_limits and key_limits for the per-tenant auto-grant toggle. - Working /healthz, /readyz (fail-closed when deps unreachable), and a Prometheus /metrics stub. Sanitizing error handlers that attach X-Request-ID to every response and never leak upstream internals. - SPEC + AGENT_PROMPT included under scope-docs/ (source of truth).
This commit is contained in:
44
.dockerignore
Normal file
44
.dockerignore
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
# Keep the build context lean and never ship secrets into an image layer.
|
||||||
|
|
||||||
|
# Secrets / local env
|
||||||
|
.env
|
||||||
|
.env.*
|
||||||
|
!.env.example
|
||||||
|
|
||||||
|
# VCS & CI
|
||||||
|
.git
|
||||||
|
.gitignore
|
||||||
|
.github
|
||||||
|
|
||||||
|
# Python caches & build artefacts
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*.egg-info/
|
||||||
|
.eggs/
|
||||||
|
build/
|
||||||
|
dist/
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
|
.mypy_cache/
|
||||||
|
.ruff_cache/
|
||||||
|
.pytest_cache/
|
||||||
|
.coverage
|
||||||
|
htmlcov/
|
||||||
|
coverage.xml
|
||||||
|
|
||||||
|
# Tests & docs are not needed in the runtime image
|
||||||
|
tests/
|
||||||
|
docs/
|
||||||
|
scope-docs/
|
||||||
|
|
||||||
|
# Editor / OS cruft
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
*.swp
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
|
# Compose / ops files don't belong in the image
|
||||||
|
docker-compose*.yml
|
||||||
|
ops/
|
||||||
|
# NOTE: README.md and LICENSE are intentionally NOT ignored — the build backend
|
||||||
|
# (hatchling) reads `readme`/`license` from pyproject.toml at build time.
|
||||||
63
.env.example
Normal file
63
.env.example
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
# neuronetz-gateway — environment configuration (SPEC §7).
|
||||||
|
#
|
||||||
|
# Copy to `.env` and adjust. `.env` is gitignored and MUST NOT be committed.
|
||||||
|
# All values here are SAFE EXAMPLES — change every secret before any real deploy.
|
||||||
|
|
||||||
|
# ──────────────────────────── Service ────────────────────────────
|
||||||
|
GATEWAY_BIND_HOST=0.0.0.0
|
||||||
|
GATEWAY_BIND_PORT=8080
|
||||||
|
GATEWAY_LOG_LEVEL=INFO
|
||||||
|
GATEWAY_LOG_FORMAT=json # json|console
|
||||||
|
GATEWAY_REQUEST_ID_HEADER=X-Request-ID
|
||||||
|
GATEWAY_TRUSTED_PROXIES=127.0.0.1,caddy # for X-Forwarded-For
|
||||||
|
|
||||||
|
# ──────────────────────────── Upstream ───────────────────────────
|
||||||
|
OLLAMA_BASE_URL=http://ollama:11434
|
||||||
|
OLLAMA_CONNECT_TIMEOUT_S=5
|
||||||
|
OLLAMA_READ_TIMEOUT_S=600
|
||||||
|
OLLAMA_MAX_CONNECTIONS=64
|
||||||
|
|
||||||
|
# ──────────────────────── Model discovery (§4.6) ─────────────────
|
||||||
|
MODEL_DISCOVERY_REFRESH_S=60
|
||||||
|
MODEL_DISCOVERY_CACHE_TTL_S=120
|
||||||
|
|
||||||
|
# ──────────────────────────── Database ───────────────────────────
|
||||||
|
# Compose builds DATABASE_URL from the POSTGRES_* parts below, but the gateway
|
||||||
|
# also accepts a full DATABASE_URL directly.
|
||||||
|
DATABASE_URL=postgresql+asyncpg://gateway:changeme@postgres:5432/neuronetz
|
||||||
|
DATABASE_POOL_SIZE=10
|
||||||
|
DATABASE_POOL_OVERFLOW=20
|
||||||
|
|
||||||
|
# Postgres container credentials (consumed by docker-compose).
|
||||||
|
POSTGRES_USER=gateway
|
||||||
|
POSTGRES_PASSWORD=changeme
|
||||||
|
POSTGRES_DB=neuronetz
|
||||||
|
|
||||||
|
# ──────────────────────────── Redis ──────────────────────────────
|
||||||
|
REDIS_URL=redis://redis:6379/0
|
||||||
|
REDIS_KEY_CACHE_TTL_S=60
|
||||||
|
|
||||||
|
# ────────────────── Limits (defaults; DB overrides) ──────────────
|
||||||
|
DEFAULT_RPM=60
|
||||||
|
DEFAULT_TPM=100000
|
||||||
|
DEFAULT_CONCURRENT=8
|
||||||
|
MAX_REQUEST_BODY_BYTES=262144
|
||||||
|
MAX_NUM_PREDICT=4096
|
||||||
|
|
||||||
|
# ──────────────────────────── Security ───────────────────────────
|
||||||
|
ARGON2_TIME_COST=3
|
||||||
|
ARGON2_MEMORY_COST_KIB=65536
|
||||||
|
ARGON2_PARALLELISM=4
|
||||||
|
AUTH_FAILURE_RATE_LIMIT_PER_IP_PER_MIN=20
|
||||||
|
|
||||||
|
# ──────────────────────────── Audit ──────────────────────────────
|
||||||
|
AUDIT_BUFFER_SIZE=1000
|
||||||
|
PROMPT_LOG_DEFAULT_RETENTION_DAYS=30
|
||||||
|
AUDIT_LOG_DEFAULT_RETENTION_DAYS=365
|
||||||
|
|
||||||
|
# ──────────────── Playground / API docs (prod-safe: OFF) ─────────
|
||||||
|
# Serve the playground HTML (owned by the docs agent) at /playground.
|
||||||
|
PLAYGROUND_ENABLED=false
|
||||||
|
PLAYGROUND_FILE=/app/playground/index.html
|
||||||
|
# Enable FastAPI's /docs + /openapi.json (default off in production).
|
||||||
|
DOCS_ENABLED=false
|
||||||
108
.github/workflows/ci.yml
vendored
Normal file
108
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
name: CI
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: ["**"]
|
||||||
|
pull_request:
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
# Cancel superseded runs on the same ref.
|
||||||
|
concurrency:
|
||||||
|
group: ci-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
env:
|
||||||
|
PYTHON_VERSION: "3.12"
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
lint:
|
||||||
|
name: ruff
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v3
|
||||||
|
with:
|
||||||
|
enable-cache: true
|
||||||
|
- name: Set up Python
|
||||||
|
run: uv python install ${{ env.PYTHON_VERSION }}
|
||||||
|
- name: Install dependencies
|
||||||
|
run: uv sync --extra dev
|
||||||
|
- name: ruff check
|
||||||
|
run: uv run ruff check .
|
||||||
|
|
||||||
|
typecheck:
|
||||||
|
name: mypy --strict
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v3
|
||||||
|
with:
|
||||||
|
enable-cache: true
|
||||||
|
- name: Set up Python
|
||||||
|
run: uv python install ${{ env.PYTHON_VERSION }}
|
||||||
|
- name: Install dependencies
|
||||||
|
run: uv sync --extra dev
|
||||||
|
- name: mypy
|
||||||
|
run: uv run mypy --strict src
|
||||||
|
|
||||||
|
test:
|
||||||
|
name: pytest
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v3
|
||||||
|
with:
|
||||||
|
enable-cache: true
|
||||||
|
- name: Set up Python
|
||||||
|
run: uv python install ${{ env.PYTHON_VERSION }}
|
||||||
|
- name: Install dependencies
|
||||||
|
run: uv sync --extra dev
|
||||||
|
# Phase 1: an empty/placeholder suite must pass. pytest exits 5 when it
|
||||||
|
# collects no tests; we treat that as success this phase. Coverage is
|
||||||
|
# reported but not gated yet (no --cov-fail-under until later phases).
|
||||||
|
- name: pytest
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
set +e
|
||||||
|
uv run pytest --cov=neuronetz_gateway --cov-report=term-missing
|
||||||
|
code=$?
|
||||||
|
if [ "$code" -eq 5 ]; then
|
||||||
|
echo "::notice::No tests collected (Phase 1) — treating as success."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
exit "$code"
|
||||||
|
|
||||||
|
bandit:
|
||||||
|
name: bandit
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v3
|
||||||
|
with:
|
||||||
|
enable-cache: true
|
||||||
|
- name: Set up Python
|
||||||
|
run: uv python install ${{ env.PYTHON_VERSION }}
|
||||||
|
- name: Install dependencies
|
||||||
|
run: uv sync --extra dev
|
||||||
|
- name: bandit
|
||||||
|
run: uv run bandit -q -r src
|
||||||
|
|
||||||
|
pip-audit:
|
||||||
|
name: pip-audit
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v3
|
||||||
|
with:
|
||||||
|
enable-cache: true
|
||||||
|
- name: Set up Python
|
||||||
|
run: uv python install ${{ env.PYTHON_VERSION }}
|
||||||
|
- name: Install dependencies
|
||||||
|
run: uv sync --extra dev
|
||||||
|
- name: pip-audit
|
||||||
|
run: uv run pip-audit
|
||||||
40
.gitignore
vendored
Normal file
40
.gitignore
vendored
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
# Secrets — NEVER commit. Only .env.example is tracked.
|
||||||
|
.env
|
||||||
|
.env.*
|
||||||
|
!.env.example
|
||||||
|
|
||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.egg-info/
|
||||||
|
.eggs/
|
||||||
|
build/
|
||||||
|
dist/
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Virtualenvs / uv
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
|
.python-version
|
||||||
|
|
||||||
|
# Type / lint / test caches
|
||||||
|
.mypy_cache/
|
||||||
|
.ruff_cache/
|
||||||
|
.pytest_cache/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
htmlcov/
|
||||||
|
coverage.xml
|
||||||
|
.tox/
|
||||||
|
|
||||||
|
# Docker
|
||||||
|
*.pid
|
||||||
|
|
||||||
|
# Editor / OS
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
*.swp
|
||||||
|
*~
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
97
Dockerfile
Normal file
97
Dockerfile
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
# syntax=docker/dockerfile:1.7
|
||||||
|
#
|
||||||
|
# neuronetz-gateway — multi-stage image.
|
||||||
|
#
|
||||||
|
# builder stage : installs dependencies into a self-contained virtualenv using uv.
|
||||||
|
# runtime stage : copies the venv + source, drops to a NON-ROOT user, contains
|
||||||
|
# no build tools, and runs `python -m neuronetz_gateway`.
|
||||||
|
#
|
||||||
|
# uv is pulled from the official distroless image so we don't need network access
|
||||||
|
# to `pip install uv`. Dependencies come from pyproject.toml (+ uv.lock if present).
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
# Stage 1 — builder
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
FROM python:3.12-slim AS builder
|
||||||
|
|
||||||
|
# Bring in the `uv` binary from its official image.
|
||||||
|
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
|
||||||
|
|
||||||
|
ENV UV_LINK_MODE=copy \
|
||||||
|
UV_COMPILE_BYTECODE=1 \
|
||||||
|
UV_PYTHON_DOWNLOADS=never \
|
||||||
|
# Create the project venv at a stable, copyable location.
|
||||||
|
VIRTUAL_ENV=/opt/venv \
|
||||||
|
PATH=/opt/venv/bin:$PATH
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Create the target virtualenv up front so uv installs into it.
|
||||||
|
RUN uv venv /opt/venv
|
||||||
|
|
||||||
|
# Dependency layer: copy only the manifest(s) first for better caching.
|
||||||
|
# uv.lock is optional in Phase 1 — the wildcard makes COPY succeed either way.
|
||||||
|
COPY pyproject.toml ./
|
||||||
|
COPY uv.loc[k] ./
|
||||||
|
|
||||||
|
# Install dependencies. If a lockfile is present `uv sync` honours it; otherwise
|
||||||
|
# we fall back to resolving straight from pyproject.toml. Either way the build
|
||||||
|
# does NOT fail when the lock is absent.
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
if [ -f uv.lock ]; then \
|
||||||
|
uv sync --frozen --no-install-project --no-dev ; \
|
||||||
|
else \
|
||||||
|
uv pip install --python /opt/venv/bin/python -r pyproject.toml ; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Now copy the application source and install the project itself into the venv.
|
||||||
|
# README.md + LICENSE are required by the build backend (pyproject `readme`/license).
|
||||||
|
COPY README.md LICENSE ./
|
||||||
|
COPY src ./src
|
||||||
|
COPY alembi[c] ./alembic
|
||||||
|
COPY alembic.in[i] ./
|
||||||
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
uv pip install --python /opt/venv/bin/python --no-deps .
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
# Stage 2 — runtime
|
||||||
|
# ----------------------------------------------------------------------------
|
||||||
|
FROM python:3.12-slim AS runtime
|
||||||
|
|
||||||
|
# Runtime-only OS packages: curl is used by the compose healthcheck.
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y --no-install-recommends curl \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Non-root user.
|
||||||
|
RUN groupadd --system --gid 10001 gateway \
|
||||||
|
&& useradd --system --uid 10001 --gid gateway --home-dir /app --shell /usr/sbin/nologin gateway
|
||||||
|
|
||||||
|
ENV VIRTUAL_ENV=/opt/venv \
|
||||||
|
PATH=/opt/venv/bin:$PATH \
|
||||||
|
PYTHONUNBUFFERED=1 \
|
||||||
|
PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
GATEWAY_BIND_HOST=0.0.0.0 \
|
||||||
|
GATEWAY_BIND_PORT=8080
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy the fully-populated virtualenv and the application from the builder.
|
||||||
|
COPY --from=builder /opt/venv /opt/venv
|
||||||
|
COPY --from=builder /app/src ./src
|
||||||
|
# alembic assets are optional during early scaffolding; copy if present.
|
||||||
|
COPY --from=builder /app/alembi[c] ./alembic
|
||||||
|
COPY --from=builder /app/alembic.in[i] ./
|
||||||
|
|
||||||
|
# Drop privileges. No build tools are present in this stage.
|
||||||
|
USER gateway
|
||||||
|
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
# Liveness probe target lives at /healthz (see SPEC §6.4).
|
||||||
|
HEALTHCHECK --interval=15s --timeout=3s --start-period=20s --retries=5 \
|
||||||
|
CMD curl -fsS "http://127.0.0.1:${GATEWAY_BIND_PORT}/healthz" || exit 1
|
||||||
|
|
||||||
|
# Default command: run the server. Compose overrides this in dev to run
|
||||||
|
# `alembic upgrade head` first (see docker-compose.dev.yml).
|
||||||
|
CMD ["python", "-m", "neuronetz_gateway"]
|
||||||
202
LICENSE
Normal file
202
LICENSE
Normal file
@@ -0,0 +1,202 @@
|
|||||||
|
|
||||||
|
Apache License
|
||||||
|
Version 2.0, January 2004
|
||||||
|
http://www.apache.org/licenses/
|
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction,
|
||||||
|
and distribution as defined by Sections 1 through 9 of this document.
|
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by
|
||||||
|
the copyright owner that is granting the License.
|
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all
|
||||||
|
other entities that control, are controlled by, or are under common
|
||||||
|
control with that entity. For the purposes of this definition,
|
||||||
|
"control" means (i) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity
|
||||||
|
exercising permissions granted by this License.
|
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications,
|
||||||
|
including but not limited to software source code, documentation
|
||||||
|
source, and configuration files.
|
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical
|
||||||
|
transformation or translation of a Source form, including but
|
||||||
|
not limited to compiled object code, generated documentation,
|
||||||
|
and conversions to other media types.
|
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or
|
||||||
|
Object form, made available under the License, as indicated by a
|
||||||
|
copyright notice that is included in or attached to the work
|
||||||
|
(an example is provided in the Appendix below).
|
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object
|
||||||
|
form, that is based on (or derived from) the Work and for which the
|
||||||
|
editorial revisions, annotations, elaborations, or other modifications
|
||||||
|
represent, as a whole, an original work of authorship. For the purposes
|
||||||
|
of this License, Derivative Works shall not include works that remain
|
||||||
|
separable from, or merely link (or bind by name) to the interfaces of,
|
||||||
|
the Work and Derivative Works thereof.
|
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including
|
||||||
|
the original version of the Work and any modifications or additions
|
||||||
|
to that Work or Derivative Works thereof, that is intentionally
|
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of
|
||||||
|
the copyright owner. For the purposes of this definition, "submitted"
|
||||||
|
means any form of electronic, verbal, or written communication sent
|
||||||
|
to the Licensor or its representatives, including but not limited to
|
||||||
|
communication on electronic mailing lists, source code control systems,
|
||||||
|
and issue tracking systems that are managed by, or on behalf of, the
|
||||||
|
Licensor for the purpose of discussing and improving the Work, but
|
||||||
|
excluding communication that is conspicuously marked or otherwise
|
||||||
|
designated in writing by the copyright owner as "Not a Contribution."
|
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||||
|
on behalf of whom a Contribution has been received by Licensor and
|
||||||
|
subsequently incorporated within the Work.
|
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
copyright license to reproduce, prepare Derivative Works of,
|
||||||
|
publicly display, publicly perform, sublicense, and distribute the
|
||||||
|
Work and such Derivative Works in Source or Object form.
|
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of
|
||||||
|
this License, each Contributor hereby grants to You a perpetual,
|
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||||
|
(except as stated in this section) patent license to make, have made,
|
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||||
|
where such license applies only to those patent claims licensable
|
||||||
|
by such Contributor that are necessarily infringed by their
|
||||||
|
Contribution(s) alone or by combination of their Contribution(s)
|
||||||
|
with the Work to which such Contribution(s) was submitted. If You
|
||||||
|
institute patent litigation against any entity (including a
|
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||||
|
or a Contribution incorporated within the Work constitutes direct
|
||||||
|
or contributory patent infringement, then any patent licenses
|
||||||
|
granted to You under this License for that Work shall terminate
|
||||||
|
as of the date such litigation is filed.
|
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the
|
||||||
|
Work or Derivative Works thereof in any medium, with or without
|
||||||
|
modifications, and in Source or Object form, provided that You
|
||||||
|
meet the following conditions:
|
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or
|
||||||
|
Derivative Works a copy of this License; and
|
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices
|
||||||
|
stating that You changed the files; and
|
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works
|
||||||
|
that You distribute, all copyright, patent, trademark, and
|
||||||
|
attribution notices from the Source form of the Work,
|
||||||
|
excluding those notices that do not pertain to any part of
|
||||||
|
the Derivative Works; and
|
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its
|
||||||
|
distribution, then any Derivative Works that You distribute must
|
||||||
|
include a readable copy of the attribution notices contained
|
||||||
|
within such NOTICE file, excluding those notices that do not
|
||||||
|
pertain to any part of the Derivative Works, in at least one
|
||||||
|
of the following places: within a NOTICE text file distributed
|
||||||
|
as part of the Derivative Works; within the Source form or
|
||||||
|
documentation, if provided along with the Derivative Works; or,
|
||||||
|
within a display generated by the Derivative Works, if and
|
||||||
|
wherever such third-party notices normally appear. The contents
|
||||||
|
of the NOTICE file are for informational purposes only and
|
||||||
|
do not modify the License. You may add Your own attribution
|
||||||
|
notices within Derivative Works that You distribute, alongside
|
||||||
|
or as an addendum to the NOTICE text from the Work, provided
|
||||||
|
that such additional attribution notices cannot be construed
|
||||||
|
as modifying the License.
|
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and
|
||||||
|
may provide additional or different license terms and conditions
|
||||||
|
for use, reproduction, or distribution of Your modifications, or
|
||||||
|
for any such Derivative Works as a whole, provided Your use,
|
||||||
|
reproduction, and distribution of the Work otherwise complies with
|
||||||
|
the conditions stated in this License.
|
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||||
|
any Contribution intentionally submitted for inclusion in the Work
|
||||||
|
by You to the Licensor shall be under the terms and conditions of
|
||||||
|
this License, without any additional terms or conditions.
|
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify
|
||||||
|
the terms of any separate license agreement you may have executed
|
||||||
|
with Licensor regarding such Contributions.
|
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade
|
||||||
|
names, trademarks, service marks, or product names of the Licensor,
|
||||||
|
except as required for reasonable and customary use in describing the
|
||||||
|
origin of the Work and reproducing the content of the NOTICE file.
|
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||||
|
agreed to in writing, Licensor provides the Work (and each
|
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||||
|
implied, including, without limitation, any warranties or conditions
|
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||||
|
appropriateness of using or redistributing the Work and assume any
|
||||||
|
risks associated with Your exercise of permissions under this License.
|
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory,
|
||||||
|
whether in tort (including negligence), contract, or otherwise,
|
||||||
|
unless required by applicable law (such as deliberate and grossly
|
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be
|
||||||
|
liable to You for damages, including any direct, indirect, special,
|
||||||
|
incidental, or consequential damages of any character arising as a
|
||||||
|
result of this License or out of the use or inability to use the
|
||||||
|
Work (including but not limited to damages for loss of goodwill,
|
||||||
|
work stoppage, computer failure or malfunction, or any and all
|
||||||
|
other commercial damages or losses), even if such Contributor
|
||||||
|
has been advised of the possibility of such damages.
|
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing
|
||||||
|
the Work or Derivative Works thereof, You may choose to offer,
|
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||||
|
or other liability obligations and/or rights consistent with this
|
||||||
|
License. However, in accepting such obligations, You may act only
|
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf
|
||||||
|
of any other Contributor, and only if You agree to indemnify,
|
||||||
|
defend, and hold each Contributor harmless for any liability
|
||||||
|
incurred by, or claims asserted against, such Contributor by reason
|
||||||
|
of your accepting any such warranty or additional liability.
|
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS
|
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work.
|
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following
|
||||||
|
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||||
|
replaced with your own identifying information. (Don't include
|
||||||
|
the brackets!) The text should be enclosed in the appropriate
|
||||||
|
comment syntax for the file format. We also recommend that a
|
||||||
|
file or class name and description of purpose be included on the
|
||||||
|
same "printed page" as the copyright notice for easier
|
||||||
|
identification within third-party archives.
|
||||||
|
|
||||||
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
92
README.md
Normal file
92
README.md
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
# neuronetz-gateway
|
||||||
|
|
||||||
|
A secure, multi-tenant API gateway in front of an [Ollama](https://github.com/ollama/ollama)
|
||||||
|
instance. It is the hot path of the Neuronetz API: every request to the models flows
|
||||||
|
through here, authenticated, rate-limited, budgeted, and audited.
|
||||||
|
|
||||||
|
**The Ollama backend is never reachable from the public internet.** It is bound to an
|
||||||
|
internal Docker network with no published ports. All access is via this gateway, behind
|
||||||
|
TLS terminated by Caddy.
|
||||||
|
|
||||||
|
> Status: **v0.1.0 — in development.** See [`scope-docs/SPEC.md`](scope-docs/SPEC.md) for
|
||||||
|
> the full specification and [`scope-docs/AGENT_PROMPT.md`](scope-docs/AGENT_PROMPT.md) for
|
||||||
|
> the phased build plan. `SPEC.md` is the source of truth.
|
||||||
|
|
||||||
|
## What it does
|
||||||
|
|
||||||
|
- **Auth** — API keys as Bearer tokens, stored as Argon2id hashes, verified in constant time.
|
||||||
|
- **Multi-tenant** — tenants own keys; limits and budgets inherit tenant → key.
|
||||||
|
- **Rate limiting** — per-key and per-tenant RPM / TPM / concurrent connections.
|
||||||
|
- **Budgets** — daily / monthly / total token budgets, enforced fail-closed.
|
||||||
|
- **Dual API surface** — native Ollama (`/api/*`) and OpenAI-compatible (`/v1/*`), both streaming.
|
||||||
|
- **Hard-blocked mutations** — `/api/pull`, `/api/push`, `/api/create`, `/api/copy`,
|
||||||
|
`/api/delete`, `/api/blobs/*` always return 403. Not configurable.
|
||||||
|
- **Audit log** — always-on request metadata; opt-in, TTL'd prompt logging per key.
|
||||||
|
|
||||||
|
Administration (dashboards, tenant self-service) lives in a separate service,
|
||||||
|
`neuronetz-console`; it is **not** part of this repository.
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
Internet ──TLS──> Caddy ──HTTP──> gateway ──┬──> Postgres (keys, budgets, audit)
|
||||||
|
├──> Redis (key cache, rate limits)
|
||||||
|
└──> Ollama (internal network only)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quickstart (dev)
|
||||||
|
|
||||||
|
Requires Docker + Docker Compose. The dev stack runs Postgres, Redis, and the gateway —
|
||||||
|
**no Caddy and no Ollama** (so `/readyz` reports 503 until a real Ollama backend is wired
|
||||||
|
in; that is expected).
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone <repo> neuronetz-gateway && cd neuronetz-gateway
|
||||||
|
cp .env.example .env # adjust if you like; defaults work for local dev
|
||||||
|
docker compose -f docker-compose.dev.yml up --build
|
||||||
|
```
|
||||||
|
|
||||||
|
The gateway runs `alembic upgrade head` on startup, then serves on `http://localhost:8080`.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -i http://localhost:8080/healthz # -> 200 {"status":"ok"}
|
||||||
|
curl -i http://localhost:8080/readyz # -> 503 (no Ollama backend in the dev stack)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Production
|
||||||
|
|
||||||
|
`docker-compose.yml` brings up the full stack — Caddy (TLS via Let's Encrypt for
|
||||||
|
`api.neuronetz.ai`), the gateway, Postgres, Redis, and Ollama. The `ollama` service has
|
||||||
|
**no `ports:` mapping** and is reachable only on the internal Docker network. See
|
||||||
|
[`docs/DEPLOYMENT.md`](docs/DEPLOYMENT.md) (added in a later phase) and
|
||||||
|
[`ops/caddy/Caddyfile.example`](ops/caddy/Caddyfile.example).
|
||||||
|
|
||||||
|
## Managing tenants and keys
|
||||||
|
|
||||||
|
Use the bootstrap CLI (Typer). Keys have the form `nz_<prefix><secret>`; the full key is
|
||||||
|
printed exactly once at creation and only its Argon2id hash is stored.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
neuronetz-gateway create-tenant --name acme
|
||||||
|
neuronetz-gateway create-key --tenant acme --name prod-server-1
|
||||||
|
neuronetz-gateway list-keys --tenant acme
|
||||||
|
neuronetz-gateway revoke-key --prefix nz_abc12345
|
||||||
|
```
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
```bash
|
||||||
|
just dev # run the dev stack
|
||||||
|
just test # pytest + coverage
|
||||||
|
just lint # ruff
|
||||||
|
just typecheck # mypy --strict
|
||||||
|
just migrate # alembic upgrade head
|
||||||
|
```
|
||||||
|
|
||||||
|
Tooling: Python 3.12, `uv`, FastAPI + uvicorn, SQLAlchemy 2.0 (async) + asyncpg, Redis,
|
||||||
|
httpx, structlog, Pydantic. Lint/type/security gates: ruff, mypy `--strict`, bandit,
|
||||||
|
pip-audit.
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
Apache 2.0 — see [`LICENSE`](LICENSE). Owner: Stephan Berbig / Neuronetz.
|
||||||
49
alembic.ini
Normal file
49
alembic.ini
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
# Alembic configuration for neuronetz-gateway.
|
||||||
|
# The database URL is read from the DATABASE_URL environment variable in
|
||||||
|
# alembic/env.py (do not hardcode credentials here).
|
||||||
|
|
||||||
|
[alembic]
|
||||||
|
script_location = alembic
|
||||||
|
prepend_sys_path = src
|
||||||
|
version_path_separator = os
|
||||||
|
# version_locations defaults to alembic/versions
|
||||||
|
|
||||||
|
# DATABASE_URL is injected at runtime; this placeholder is never used directly.
|
||||||
|
sqlalchemy.url = driver://user:pass@localhost/dbname
|
||||||
|
|
||||||
|
[post_write_hooks]
|
||||||
|
# (none)
|
||||||
|
|
||||||
|
[loggers]
|
||||||
|
keys = root,sqlalchemy,alembic
|
||||||
|
|
||||||
|
[handlers]
|
||||||
|
keys = console
|
||||||
|
|
||||||
|
[formatters]
|
||||||
|
keys = generic
|
||||||
|
|
||||||
|
[logger_root]
|
||||||
|
level = WARNING
|
||||||
|
handlers = console
|
||||||
|
qualname =
|
||||||
|
|
||||||
|
[logger_sqlalchemy]
|
||||||
|
level = WARNING
|
||||||
|
handlers =
|
||||||
|
qualname = sqlalchemy.engine
|
||||||
|
|
||||||
|
[logger_alembic]
|
||||||
|
level = INFO
|
||||||
|
handlers =
|
||||||
|
qualname = alembic
|
||||||
|
|
||||||
|
[handler_console]
|
||||||
|
class = StreamHandler
|
||||||
|
args = (sys.stderr,)
|
||||||
|
level = NOTSET
|
||||||
|
formatter = generic
|
||||||
|
|
||||||
|
[formatter_generic]
|
||||||
|
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||||
|
datefmt = %H:%M:%S
|
||||||
97
alembic/env.py
Normal file
97
alembic/env.py
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
"""Alembic environment for neuronetz-gateway (async engine).
|
||||||
|
|
||||||
|
Reads ``DATABASE_URL`` from the environment (the same value the app uses,
|
||||||
|
``postgresql+asyncpg://...``). Ensures schema ``gateway`` exists and pins the
|
||||||
|
Alembic version table into that schema so migration bookkeeping never collides
|
||||||
|
with the ``console`` schema in the shared database.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
from logging.config import fileConfig
|
||||||
|
|
||||||
|
from alembic import context
|
||||||
|
from sqlalchemy import pool, text
|
||||||
|
from sqlalchemy.engine import Connection
|
||||||
|
from sqlalchemy.ext.asyncio import async_engine_from_config
|
||||||
|
|
||||||
|
from neuronetz_gateway.config import get_settings
|
||||||
|
from neuronetz_gateway.db.models import GATEWAY_SCHEMA, Base
|
||||||
|
|
||||||
|
config = context.config
|
||||||
|
|
||||||
|
if config.config_file_name is not None:
|
||||||
|
fileConfig(config.config_file_name)
|
||||||
|
|
||||||
|
target_metadata = Base.metadata
|
||||||
|
|
||||||
|
|
||||||
|
def _database_url() -> str:
|
||||||
|
"""Resolve the async database URL from env, falling back to settings."""
|
||||||
|
return os.environ.get("DATABASE_URL") or get_settings().database_url
|
||||||
|
|
||||||
|
|
||||||
|
def _configure_context(connection: Connection) -> None:
|
||||||
|
"""Configure migration context with the gateway schema + version table."""
|
||||||
|
context.configure(
|
||||||
|
connection=connection,
|
||||||
|
target_metadata=target_metadata,
|
||||||
|
version_table="alembic_version",
|
||||||
|
version_table_schema=GATEWAY_SCHEMA,
|
||||||
|
include_schemas=True,
|
||||||
|
compare_type=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def run_migrations_offline() -> None:
|
||||||
|
"""Run migrations in 'offline' mode (emit SQL without a DBAPI connection)."""
|
||||||
|
context.configure(
|
||||||
|
url=_database_url(),
|
||||||
|
target_metadata=target_metadata,
|
||||||
|
literal_binds=True,
|
||||||
|
dialect_opts={"paramstyle": "named"},
|
||||||
|
version_table="alembic_version",
|
||||||
|
version_table_schema=GATEWAY_SCHEMA,
|
||||||
|
include_schemas=True,
|
||||||
|
)
|
||||||
|
with context.begin_transaction():
|
||||||
|
context.run_migrations()
|
||||||
|
|
||||||
|
|
||||||
|
def _do_run_migrations(connection: Connection) -> None:
|
||||||
|
"""Ensure the schema exists, then run migrations within a transaction.
|
||||||
|
|
||||||
|
The ``CREATE SCHEMA`` is committed in its own transaction before configuring
|
||||||
|
Alembic. Under SQLAlchemy 2.0, ``execute()`` auto-begins a transaction; if it
|
||||||
|
were left open, Alembic's ``begin_transaction()`` would treat the connection as
|
||||||
|
caller-managed and become a no-op that never commits, so the whole migration
|
||||||
|
(and the schema) would be rolled back on connection close. Committing here
|
||||||
|
leaves the connection clean so Alembic owns — and commits — its own transaction.
|
||||||
|
"""
|
||||||
|
connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{GATEWAY_SCHEMA}"'))
|
||||||
|
connection.commit()
|
||||||
|
_configure_context(connection)
|
||||||
|
with context.begin_transaction():
|
||||||
|
context.run_migrations()
|
||||||
|
|
||||||
|
|
||||||
|
async def run_migrations_online() -> None:
|
||||||
|
"""Run migrations in 'online' mode using an async engine."""
|
||||||
|
configuration = config.get_section(config.config_ini_section) or {}
|
||||||
|
configuration["sqlalchemy.url"] = _database_url()
|
||||||
|
connectable = async_engine_from_config(
|
||||||
|
configuration,
|
||||||
|
prefix="sqlalchemy.",
|
||||||
|
poolclass=pool.NullPool,
|
||||||
|
)
|
||||||
|
async with connectable.connect() as connection:
|
||||||
|
await connection.run_sync(_do_run_migrations)
|
||||||
|
await connectable.dispose()
|
||||||
|
|
||||||
|
|
||||||
|
if context.is_offline_mode():
|
||||||
|
run_migrations_offline()
|
||||||
|
else:
|
||||||
|
asyncio.run(run_migrations_online())
|
||||||
342
alembic/versions/0001_initial.py
Normal file
342
alembic/versions/0001_initial.py
Normal file
@@ -0,0 +1,342 @@
|
|||||||
|
"""initial gateway schema
|
||||||
|
|
||||||
|
Creates schema ``gateway``, the three enum types, all tables and indexes, and
|
||||||
|
the ``notify_key_revoked()`` function plus ``trg_notify_key_revoked`` trigger,
|
||||||
|
matching SPEC §5 verbatim in structure.
|
||||||
|
|
||||||
|
Revision ID: 0001_initial
|
||||||
|
Revises:
|
||||||
|
Create Date: 2026-05-22
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Sequence
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
from sqlalchemy.dialects import postgresql
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = "0001_initial"
|
||||||
|
down_revision: str | None = None
|
||||||
|
branch_labels: str | Sequence[str] | None = None
|
||||||
|
depends_on: str | Sequence[str] | None = None
|
||||||
|
|
||||||
|
SCHEMA = "gateway"
|
||||||
|
|
||||||
|
# Enum types are created explicitly via raw SQL below; the table columns
|
||||||
|
# reference them with create_type=False so they are not created twice.
|
||||||
|
_key_status = postgresql.ENUM(
|
||||||
|
"active", "disabled", "revoked", name="key_status", schema=SCHEMA, create_type=False
|
||||||
|
)
|
||||||
|
_tenant_status = postgresql.ENUM(
|
||||||
|
"active", "suspended", "closed", name="tenant_status", schema=SCHEMA, create_type=False
|
||||||
|
)
|
||||||
|
_budget_period = postgresql.ENUM(
|
||||||
|
"day", "month", "total", name="budget_period", schema=SCHEMA, create_type=False
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
"""Create the full ``gateway`` schema."""
|
||||||
|
op.execute(f'CREATE SCHEMA IF NOT EXISTS "{SCHEMA}"')
|
||||||
|
|
||||||
|
# --- Enum types (SPEC §5) ---
|
||||||
|
op.execute("CREATE TYPE gateway.key_status AS ENUM ('active', 'disabled', 'revoked')")
|
||||||
|
op.execute("CREATE TYPE gateway.tenant_status AS ENUM ('active', 'suspended', 'closed')")
|
||||||
|
op.execute("CREATE TYPE gateway.budget_period AS ENUM ('day', 'month', 'total')")
|
||||||
|
|
||||||
|
# --- tenants ---
|
||||||
|
op.create_table(
|
||||||
|
"tenants",
|
||||||
|
sa.Column(
|
||||||
|
"id",
|
||||||
|
postgresql.UUID(as_uuid=True),
|
||||||
|
primary_key=True,
|
||||||
|
server_default=sa.text("gen_random_uuid()"),
|
||||||
|
),
|
||||||
|
sa.Column("name", sa.Text(), nullable=False, unique=True),
|
||||||
|
sa.Column(
|
||||||
|
"status", _tenant_status, nullable=False, server_default=sa.text("'active'")
|
||||||
|
),
|
||||||
|
sa.Column(
|
||||||
|
"created_at",
|
||||||
|
postgresql.TIMESTAMP(timezone=True),
|
||||||
|
nullable=False,
|
||||||
|
server_default=sa.text("now()"),
|
||||||
|
),
|
||||||
|
sa.Column(
|
||||||
|
"metadata",
|
||||||
|
postgresql.JSONB(),
|
||||||
|
nullable=False,
|
||||||
|
server_default=sa.text("'{}'::jsonb"),
|
||||||
|
),
|
||||||
|
schema=SCHEMA,
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- tenant_limits ---
|
||||||
|
op.create_table(
|
||||||
|
"tenant_limits",
|
||||||
|
sa.Column(
|
||||||
|
"tenant_id",
|
||||||
|
postgresql.UUID(as_uuid=True),
|
||||||
|
sa.ForeignKey(f"{SCHEMA}.tenants.id", ondelete="CASCADE"),
|
||||||
|
primary_key=True,
|
||||||
|
),
|
||||||
|
sa.Column("rpm", sa.Integer(), nullable=False, server_default=sa.text("60")),
|
||||||
|
sa.Column("tpm", sa.Integer(), nullable=False, server_default=sa.text("100000")),
|
||||||
|
sa.Column("concurrent", sa.Integer(), nullable=False, server_default=sa.text("8")),
|
||||||
|
sa.Column("tokens_daily", sa.BigInteger(), nullable=True),
|
||||||
|
sa.Column("tokens_monthly", sa.BigInteger(), nullable=True),
|
||||||
|
sa.Column("tokens_total", sa.BigInteger(), nullable=True),
|
||||||
|
sa.Column(
|
||||||
|
"allowed_models",
|
||||||
|
postgresql.ARRAY(sa.Text()),
|
||||||
|
nullable=False,
|
||||||
|
server_default=sa.text("'{}'"),
|
||||||
|
),
|
||||||
|
sa.Column(
|
||||||
|
"allow_all_models",
|
||||||
|
sa.Boolean(),
|
||||||
|
nullable=False,
|
||||||
|
server_default=sa.text("false"),
|
||||||
|
),
|
||||||
|
sa.Column(
|
||||||
|
"log_prompts_default",
|
||||||
|
sa.Boolean(),
|
||||||
|
nullable=False,
|
||||||
|
server_default=sa.text("false"),
|
||||||
|
),
|
||||||
|
sa.Column(
|
||||||
|
"prompt_retention_days", sa.Integer(), nullable=False, server_default=sa.text("30")
|
||||||
|
),
|
||||||
|
sa.Column(
|
||||||
|
"audit_retention_days", sa.Integer(), nullable=False, server_default=sa.text("365")
|
||||||
|
),
|
||||||
|
schema=SCHEMA,
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- api_keys ---
|
||||||
|
op.create_table(
|
||||||
|
"api_keys",
|
||||||
|
sa.Column(
|
||||||
|
"id",
|
||||||
|
postgresql.UUID(as_uuid=True),
|
||||||
|
primary_key=True,
|
||||||
|
server_default=sa.text("gen_random_uuid()"),
|
||||||
|
),
|
||||||
|
sa.Column(
|
||||||
|
"tenant_id",
|
||||||
|
postgresql.UUID(as_uuid=True),
|
||||||
|
sa.ForeignKey(f"{SCHEMA}.tenants.id", ondelete="CASCADE"),
|
||||||
|
nullable=False,
|
||||||
|
),
|
||||||
|
sa.Column("prefix", sa.Text(), nullable=False, unique=True),
|
||||||
|
sa.Column("key_hash", sa.Text(), nullable=False),
|
||||||
|
sa.Column("name", sa.Text(), nullable=False),
|
||||||
|
sa.Column("status", _key_status, nullable=False, server_default=sa.text("'active'")),
|
||||||
|
sa.Column(
|
||||||
|
"scopes",
|
||||||
|
postgresql.ARRAY(sa.Text()),
|
||||||
|
nullable=False,
|
||||||
|
server_default=sa.text("'{chat,embeddings}'"),
|
||||||
|
),
|
||||||
|
sa.Column(
|
||||||
|
"created_at",
|
||||||
|
postgresql.TIMESTAMP(timezone=True),
|
||||||
|
nullable=False,
|
||||||
|
server_default=sa.text("now()"),
|
||||||
|
),
|
||||||
|
sa.Column("last_used_at", postgresql.TIMESTAMP(timezone=True), nullable=True),
|
||||||
|
sa.Column("expires_at", postgresql.TIMESTAMP(timezone=True), nullable=True),
|
||||||
|
sa.Column("log_prompts", sa.Boolean(), nullable=True),
|
||||||
|
sa.Column(
|
||||||
|
"metadata",
|
||||||
|
postgresql.JSONB(),
|
||||||
|
nullable=False,
|
||||||
|
server_default=sa.text("'{}'::jsonb"),
|
||||||
|
),
|
||||||
|
schema=SCHEMA,
|
||||||
|
)
|
||||||
|
op.create_index(
|
||||||
|
"idx_api_keys_prefix",
|
||||||
|
"api_keys",
|
||||||
|
["prefix"],
|
||||||
|
schema=SCHEMA,
|
||||||
|
postgresql_where=sa.text("status = 'active'"),
|
||||||
|
)
|
||||||
|
op.create_index("idx_api_keys_tenant", "api_keys", ["tenant_id"], schema=SCHEMA)
|
||||||
|
|
||||||
|
# --- key_limits ---
|
||||||
|
op.create_table(
|
||||||
|
"key_limits",
|
||||||
|
sa.Column(
|
||||||
|
"key_id",
|
||||||
|
postgresql.UUID(as_uuid=True),
|
||||||
|
sa.ForeignKey(f"{SCHEMA}.api_keys.id", ondelete="CASCADE"),
|
||||||
|
primary_key=True,
|
||||||
|
),
|
||||||
|
sa.Column("rpm", sa.Integer(), nullable=True),
|
||||||
|
sa.Column("tpm", sa.Integer(), nullable=True),
|
||||||
|
sa.Column("concurrent", sa.Integer(), nullable=True),
|
||||||
|
sa.Column("tokens_daily", sa.BigInteger(), nullable=True),
|
||||||
|
sa.Column("tokens_monthly", sa.BigInteger(), nullable=True),
|
||||||
|
sa.Column("tokens_total", sa.BigInteger(), nullable=True),
|
||||||
|
sa.Column("allowed_models", postgresql.ARRAY(sa.Text()), nullable=True),
|
||||||
|
sa.Column("allow_all_models", sa.Boolean(), nullable=True),
|
||||||
|
schema=SCHEMA,
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- budget_usage ---
|
||||||
|
op.create_table(
|
||||||
|
"budget_usage",
|
||||||
|
sa.Column(
|
||||||
|
"key_id",
|
||||||
|
postgresql.UUID(as_uuid=True),
|
||||||
|
sa.ForeignKey(f"{SCHEMA}.api_keys.id", ondelete="CASCADE"),
|
||||||
|
primary_key=True,
|
||||||
|
nullable=False,
|
||||||
|
),
|
||||||
|
sa.Column("period", _budget_period, primary_key=True, nullable=False),
|
||||||
|
sa.Column(
|
||||||
|
"period_start",
|
||||||
|
postgresql.TIMESTAMP(timezone=True),
|
||||||
|
primary_key=True,
|
||||||
|
nullable=False,
|
||||||
|
),
|
||||||
|
sa.Column("tokens_in", sa.BigInteger(), nullable=False, server_default=sa.text("0")),
|
||||||
|
sa.Column("tokens_out", sa.BigInteger(), nullable=False, server_default=sa.text("0")),
|
||||||
|
sa.Column("requests", sa.BigInteger(), nullable=False, server_default=sa.text("0")),
|
||||||
|
schema=SCHEMA,
|
||||||
|
)
|
||||||
|
op.create_index(
|
||||||
|
"idx_budget_usage_period",
|
||||||
|
"budget_usage",
|
||||||
|
["period", "period_start"],
|
||||||
|
schema=SCHEMA,
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- audit_log ---
|
||||||
|
op.create_table(
|
||||||
|
"audit_log",
|
||||||
|
sa.Column("id", sa.BigInteger(), sa.Identity(always=False), primary_key=True),
|
||||||
|
sa.Column(
|
||||||
|
"ts",
|
||||||
|
postgresql.TIMESTAMP(timezone=True),
|
||||||
|
nullable=False,
|
||||||
|
server_default=sa.text("now()"),
|
||||||
|
),
|
||||||
|
sa.Column("request_id", postgresql.UUID(as_uuid=True), nullable=False),
|
||||||
|
sa.Column("tenant_id", postgresql.UUID(as_uuid=True), nullable=True),
|
||||||
|
sa.Column("key_id", postgresql.UUID(as_uuid=True), nullable=True),
|
||||||
|
sa.Column("key_prefix", sa.Text(), nullable=True),
|
||||||
|
sa.Column("method", sa.Text(), nullable=False),
|
||||||
|
sa.Column("path", sa.Text(), nullable=False),
|
||||||
|
sa.Column("model", sa.Text(), nullable=True),
|
||||||
|
sa.Column("tokens_in", sa.Integer(), nullable=True),
|
||||||
|
sa.Column("tokens_out", sa.Integer(), nullable=True),
|
||||||
|
sa.Column("latency_ms", sa.Integer(), nullable=True),
|
||||||
|
sa.Column("status", sa.Integer(), nullable=False),
|
||||||
|
sa.Column("client_ip", postgresql.INET(), nullable=True),
|
||||||
|
sa.Column("user_agent", sa.Text(), nullable=True),
|
||||||
|
sa.Column("error_code", sa.Text(), nullable=True),
|
||||||
|
schema=SCHEMA,
|
||||||
|
)
|
||||||
|
op.create_index("idx_audit_ts", "audit_log", ["ts"], schema=SCHEMA)
|
||||||
|
op.create_index("idx_audit_tenant_ts", "audit_log", ["tenant_id", "ts"], schema=SCHEMA)
|
||||||
|
op.create_index("idx_audit_key_ts", "audit_log", ["key_id", "ts"], schema=SCHEMA)
|
||||||
|
|
||||||
|
# --- prompt_log ---
|
||||||
|
op.create_table(
|
||||||
|
"prompt_log",
|
||||||
|
sa.Column("id", sa.BigInteger(), sa.Identity(always=False), primary_key=True),
|
||||||
|
sa.Column(
|
||||||
|
"audit_id",
|
||||||
|
sa.BigInteger(),
|
||||||
|
sa.ForeignKey(f"{SCHEMA}.audit_log.id", ondelete="CASCADE"),
|
||||||
|
nullable=False,
|
||||||
|
),
|
||||||
|
sa.Column(
|
||||||
|
"ts",
|
||||||
|
postgresql.TIMESTAMP(timezone=True),
|
||||||
|
nullable=False,
|
||||||
|
server_default=sa.text("now()"),
|
||||||
|
),
|
||||||
|
sa.Column("key_id", postgresql.UUID(as_uuid=True), nullable=False),
|
||||||
|
sa.Column("request_body", postgresql.JSONB(), nullable=False),
|
||||||
|
sa.Column("response_text", sa.Text(), nullable=True),
|
||||||
|
sa.Column("retention_until", postgresql.TIMESTAMP(timezone=True), nullable=False),
|
||||||
|
schema=SCHEMA,
|
||||||
|
)
|
||||||
|
op.create_index(
|
||||||
|
"idx_prompt_log_retention", "prompt_log", ["retention_until"], schema=SCHEMA
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- revocations ---
|
||||||
|
op.create_table(
|
||||||
|
"revocations",
|
||||||
|
sa.Column("id", sa.BigInteger(), sa.Identity(always=False), primary_key=True),
|
||||||
|
sa.Column("key_id", postgresql.UUID(as_uuid=True), nullable=False),
|
||||||
|
sa.Column(
|
||||||
|
"ts",
|
||||||
|
postgresql.TIMESTAMP(timezone=True),
|
||||||
|
nullable=False,
|
||||||
|
server_default=sa.text("now()"),
|
||||||
|
),
|
||||||
|
sa.Column("reason", sa.Text(), nullable=True),
|
||||||
|
sa.Column("processed_at", postgresql.TIMESTAMP(timezone=True), nullable=True),
|
||||||
|
schema=SCHEMA,
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- NOTIFY trigger on revocation insert (SPEC §5) ---
|
||||||
|
op.execute(
|
||||||
|
"""
|
||||||
|
CREATE OR REPLACE FUNCTION gateway.notify_key_revoked() RETURNS trigger AS $$
|
||||||
|
BEGIN
|
||||||
|
PERFORM pg_notify('key_revoked', NEW.key_id::text);
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
op.execute(
|
||||||
|
"""
|
||||||
|
CREATE TRIGGER trg_notify_key_revoked
|
||||||
|
AFTER INSERT ON gateway.revocations
|
||||||
|
FOR EACH ROW EXECUTE FUNCTION gateway.notify_key_revoked();
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
"""Drop the entire ``gateway`` schema and its objects."""
|
||||||
|
op.execute("DROP TRIGGER IF EXISTS trg_notify_key_revoked ON gateway.revocations")
|
||||||
|
op.execute("DROP FUNCTION IF EXISTS gateway.notify_key_revoked()")
|
||||||
|
|
||||||
|
op.drop_index("idx_prompt_log_retention", table_name="prompt_log", schema=SCHEMA)
|
||||||
|
op.drop_table("prompt_log", schema=SCHEMA)
|
||||||
|
|
||||||
|
op.drop_index("idx_audit_key_ts", table_name="audit_log", schema=SCHEMA)
|
||||||
|
op.drop_index("idx_audit_tenant_ts", table_name="audit_log", schema=SCHEMA)
|
||||||
|
op.drop_index("idx_audit_ts", table_name="audit_log", schema=SCHEMA)
|
||||||
|
op.drop_table("audit_log", schema=SCHEMA)
|
||||||
|
|
||||||
|
op.drop_index("idx_budget_usage_period", table_name="budget_usage", schema=SCHEMA)
|
||||||
|
op.drop_table("budget_usage", schema=SCHEMA)
|
||||||
|
|
||||||
|
op.drop_table("key_limits", schema=SCHEMA)
|
||||||
|
|
||||||
|
op.drop_index("idx_api_keys_tenant", table_name="api_keys", schema=SCHEMA)
|
||||||
|
op.drop_index("idx_api_keys_prefix", table_name="api_keys", schema=SCHEMA)
|
||||||
|
op.drop_table("api_keys", schema=SCHEMA)
|
||||||
|
|
||||||
|
op.drop_table("tenant_limits", schema=SCHEMA)
|
||||||
|
op.drop_table("tenants", schema=SCHEMA)
|
||||||
|
|
||||||
|
op.execute("DROP TYPE IF EXISTS gateway.budget_period")
|
||||||
|
op.execute("DROP TYPE IF EXISTS gateway.tenant_status")
|
||||||
|
op.execute("DROP TYPE IF EXISTS gateway.key_status")
|
||||||
|
|
||||||
|
op.execute(f'DROP SCHEMA IF EXISTS "{SCHEMA}"')
|
||||||
101
docker-compose.dev.yml
Normal file
101
docker-compose.dev.yml
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
# neuronetz-gateway — DEV stack (postgres + redis + gateway only).
|
||||||
|
#
|
||||||
|
# Deliberately differs from the production stack:
|
||||||
|
# * NO caddy — the gateway is published directly on localhost:8080.
|
||||||
|
# * NO ollama — Phase 1 expects /readyz to return 503 *because* there is no
|
||||||
|
# Ollama backend yet. This is the intended exit-criterion state.
|
||||||
|
#
|
||||||
|
# Bring it up with:
|
||||||
|
# docker compose -f docker-compose.dev.yml up --build
|
||||||
|
#
|
||||||
|
# Then:
|
||||||
|
# curl -i http://localhost:8080/healthz # -> 200
|
||||||
|
# curl -i http://localhost:8080/readyz # -> 503 (no Ollama)
|
||||||
|
#
|
||||||
|
# The gateway container runs `alembic upgrade head` and then starts the server.
|
||||||
|
|
||||||
|
services:
|
||||||
|
gateway:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "127.0.0.1:8080:8080"
|
||||||
|
environment:
|
||||||
|
GATEWAY_BIND_HOST: 0.0.0.0
|
||||||
|
GATEWAY_BIND_PORT: "8080"
|
||||||
|
GATEWAY_LOG_LEVEL: ${GATEWAY_LOG_LEVEL:-INFO}
|
||||||
|
GATEWAY_LOG_FORMAT: ${GATEWAY_LOG_FORMAT:-console}
|
||||||
|
GATEWAY_REQUEST_ID_HEADER: ${GATEWAY_REQUEST_ID_HEADER:-X-Request-ID}
|
||||||
|
GATEWAY_TRUSTED_PROXIES: ${GATEWAY_TRUSTED_PROXIES:-127.0.0.1}
|
||||||
|
DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-gateway}:${POSTGRES_PASSWORD:-gateway}@postgres:5432/${POSTGRES_DB:-neuronetz}
|
||||||
|
DATABASE_POOL_SIZE: ${DATABASE_POOL_SIZE:-10}
|
||||||
|
DATABASE_POOL_OVERFLOW: ${DATABASE_POOL_OVERFLOW:-20}
|
||||||
|
REDIS_URL: redis://redis:6379/0
|
||||||
|
REDIS_KEY_CACHE_TTL_S: ${REDIS_KEY_CACHE_TTL_S:-60}
|
||||||
|
# No Ollama in the dev stack — point at the (absent) service name so the
|
||||||
|
# readiness check fails closed with 503, exactly as Phase 1 expects.
|
||||||
|
OLLAMA_BASE_URL: ${OLLAMA_BASE_URL:-http://ollama:11434}
|
||||||
|
OLLAMA_CONNECT_TIMEOUT_S: ${OLLAMA_CONNECT_TIMEOUT_S:-5}
|
||||||
|
OLLAMA_READ_TIMEOUT_S: ${OLLAMA_READ_TIMEOUT_S:-600}
|
||||||
|
OLLAMA_MAX_CONNECTIONS: ${OLLAMA_MAX_CONNECTIONS:-64}
|
||||||
|
DEFAULT_RPM: ${DEFAULT_RPM:-60}
|
||||||
|
DEFAULT_TPM: ${DEFAULT_TPM:-100000}
|
||||||
|
DEFAULT_CONCURRENT: ${DEFAULT_CONCURRENT:-8}
|
||||||
|
MAX_REQUEST_BODY_BYTES: ${MAX_REQUEST_BODY_BYTES:-262144}
|
||||||
|
MAX_NUM_PREDICT: ${MAX_NUM_PREDICT:-4096}
|
||||||
|
ARGON2_TIME_COST: ${ARGON2_TIME_COST:-3}
|
||||||
|
ARGON2_MEMORY_COST_KIB: ${ARGON2_MEMORY_COST_KIB:-65536}
|
||||||
|
ARGON2_PARALLELISM: ${ARGON2_PARALLELISM:-4}
|
||||||
|
AUTH_FAILURE_RATE_LIMIT_PER_IP_PER_MIN: ${AUTH_FAILURE_RATE_LIMIT_PER_IP_PER_MIN:-20}
|
||||||
|
AUDIT_BUFFER_SIZE: ${AUDIT_BUFFER_SIZE:-1000}
|
||||||
|
PROMPT_LOG_DEFAULT_RETENTION_DAYS: ${PROMPT_LOG_DEFAULT_RETENTION_DAYS:-30}
|
||||||
|
AUDIT_LOG_DEFAULT_RETENTION_DAYS: ${AUDIT_LOG_DEFAULT_RETENTION_DAYS:-365}
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
# Run migrations, then start the server.
|
||||||
|
command: ["sh", "-c", "alembic upgrade head && exec python -m neuronetz_gateway"]
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-fsS", "http://127.0.0.1:8080/healthz"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 3s
|
||||||
|
retries: 5
|
||||||
|
start_period: 30s
|
||||||
|
|
||||||
|
postgres:
|
||||||
|
image: postgres:16-alpine
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: ${POSTGRES_USER:-gateway}
|
||||||
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-gateway}
|
||||||
|
POSTGRES_DB: ${POSTGRES_DB:-neuronetz}
|
||||||
|
ports:
|
||||||
|
# Exposed on localhost for dev convenience (psql, migrations from host).
|
||||||
|
- "127.0.0.1:5432:5432"
|
||||||
|
volumes:
|
||||||
|
- postgres_dev_data:/var/lib/postgresql/data
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-gateway} -d ${POSTGRES_DB:-neuronetz}"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 3s
|
||||||
|
retries: 10
|
||||||
|
|
||||||
|
redis:
|
||||||
|
image: redis:7-alpine
|
||||||
|
restart: unless-stopped
|
||||||
|
command: ["redis-server", "--save", "", "--appendonly", "no"]
|
||||||
|
ports:
|
||||||
|
# Exposed on localhost for dev convenience (redis-cli from host).
|
||||||
|
- "127.0.0.1:6379:6379"
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "redis-cli", "ping"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 3s
|
||||||
|
retries: 10
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
postgres_dev_data:
|
||||||
152
docker-compose.yml
Normal file
152
docker-compose.yml
Normal file
@@ -0,0 +1,152 @@
|
|||||||
|
# neuronetz-gateway — FULL production stack (SPEC §4.1).
|
||||||
|
#
|
||||||
|
# Internet ──TLS──▶ caddy ──HTTP/1.1 internal──▶ gateway ──▶ postgres / redis / ollama
|
||||||
|
#
|
||||||
|
# Only Caddy publishes ports to the host. The gateway is reachable solely through
|
||||||
|
# Caddy on the internal network. Postgres, Redis and (critically) Ollama are NOT
|
||||||
|
# published to the host at all.
|
||||||
|
#
|
||||||
|
# ┌─────────────────────────────────────────────────────────────────────────┐
|
||||||
|
# │ SECURITY NON-NEGOTIABLE: │
|
||||||
|
# │ The `ollama` service has NO `ports:` mapping and MUST NEVER get one. │
|
||||||
|
# │ Ollama is reachable only on the internal Docker network via the │
|
||||||
|
# │ service name `ollama:11434`. Publishing it would re-open the exact │
|
||||||
|
# │ unauthenticated exposure this whole project exists to close. │
|
||||||
|
# └─────────────────────────────────────────────────────────────────────────┘
|
||||||
|
#
|
||||||
|
# Copy `.env.example` to `.env` and adjust before running:
|
||||||
|
# docker compose up -d --build
|
||||||
|
|
||||||
|
services:
|
||||||
|
caddy:
|
||||||
|
image: caddy:2-alpine
|
||||||
|
restart: unless-stopped
|
||||||
|
depends_on:
|
||||||
|
gateway:
|
||||||
|
condition: service_healthy
|
||||||
|
ports:
|
||||||
|
- "80:80"
|
||||||
|
- "443:443"
|
||||||
|
- "443:443/udp" # HTTP/3
|
||||||
|
volumes:
|
||||||
|
- ./ops/caddy/Caddyfile.example:/etc/caddy/Caddyfile:ro
|
||||||
|
- caddy_data:/data
|
||||||
|
- caddy_config:/config
|
||||||
|
networks:
|
||||||
|
- edge
|
||||||
|
- internal
|
||||||
|
|
||||||
|
gateway:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
restart: unless-stopped
|
||||||
|
# NOTE: deliberately NO `ports:` — the gateway is internal-only and is
|
||||||
|
# reached exclusively through Caddy.
|
||||||
|
expose:
|
||||||
|
- "8080"
|
||||||
|
environment:
|
||||||
|
GATEWAY_BIND_HOST: 0.0.0.0
|
||||||
|
GATEWAY_BIND_PORT: "8080"
|
||||||
|
GATEWAY_LOG_LEVEL: ${GATEWAY_LOG_LEVEL:-INFO}
|
||||||
|
GATEWAY_LOG_FORMAT: ${GATEWAY_LOG_FORMAT:-json}
|
||||||
|
GATEWAY_REQUEST_ID_HEADER: ${GATEWAY_REQUEST_ID_HEADER:-X-Request-ID}
|
||||||
|
GATEWAY_TRUSTED_PROXIES: ${GATEWAY_TRUSTED_PROXIES:-127.0.0.1,caddy}
|
||||||
|
# Service-name addressing on the internal network.
|
||||||
|
DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-gateway}:${POSTGRES_PASSWORD:-changeme}@postgres:5432/${POSTGRES_DB:-neuronetz}
|
||||||
|
DATABASE_POOL_SIZE: ${DATABASE_POOL_SIZE:-10}
|
||||||
|
DATABASE_POOL_OVERFLOW: ${DATABASE_POOL_OVERFLOW:-20}
|
||||||
|
REDIS_URL: redis://redis:6379/0
|
||||||
|
REDIS_KEY_CACHE_TTL_S: ${REDIS_KEY_CACHE_TTL_S:-60}
|
||||||
|
OLLAMA_BASE_URL: http://ollama:11434
|
||||||
|
OLLAMA_CONNECT_TIMEOUT_S: ${OLLAMA_CONNECT_TIMEOUT_S:-5}
|
||||||
|
OLLAMA_READ_TIMEOUT_S: ${OLLAMA_READ_TIMEOUT_S:-600}
|
||||||
|
OLLAMA_MAX_CONNECTIONS: ${OLLAMA_MAX_CONNECTIONS:-64}
|
||||||
|
DEFAULT_RPM: ${DEFAULT_RPM:-60}
|
||||||
|
DEFAULT_TPM: ${DEFAULT_TPM:-100000}
|
||||||
|
DEFAULT_CONCURRENT: ${DEFAULT_CONCURRENT:-8}
|
||||||
|
MAX_REQUEST_BODY_BYTES: ${MAX_REQUEST_BODY_BYTES:-262144}
|
||||||
|
MAX_NUM_PREDICT: ${MAX_NUM_PREDICT:-4096}
|
||||||
|
ARGON2_TIME_COST: ${ARGON2_TIME_COST:-3}
|
||||||
|
ARGON2_MEMORY_COST_KIB: ${ARGON2_MEMORY_COST_KIB:-65536}
|
||||||
|
ARGON2_PARALLELISM: ${ARGON2_PARALLELISM:-4}
|
||||||
|
AUTH_FAILURE_RATE_LIMIT_PER_IP_PER_MIN: ${AUTH_FAILURE_RATE_LIMIT_PER_IP_PER_MIN:-20}
|
||||||
|
AUDIT_BUFFER_SIZE: ${AUDIT_BUFFER_SIZE:-1000}
|
||||||
|
PROMPT_LOG_DEFAULT_RETENTION_DAYS: ${PROMPT_LOG_DEFAULT_RETENTION_DAYS:-30}
|
||||||
|
AUDIT_LOG_DEFAULT_RETENTION_DAYS: ${AUDIT_LOG_DEFAULT_RETENTION_DAYS:-365}
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
ollama:
|
||||||
|
condition: service_started
|
||||||
|
# Apply migrations, then start the server.
|
||||||
|
command: ["sh", "-c", "alembic upgrade head && exec python -m neuronetz_gateway"]
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-fsS", "http://127.0.0.1:8080/healthz"]
|
||||||
|
interval: 15s
|
||||||
|
timeout: 3s
|
||||||
|
retries: 5
|
||||||
|
start_period: 30s
|
||||||
|
networks:
|
||||||
|
- internal
|
||||||
|
|
||||||
|
postgres:
|
||||||
|
image: postgres:16-alpine
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: ${POSTGRES_USER:-gateway}
|
||||||
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-changeme}
|
||||||
|
POSTGRES_DB: ${POSTGRES_DB:-neuronetz}
|
||||||
|
volumes:
|
||||||
|
- postgres_data:/var/lib/postgresql/data
|
||||||
|
# No `ports:` — Postgres is internal-only.
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-gateway} -d ${POSTGRES_DB:-neuronetz}"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 3s
|
||||||
|
retries: 10
|
||||||
|
networks:
|
||||||
|
- internal
|
||||||
|
|
||||||
|
redis:
|
||||||
|
image: redis:7-alpine
|
||||||
|
restart: unless-stopped
|
||||||
|
command: ["redis-server", "--save", "", "--appendonly", "no"]
|
||||||
|
# No `ports:` — Redis is internal-only.
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "redis-cli", "ping"]
|
||||||
|
interval: 5s
|
||||||
|
timeout: 3s
|
||||||
|
retries: 10
|
||||||
|
networks:
|
||||||
|
- internal
|
||||||
|
|
||||||
|
# ───────────────────────────────────────────────────────────────────────────
|
||||||
|
# Ollama — INTERNAL NETWORK ONLY. DO NOT ADD A `ports:` MAPPING.
|
||||||
|
# Reachable only as `http://ollama:11434` from the gateway container.
|
||||||
|
# ───────────────────────────────────────────────────────────────────────────
|
||||||
|
ollama:
|
||||||
|
image: ollama/ollama:latest
|
||||||
|
restart: unless-stopped
|
||||||
|
# !!! NO `ports:` — never publish Ollama to the host or the internet. !!!
|
||||||
|
volumes:
|
||||||
|
- ollama_data:/root/.ollama
|
||||||
|
networks:
|
||||||
|
- internal
|
||||||
|
|
||||||
|
networks:
|
||||||
|
# Public-facing network: only Caddy is attached alongside `internal`.
|
||||||
|
edge:
|
||||||
|
driver: bridge
|
||||||
|
# Private network for inter-service traffic; not reachable from the host.
|
||||||
|
internal:
|
||||||
|
driver: bridge
|
||||||
|
internal: false
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
postgres_data:
|
||||||
|
ollama_data:
|
||||||
|
caddy_data:
|
||||||
|
caddy_config:
|
||||||
60
justfile
Normal file
60
justfile
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# neuronetz-gateway — task runner.
|
||||||
|
#
|
||||||
|
# Requires `just` (https://github.com/casey/just) and `uv`
|
||||||
|
# (https://github.com/astral-sh/uv) on the host.
|
||||||
|
#
|
||||||
|
# just # list available targets
|
||||||
|
# just dev # run postgres + redis + gateway locally (dev stack)
|
||||||
|
# just test # run the test suite with coverage
|
||||||
|
# just lint # ruff check
|
||||||
|
# just typecheck # mypy --strict
|
||||||
|
# just migrate # apply alembic migrations against DATABASE_URL
|
||||||
|
|
||||||
|
set dotenv-load := true
|
||||||
|
|
||||||
|
# uv runs commands inside the project's managed environment.
|
||||||
|
uv := "uv"
|
||||||
|
|
||||||
|
# Show the list of targets (default).
|
||||||
|
default:
|
||||||
|
@just --list
|
||||||
|
|
||||||
|
# Sync dependencies into the local uv-managed virtualenv (incl. dev extras).
|
||||||
|
install:
|
||||||
|
{{uv}} sync --extra dev
|
||||||
|
|
||||||
|
# Run the dev stack: postgres + redis + gateway (no caddy, no ollama).
|
||||||
|
dev:
|
||||||
|
docker compose -f docker-compose.dev.yml up --build
|
||||||
|
|
||||||
|
# Run the test suite with coverage.
|
||||||
|
test:
|
||||||
|
{{uv}} run pytest
|
||||||
|
|
||||||
|
# Lint with ruff.
|
||||||
|
lint:
|
||||||
|
{{uv}} run ruff check .
|
||||||
|
|
||||||
|
# Static type checking (strict).
|
||||||
|
typecheck:
|
||||||
|
{{uv}} run mypy --strict src
|
||||||
|
|
||||||
|
# Apply database migrations to head.
|
||||||
|
migrate:
|
||||||
|
{{uv}} run alembic upgrade head
|
||||||
|
|
||||||
|
# Security lint.
|
||||||
|
bandit:
|
||||||
|
{{uv}} run bandit -q -r src
|
||||||
|
|
||||||
|
# Dependency vulnerability audit.
|
||||||
|
audit:
|
||||||
|
{{uv}} run pip-audit
|
||||||
|
|
||||||
|
# Bring the FULL production stack up (caddy + gateway + postgres + redis + ollama).
|
||||||
|
compose-up:
|
||||||
|
docker compose up -d --build
|
||||||
|
|
||||||
|
# Tear the production stack down.
|
||||||
|
compose-down:
|
||||||
|
docker compose down
|
||||||
59
ops/caddy/Caddyfile.example
Normal file
59
ops/caddy/Caddyfile.example
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
# neuronetz-gateway — Caddy reverse proxy (SPEC §4.1, §6.5).
|
||||||
|
#
|
||||||
|
# Caddy is the only public-facing component. It terminates TLS (HTTP/2 + HTTP/3),
|
||||||
|
# obtains a Let's Encrypt certificate for api.neuronetz.ai automatically, applies
|
||||||
|
# security headers, and reverse-proxies to the internal-only gateway:8080.
|
||||||
|
#
|
||||||
|
# Copy this file to `Caddyfile` and edit the site address / admin email.
|
||||||
|
# The production docker-compose.yml mounts it at /etc/caddy/Caddyfile.
|
||||||
|
{
|
||||||
|
# Email for Let's Encrypt account + expiry notices. Replace before deploy.
|
||||||
|
email ops@neuronetz.ai
|
||||||
|
}
|
||||||
|
|
||||||
|
api.neuronetz.ai {
|
||||||
|
# --- Reverse proxy to the internal gateway ---
|
||||||
|
# `gateway` is the Docker service name on the internal network; it is never
|
||||||
|
# published to the host. Caddy forwards plain HTTP/1.1 to it.
|
||||||
|
reverse_proxy gateway:8080
|
||||||
|
|
||||||
|
# --- Security headers ---
|
||||||
|
header {
|
||||||
|
# HSTS: force HTTPS for two years, include subdomains, allow preload.
|
||||||
|
Strict-Transport-Security "max-age=63072000; includeSubDomains; preload"
|
||||||
|
# Disable MIME sniffing.
|
||||||
|
X-Content-Type-Options "nosniff"
|
||||||
|
# Clickjacking defense (API has no UI, deny framing outright).
|
||||||
|
X-Frame-Options "DENY"
|
||||||
|
# Conservative referrer policy.
|
||||||
|
Referrer-Policy "no-referrer"
|
||||||
|
# Strip server-identifying headers so we don't advertise the stack.
|
||||||
|
-Server
|
||||||
|
-X-Powered-By
|
||||||
|
}
|
||||||
|
|
||||||
|
# Structured access logs to stdout (collected by the container runtime).
|
||||||
|
log {
|
||||||
|
output stdout
|
||||||
|
format json
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
# DEV / LOCAL note:
|
||||||
|
#
|
||||||
|
# For local testing without a public domain or real certificate, replace the
|
||||||
|
# site block above with a localhost block that uses Caddy's internal self-signed
|
||||||
|
# CA (no Let's Encrypt round-trip):
|
||||||
|
#
|
||||||
|
# localhost {
|
||||||
|
# tls internal
|
||||||
|
# reverse_proxy gateway:8080
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# Caddy will install its local root CA; trust it or pass `-k` to curl. Note the
|
||||||
|
# Phase 1 *dev* compose stack (docker-compose.dev.yml) ships WITHOUT Caddy and
|
||||||
|
# exposes the gateway directly on localhost:8080 — this file is for the full
|
||||||
|
# production stack only.
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
58
ops/systemd/neuronetz-gateway.service
Normal file
58
ops/systemd/neuronetz-gateway.service
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
# neuronetz-gateway — systemd unit for non-Compose deployments.
|
||||||
|
#
|
||||||
|
# Assumes the project is installed into a virtualenv at /opt/neuronetz-gateway/venv
|
||||||
|
# (e.g. `uv venv /opt/neuronetz-gateway/venv && uv pip install ...`) and that
|
||||||
|
# configuration lives in /etc/neuronetz-gateway/gateway.env (same keys as
|
||||||
|
# .env.example). Postgres, Redis and Ollama are reached over the network/loopback
|
||||||
|
# per that env file — Ollama must remain bound to localhost / a private network
|
||||||
|
# and never be published publicly.
|
||||||
|
#
|
||||||
|
# Install:
|
||||||
|
# sudo cp neuronetz-gateway.service /etc/systemd/system/
|
||||||
|
# sudo systemctl daemon-reload
|
||||||
|
# sudo systemctl enable --now neuronetz-gateway
|
||||||
|
|
||||||
|
[Unit]
|
||||||
|
Description=neuronetz-gateway — secure API gateway in front of Ollama
|
||||||
|
Documentation=https://github.com/neuronetz/neuronetz-gateway
|
||||||
|
After=network-online.target
|
||||||
|
Wants=network-online.target
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
|
||||||
|
# Dedicated unprivileged service account (create with: useradd --system gateway).
|
||||||
|
User=gateway
|
||||||
|
Group=gateway
|
||||||
|
|
||||||
|
WorkingDirectory=/opt/neuronetz-gateway
|
||||||
|
EnvironmentFile=/etc/neuronetz-gateway/gateway.env
|
||||||
|
|
||||||
|
# Apply migrations before starting (idempotent; no-op when already at head).
|
||||||
|
ExecStartPre=/opt/neuronetz-gateway/venv/bin/alembic upgrade head
|
||||||
|
ExecStart=/opt/neuronetz-gateway/venv/bin/python -m neuronetz_gateway
|
||||||
|
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=5
|
||||||
|
TimeoutStopSec=30
|
||||||
|
|
||||||
|
# --- Hardening ---
|
||||||
|
NoNewPrivileges=true
|
||||||
|
ProtectSystem=strict
|
||||||
|
ProtectHome=true
|
||||||
|
PrivateTmp=true
|
||||||
|
PrivateDevices=true
|
||||||
|
ProtectKernelTunables=true
|
||||||
|
ProtectKernelModules=true
|
||||||
|
ProtectControlGroups=true
|
||||||
|
RestrictNamespaces=true
|
||||||
|
RestrictRealtime=true
|
||||||
|
RestrictSUIDSGID=true
|
||||||
|
LockPersonality=true
|
||||||
|
MemoryDenyWriteExecute=true
|
||||||
|
RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX
|
||||||
|
# Allow writing only where the app legitimately needs to (none by default).
|
||||||
|
ReadWritePaths=
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
94
pyproject.toml
Normal file
94
pyproject.toml
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
[project]
|
||||||
|
name = "neuronetz-gateway"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Secure multi-tenant API gateway in front of Ollama for the Neuronetz platform."
|
||||||
|
readme = "README.md"
|
||||||
|
license = { text = "Apache-2.0" }
|
||||||
|
requires-python = ">=3.12"
|
||||||
|
authors = [{ name = "Neuronetz", email = "ops@neuronetz.ai" }]
|
||||||
|
dependencies = [
|
||||||
|
"fastapi>=0.115",
|
||||||
|
"uvicorn[standard]>=0.30",
|
||||||
|
"httpx>=0.27",
|
||||||
|
"sqlalchemy[asyncio]>=2.0",
|
||||||
|
"asyncpg>=0.29",
|
||||||
|
"redis[hiredis]>=5.0",
|
||||||
|
"structlog>=24.1",
|
||||||
|
"pydantic>=2.9",
|
||||||
|
"pydantic-settings>=2.4",
|
||||||
|
"argon2-cffi>=23.1",
|
||||||
|
"typer>=0.12",
|
||||||
|
"prometheus-client>=0.20",
|
||||||
|
"alembic>=1.13",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
neuronetz-gateway = "neuronetz_gateway.cli.manage:app"
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
dev = [
|
||||||
|
"ruff>=0.6",
|
||||||
|
"mypy>=1.11",
|
||||||
|
"bandit>=1.7",
|
||||||
|
"pip-audit>=2.7",
|
||||||
|
"pytest>=8.3",
|
||||||
|
"pytest-asyncio>=0.24",
|
||||||
|
"pytest-cov>=5.0",
|
||||||
|
"testcontainers>=4.8",
|
||||||
|
"respx>=0.21",
|
||||||
|
"locust>=2.31",
|
||||||
|
]
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["hatchling"]
|
||||||
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
|
[tool.hatch.build.targets.wheel]
|
||||||
|
packages = ["src/neuronetz_gateway"]
|
||||||
|
|
||||||
|
[tool.ruff]
|
||||||
|
target-version = "py312"
|
||||||
|
line-length = 100
|
||||||
|
src = ["src", "tests"]
|
||||||
|
|
||||||
|
[tool.ruff.lint]
|
||||||
|
select = ["E", "F", "I", "B", "UP", "S", "ASYNC"]
|
||||||
|
|
||||||
|
[tool.ruff.lint.per-file-ignores]
|
||||||
|
# Tests may use assert and bind to all interfaces in fixtures.
|
||||||
|
"tests/**" = ["S101", "S104"]
|
||||||
|
|
||||||
|
[tool.mypy]
|
||||||
|
python_version = "3.12"
|
||||||
|
strict = true
|
||||||
|
mypy_path = "src"
|
||||||
|
plugins = ["pydantic.mypy"]
|
||||||
|
namespace_packages = true
|
||||||
|
explicit_package_bases = true
|
||||||
|
|
||||||
|
[[tool.mypy.overrides]]
|
||||||
|
# argon2 ships types but some transitive deps may not; keep strictness elsewhere.
|
||||||
|
# asyncpg ships no stubs/py.typed marker; it is used in revocation.py only.
|
||||||
|
module = ["testcontainers.*", "locust.*", "asyncpg", "asyncpg.*"]
|
||||||
|
ignore_missing_imports = true
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
asyncio_mode = "auto"
|
||||||
|
testpaths = ["tests"]
|
||||||
|
pythonpath = ["src"]
|
||||||
|
addopts = "--cov=neuronetz_gateway --cov-report=term-missing"
|
||||||
|
|
||||||
|
[tool.coverage.run]
|
||||||
|
source = ["src/neuronetz_gateway"]
|
||||||
|
branch = true
|
||||||
|
omit = [
|
||||||
|
"src/neuronetz_gateway/__main__.py",
|
||||||
|
"src/neuronetz_gateway/cli/*",
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.coverage.report]
|
||||||
|
# Phase 1: coverage is reported but non-blocking. Later phases set fail_under.
|
||||||
|
show_missing = true
|
||||||
|
|
||||||
|
[tool.bandit]
|
||||||
|
exclude_dirs = ["tests"]
|
||||||
121
scope-docs/AGENT_PROMPT.md
Normal file
121
scope-docs/AGENT_PROMPT.md
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
# Build Order: neuronetz-gateway v0.1.0
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
The Ollama instance at `https://api.neuronetz.ai` is currently exposed without authentication. This is a security incident in waiting. Your job is to build the gateway that closes that gap and forms the commercial API surface of the Neuronetz AI platform.
|
||||||
|
|
||||||
|
The full specification is in **`SPEC.md`** in this repository. Read it before writing any code. It is the source of truth; if anything below conflicts with it, SPEC.md wins.
|
||||||
|
|
||||||
|
## Mission
|
||||||
|
|
||||||
|
Implement `neuronetz-gateway` per SPEC.md to a state that satisfies **§12 Acceptance Criteria**. Nothing less ships.
|
||||||
|
|
||||||
|
## Non-Negotiables
|
||||||
|
|
||||||
|
These are hard constraints. Violating any of them is a build failure regardless of feature completeness.
|
||||||
|
|
||||||
|
1. **Fail closed, always.** If a security or budgeting check cannot be performed (Redis down, DB unreachable, ambiguous state), deny the request. Never default to allow.
|
||||||
|
2. **Ollama never reachable from outside the Docker internal network.** No `ports:` mapping for the ollama service in any compose file shipped with the project. Document this prominently.
|
||||||
|
3. **No secrets in code, no secrets in logs, no secrets in errors.** Argon2id for key storage. Constant-time comparison only. Keys printed exactly once at creation.
|
||||||
|
4. **No reflected upstream errors.** Ollama errors are sanitized at the gateway boundary. Map to generic 4xx/5xx with a request ID.
|
||||||
|
5. **Mutating Ollama endpoints (`/api/pull`, `/api/push`, `/api/create`, `/api/copy`, `/api/delete`, `/api/blobs/*`) are hard-blocked.** Not configurable. Not behind a feature flag. Blocked.
|
||||||
|
6. **Streaming integrity.** Token counting and audit writes happen **after** stream close, never on the hot path. Time-to-first-byte must not be degraded by gateway bookkeeping.
|
||||||
|
7. **`mypy --strict` and `ruff check` clean before any PR is opened.** No `# type: ignore` without an inline justification comment.
|
||||||
|
8. **Test coverage targets (§9) are a gate, not a goal.** 100% on `auth/`, `ratelimit/`, `budget/`. CI fails below threshold.
|
||||||
|
9. **Apache 2.0 license file present from commit one.** No GPL dependencies.
|
||||||
|
10. **The bootstrap CLI must work before the first manual `curl`.** No "I'll create a key by hand in the DB just to test it" — if the CLI can't create a key, fix the CLI first.
|
||||||
|
|
||||||
|
## Phasing
|
||||||
|
|
||||||
|
Five phases. Each phase has an explicit exit criterion. **Do not start phase N+1 until phase N's exit criterion is verifiably met.** PM/Control: enforce this.
|
||||||
|
|
||||||
|
### Phase 1 — Scaffold
|
||||||
|
|
||||||
|
- Repo layout per SPEC §8
|
||||||
|
- `pyproject.toml`, `uv.lock`, Dockerfile, docker-compose.yml, docker-compose.dev.yml, .env.example, README, LICENSE
|
||||||
|
- Alembic configured; migration `0001_initial.py` creates schema `gateway` and all tables per SPEC §5
|
||||||
|
- `make` or `just` targets: `dev`, `test`, `lint`, `typecheck`, `migrate`, `compose-up`, `compose-down`
|
||||||
|
- CI workflow runs: ruff, mypy, pytest, bandit, pip-audit
|
||||||
|
- **Exit criterion:** `docker compose -f docker-compose.dev.yml up` brings up postgres + redis + a stub gateway that responds 200 on `/healthz` and 503 on `/readyz` (because no Ollama yet). Migrations apply cleanly. CI is green on an empty test suite.
|
||||||
|
|
||||||
|
### Phase 2 — Core proxy + auth
|
||||||
|
|
||||||
|
- Bootstrap CLI (`create-tenant`, `create-key`, `list-keys`, `revoke-key`) working end-to-end
|
||||||
|
- Argon2id hashing module with unit tests covering: hash, verify, constant-time behavior, rehash-on-parameter-change
|
||||||
|
- Auth middleware: Bearer extraction, prefix lookup, hash verify, Redis cache with TTL
|
||||||
|
- Ollama proxy for `/api/chat` and `/api/generate` — both streamed (NDJSON) and non-streamed
|
||||||
|
- Endpoint allowlist enforced
|
||||||
|
- **Model discovery (SPEC §4.6):** background poll of Ollama `/api/tags`, cached in Redis + in-process, fail-closed when unavailable
|
||||||
|
- Model allowlist enforced per-tenant via the **effective set** (allow_all → all discovered; else `allowed_models ∩ discovered`); key-level `allow_all_models` overrides tenant
|
||||||
|
- Error handler: sanitized responses, request ID in every error
|
||||||
|
- Audit log writer (buffered, async)
|
||||||
|
- Mock Ollama in `tests/integration/mock_ollama.py` (no real model required for CI)
|
||||||
|
- **Exit criterion:** A key created via CLI can call `/api/chat` and `/api/generate` through Caddy → gateway → mock Ollama, streaming works, audit rows land in Postgres with correct token counts, `/api/pull` returns 403, no-auth returns 401, wrong-key returns 401. Model discovery populates from the (mock) Ollama `/api/tags`; `/api/tags` returns the tenant's effective set; an `allow_all_models` tenant sees all discovered models, a default-deny tenant sees only `allowed ∩ discovered`, and a non-effective model returns 403; discovery-unavailable fails closed. Integration tests cover all of the above.
|
||||||
|
|
||||||
|
### Phase 3 — Rate limit + budget + OpenAI-compat
|
||||||
|
|
||||||
|
- Sliding window rate limit (Redis Lua script) — per-key RPM, per-tenant RPM, per-key TPM
|
||||||
|
- Concurrency semaphore (Redis-backed) with TTL guard
|
||||||
|
- Token budget counters in Redis with Postgres ledger reconciliation on period rollover
|
||||||
|
- OpenAI-compatibility layer: `/v1/chat/completions`, `/v1/completions`, `/v1/embeddings`, `/v1/models` with full SSE streaming and `data: [DONE]` terminator
|
||||||
|
- Schema translation tests with golden fixtures (request in OpenAI → expected Ollama request; response from Ollama → expected OpenAI response)
|
||||||
|
- Rate-limit and budget response headers per SPEC §6.5
|
||||||
|
- **Exit criterion:** Locust test (100 concurrent users, 5 min) shows correct 429 behavior at the limit, correct token accounting, p99 gateway overhead < 25 ms. OpenAI Python SDK pointed at `/v1` successfully completes streaming chat. Killing Redis mid-test produces 503 (fail closed), not 200.
|
||||||
|
|
||||||
|
### Phase 4 — Audit, prompt log, revocation
|
||||||
|
|
||||||
|
- Prompt log (opt-in per key, TTL) with daily sweeper task
|
||||||
|
- Audit log retention sweeper (TTL per tenant config)
|
||||||
|
- Buffered audit writer with ring-buffer overflow → deny-mode behavior
|
||||||
|
- Revocation flow: console (simulated via direct INSERT in tests) writes `gateway.revocations` → NOTIFY → gateway evicts Redis cache → next request with revoked key returns 401 within 1 second
|
||||||
|
- Prometheus `/metrics` (loopback only) with: `gateway_requests_total{tenant,model,status}`, `gateway_tokens_total{tenant,model,direction}`, `gateway_request_duration_seconds{tenant,model}` (histogram)
|
||||||
|
- `/readyz` checks DB + Redis + Ollama all reachable
|
||||||
|
- Circuit breaker on Ollama failures
|
||||||
|
- **Exit criterion:** Revocation E2E test green. Prompt log retention TTL works (use freeze-time to simulate). Metrics scrape returns valid Prometheus exposition. `/readyz` flips to 503 when any dependency is down.
|
||||||
|
|
||||||
|
### Phase 5 — Harden, document, release
|
||||||
|
|
||||||
|
- `docs/ARCHITECTURE.md`, `docs/DEPLOYMENT.md`, `docs/API.md`, `docs/THREAT_MODEL.md`, `docs/OPERATIONS.md` complete
|
||||||
|
- Caddyfile example with Let's Encrypt for `api.neuronetz.ai` and security headers (HSTS, X-Content-Type-Options, no Server header, no X-Powered-By)
|
||||||
|
- Systemd unit file for non-Compose deployments
|
||||||
|
- Multi-stage Dockerfile with non-root user, distroless or `python:3.12-slim` final stage, no build tools in final image
|
||||||
|
- `pip-audit` and `bandit` clean in CI
|
||||||
|
- Image scan (Trivy or Grype) clean of HIGH/CRITICAL
|
||||||
|
- Tag `v0.1.0`, build and push image, GitHub release with changelog
|
||||||
|
- **Exit criterion:** Every box in SPEC §12 checked, signed off by Control. Image runnable from a fresh host with only docker + a `.env`. README quickstart works for someone who has never seen the repo.
|
||||||
|
|
||||||
|
## Agent Role Assignments
|
||||||
|
|
||||||
|
For the multi-agent orchestrator (Fritz/UI-UX/DevOps/QA/Control/Timo/PM):
|
||||||
|
|
||||||
|
| Agent | Owns |
|
||||||
|
|---|---|
|
||||||
|
| **Backend / Fritz** | All Python code under `src/neuronetz_gateway/`, Alembic migrations, CLI. Primary author. |
|
||||||
|
| **DevOps** | Dockerfile, docker-compose.yml(s), Caddyfile, systemd unit, CI workflows, image scanning, release tagging. |
|
||||||
|
| **QA** | All tests under `tests/`. Owns coverage gate. Writes the locust scenarios. Verifies acceptance criteria at each phase exit. |
|
||||||
|
| **UI-UX** | Not active this project (no UI surface here). Console project will pick this up. |
|
||||||
|
| **Control / Timo** | Enforces phase gates. Refuses to advance a phase whose exit criterion isn't met. Runs the acceptance checklist at end of Phase 5. |
|
||||||
|
| **PM** | Tracks the phase progression, opens YouTrack tickets per phase, runs daily standups against this prompt, surfaces blockers. |
|
||||||
|
|
||||||
|
## Working Agreements
|
||||||
|
|
||||||
|
- **Branch per phase.** `phase-1-scaffold`, `phase-2-proxy-auth`, etc. Merge to `main` only after phase exit criterion is verified.
|
||||||
|
- **PRs are reviewed against SPEC.md.** "Does this match the spec? If not, is SPEC.md wrong or is the PR wrong?" — that's the review question.
|
||||||
|
- **SPEC changes are explicit.** If a phase reveals a spec mistake, amend SPEC.md in a separate PR before changing the implementation. Never drift silently.
|
||||||
|
- **Commit messages reference the section.** e.g. `auth: implement argon2id verify per SPEC §5, §9`.
|
||||||
|
- **No TODOs in main.** If something is deferred, it becomes a tracked issue, not a code comment.
|
||||||
|
- **Open questions (SPEC §13) are resolved in writing.** Decision goes in SPEC.md, not in a Slack message that gets lost.
|
||||||
|
|
||||||
|
## What "Done" Looks Like
|
||||||
|
|
||||||
|
A fresh clone, a fresh host, a domain pointing at it, and a `.env` file. `docker compose up`. Five minutes later, `curl -H "Authorization: Bearer nz_..." https://api.neuronetz.ai/v1/chat/completions -d '...'` streams a response. The Ollama port is not open. The audit log has a row. The budget counter decremented. The metrics endpoint shows the request. The locust suite passes. The threat model document explains every defense.
|
||||||
|
|
||||||
|
When all of that is true and SPEC §12 is fully ticked, ship v0.1.0.
|
||||||
|
|
||||||
|
## When You Get Stuck
|
||||||
|
|
||||||
|
- **Ambiguity in the spec → ask, don't guess.** Open a question in the PM channel; if resolved, amend SPEC.md.
|
||||||
|
- **Conflict between speed and correctness → correctness wins.** This is security infrastructure. We do not ship "good enough."
|
||||||
|
- **Conflict between scope creep and v0.1.0 → defer.** New ideas go in a follow-up issue. v0.1.0 ships per spec.
|
||||||
|
|
||||||
|
Start with Phase 1. Read SPEC.md first.
|
||||||
593
scope-docs/SPEC.md
Normal file
593
scope-docs/SPEC.md
Normal file
@@ -0,0 +1,593 @@
|
|||||||
|
# neuronetz-gateway — SPEC.md
|
||||||
|
|
||||||
|
**Project:** `neuronetz-gateway`
|
||||||
|
**Version:** 0.1.0 (target)
|
||||||
|
**Status:** Specification — not yet implemented
|
||||||
|
**License:** Apache 2.0
|
||||||
|
**Owner:** Stephan Berbig / Neuronetz
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Purpose
|
||||||
|
|
||||||
|
A secure, multi-tenant API gateway in front of an Ollama instance currently exposed at `https://api.neuronetz.ai`. The Ollama endpoint must never be reachable directly from the public internet again. All access flows through this gateway.
|
||||||
|
|
||||||
|
The gateway is the **hot path** of the Neuronetz API. A separate service (`neuronetz-console`, built on the Nibiru PHP framework) handles administration, dashboards, and tenant self-service. This SPEC covers only the gateway.
|
||||||
|
|
||||||
|
## 2. Scope
|
||||||
|
|
||||||
|
### In scope (v0.1.0)
|
||||||
|
|
||||||
|
- Authentication via API keys (Bearer tokens)
|
||||||
|
- Multi-tenant data model (tenants → keys, with inheritance)
|
||||||
|
- Per-key and per-tenant rate limiting (RPM, TPM, concurrent)
|
||||||
|
- Per-key and per-tenant token budgets (daily, monthly, total)
|
||||||
|
- Streaming and non-streaming proxy to Ollama
|
||||||
|
- Dual API surface: native Ollama (`/api/*`) and OpenAI-compatible (`/v1/*`)
|
||||||
|
- Endpoint allowlist (block all model-mutating Ollama endpoints)
|
||||||
|
- **Dynamic model discovery** from the Ollama backend — the live set of installed models is queried, cached, and auto-refreshed; nothing about the model list is hand-maintained
|
||||||
|
- Model allowlist (per-tenant override), **default-deny, resolved against the live discovered set** (stale/typo'd entries never resolve)
|
||||||
|
- **Per-tenant `allow_all_models` toggle** — opt-in: a flagged tenant may use any currently-installed model, so models newly pulled into Ollama are auto-granted on the next discovery refresh
|
||||||
|
- Request size limits, response size limits, timeouts
|
||||||
|
- Token counting from Ollama responses (precise, not heuristic)
|
||||||
|
- Audit log (always-on metadata)
|
||||||
|
- Prompt log (opt-in per key, TTL'd retention)
|
||||||
|
- Bootstrap CLI: create tenants, keys, set budgets
|
||||||
|
- Health and readiness endpoints
|
||||||
|
- Docker Compose deployment (gateway + caddy + postgres + redis + ollama)
|
||||||
|
- Caddy as TLS terminator (Let's Encrypt for `api.neuronetz.ai`)
|
||||||
|
|
||||||
|
### Out of scope (v0.1.0, document as future)
|
||||||
|
|
||||||
|
- Web admin UI (lives in `neuronetz-console`, separate repo)
|
||||||
|
- Billing / Stripe integration (budgets only, no money yet)
|
||||||
|
- Multi-region / HA / k8s
|
||||||
|
- Content moderation / prompt-injection filtering
|
||||||
|
- Response caching
|
||||||
|
- Multi-backend routing (one Ollama; pluggable backend interface stays for later)
|
||||||
|
- Webhook notifications
|
||||||
|
- SSO / OAuth2 for admin
|
||||||
|
|
||||||
|
## 3. Threat Model (abbreviated)
|
||||||
|
|
||||||
|
| Threat | Mitigation |
|
||||||
|
|---|---|
|
||||||
|
| Internet scanners hitting Ollama directly | Ollama bound to internal Docker network; never published |
|
||||||
|
| Unauthenticated API abuse | Mandatory Bearer token; fail-closed on auth errors |
|
||||||
|
| API key brute force | Argon2id hashing; constant-time compare; rate limit on auth failures per source IP |
|
||||||
|
| GPU/token exhaustion (cost attack) | Per-key TPM + token budget; per-tenant ceiling; concurrent connection cap |
|
||||||
|
| Resource exhaustion via large payloads | Request body size limit (default 256 KiB); `num_predict` cap (default 4096) |
|
||||||
|
| Model enumeration / training-data exfil via uncommon models | Model allowlist; default-deny. `allow_all_models` is **opt-in per tenant and audited**. Discovery only ever exposes models actually installed on the backend; `/api/tags` and `/v1/models` never reveal models outside the tenant's effective set; "not allowed" and "doesn't exist" return the same generic response |
|
||||||
|
| Discovery backend unreachable | Fail-closed: an empty/stale-expired discovered set means no model resolves, so requests are denied — never "allow because we couldn't list models" |
|
||||||
|
| Ollama mutation (model pull/delete) by attacker | Endpoint allowlist; mutating endpoints (`/api/pull`, `/api/push`, `/api/create`, `/api/copy`, `/api/delete`) hard-blocked at the gateway |
|
||||||
|
| Information disclosure via error messages | Sanitize upstream errors; never proxy Ollama internals to client |
|
||||||
|
| Audit log tampering | Append-only at app layer; DB role separation; optional WAL archiving |
|
||||||
|
| Prompt data leakage | Prompt logging off by default; opt-in per key; TTL'd; redaction hook |
|
||||||
|
| Redis outage causing "fail open" | Fail-closed: if rate-limit/budget backend is unavailable, deny |
|
||||||
|
| Compromised admin token | Admin token lives in `neuronetz-console`, not in gateway; gateway has no admin endpoints |
|
||||||
|
|
||||||
|
## 4. Architecture
|
||||||
|
|
||||||
|
### 4.1 Component diagram
|
||||||
|
|
||||||
|
```
|
||||||
|
Internet
|
||||||
|
│ TLS
|
||||||
|
▼
|
||||||
|
┌──────────────────────┐
|
||||||
|
│ Caddy (sidecar) │ Let's Encrypt for api.neuronetz.ai
|
||||||
|
│ - TLS termination │ HSTS, security headers
|
||||||
|
│ - HTTP/2, HTTP/3 │
|
||||||
|
└──────────┬───────────┘
|
||||||
|
│ HTTP/1.1 internal
|
||||||
|
┌──────────▼───────────┐
|
||||||
|
│ neuronetz-gateway │ FastAPI + uvicorn
|
||||||
|
│ - authn │
|
||||||
|
│ - rate limit │
|
||||||
|
│ - budget check │
|
||||||
|
│ - proxy + stream │
|
||||||
|
│ - token count │
|
||||||
|
│ - audit write │
|
||||||
|
└──┬────────┬──────┬───┘
|
||||||
|
│ │ │
|
||||||
|
┌──────▼──┐ ┌──▼───┐ │
|
||||||
|
│Postgres │ │Redis │ │
|
||||||
|
│ schema: │ │ keys │ │
|
||||||
|
│ gateway │ │bucket│ │
|
||||||
|
└─────────┘ └──────┘ │
|
||||||
|
│ internal network only
|
||||||
|
┌──────▼──────┐
|
||||||
|
│ Ollama │
|
||||||
|
│ 127.0.0.1 │
|
||||||
|
└─────────────┘
|
||||||
|
|
||||||
|
Same Compose stack also hosts (separate from this SPEC):
|
||||||
|
- neuronetz-console (PHP/Nibiru) → reads schema `console`, reads schema `gateway` (SELECT)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.2 Database schemas
|
||||||
|
|
||||||
|
**Single Postgres instance, two schemas:**
|
||||||
|
|
||||||
|
- `gateway` — owned by the gateway service; gateway role has full DDL
|
||||||
|
- `console` — owned by `neuronetz-console` (out of scope here); console role has full DDL
|
||||||
|
- Both services connect with their own role. Cross-schema access is explicit GRANT.
|
||||||
|
|
||||||
|
**Console role gets `SELECT` on all `gateway.*` tables.** Console writes go only to `console.*` tables. If the console needs to mutate gateway state (e.g. revoke a key), it does so by writing to a `gateway.revocations` outbox table that the gateway tails (see §4.5).
|
||||||
|
|
||||||
|
### 4.3 Request lifecycle
|
||||||
|
|
||||||
|
1. Caddy terminates TLS, forwards to gateway on internal port.
|
||||||
|
2. Gateway middleware extracts `Authorization: Bearer <key>`.
|
||||||
|
3. Key prefix (first 12 chars) used as Redis cache key. On miss, lookup `gateway.api_keys` by prefix; verify full key with argon2id `verify`; cache resolved key metadata in Redis (TTL 60s).
|
||||||
|
4. Rate limit check (sliding window in Redis, Lua-atomic) — per-key RPM + per-tenant RPM.
|
||||||
|
5. Budget check (Redis counter for current period; Postgres ledger is source of truth on reset).
|
||||||
|
6. Concurrent-connection semaphore (Redis `INCR` with TTL).
|
||||||
|
7. Model allowlist check. Resolve the **effective model set** for the key:
|
||||||
|
`allow_all := key.allow_all_models ?? tenant.allow_all_models`;
|
||||||
|
`effective := discovered` if `allow_all` else `(key.allowed_models ?? tenant.allowed_models) ∩ discovered`,
|
||||||
|
where `discovered` is the cached live model set from discovery (§4.6). The request's
|
||||||
|
`model` must be in `effective`, else a generic 403 with no disclosure of whether the
|
||||||
|
model exists but is unpermitted vs. is not installed.
|
||||||
|
8. Endpoint allowlist check.
|
||||||
|
9. Request body validation (size, schema, `num_predict` cap).
|
||||||
|
10. If OpenAI-compat path, translate request to Ollama schema.
|
||||||
|
11. Open httpx async stream to Ollama.
|
||||||
|
12. Stream response back to client, accumulating final `prompt_eval_count` + `eval_count`.
|
||||||
|
13. On stream close: write `gateway.audit_log` row; decrement budget; release semaphore; if prompt logging enabled, write `gateway.prompt_log` row.
|
||||||
|
14. On any failure: sanitized error to client, audit row with status code, semaphore released.
|
||||||
|
|
||||||
|
### 4.4 Failure modes (fail-closed)
|
||||||
|
|
||||||
|
| Subsystem | If down | Behavior |
|
||||||
|
|---|---|---|
|
||||||
|
| Postgres (read) | Key lookup fails | 503 with retry-after; no requests proxied |
|
||||||
|
| Postgres (write) | Audit write fails | Request still succeeds, audit row buffered in-memory ring (max 1000), drained on recovery; if buffer fills, switch to deny mode |
|
||||||
|
| Redis | Rate limit / budget unavailable | 503 — fail closed. Never "allow because we can't check." |
|
||||||
|
| Ollama | Upstream unreachable | 502 with retry-after; circuit breaker opens after 5 consecutive failures, half-open after 30s |
|
||||||
|
| Caddy | Not a gateway concern | — |
|
||||||
|
|
||||||
|
### 4.5 Cache invalidation (key revocation)
|
||||||
|
|
||||||
|
Console can revoke a key by inserting into `gateway.revocations(key_id, ts, reason)`. Gateway has a background task (`asyncio.create_task` in lifespan) that:
|
||||||
|
- LISTENs on Postgres channel `key_revoked` (gateway emits NOTIFY on its own write path; console emits via INSERT trigger)
|
||||||
|
- On notification, evicts the Redis cache entry for that key's prefix
|
||||||
|
- This makes revocation effectively immediate (≤ Redis RTT) without cross-service HTTP
|
||||||
|
|
||||||
|
### 4.6 Model discovery
|
||||||
|
|
||||||
|
The set of usable models is **never hand-maintained**; it is extracted live from the
|
||||||
|
Ollama backend.
|
||||||
|
|
||||||
|
- A background task (started in lifespan, like the revocation listener) polls Ollama
|
||||||
|
`GET /api/tags` every `MODEL_DISCOVERY_REFRESH_S` seconds.
|
||||||
|
- The parsed model set (names + sanitized metadata: family, parameter size, quantization,
|
||||||
|
size bytes, modified-at) is cached in Redis under `gateway:models:discovered` with TTL
|
||||||
|
`MODEL_DISCOVERY_CACHE_TTL_S`, and held in-process for hot reads on the request path.
|
||||||
|
- On startup an initial fetch runs; if Ollama is unreachable the discovered set is empty.
|
||||||
|
- **Fail-closed:** if the discovered set is empty or its cache has expired and cannot be
|
||||||
|
refreshed, no model resolves and requests are denied (consistent with default-deny).
|
||||||
|
Discovery never opens access on failure.
|
||||||
|
- "Auto-grant": because the effective set (§4.3 step 7) intersects with `discovered` (or
|
||||||
|
*is* `discovered` when `allow_all_models`), a model pulled into Ollama out-of-band
|
||||||
|
becomes usable to `allow_all` tenants on the next refresh — no per-tenant config change.
|
||||||
|
- Discovery is **read-only** against Ollama and uses only the allowlisted `/api/tags`
|
||||||
|
endpoint; it never triggers a model pull.
|
||||||
|
|
||||||
|
## 5. Data Model (schema `gateway`)
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE SCHEMA gateway;
|
||||||
|
|
||||||
|
CREATE TYPE gateway.key_status AS ENUM ('active', 'disabled', 'revoked');
|
||||||
|
CREATE TYPE gateway.tenant_status AS ENUM ('active', 'suspended', 'closed');
|
||||||
|
CREATE TYPE gateway.budget_period AS ENUM ('day', 'month', 'total');
|
||||||
|
|
||||||
|
CREATE TABLE gateway.tenants (
|
||||||
|
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
name text NOT NULL UNIQUE,
|
||||||
|
status gateway.tenant_status NOT NULL DEFAULT 'active',
|
||||||
|
created_at timestamptz NOT NULL DEFAULT now(),
|
||||||
|
metadata jsonb NOT NULL DEFAULT '{}'::jsonb
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE gateway.tenant_limits (
|
||||||
|
tenant_id uuid PRIMARY KEY REFERENCES gateway.tenants(id) ON DELETE CASCADE,
|
||||||
|
rpm integer NOT NULL DEFAULT 60,
|
||||||
|
tpm integer NOT NULL DEFAULT 100000,
|
||||||
|
concurrent integer NOT NULL DEFAULT 8,
|
||||||
|
tokens_daily bigint,
|
||||||
|
tokens_monthly bigint,
|
||||||
|
tokens_total bigint,
|
||||||
|
allowed_models text[] NOT NULL DEFAULT '{}',
|
||||||
|
allow_all_models boolean NOT NULL DEFAULT false, -- opt-in: allow any installed model
|
||||||
|
log_prompts_default boolean NOT NULL DEFAULT false,
|
||||||
|
prompt_retention_days integer NOT NULL DEFAULT 30,
|
||||||
|
audit_retention_days integer NOT NULL DEFAULT 365
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE gateway.api_keys (
|
||||||
|
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
tenant_id uuid NOT NULL REFERENCES gateway.tenants(id) ON DELETE CASCADE,
|
||||||
|
prefix text NOT NULL UNIQUE, -- first 12 chars, indexed
|
||||||
|
key_hash text NOT NULL, -- argon2id
|
||||||
|
name text NOT NULL,
|
||||||
|
status gateway.key_status NOT NULL DEFAULT 'active',
|
||||||
|
scopes text[] NOT NULL DEFAULT '{chat,embeddings}',
|
||||||
|
created_at timestamptz NOT NULL DEFAULT now(),
|
||||||
|
last_used_at timestamptz,
|
||||||
|
expires_at timestamptz,
|
||||||
|
log_prompts boolean, -- NULL = inherit from tenant
|
||||||
|
metadata jsonb NOT NULL DEFAULT '{}'::jsonb
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX idx_api_keys_prefix ON gateway.api_keys(prefix) WHERE status = 'active';
|
||||||
|
CREATE INDEX idx_api_keys_tenant ON gateway.api_keys(tenant_id);
|
||||||
|
|
||||||
|
CREATE TABLE gateway.key_limits (
|
||||||
|
key_id uuid PRIMARY KEY REFERENCES gateway.api_keys(id) ON DELETE CASCADE,
|
||||||
|
rpm integer, -- NULL = inherit tenant
|
||||||
|
tpm integer,
|
||||||
|
concurrent integer,
|
||||||
|
tokens_daily bigint,
|
||||||
|
tokens_monthly bigint,
|
||||||
|
tokens_total bigint,
|
||||||
|
allowed_models text[], -- NULL = inherit tenant
|
||||||
|
allow_all_models boolean -- NULL = inherit tenant
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE gateway.budget_usage (
|
||||||
|
key_id uuid NOT NULL REFERENCES gateway.api_keys(id) ON DELETE CASCADE,
|
||||||
|
period gateway.budget_period NOT NULL,
|
||||||
|
period_start timestamptz NOT NULL,
|
||||||
|
tokens_in bigint NOT NULL DEFAULT 0,
|
||||||
|
tokens_out bigint NOT NULL DEFAULT 0,
|
||||||
|
requests bigint NOT NULL DEFAULT 0,
|
||||||
|
PRIMARY KEY (key_id, period, period_start)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX idx_budget_usage_period ON gateway.budget_usage(period, period_start);
|
||||||
|
|
||||||
|
CREATE TABLE gateway.audit_log (
|
||||||
|
id bigserial PRIMARY KEY,
|
||||||
|
ts timestamptz NOT NULL DEFAULT now(),
|
||||||
|
request_id uuid NOT NULL,
|
||||||
|
tenant_id uuid, -- nullable for auth-failed rows
|
||||||
|
key_id uuid,
|
||||||
|
key_prefix text, -- denormalized for forensic queries
|
||||||
|
method text NOT NULL,
|
||||||
|
path text NOT NULL,
|
||||||
|
model text,
|
||||||
|
tokens_in integer,
|
||||||
|
tokens_out integer,
|
||||||
|
latency_ms integer,
|
||||||
|
status integer NOT NULL,
|
||||||
|
client_ip inet,
|
||||||
|
user_agent text,
|
||||||
|
error_code text
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX idx_audit_ts ON gateway.audit_log(ts);
|
||||||
|
CREATE INDEX idx_audit_tenant_ts ON gateway.audit_log(tenant_id, ts);
|
||||||
|
CREATE INDEX idx_audit_key_ts ON gateway.audit_log(key_id, ts);
|
||||||
|
|
||||||
|
CREATE TABLE gateway.prompt_log (
|
||||||
|
id bigserial PRIMARY KEY,
|
||||||
|
audit_id bigint NOT NULL REFERENCES gateway.audit_log(id) ON DELETE CASCADE,
|
||||||
|
ts timestamptz NOT NULL DEFAULT now(),
|
||||||
|
key_id uuid NOT NULL,
|
||||||
|
request_body jsonb NOT NULL,
|
||||||
|
response_text text,
|
||||||
|
retention_until timestamptz NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX idx_prompt_log_retention ON gateway.prompt_log(retention_until);
|
||||||
|
|
||||||
|
CREATE TABLE gateway.revocations (
|
||||||
|
id bigserial PRIMARY KEY,
|
||||||
|
key_id uuid NOT NULL,
|
||||||
|
ts timestamptz NOT NULL DEFAULT now(),
|
||||||
|
reason text,
|
||||||
|
processed_at timestamptz
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Trigger to NOTIFY on revocation insert
|
||||||
|
CREATE OR REPLACE FUNCTION gateway.notify_key_revoked() RETURNS trigger AS $$
|
||||||
|
BEGIN
|
||||||
|
PERFORM pg_notify('key_revoked', NEW.key_id::text);
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
CREATE TRIGGER trg_notify_key_revoked
|
||||||
|
AFTER INSERT ON gateway.revocations
|
||||||
|
FOR EACH ROW EXECUTE FUNCTION gateway.notify_key_revoked();
|
||||||
|
|
||||||
|
-- Grants for console role (created in console SPEC, referenced here)
|
||||||
|
-- GRANT USAGE ON SCHEMA gateway TO console_role;
|
||||||
|
-- GRANT SELECT ON ALL TABLES IN SCHEMA gateway TO console_role;
|
||||||
|
-- GRANT INSERT ON gateway.revocations TO console_role;
|
||||||
|
```
|
||||||
|
|
||||||
|
## 6. API Surface
|
||||||
|
|
||||||
|
### 6.1 Native Ollama passthrough (allowlisted)
|
||||||
|
|
||||||
|
| Path | Method | Notes |
|
||||||
|
|---|---|---|
|
||||||
|
| `/api/chat` | POST | Streamed (NDJSON) and non-streamed |
|
||||||
|
| `/api/generate` | POST | Streamed (NDJSON) and non-streamed |
|
||||||
|
| `/api/embeddings` | POST | Non-streamed |
|
||||||
|
| `/api/embed` | POST | Newer Ollama embeddings endpoint |
|
||||||
|
| `/api/tags` | GET | Returns the tenant's **effective** model set (live-discovered ∩ allowed, or *all* discovered when `allow_all_models`). Sourced from discovery (§4.6), never a static list |
|
||||||
|
| `/api/show` | POST | Allowed only for models in the tenant's effective set; returns sanitized model info (no system prompts, no template) |
|
||||||
|
| `/api/ps` | GET | **Blocked** — leaks loaded models |
|
||||||
|
| `/api/version` | GET | Returns gateway version, not Ollama version |
|
||||||
|
|
||||||
|
### 6.2 Hard-blocked Ollama endpoints (always 403)
|
||||||
|
|
||||||
|
`/api/pull`, `/api/push`, `/api/create`, `/api/copy`, `/api/delete`, `/api/blobs/*`
|
||||||
|
|
||||||
|
### 6.3 OpenAI-compatible
|
||||||
|
|
||||||
|
| Path | Method | Maps to |
|
||||||
|
|---|---|---|
|
||||||
|
| `/v1/chat/completions` | POST | `/api/chat` |
|
||||||
|
| `/v1/completions` | POST | `/api/generate` |
|
||||||
|
| `/v1/embeddings` | POST | `/api/embed` |
|
||||||
|
| `/v1/models` | GET | `/api/tags` (the tenant's effective discovered set), in OpenAI model-list format |
|
||||||
|
|
||||||
|
Translation must preserve streaming. SSE (`data: {...}\n\n`) for OpenAI-compat; NDJSON for native.
|
||||||
|
|
||||||
|
### 6.4 Gateway endpoints
|
||||||
|
|
||||||
|
| Path | Method | Auth | Purpose |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `/healthz` | GET | none | Liveness — process responsive |
|
||||||
|
| `/readyz` | GET | none | Readiness — DB + Redis + Ollama all reachable |
|
||||||
|
| `/metrics` | GET | none (loopback only) | Prometheus exposition (counters, histograms) |
|
||||||
|
|
||||||
|
No admin endpoints. Admin lives in `neuronetz-console`.
|
||||||
|
|
||||||
|
### 6.5 Response headers
|
||||||
|
|
||||||
|
Every proxied response carries:
|
||||||
|
- `X-Request-ID: <uuid>`
|
||||||
|
- `X-RateLimit-Limit-Requests: <n>`
|
||||||
|
- `X-RateLimit-Remaining-Requests: <n>`
|
||||||
|
- `X-RateLimit-Limit-Tokens: <n>`
|
||||||
|
- `X-RateLimit-Remaining-Tokens: <n>`
|
||||||
|
- `X-Budget-Period: day|month|total`
|
||||||
|
- `X-Budget-Tokens-Remaining: <n>`
|
||||||
|
|
||||||
|
429 responses additionally carry `Retry-After: <seconds>`.
|
||||||
|
|
||||||
|
## 7. Configuration
|
||||||
|
|
||||||
|
All via environment variables, validated by Pydantic Settings on boot. Boot fails loudly on invalid config.
|
||||||
|
|
||||||
|
```
|
||||||
|
# Service
|
||||||
|
GATEWAY_BIND_HOST=0.0.0.0
|
||||||
|
GATEWAY_BIND_PORT=8080
|
||||||
|
GATEWAY_LOG_LEVEL=INFO
|
||||||
|
GATEWAY_LOG_FORMAT=json # json|console
|
||||||
|
GATEWAY_REQUEST_ID_HEADER=X-Request-ID
|
||||||
|
GATEWAY_TRUSTED_PROXIES=127.0.0.1,caddy # for X-Forwarded-For
|
||||||
|
|
||||||
|
# Upstream
|
||||||
|
OLLAMA_BASE_URL=http://ollama:11434
|
||||||
|
OLLAMA_CONNECT_TIMEOUT_S=5
|
||||||
|
OLLAMA_READ_TIMEOUT_S=600
|
||||||
|
OLLAMA_MAX_CONNECTIONS=64
|
||||||
|
|
||||||
|
# Model discovery (§4.6)
|
||||||
|
MODEL_DISCOVERY_REFRESH_S=60 # how often to re-query Ollama /api/tags
|
||||||
|
MODEL_DISCOVERY_CACHE_TTL_S=120 # Redis cache TTL for the discovered model set
|
||||||
|
|
||||||
|
# Database
|
||||||
|
DATABASE_URL=postgresql+asyncpg://gateway:...@postgres:5432/neuronetz
|
||||||
|
DATABASE_POOL_SIZE=10
|
||||||
|
DATABASE_POOL_OVERFLOW=20
|
||||||
|
|
||||||
|
# Redis
|
||||||
|
REDIS_URL=redis://redis:6379/0
|
||||||
|
REDIS_KEY_CACHE_TTL_S=60
|
||||||
|
|
||||||
|
# Limits (defaults; per-tenant/key overrides in DB)
|
||||||
|
DEFAULT_RPM=60
|
||||||
|
DEFAULT_TPM=100000
|
||||||
|
DEFAULT_CONCURRENT=8
|
||||||
|
MAX_REQUEST_BODY_BYTES=262144
|
||||||
|
MAX_NUM_PREDICT=4096
|
||||||
|
|
||||||
|
# Security
|
||||||
|
ARGON2_TIME_COST=3
|
||||||
|
ARGON2_MEMORY_COST_KIB=65536
|
||||||
|
ARGON2_PARALLELISM=4
|
||||||
|
AUTH_FAILURE_RATE_LIMIT_PER_IP_PER_MIN=20
|
||||||
|
|
||||||
|
# Audit
|
||||||
|
AUDIT_BUFFER_SIZE=1000
|
||||||
|
PROMPT_LOG_DEFAULT_RETENTION_DAYS=30
|
||||||
|
AUDIT_LOG_DEFAULT_RETENTION_DAYS=365
|
||||||
|
```
|
||||||
|
|
||||||
|
## 8. Repository Layout
|
||||||
|
|
||||||
|
```
|
||||||
|
neuronetz-gateway/
|
||||||
|
├── pyproject.toml # uv-managed, ruff, mypy --strict, pytest
|
||||||
|
├── README.md
|
||||||
|
├── LICENSE # Apache 2.0
|
||||||
|
├── docker-compose.yml # full stack incl. console placeholder
|
||||||
|
├── docker-compose.dev.yml # without caddy, gateway exposed on localhost
|
||||||
|
├── Dockerfile # multi-stage, python:3.12-slim base
|
||||||
|
├── .env.example
|
||||||
|
├── .dockerignore
|
||||||
|
├── .gitignore
|
||||||
|
├── alembic.ini
|
||||||
|
├── alembic/
|
||||||
|
│ ├── env.py
|
||||||
|
│ └── versions/
|
||||||
|
│ └── 0001_initial.py # creates schema `gateway` and all tables
|
||||||
|
├── ops/
|
||||||
|
│ ├── caddy/
|
||||||
|
│ │ └── Caddyfile.example
|
||||||
|
│ └── systemd/
|
||||||
|
│ └── neuronetz-gateway.service
|
||||||
|
├── src/neuronetz_gateway/
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ ├── __main__.py # uvicorn entry
|
||||||
|
│ ├── app.py # FastAPI factory
|
||||||
|
│ ├── config.py # Pydantic Settings
|
||||||
|
│ ├── deps.py # DI providers
|
||||||
|
│ ├── lifespan.py # startup/shutdown, NOTIFY listener
|
||||||
|
│ ├── errors.py # exception types, handlers, sanitization
|
||||||
|
│ ├── auth/
|
||||||
|
│ │ ├── __init__.py
|
||||||
|
│ │ ├── hashing.py # argon2id wrapper
|
||||||
|
│ │ ├── keys.py # key generation, prefix, verify
|
||||||
|
│ │ └── middleware.py
|
||||||
|
│ ├── ratelimit/
|
||||||
|
│ │ ├── __init__.py
|
||||||
|
│ │ ├── sliding_window.py # Redis Lua script
|
||||||
|
│ │ └── concurrency.py # semaphore via Redis
|
||||||
|
│ ├── budget/
|
||||||
|
│ │ ├── __init__.py
|
||||||
|
│ │ ├── counter.py # Redis period counters
|
||||||
|
│ │ └── ledger.py # Postgres reconciliation
|
||||||
|
│ ├── proxy/
|
||||||
|
│ │ ├── __init__.py
|
||||||
|
│ │ ├── ollama.py # httpx streaming client
|
||||||
|
│ │ ├── translate.py # OpenAI <-> Ollama schemas
|
||||||
|
│ │ ├── token_counter.py # parse usage from stream
|
||||||
|
│ │ ├── discovery.py # live model discovery from Ollama /api/tags (§4.6)
|
||||||
|
│ │ └── allowlist.py # effective-set resolution (allow_all / allowed ∩ discovered)
|
||||||
|
│ ├── routes/
|
||||||
|
│ │ ├── __init__.py
|
||||||
|
│ │ ├── ollama_native.py
|
||||||
|
│ │ ├── openai_compat.py
|
||||||
|
│ │ └── health.py
|
||||||
|
│ ├── db/
|
||||||
|
│ │ ├── __init__.py
|
||||||
|
│ │ ├── session.py
|
||||||
|
│ │ ├── models.py # SQLAlchemy 2.0
|
||||||
|
│ │ └── repositories.py
|
||||||
|
│ ├── audit/
|
||||||
|
│ │ ├── __init__.py
|
||||||
|
│ │ ├── writer.py # buffered async writer
|
||||||
|
│ │ └── prompt_log.py
|
||||||
|
│ ├── observability/
|
||||||
|
│ │ ├── __init__.py
|
||||||
|
│ │ ├── logging.py # structlog config
|
||||||
|
│ │ └── metrics.py # prometheus
|
||||||
|
│ └── cli/
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ └── manage.py # typer: create-tenant, create-key, ...
|
||||||
|
├── tests/
|
||||||
|
│ ├── conftest.py # testcontainers fixtures
|
||||||
|
│ ├── unit/
|
||||||
|
│ │ ├── test_hashing.py
|
||||||
|
│ │ ├── test_translate.py
|
||||||
|
│ │ ├── test_token_counter.py
|
||||||
|
│ │ ├── test_discovery.py
|
||||||
|
│ │ ├── test_allowlist.py
|
||||||
|
│ │ └── test_sliding_window.py
|
||||||
|
│ ├── integration/
|
||||||
|
│ │ ├── test_auth_flow.py
|
||||||
|
│ │ ├── test_rate_limit.py
|
||||||
|
│ │ ├── test_budget.py
|
||||||
|
│ │ ├── test_proxy_stream.py
|
||||||
|
│ │ ├── test_openai_compat.py
|
||||||
|
│ │ ├── test_revocation.py
|
||||||
|
│ │ └── mock_ollama.py # FastAPI mock with NDJSON/SSE
|
||||||
|
│ └── load/
|
||||||
|
│ └── locustfile.py
|
||||||
|
└── docs/
|
||||||
|
├── ARCHITECTURE.md
|
||||||
|
├── DEPLOYMENT.md
|
||||||
|
├── API.md
|
||||||
|
├── THREAT_MODEL.md
|
||||||
|
└── OPERATIONS.md # runbook: revoke key, rotate, check usage
|
||||||
|
```
|
||||||
|
|
||||||
|
## 9. Non-Functional Requirements
|
||||||
|
|
||||||
|
- **Performance:** p50 overhead < 5 ms over direct Ollama call (auth + ratelimit + audit); p99 < 25 ms (excluding upstream latency)
|
||||||
|
- **Streaming:** Time-to-first-byte must not be degraded by gateway logic — audit write happens **after** stream close
|
||||||
|
- **Memory:** Steady-state RSS < 200 MiB per gateway worker under 100 concurrent streams
|
||||||
|
- **Concurrency:** Handle 200 concurrent connections per worker; 4 workers per instance default
|
||||||
|
- **Test coverage:** ≥ 85% line coverage on `src/neuronetz_gateway/` excluding `__main__` and CLI; 100% on `auth/`, `ratelimit/`, `budget/`
|
||||||
|
- **Security:** No `eval`, no `exec`, no shell-out, no `pickle`. Bandit clean. `pip-audit` clean on every CI run.
|
||||||
|
- **Type safety:** `mypy --strict` clean
|
||||||
|
- **Lint:** `ruff check` clean with project ruleset (E, F, I, B, UP, S, ASYNC)
|
||||||
|
|
||||||
|
## 10. Tooling
|
||||||
|
|
||||||
|
- Python 3.12
|
||||||
|
- `uv` for dependency management (pyproject.toml + uv.lock)
|
||||||
|
- FastAPI ≥ 0.115, uvicorn[standard], httpx ≥ 0.27, SQLAlchemy 2.0 (async), asyncpg, redis ≥ 5.0 (with hiredis), structlog, pydantic ≥ 2.9, pydantic-settings, argon2-cffi, typer, prometheus-client
|
||||||
|
- Test: pytest, pytest-asyncio, pytest-cov, testcontainers, httpx (test client), respx (mock), locust
|
||||||
|
- Lint/format: ruff, mypy --strict, bandit, pip-audit
|
||||||
|
- CI: GitHub Actions workflow (lint, type, test with coverage, build image, push on tag)
|
||||||
|
|
||||||
|
## 11. Bootstrap CLI (Typer)
|
||||||
|
|
||||||
|
```
|
||||||
|
neuronetz-gateway create-tenant --name "acme" [--rpm 60] [--tpm 100000]
|
||||||
|
neuronetz-gateway create-key --tenant acme --name "prod-server-1" [--scopes chat,embeddings]
|
||||||
|
neuronetz-gateway revoke-key --prefix nz_abc12345
|
||||||
|
neuronetz-gateway list-keys --tenant acme
|
||||||
|
neuronetz-gateway show-usage --tenant acme [--period day|month|total]
|
||||||
|
neuronetz-gateway set-budget --key nz_abc12345 --daily 1000000 --monthly 30000000
|
||||||
|
neuronetz-gateway set-models --tenant acme --models llama3.1:8b,mistral:7b
|
||||||
|
neuronetz-gateway set-models --tenant acme --allow-all # opt into allow_all_models
|
||||||
|
neuronetz-gateway set-models --tenant acme --no-allow-all # back to explicit allowlist
|
||||||
|
neuronetz-gateway list-models [--tenant acme] # show live-discovered models
|
||||||
|
# (and the tenant's effective set)
|
||||||
|
```
|
||||||
|
|
||||||
|
`create-tenant` accepts `--allow-all-models / --no-allow-all-models` (default off).
|
||||||
|
`list-models` reads the discovery cache (§4.6); with `--tenant` it also shows that tenant's
|
||||||
|
resolved effective set.
|
||||||
|
|
||||||
|
Key format: `nz_<12-char-prefix><32-char-random>`. Prefix is stored; full key is hashed (argon2id). On creation, the full key is printed exactly once.
|
||||||
|
|
||||||
|
## 12. Acceptance Criteria
|
||||||
|
|
||||||
|
The build is "done" when every box below is checked. The orchestrator must verify each before declaring v0.1.0.
|
||||||
|
|
||||||
|
- [ ] `docker compose up` from a clean checkout produces a running stack with TLS via Caddy (self-signed in dev, Let's Encrypt-ready in prod).
|
||||||
|
- [ ] CLI creates tenant and key; printed key successfully authenticates an `/api/chat` call.
|
||||||
|
- [ ] Unauthenticated request returns 401 with no Ollama details leaked.
|
||||||
|
- [ ] Request to `/api/pull` returns 403 with generic error message.
|
||||||
|
- [ ] Streaming `/api/chat` works end-to-end; first byte arrives within Ollama's own TTFB + < 10 ms gateway overhead.
|
||||||
|
- [ ] Streaming `/v1/chat/completions` returns valid SSE with `data: [DONE]` terminator.
|
||||||
|
- [ ] Token counts in audit log match Ollama's reported `prompt_eval_count` + `eval_count` exactly.
|
||||||
|
- [ ] `/api/tags` and `/v1/models` reflect the **live** Ollama model set (discovery, §4.6): an `allow_all_models` tenant sees every installed model and a newly-pulled model appears within one refresh interval; a default-deny tenant sees only `allowed_models ∩ discovered`; a request for a model outside the effective set returns a generic 403; with discovery unavailable, requests fail closed (deny), not open.
|
||||||
|
- [ ] Rate limit triggers at configured RPM with `Retry-After` header.
|
||||||
|
- [ ] Token budget enforces and blocks at zero remaining with descriptive error.
|
||||||
|
- [ ] Redis outage causes 503 (fail-closed), not 200.
|
||||||
|
- [ ] Revocation via `INSERT INTO gateway.revocations` evicts Redis cache within 1 second.
|
||||||
|
- [ ] `mypy --strict`, `ruff check`, `bandit`, `pip-audit` all clean in CI.
|
||||||
|
- [ ] Test coverage ≥ 85% overall, 100% in `auth/`, `ratelimit/`, `budget/`.
|
||||||
|
- [ ] `docs/THREAT_MODEL.md`, `docs/DEPLOYMENT.md`, `docs/OPERATIONS.md` present and accurate.
|
||||||
|
- [ ] Load test (locust): 100 concurrent users sustained 5 minutes, p99 gateway overhead < 25 ms, zero 5xx outside induced failures.
|
||||||
|
|
||||||
|
## 13. Open Questions (decide during build)
|
||||||
|
|
||||||
|
1. Embedding cost accounting — Ollama doesn't return `eval_count` for embeddings. Decision: charge based on `prompt_eval_count` only; document as such.
|
||||||
|
2. SSE vs NDJSON heuristic for OpenAI-compat — always SSE per OpenAI spec. NDJSON only on native `/api/*`.
|
||||||
|
3. Prometheus cardinality — do not label by `key_id` (too many series); label by `tenant_id` only; per-key data lives in Postgres.
|
||||||
|
4. **Model discovery source** — the live model list is `GET /api/tags` on the Ollama backend; there is no separate registry. Cached in Redis + in-process, refreshed every `MODEL_DISCOVERY_REFRESH_S`.
|
||||||
|
5. **Discovery failure is fail-closed** — empty/expired discovered set ⇒ no model resolves ⇒ deny. Discovery never opens access on error.
|
||||||
|
6. **No existence disclosure** — a model that is installed-but-unpermitted and a model that is not installed both return the same generic response, to prevent enumeration.
|
||||||
|
7. **`allow_all_models` precedence** — key-level `allow_all_models` (when non-NULL) overrides the tenant flag; otherwise the tenant flag applies. Same NULL-inherits-tenant rule as the other key limits.
|
||||||
|
|
||||||
|
## 14. References
|
||||||
|
|
||||||
|
- Ollama API: https://github.com/ollama/ollama/blob/main/docs/api.md
|
||||||
|
- OpenAI Chat Completions: https://platform.openai.com/docs/api-reference/chat
|
||||||
|
- Nibiru (sibling console project): https://nibiru-framework.com
|
||||||
|
- Argon2 RFC 9106
|
||||||
7
src/neuronetz_gateway/__init__.py
Normal file
7
src/neuronetz_gateway/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
"""neuronetz-gateway: secure multi-tenant API gateway in front of Ollama."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
__version__ = "0.1.0"
|
||||||
|
|
||||||
|
__all__ = ["__version__"]
|
||||||
28
src/neuronetz_gateway/__main__.py
Normal file
28
src/neuronetz_gateway/__main__.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
"""Uvicorn entry point: ``python -m neuronetz_gateway``.
|
||||||
|
|
||||||
|
Binds the app to ``GATEWAY_BIND_HOST``:``GATEWAY_BIND_PORT`` (default
|
||||||
|
0.0.0.0:8080). The factory string is passed to uvicorn so the app is built in
|
||||||
|
the worker process.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
from neuronetz_gateway.config import get_settings
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Run the gateway under uvicorn using the configured bind address."""
|
||||||
|
settings = get_settings()
|
||||||
|
uvicorn.run(
|
||||||
|
"neuronetz_gateway.app:create_app",
|
||||||
|
factory=True,
|
||||||
|
host=settings.gateway_bind_host,
|
||||||
|
port=settings.gateway_bind_port,
|
||||||
|
log_level=settings.gateway_log_level.lower(),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
111
src/neuronetz_gateway/app.py
Normal file
111
src/neuronetz_gateway/app.py
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
"""FastAPI application factory.
|
||||||
|
|
||||||
|
``create_app()`` is the shared contract entry point: other agents (DevOps, QA)
|
||||||
|
import and serve this. It configures logging, installs the request-id and auth
|
||||||
|
middleware, registers the sanitizing exception handlers, mounts routers, and
|
||||||
|
binds the lifespan that manages backend handles + background tasks.
|
||||||
|
|
||||||
|
Production safety: FastAPI's ``/docs`` + ``/openapi.json`` are disabled by
|
||||||
|
default (enabled only via ``DOCS_ENABLED``). The ``/playground`` route is served
|
||||||
|
only when ``PLAYGROUND_ENABLED`` is true and ``PLAYGROUND_FILE`` exists.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from fastapi import FastAPI, Request
|
||||||
|
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
|
||||||
|
from starlette.responses import HTMLResponse, Response
|
||||||
|
from starlette.types import ASGIApp
|
||||||
|
|
||||||
|
from neuronetz_gateway import __version__
|
||||||
|
from neuronetz_gateway.auth.middleware import AuthMiddleware
|
||||||
|
from neuronetz_gateway.config import Settings, get_settings
|
||||||
|
from neuronetz_gateway.errors import register_exception_handlers
|
||||||
|
from neuronetz_gateway.lifespan import lifespan
|
||||||
|
from neuronetz_gateway.observability.logging import configure_logging
|
||||||
|
from neuronetz_gateway.routes import health, ollama_native, openai_compat
|
||||||
|
|
||||||
|
|
||||||
|
class RequestIDMiddleware(BaseHTTPMiddleware):
|
||||||
|
"""Assign/propagate a request id and expose it on ``request.state``.
|
||||||
|
|
||||||
|
Honours an inbound ``X-Request-ID`` from a trusted proxy; otherwise mints a
|
||||||
|
fresh UUID. The id is echoed on the response and used by error handlers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, app: ASGIApp, header_name: str) -> None:
|
||||||
|
super().__init__(app)
|
||||||
|
self._header = header_name
|
||||||
|
|
||||||
|
async def dispatch(
|
||||||
|
self, request: Request, call_next: RequestResponseEndpoint
|
||||||
|
) -> Response:
|
||||||
|
incoming = request.headers.get(self._header)
|
||||||
|
request_id = incoming or str(uuid.uuid4())
|
||||||
|
request.state.request_id = request_id
|
||||||
|
response = await call_next(request)
|
||||||
|
response.headers[self._header] = request_id
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
def _register_playground(app: FastAPI, cfg: Settings) -> None:
|
||||||
|
"""Add the flag-gated ``/playground`` route (HTML asset, owned by docs agent).
|
||||||
|
|
||||||
|
The file is read off the event loop via ``asyncio.to_thread`` so a slow disk
|
||||||
|
cannot stall request handling. Missing-file is a simple 404, never an error.
|
||||||
|
"""
|
||||||
|
import asyncio as _asyncio
|
||||||
|
|
||||||
|
def _load(path_str: str) -> str | None:
|
||||||
|
p = Path(path_str)
|
||||||
|
if not p.is_file():
|
||||||
|
return None
|
||||||
|
return p.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
@app.get("/playground", include_in_schema=False)
|
||||||
|
async def playground() -> Response:
|
||||||
|
content = await _asyncio.to_thread(_load, cfg.playground_file)
|
||||||
|
if content is None:
|
||||||
|
return Response(status_code=404, content="Not found")
|
||||||
|
return HTMLResponse(content)
|
||||||
|
|
||||||
|
|
||||||
|
def create_app(settings: Settings | None = None) -> FastAPI:
|
||||||
|
"""Build and return the configured FastAPI application."""
|
||||||
|
cfg = settings or get_settings()
|
||||||
|
configure_logging(level=cfg.gateway_log_level, fmt=cfg.gateway_log_format)
|
||||||
|
|
||||||
|
app = FastAPI(
|
||||||
|
title="neuronetz-gateway",
|
||||||
|
version=__version__,
|
||||||
|
lifespan=lifespan,
|
||||||
|
docs_url="/docs" if cfg.docs_enabled else None,
|
||||||
|
redoc_url="/redoc" if cfg.docs_enabled else None,
|
||||||
|
openapi_url="/openapi.json" if cfg.docs_enabled else None,
|
||||||
|
)
|
||||||
|
# Settings are needed by the auth middleware before lifespan runs in some
|
||||||
|
# test setups; lifespan also sets this. Setting here is idempotent.
|
||||||
|
app.state.settings = cfg
|
||||||
|
|
||||||
|
# Auth runs inside RequestID so a request id is always available for the
|
||||||
|
# sanitized 401 the auth middleware emits. add_middleware wraps outermost
|
||||||
|
# last, so add Auth first then RequestID.
|
||||||
|
app.add_middleware(AuthMiddleware)
|
||||||
|
app.add_middleware(RequestIDMiddleware, header_name=cfg.gateway_request_id_header)
|
||||||
|
|
||||||
|
register_exception_handlers(app)
|
||||||
|
|
||||||
|
app.include_router(health.router)
|
||||||
|
app.include_router(openai_compat.router)
|
||||||
|
app.include_router(ollama_native.router)
|
||||||
|
|
||||||
|
if cfg.playground_enabled:
|
||||||
|
_register_playground(app, cfg)
|
||||||
|
|
||||||
|
return app
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["RequestIDMiddleware", "create_app"]
|
||||||
86
src/neuronetz_gateway/config.py
Normal file
86
src/neuronetz_gateway/config.py
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
"""Application configuration via Pydantic Settings v2.
|
||||||
|
|
||||||
|
Reads every environment variable documented in SPEC §7 with the documented
|
||||||
|
defaults. Boot fails loudly (ValidationError) on invalid config.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from functools import lru_cache
|
||||||
|
|
||||||
|
from pydantic import Field
|
||||||
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
|
|
||||||
|
|
||||||
|
class Settings(BaseSettings):
|
||||||
|
"""Gateway runtime configuration. All fields map to SPEC §7 env vars."""
|
||||||
|
|
||||||
|
model_config = SettingsConfigDict(
|
||||||
|
env_file=".env",
|
||||||
|
env_file_encoding="utf-8",
|
||||||
|
extra="ignore",
|
||||||
|
case_sensitive=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Service ---
|
||||||
|
gateway_bind_host: str = Field(default="0.0.0.0") # noqa: S104 - bind-all is intended in container
|
||||||
|
gateway_bind_port: int = Field(default=8080)
|
||||||
|
gateway_log_level: str = Field(default="INFO")
|
||||||
|
gateway_log_format: str = Field(default="json") # json|console
|
||||||
|
gateway_request_id_header: str = Field(default="X-Request-ID")
|
||||||
|
gateway_trusted_proxies: str = Field(default="127.0.0.1,caddy")
|
||||||
|
|
||||||
|
# --- Upstream (Ollama) ---
|
||||||
|
ollama_base_url: str = Field(default="http://ollama:11434")
|
||||||
|
ollama_connect_timeout_s: int = Field(default=5)
|
||||||
|
ollama_read_timeout_s: int = Field(default=600)
|
||||||
|
ollama_max_connections: int = Field(default=64)
|
||||||
|
|
||||||
|
# --- Model discovery (SPEC §4.6) ---
|
||||||
|
model_discovery_refresh_s: int = Field(default=60)
|
||||||
|
model_discovery_cache_ttl_s: int = Field(default=120)
|
||||||
|
|
||||||
|
# --- Database ---
|
||||||
|
database_url: str = Field(
|
||||||
|
default="postgresql+asyncpg://gateway:gateway@postgres:5432/neuronetz",
|
||||||
|
)
|
||||||
|
database_pool_size: int = Field(default=10)
|
||||||
|
database_pool_overflow: int = Field(default=20)
|
||||||
|
|
||||||
|
# --- Redis ---
|
||||||
|
redis_url: str = Field(default="redis://redis:6379/0")
|
||||||
|
redis_key_cache_ttl_s: int = Field(default=60)
|
||||||
|
|
||||||
|
# --- Limits ---
|
||||||
|
default_rpm: int = Field(default=60)
|
||||||
|
default_tpm: int = Field(default=100_000)
|
||||||
|
default_concurrent: int = Field(default=8)
|
||||||
|
max_request_body_bytes: int = Field(default=262_144)
|
||||||
|
max_num_predict: int = Field(default=4096)
|
||||||
|
|
||||||
|
# --- Security ---
|
||||||
|
argon2_time_cost: int = Field(default=3)
|
||||||
|
argon2_memory_cost_kib: int = Field(default=65_536)
|
||||||
|
argon2_parallelism: int = Field(default=4)
|
||||||
|
auth_failure_rate_limit_per_ip_per_min: int = Field(default=20)
|
||||||
|
|
||||||
|
# --- Audit ---
|
||||||
|
audit_buffer_size: int = Field(default=1000)
|
||||||
|
prompt_log_default_retention_days: int = Field(default=30)
|
||||||
|
audit_log_default_retention_days: int = Field(default=365)
|
||||||
|
|
||||||
|
# --- Playground / docs (prod-safe defaults: both OFF) ---
|
||||||
|
playground_enabled: bool = Field(default=False)
|
||||||
|
playground_file: str = Field(default="/app/playground/index.html")
|
||||||
|
docs_enabled: bool = Field(default=False)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def trusted_proxies_list(self) -> list[str]:
|
||||||
|
"""Parse the comma-separated trusted-proxy list into individual hosts."""
|
||||||
|
return [p.strip() for p in self.gateway_trusted_proxies.split(",") if p.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=1)
|
||||||
|
def get_settings() -> Settings:
|
||||||
|
"""Return a cached Settings instance, constructed from the environment."""
|
||||||
|
return Settings()
|
||||||
3
src/neuronetz_gateway/db/__init__.py
Normal file
3
src/neuronetz_gateway/db/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
"""Database access layer: SQLAlchemy models, session factory, repositories."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
292
src/neuronetz_gateway/db/models.py
Normal file
292
src/neuronetz_gateway/db/models.py
Normal file
@@ -0,0 +1,292 @@
|
|||||||
|
"""SQLAlchemy 2.0 (async) ORM models for schema ``gateway`` per SPEC §5.
|
||||||
|
|
||||||
|
These mirror the migration in ``alembic/versions/0001_initial.py`` exactly.
|
||||||
|
The migration is the authoritative DDL; these models are for application use.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import enum
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from sqlalchemy import (
|
||||||
|
BigInteger,
|
||||||
|
Boolean,
|
||||||
|
ForeignKey,
|
||||||
|
Integer,
|
||||||
|
MetaData,
|
||||||
|
String,
|
||||||
|
Text,
|
||||||
|
text,
|
||||||
|
)
|
||||||
|
from sqlalchemy.dialects.postgresql import ARRAY, ENUM, INET, JSONB, TIMESTAMP, UUID
|
||||||
|
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
||||||
|
|
||||||
|
GATEWAY_SCHEMA = "gateway"
|
||||||
|
|
||||||
|
# Stable naming convention so Alembic autogenerate and ad-hoc DDL agree.
|
||||||
|
_NAMING_CONVENTION = {
|
||||||
|
"ix": "ix_%(column_0_label)s",
|
||||||
|
"uq": "uq_%(table_name)s_%(column_0_name)s",
|
||||||
|
"ck": "ck_%(table_name)s_%(constraint_name)s",
|
||||||
|
"fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s",
|
||||||
|
"pk": "pk_%(table_name)s",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class Base(DeclarativeBase):
|
||||||
|
"""Declarative base; all tables live in the ``gateway`` schema."""
|
||||||
|
|
||||||
|
metadata = MetaData(schema=GATEWAY_SCHEMA, naming_convention=_NAMING_CONVENTION)
|
||||||
|
|
||||||
|
|
||||||
|
class KeyStatus(enum.StrEnum):
|
||||||
|
"""Lifecycle states for an API key (SPEC §5 ``gateway.key_status``)."""
|
||||||
|
|
||||||
|
active = "active"
|
||||||
|
disabled = "disabled"
|
||||||
|
revoked = "revoked"
|
||||||
|
|
||||||
|
|
||||||
|
class TenantStatus(enum.StrEnum):
|
||||||
|
"""Lifecycle states for a tenant (SPEC §5 ``gateway.tenant_status``)."""
|
||||||
|
|
||||||
|
active = "active"
|
||||||
|
suspended = "suspended"
|
||||||
|
closed = "closed"
|
||||||
|
|
||||||
|
|
||||||
|
class BudgetPeriod(enum.StrEnum):
|
||||||
|
"""Budget accounting periods (SPEC §5 ``gateway.budget_period``)."""
|
||||||
|
|
||||||
|
day = "day"
|
||||||
|
month = "month"
|
||||||
|
total = "total"
|
||||||
|
|
||||||
|
|
||||||
|
# Reuse existing Postgres enum types (the migration creates them); do not let
|
||||||
|
# SQLAlchemy try to CREATE TYPE again at runtime.
|
||||||
|
_key_status_enum = ENUM(KeyStatus, name="key_status", schema=GATEWAY_SCHEMA, create_type=False)
|
||||||
|
_tenant_status_enum = ENUM(
|
||||||
|
TenantStatus, name="tenant_status", schema=GATEWAY_SCHEMA, create_type=False
|
||||||
|
)
|
||||||
|
_budget_period_enum = ENUM(
|
||||||
|
BudgetPeriod, name="budget_period", schema=GATEWAY_SCHEMA, create_type=False
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Tenant(Base):
|
||||||
|
"""A tenant: the top-level isolation and ownership boundary."""
|
||||||
|
|
||||||
|
__tablename__ = "tenants"
|
||||||
|
|
||||||
|
id: Mapped[uuid.UUID] = mapped_column(
|
||||||
|
UUID(as_uuid=True), primary_key=True, server_default=text("gen_random_uuid()")
|
||||||
|
)
|
||||||
|
name: Mapped[str] = mapped_column(Text, nullable=False, unique=True)
|
||||||
|
status: Mapped[TenantStatus] = mapped_column(
|
||||||
|
_tenant_status_enum, nullable=False, server_default=text("'active'")
|
||||||
|
)
|
||||||
|
created_at: Mapped[datetime.datetime] = mapped_column(
|
||||||
|
TIMESTAMP(timezone=True), nullable=False, server_default=text("now()")
|
||||||
|
)
|
||||||
|
tenant_metadata: Mapped[dict[str, object]] = mapped_column(
|
||||||
|
"metadata", JSONB, nullable=False, server_default=text("'{}'::jsonb")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TenantLimit(Base):
|
||||||
|
"""Per-tenant default limits and retention policy."""
|
||||||
|
|
||||||
|
__tablename__ = "tenant_limits"
|
||||||
|
|
||||||
|
tenant_id: Mapped[uuid.UUID] = mapped_column(
|
||||||
|
UUID(as_uuid=True),
|
||||||
|
ForeignKey("tenants.id", ondelete="CASCADE"),
|
||||||
|
primary_key=True,
|
||||||
|
)
|
||||||
|
rpm: Mapped[int] = mapped_column(Integer, nullable=False, server_default=text("60"))
|
||||||
|
tpm: Mapped[int] = mapped_column(Integer, nullable=False, server_default=text("100000"))
|
||||||
|
concurrent: Mapped[int] = mapped_column(Integer, nullable=False, server_default=text("8"))
|
||||||
|
tokens_daily: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
|
||||||
|
tokens_monthly: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
|
||||||
|
tokens_total: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
|
||||||
|
allowed_models: Mapped[list[str]] = mapped_column(
|
||||||
|
ARRAY(Text), nullable=False, server_default=text("'{}'")
|
||||||
|
)
|
||||||
|
# When true, the tenant may use ANY model currently installed on the Ollama
|
||||||
|
# backend (resolved live via model discovery). When false (default), access is
|
||||||
|
# default-deny and restricted to ``allowed_models`` intersected with the live set.
|
||||||
|
allow_all_models: Mapped[bool] = mapped_column(
|
||||||
|
Boolean, nullable=False, server_default=text("false")
|
||||||
|
)
|
||||||
|
log_prompts_default: Mapped[bool] = mapped_column(
|
||||||
|
Boolean, nullable=False, server_default=text("false")
|
||||||
|
)
|
||||||
|
prompt_retention_days: Mapped[int] = mapped_column(
|
||||||
|
Integer, nullable=False, server_default=text("30")
|
||||||
|
)
|
||||||
|
audit_retention_days: Mapped[int] = mapped_column(
|
||||||
|
Integer, nullable=False, server_default=text("365")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ApiKey(Base):
|
||||||
|
"""An API key belonging to a tenant. The full key is never stored."""
|
||||||
|
|
||||||
|
__tablename__ = "api_keys"
|
||||||
|
|
||||||
|
id: Mapped[uuid.UUID] = mapped_column(
|
||||||
|
UUID(as_uuid=True), primary_key=True, server_default=text("gen_random_uuid()")
|
||||||
|
)
|
||||||
|
tenant_id: Mapped[uuid.UUID] = mapped_column(
|
||||||
|
UUID(as_uuid=True),
|
||||||
|
ForeignKey("tenants.id", ondelete="CASCADE"),
|
||||||
|
nullable=False,
|
||||||
|
)
|
||||||
|
prefix: Mapped[str] = mapped_column(Text, nullable=False, unique=True)
|
||||||
|
key_hash: Mapped[str] = mapped_column(Text, nullable=False)
|
||||||
|
name: Mapped[str] = mapped_column(Text, nullable=False)
|
||||||
|
status: Mapped[KeyStatus] = mapped_column(
|
||||||
|
_key_status_enum, nullable=False, server_default=text("'active'")
|
||||||
|
)
|
||||||
|
scopes: Mapped[list[str]] = mapped_column(
|
||||||
|
ARRAY(Text), nullable=False, server_default=text("'{chat,embeddings}'")
|
||||||
|
)
|
||||||
|
created_at: Mapped[datetime.datetime] = mapped_column(
|
||||||
|
TIMESTAMP(timezone=True), nullable=False, server_default=text("now()")
|
||||||
|
)
|
||||||
|
last_used_at: Mapped[datetime.datetime | None] = mapped_column(
|
||||||
|
TIMESTAMP(timezone=True), nullable=True
|
||||||
|
)
|
||||||
|
expires_at: Mapped[datetime.datetime | None] = mapped_column(
|
||||||
|
TIMESTAMP(timezone=True), nullable=True
|
||||||
|
)
|
||||||
|
log_prompts: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
|
||||||
|
key_metadata: Mapped[dict[str, object]] = mapped_column(
|
||||||
|
"metadata", JSONB, nullable=False, server_default=text("'{}'::jsonb")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class KeyLimit(Base):
|
||||||
|
"""Per-key overrides; NULL columns inherit the tenant value."""
|
||||||
|
|
||||||
|
__tablename__ = "key_limits"
|
||||||
|
|
||||||
|
key_id: Mapped[uuid.UUID] = mapped_column(
|
||||||
|
UUID(as_uuid=True),
|
||||||
|
ForeignKey("api_keys.id", ondelete="CASCADE"),
|
||||||
|
primary_key=True,
|
||||||
|
)
|
||||||
|
rpm: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||||
|
tpm: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||||
|
concurrent: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||||
|
tokens_daily: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
|
||||||
|
tokens_monthly: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
|
||||||
|
tokens_total: Mapped[int | None] = mapped_column(BigInteger, nullable=True)
|
||||||
|
allowed_models: Mapped[list[str] | None] = mapped_column(ARRAY(Text), nullable=True)
|
||||||
|
# NULL = inherit tenant's allow_all_models; otherwise overrides it for this key.
|
||||||
|
allow_all_models: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
|
||||||
|
|
||||||
|
|
||||||
|
class BudgetUsage(Base):
|
||||||
|
"""Token/request accounting per key, period, and period start."""
|
||||||
|
|
||||||
|
__tablename__ = "budget_usage"
|
||||||
|
|
||||||
|
key_id: Mapped[uuid.UUID] = mapped_column(
|
||||||
|
UUID(as_uuid=True),
|
||||||
|
ForeignKey("api_keys.id", ondelete="CASCADE"),
|
||||||
|
primary_key=True,
|
||||||
|
)
|
||||||
|
period: Mapped[BudgetPeriod] = mapped_column(_budget_period_enum, primary_key=True)
|
||||||
|
period_start: Mapped[datetime.datetime] = mapped_column(
|
||||||
|
TIMESTAMP(timezone=True), primary_key=True
|
||||||
|
)
|
||||||
|
tokens_in: Mapped[int] = mapped_column(BigInteger, nullable=False, server_default=text("0"))
|
||||||
|
tokens_out: Mapped[int] = mapped_column(BigInteger, nullable=False, server_default=text("0"))
|
||||||
|
requests: Mapped[int] = mapped_column(BigInteger, nullable=False, server_default=text("0"))
|
||||||
|
|
||||||
|
|
||||||
|
class AuditLog(Base):
|
||||||
|
"""Always-on append-only request metadata log."""
|
||||||
|
|
||||||
|
__tablename__ = "audit_log"
|
||||||
|
|
||||||
|
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
|
||||||
|
ts: Mapped[datetime.datetime] = mapped_column(
|
||||||
|
TIMESTAMP(timezone=True), nullable=False, server_default=text("now()")
|
||||||
|
)
|
||||||
|
request_id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), nullable=False)
|
||||||
|
tenant_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), nullable=True)
|
||||||
|
key_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), nullable=True)
|
||||||
|
key_prefix: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
method: Mapped[str] = mapped_column(Text, nullable=False)
|
||||||
|
path: Mapped[str] = mapped_column(Text, nullable=False)
|
||||||
|
model: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
tokens_in: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||||
|
tokens_out: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||||
|
latency_ms: Mapped[int | None] = mapped_column(Integer, nullable=True)
|
||||||
|
status: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||||
|
client_ip: Mapped[str | None] = mapped_column(INET, nullable=True)
|
||||||
|
user_agent: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
error_code: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
|
||||||
|
|
||||||
|
class PromptLog(Base):
|
||||||
|
"""Opt-in, TTL'd capture of request/response bodies."""
|
||||||
|
|
||||||
|
__tablename__ = "prompt_log"
|
||||||
|
|
||||||
|
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
|
||||||
|
audit_id: Mapped[int] = mapped_column(
|
||||||
|
BigInteger,
|
||||||
|
ForeignKey("audit_log.id", ondelete="CASCADE"),
|
||||||
|
nullable=False,
|
||||||
|
)
|
||||||
|
ts: Mapped[datetime.datetime] = mapped_column(
|
||||||
|
TIMESTAMP(timezone=True), nullable=False, server_default=text("now()")
|
||||||
|
)
|
||||||
|
key_id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), nullable=False)
|
||||||
|
request_body: Mapped[dict[str, object]] = mapped_column(JSONB, nullable=False)
|
||||||
|
response_text: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||||
|
retention_until: Mapped[datetime.datetime] = mapped_column(
|
||||||
|
TIMESTAMP(timezone=True), nullable=False
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Revocation(Base):
|
||||||
|
"""Outbox table written by console (or gateway) to revoke a key.
|
||||||
|
|
||||||
|
An ``AFTER INSERT`` trigger fires ``pg_notify('key_revoked', key_id)``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
__tablename__ = "revocations"
|
||||||
|
|
||||||
|
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
|
||||||
|
key_id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), nullable=False)
|
||||||
|
ts: Mapped[datetime.datetime] = mapped_column(
|
||||||
|
TIMESTAMP(timezone=True), nullable=False, server_default=text("now()")
|
||||||
|
)
|
||||||
|
reason: Mapped[str | None] = mapped_column(String, nullable=True)
|
||||||
|
processed_at: Mapped[datetime.datetime | None] = mapped_column(
|
||||||
|
TIMESTAMP(timezone=True), nullable=True
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"GATEWAY_SCHEMA",
|
||||||
|
"ApiKey",
|
||||||
|
"AuditLog",
|
||||||
|
"Base",
|
||||||
|
"BudgetPeriod",
|
||||||
|
"BudgetUsage",
|
||||||
|
"KeyLimit",
|
||||||
|
"KeyStatus",
|
||||||
|
"PromptLog",
|
||||||
|
"Revocation",
|
||||||
|
"Tenant",
|
||||||
|
"TenantLimit",
|
||||||
|
"TenantStatus",
|
||||||
|
]
|
||||||
53
src/neuronetz_gateway/db/session.py
Normal file
53
src/neuronetz_gateway/db/session.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
"""Async SQLAlchemy engine and session factory construction.
|
||||||
|
|
||||||
|
Phase 1 provides the wiring only; the lifespan owns the engine instance and
|
||||||
|
stores it on ``app.state``. Business-logic callers should depend on the
|
||||||
|
session factory via ``deps.py``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import AsyncIterator
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
|
||||||
|
from sqlalchemy.ext.asyncio import (
|
||||||
|
AsyncEngine,
|
||||||
|
AsyncSession,
|
||||||
|
async_sessionmaker,
|
||||||
|
create_async_engine,
|
||||||
|
)
|
||||||
|
|
||||||
|
from neuronetz_gateway.config import Settings
|
||||||
|
|
||||||
|
|
||||||
|
def create_engine(settings: Settings) -> AsyncEngine:
|
||||||
|
"""Build the async engine from settings (asyncpg driver, pooled)."""
|
||||||
|
return create_async_engine(
|
||||||
|
settings.database_url,
|
||||||
|
pool_size=settings.database_pool_size,
|
||||||
|
max_overflow=settings.database_pool_overflow,
|
||||||
|
pool_pre_ping=True,
|
||||||
|
future=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_session_factory(engine: AsyncEngine) -> async_sessionmaker[AsyncSession]:
|
||||||
|
"""Build a session factory bound to the given engine."""
|
||||||
|
return async_sessionmaker(engine, expire_on_commit=False, class_=AsyncSession)
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def session_scope(
|
||||||
|
factory: async_sessionmaker[AsyncSession],
|
||||||
|
) -> AsyncIterator[AsyncSession]:
|
||||||
|
"""Provide a transactional session scope, committing on success."""
|
||||||
|
async with factory() as session:
|
||||||
|
try:
|
||||||
|
yield session
|
||||||
|
await session.commit()
|
||||||
|
except Exception:
|
||||||
|
await session.rollback()
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["create_engine", "create_session_factory", "session_scope"]
|
||||||
180
src/neuronetz_gateway/deps.py
Normal file
180
src/neuronetz_gateway/deps.py
Normal file
@@ -0,0 +1,180 @@
|
|||||||
|
"""FastAPI dependency-injection providers.
|
||||||
|
|
||||||
|
Exposes typed accessors for the handles placed on ``app.state`` by the lifespan
|
||||||
|
(Redis, the upstream httpx client, the DB session factory, the discovery cache)
|
||||||
|
plus the request principal and the proxy client.
|
||||||
|
|
||||||
|
QA override contract
|
||||||
|
--------------------
|
||||||
|
Routes obtain the upstream proxy via :func:`get_ollama_client`. Tests override
|
||||||
|
the *Ollama backend* by overriding this provider::
|
||||||
|
|
||||||
|
from neuronetz_gateway.deps import get_ollama_client
|
||||||
|
from neuronetz_gateway.proxy.ollama import OllamaClient
|
||||||
|
import httpx
|
||||||
|
from tests.integration.mock_ollama import create_mock_ollama
|
||||||
|
|
||||||
|
transport = httpx.ASGITransport(app=create_mock_ollama())
|
||||||
|
mock_http = httpx.AsyncClient(transport=transport, base_url="http://ollama")
|
||||||
|
app.dependency_overrides[get_ollama_client] = lambda: OllamaClient(mock_http)
|
||||||
|
|
||||||
|
Because ``get_ollama_client`` returns a fully-built :class:`OllamaClient`, an
|
||||||
|
override needs no access to ``app.state`` and can point at the in-process mock.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import AsyncIterator
|
||||||
|
from typing import Annotated
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import redis.asyncio as redis
|
||||||
|
from fastapi import Depends, Request
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||||
|
|
||||||
|
from neuronetz_gateway.audit.writer import AuditWriter
|
||||||
|
from neuronetz_gateway.auth.principal import Principal
|
||||||
|
from neuronetz_gateway.budget.counter import BudgetCounter
|
||||||
|
from neuronetz_gateway.config import Settings, get_settings
|
||||||
|
from neuronetz_gateway.errors import AuthenticationError, DependencyUnavailableError
|
||||||
|
from neuronetz_gateway.proxy.discovery import DiscoveryCache
|
||||||
|
from neuronetz_gateway.proxy.ollama import OllamaClient
|
||||||
|
from neuronetz_gateway.proxy.pipeline import Pipeline
|
||||||
|
from neuronetz_gateway.ratelimit.concurrency import ConcurrencyLimiter
|
||||||
|
from neuronetz_gateway.ratelimit.sliding_window import SlidingWindowLimiter
|
||||||
|
|
||||||
|
|
||||||
|
def get_config() -> Settings:
|
||||||
|
"""Provide the cached application settings."""
|
||||||
|
return get_settings()
|
||||||
|
|
||||||
|
|
||||||
|
def get_redis(request: Request) -> redis.Redis:
|
||||||
|
"""Provide the shared Redis client, failing closed if unavailable."""
|
||||||
|
client: redis.Redis | None = getattr(request.app.state, "redis", None)
|
||||||
|
if client is None:
|
||||||
|
raise DependencyUnavailableError(internal_detail="redis client not initialised")
|
||||||
|
return client
|
||||||
|
|
||||||
|
|
||||||
|
def get_http_client(request: Request) -> httpx.AsyncClient:
|
||||||
|
"""Provide the shared upstream httpx client."""
|
||||||
|
client: httpx.AsyncClient | None = getattr(request.app.state, "http_client", None)
|
||||||
|
if client is None:
|
||||||
|
raise DependencyUnavailableError(internal_detail="http client not initialised")
|
||||||
|
return client
|
||||||
|
|
||||||
|
|
||||||
|
def get_ollama_client(request: Request) -> OllamaClient:
|
||||||
|
"""Provide the upstream Ollama proxy client (override target for tests)."""
|
||||||
|
return OllamaClient(get_http_client(request))
|
||||||
|
|
||||||
|
|
||||||
|
def get_discovery_cache(request: Request) -> DiscoveryCache:
|
||||||
|
"""Provide the in-process discovery cache; fail closed if absent."""
|
||||||
|
cache: DiscoveryCache | None = getattr(request.app.state, "discovery_cache", None)
|
||||||
|
if cache is None:
|
||||||
|
raise DependencyUnavailableError(internal_detail="discovery cache not initialised")
|
||||||
|
return cache
|
||||||
|
|
||||||
|
|
||||||
|
def get_principal(request: Request) -> Principal:
|
||||||
|
"""Return the authenticated principal placed on ``request.state``.
|
||||||
|
|
||||||
|
The auth middleware attaches it before routing; its absence on a non-exempt
|
||||||
|
route is a programming error, so we fail closed with a 401.
|
||||||
|
"""
|
||||||
|
principal: Principal | None = getattr(request.state, "principal", None)
|
||||||
|
if principal is None:
|
||||||
|
raise AuthenticationError(internal_detail="principal missing on authenticated route")
|
||||||
|
return principal
|
||||||
|
|
||||||
|
|
||||||
|
def get_audit_writer(request: Request) -> AuditWriter:
|
||||||
|
"""Provide the shared buffered audit writer; fail closed if absent."""
|
||||||
|
writer: AuditWriter | None = getattr(request.app.state, "audit_writer", None)
|
||||||
|
if writer is None:
|
||||||
|
raise DependencyUnavailableError(internal_detail="audit writer not initialised")
|
||||||
|
return writer
|
||||||
|
|
||||||
|
|
||||||
|
def get_pipeline(
|
||||||
|
request: Request,
|
||||||
|
principal: Annotated[Principal, Depends(get_principal)],
|
||||||
|
settings: Annotated[Settings, Depends(get_config)],
|
||||||
|
ollama: Annotated[OllamaClient, Depends(get_ollama_client)],
|
||||||
|
discovery: Annotated[DiscoveryCache, Depends(get_discovery_cache)],
|
||||||
|
redis_client: Annotated[redis.Redis, Depends(get_redis)],
|
||||||
|
audit: Annotated[AuditWriter, Depends(get_audit_writer)],
|
||||||
|
) -> Pipeline:
|
||||||
|
"""Assemble a per-request enforcement + proxy pipeline.
|
||||||
|
|
||||||
|
The pipeline owns all hot-path checks (rate limit, budget, concurrency,
|
||||||
|
model/endpoint allowlist) and the streaming-with-bookkeeping contract.
|
||||||
|
Audit deny-mode flips this to fail closed at the route layer.
|
||||||
|
"""
|
||||||
|
sessionmaker: async_sessionmaker[AsyncSession] | None = getattr(
|
||||||
|
request.app.state, "db_sessionmaker", None
|
||||||
|
)
|
||||||
|
return Pipeline(
|
||||||
|
request=request,
|
||||||
|
principal=principal,
|
||||||
|
settings=settings,
|
||||||
|
ollama=ollama,
|
||||||
|
discovery=discovery,
|
||||||
|
rate_limiter=SlidingWindowLimiter(redis_client),
|
||||||
|
concurrency=ConcurrencyLimiter(redis_client),
|
||||||
|
budget=BudgetCounter(redis_client),
|
||||||
|
audit=audit,
|
||||||
|
sessionmaker=sessionmaker,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_sessionmaker(request: Request) -> async_sessionmaker[AsyncSession]:
|
||||||
|
"""Return the session factory or fail closed if the engine is absent."""
|
||||||
|
factory: async_sessionmaker[AsyncSession] | None = getattr(
|
||||||
|
request.app.state, "db_sessionmaker", None
|
||||||
|
)
|
||||||
|
if factory is None:
|
||||||
|
raise DependencyUnavailableError(internal_detail="db session factory not initialised")
|
||||||
|
return factory
|
||||||
|
|
||||||
|
|
||||||
|
async def get_db_session(request: Request) -> AsyncIterator[AsyncSession]:
|
||||||
|
"""Provide a request-scoped async DB session."""
|
||||||
|
factory = _get_sessionmaker(request)
|
||||||
|
async with factory() as session:
|
||||||
|
yield session
|
||||||
|
|
||||||
|
|
||||||
|
ConfigDep = Annotated[Settings, Depends(get_config)]
|
||||||
|
RedisDep = Annotated[redis.Redis, Depends(get_redis)]
|
||||||
|
HttpClientDep = Annotated[httpx.AsyncClient, Depends(get_http_client)]
|
||||||
|
OllamaClientDep = Annotated[OllamaClient, Depends(get_ollama_client)]
|
||||||
|
DiscoveryCacheDep = Annotated[DiscoveryCache, Depends(get_discovery_cache)]
|
||||||
|
PrincipalDep = Annotated[Principal, Depends(get_principal)]
|
||||||
|
AuditWriterDep = Annotated[AuditWriter, Depends(get_audit_writer)]
|
||||||
|
PipelineDep = Annotated[Pipeline, Depends(get_pipeline)]
|
||||||
|
DbSessionDep = Annotated[AsyncSession, Depends(get_db_session)]
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"AuditWriterDep",
|
||||||
|
"ConfigDep",
|
||||||
|
"DbSessionDep",
|
||||||
|
"DiscoveryCacheDep",
|
||||||
|
"HttpClientDep",
|
||||||
|
"OllamaClientDep",
|
||||||
|
"PipelineDep",
|
||||||
|
"PrincipalDep",
|
||||||
|
"RedisDep",
|
||||||
|
"get_audit_writer",
|
||||||
|
"get_config",
|
||||||
|
"get_db_session",
|
||||||
|
"get_discovery_cache",
|
||||||
|
"get_http_client",
|
||||||
|
"get_ollama_client",
|
||||||
|
"get_pipeline",
|
||||||
|
"get_principal",
|
||||||
|
"get_redis",
|
||||||
|
]
|
||||||
179
src/neuronetz_gateway/errors.py
Normal file
179
src/neuronetz_gateway/errors.py
Normal file
@@ -0,0 +1,179 @@
|
|||||||
|
"""Exception types and FastAPI exception handlers.
|
||||||
|
|
||||||
|
Hard rule (SPEC §3, AGENT_PROMPT non-negotiable #4): never leak upstream or
|
||||||
|
internal error details to the client. Every error response is a generic,
|
||||||
|
sanitized JSON body carrying only a stable ``error.code``, a safe message, and
|
||||||
|
the request id. Detailed context is logged server-side, never returned.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from fastapi import FastAPI, Request, status
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
|
||||||
|
from neuronetz_gateway.observability.logging import get_logger
|
||||||
|
|
||||||
|
_log = get_logger("errors")
|
||||||
|
|
||||||
|
|
||||||
|
class GatewayError(Exception):
|
||||||
|
"""Base class for gateway errors that map to a sanitized HTTP response.
|
||||||
|
|
||||||
|
``message`` MUST be safe to return to clients. Anything sensitive belongs
|
||||||
|
in ``internal_detail`` which is logged but never serialized to the client.
|
||||||
|
"""
|
||||||
|
|
||||||
|
status_code: int = status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||||
|
code: str = "internal_error"
|
||||||
|
message: str = "An internal error occurred."
|
||||||
|
|
||||||
|
def __init__(self, message: str | None = None, *, internal_detail: str | None = None) -> None:
|
||||||
|
super().__init__(message or self.message)
|
||||||
|
if message is not None:
|
||||||
|
self.message = message
|
||||||
|
self.internal_detail = internal_detail
|
||||||
|
|
||||||
|
|
||||||
|
class AuthenticationError(GatewayError):
|
||||||
|
"""Missing/invalid credentials. Fail closed, no detail."""
|
||||||
|
|
||||||
|
status_code = status.HTTP_401_UNAUTHORIZED
|
||||||
|
code = "unauthorized"
|
||||||
|
message = "Authentication required."
|
||||||
|
|
||||||
|
|
||||||
|
class AuthorizationError(GatewayError):
|
||||||
|
"""Authenticated but not permitted (scope/model/endpoint denied)."""
|
||||||
|
|
||||||
|
status_code = status.HTTP_403_FORBIDDEN
|
||||||
|
code = "forbidden"
|
||||||
|
message = "This request is not permitted."
|
||||||
|
|
||||||
|
|
||||||
|
class RateLimitError(GatewayError):
|
||||||
|
"""Rate limit exceeded. Handler attaches ``Retry-After`` when known."""
|
||||||
|
|
||||||
|
status_code = status.HTTP_429_TOO_MANY_REQUESTS
|
||||||
|
code = "rate_limited"
|
||||||
|
message = "Rate limit exceeded."
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
message: str | None = None,
|
||||||
|
*,
|
||||||
|
retry_after: int | None = None,
|
||||||
|
internal_detail: str | None = None,
|
||||||
|
) -> None:
|
||||||
|
super().__init__(message, internal_detail=internal_detail)
|
||||||
|
self.retry_after = retry_after
|
||||||
|
|
||||||
|
|
||||||
|
class BudgetExceededError(GatewayError):
|
||||||
|
"""Token budget exhausted for the active period."""
|
||||||
|
|
||||||
|
status_code = status.HTTP_429_TOO_MANY_REQUESTS
|
||||||
|
code = "budget_exceeded"
|
||||||
|
message = "Token budget exhausted for the current period."
|
||||||
|
|
||||||
|
|
||||||
|
class RequestTooLargeError(GatewayError):
|
||||||
|
"""Request body exceeds the configured limit."""
|
||||||
|
|
||||||
|
status_code = status.HTTP_413_REQUEST_ENTITY_TOO_LARGE
|
||||||
|
code = "request_too_large"
|
||||||
|
message = "Request body is too large."
|
||||||
|
|
||||||
|
|
||||||
|
class UpstreamUnavailableError(GatewayError):
|
||||||
|
"""Ollama (or another dependency) is unreachable. Fail closed."""
|
||||||
|
|
||||||
|
status_code = status.HTTP_502_BAD_GATEWAY
|
||||||
|
code = "upstream_unavailable"
|
||||||
|
message = "The upstream service is temporarily unavailable."
|
||||||
|
|
||||||
|
|
||||||
|
class DependencyUnavailableError(GatewayError):
|
||||||
|
"""A required backend (DB/Redis) is unavailable; serve 503, fail closed."""
|
||||||
|
|
||||||
|
status_code = status.HTTP_503_SERVICE_UNAVAILABLE
|
||||||
|
code = "service_unavailable"
|
||||||
|
message = "The service is temporarily unavailable."
|
||||||
|
|
||||||
|
|
||||||
|
def _request_id(request: Request) -> str:
|
||||||
|
"""Extract the request id placed on ``request.state`` by middleware."""
|
||||||
|
rid = getattr(request.state, "request_id", None)
|
||||||
|
return str(rid) if rid else ""
|
||||||
|
|
||||||
|
|
||||||
|
def _error_response(
|
||||||
|
request: Request,
|
||||||
|
*,
|
||||||
|
status_code: int,
|
||||||
|
code: str,
|
||||||
|
message: str,
|
||||||
|
extra_headers: dict[str, str] | None = None,
|
||||||
|
) -> JSONResponse:
|
||||||
|
"""Build a sanitized JSON error response with the request id header."""
|
||||||
|
request_id = _request_id(request)
|
||||||
|
headers = {"X-Request-ID": request_id} if request_id else {}
|
||||||
|
if extra_headers:
|
||||||
|
headers.update(extra_headers)
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=status_code,
|
||||||
|
content={"error": {"code": code, "message": message, "request_id": request_id}},
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _gateway_error_handler(request: Request, exc: GatewayError) -> JSONResponse:
|
||||||
|
"""Render a ``GatewayError`` as a sanitized response."""
|
||||||
|
if exc.internal_detail:
|
||||||
|
_log.warning(
|
||||||
|
"gateway_error",
|
||||||
|
code=exc.code,
|
||||||
|
status_code=exc.status_code,
|
||||||
|
internal_detail=exc.internal_detail,
|
||||||
|
)
|
||||||
|
extra: dict[str, str] | None = None
|
||||||
|
if isinstance(exc, RateLimitError) and exc.retry_after is not None:
|
||||||
|
extra = {"Retry-After": str(exc.retry_after)}
|
||||||
|
return _error_response(
|
||||||
|
request,
|
||||||
|
status_code=exc.status_code,
|
||||||
|
code=exc.code,
|
||||||
|
message=exc.message,
|
||||||
|
extra_headers=extra,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _unhandled_error_handler(request: Request, exc: Exception) -> JSONResponse:
|
||||||
|
"""Catch-all: log the real exception, return a generic 500. No leakage."""
|
||||||
|
_log.error("unhandled_exception", exc_info=exc)
|
||||||
|
return _error_response(
|
||||||
|
request,
|
||||||
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
|
code="internal_error",
|
||||||
|
message="An internal error occurred.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def register_exception_handlers(app: FastAPI) -> None:
|
||||||
|
"""Attach the gateway's sanitizing exception handlers to the app."""
|
||||||
|
# mypy: FastAPI's add_exception_handler accepts these handler signatures;
|
||||||
|
# the stubs are intentionally broad, so casts are unnecessary here.
|
||||||
|
app.add_exception_handler(GatewayError, _gateway_error_handler) # type: ignore[arg-type] # handler typed for GatewayError subclass
|
||||||
|
app.add_exception_handler(Exception, _unhandled_error_handler)
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"AuthenticationError",
|
||||||
|
"AuthorizationError",
|
||||||
|
"BudgetExceededError",
|
||||||
|
"DependencyUnavailableError",
|
||||||
|
"GatewayError",
|
||||||
|
"RateLimitError",
|
||||||
|
"RequestTooLargeError",
|
||||||
|
"UpstreamUnavailableError",
|
||||||
|
"register_exception_handlers",
|
||||||
|
]
|
||||||
131
src/neuronetz_gateway/lifespan.py
Normal file
131
src/neuronetz_gateway/lifespan.py
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
"""Application lifespan: connect/dispose backends and run background tasks.
|
||||||
|
|
||||||
|
Startup connects Postgres + Redis + the upstream httpx client, builds the
|
||||||
|
argon2 hasher and the buffered audit writer, and launches the background tasks:
|
||||||
|
the model-discovery poller (SPEC §4.6) and the Postgres revocation NOTIFY
|
||||||
|
listener (SPEC §4.5). Connection failures are tolerated so ``/healthz`` always
|
||||||
|
serves; ``/readyz`` reports true readiness. All handles live on ``app.state``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import contextlib
|
||||||
|
from collections.abc import AsyncIterator
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import redis.asyncio as redis
|
||||||
|
|
||||||
|
from neuronetz_gateway.audit.writer import AuditWriter
|
||||||
|
from neuronetz_gateway.auth.hashing import build_hasher
|
||||||
|
from neuronetz_gateway.config import Settings, get_settings
|
||||||
|
from neuronetz_gateway.db.session import create_engine, create_session_factory
|
||||||
|
from neuronetz_gateway.observability.logging import get_logger
|
||||||
|
from neuronetz_gateway.proxy.discovery import DiscoveryCache, discovery_loop
|
||||||
|
from neuronetz_gateway.revocation import revocation_listener
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from fastapi import FastAPI
|
||||||
|
|
||||||
|
_log = get_logger("lifespan")
|
||||||
|
|
||||||
|
|
||||||
|
def _build_http_client(settings: Settings) -> httpx.AsyncClient:
|
||||||
|
"""Construct the shared httpx client used to reach Ollama."""
|
||||||
|
timeout = httpx.Timeout(
|
||||||
|
connect=settings.ollama_connect_timeout_s,
|
||||||
|
read=settings.ollama_read_timeout_s,
|
||||||
|
write=settings.ollama_read_timeout_s,
|
||||||
|
pool=settings.ollama_connect_timeout_s,
|
||||||
|
)
|
||||||
|
limits = httpx.Limits(max_connections=settings.ollama_max_connections)
|
||||||
|
return httpx.AsyncClient(base_url=settings.ollama_base_url, timeout=timeout, limits=limits)
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI) -> AsyncIterator[None]:
|
||||||
|
"""Manage startup/shutdown of all backends and background tasks."""
|
||||||
|
settings: Settings = get_settings()
|
||||||
|
app.state.settings = settings
|
||||||
|
app.state.hasher = build_hasher(settings)
|
||||||
|
app.state.discovery_cache = DiscoveryCache()
|
||||||
|
tasks: list[asyncio.Task[None]] = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
engine = create_engine(settings)
|
||||||
|
app.state.db_engine = engine
|
||||||
|
app.state.db_sessionmaker = create_session_factory(engine)
|
||||||
|
except Exception as exc: # noqa: BLE001 - tolerate so /healthz still serves
|
||||||
|
_log.error("db_engine_init_failed", error=str(exc))
|
||||||
|
app.state.db_engine = None
|
||||||
|
app.state.db_sessionmaker = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
app.state.redis = redis.from_url(settings.redis_url, decode_responses=True)
|
||||||
|
except Exception as exc: # noqa: BLE001 - tolerate so /healthz still serves
|
||||||
|
_log.error("redis_init_failed", error=str(exc))
|
||||||
|
app.state.redis = None
|
||||||
|
|
||||||
|
app.state.http_client = _build_http_client(settings)
|
||||||
|
|
||||||
|
audit_writer = AuditWriter(settings.audit_buffer_size, app.state.db_sessionmaker)
|
||||||
|
audit_writer.start()
|
||||||
|
app.state.audit_writer = audit_writer
|
||||||
|
|
||||||
|
# Background tasks (cancelled on shutdown).
|
||||||
|
tasks.append(
|
||||||
|
asyncio.create_task(
|
||||||
|
discovery_loop(
|
||||||
|
app.state.http_client, app.state.redis, app.state.discovery_cache, settings
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if app.state.redis is not None and app.state.db_sessionmaker is not None:
|
||||||
|
tasks.append(
|
||||||
|
asyncio.create_task(
|
||||||
|
revocation_listener(settings, app.state.redis, app.state.db_sessionmaker)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
app.state.background_tasks = tasks
|
||||||
|
|
||||||
|
_log.info("gateway_startup_complete")
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
await _shutdown(app, tasks, audit_writer)
|
||||||
|
|
||||||
|
|
||||||
|
async def _shutdown(
|
||||||
|
app: FastAPI, tasks: list[asyncio.Task[None]], audit_writer: AuditWriter
|
||||||
|
) -> None:
|
||||||
|
"""Cancel background tasks and dispose of all backend handles."""
|
||||||
|
for task in tasks:
|
||||||
|
task.cancel()
|
||||||
|
for task in tasks:
|
||||||
|
with contextlib.suppress(asyncio.CancelledError):
|
||||||
|
await task
|
||||||
|
|
||||||
|
with contextlib.suppress(Exception):
|
||||||
|
await audit_writer.stop()
|
||||||
|
|
||||||
|
http_client: httpx.AsyncClient | None = getattr(app.state, "http_client", None)
|
||||||
|
if http_client is not None:
|
||||||
|
with contextlib.suppress(Exception):
|
||||||
|
await http_client.aclose()
|
||||||
|
|
||||||
|
redis_client = getattr(app.state, "redis", None)
|
||||||
|
if redis_client is not None:
|
||||||
|
with contextlib.suppress(Exception):
|
||||||
|
await redis_client.aclose()
|
||||||
|
|
||||||
|
engine = getattr(app.state, "db_engine", None)
|
||||||
|
if engine is not None:
|
||||||
|
with contextlib.suppress(Exception):
|
||||||
|
await engine.dispose()
|
||||||
|
|
||||||
|
_log.info("gateway_shutdown_complete")
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["lifespan"]
|
||||||
3
src/neuronetz_gateway/observability/__init__.py
Normal file
3
src/neuronetz_gateway/observability/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
"""Observability: structured logging and Prometheus metrics."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
48
src/neuronetz_gateway/observability/logging.py
Normal file
48
src/neuronetz_gateway/observability/logging.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
"""structlog configuration.
|
||||||
|
|
||||||
|
Renders JSON in production (``GATEWAY_LOG_FORMAT=json``) and a human-friendly
|
||||||
|
console format in development. No secrets are ever logged; processors here
|
||||||
|
must not introduce any.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import structlog
|
||||||
|
|
||||||
|
|
||||||
|
def configure_logging(level: str = "INFO", fmt: str = "json") -> None:
|
||||||
|
"""Configure stdlib logging and structlog according to settings."""
|
||||||
|
log_level = getattr(logging, level.upper(), logging.INFO)
|
||||||
|
logging.basicConfig(format="%(message)s", level=log_level)
|
||||||
|
|
||||||
|
shared_processors: list[structlog.types.Processor] = [
|
||||||
|
structlog.contextvars.merge_contextvars,
|
||||||
|
structlog.processors.add_log_level,
|
||||||
|
structlog.processors.TimeStamper(fmt="iso", utc=True),
|
||||||
|
structlog.processors.StackInfoRenderer(),
|
||||||
|
structlog.processors.format_exc_info,
|
||||||
|
]
|
||||||
|
|
||||||
|
renderer: structlog.types.Processor
|
||||||
|
if fmt == "console":
|
||||||
|
renderer = structlog.dev.ConsoleRenderer()
|
||||||
|
else:
|
||||||
|
renderer = structlog.processors.JSONRenderer()
|
||||||
|
|
||||||
|
structlog.configure(
|
||||||
|
processors=[*shared_processors, renderer],
|
||||||
|
wrapper_class=structlog.make_filtering_bound_logger(log_level),
|
||||||
|
logger_factory=structlog.PrintLoggerFactory(),
|
||||||
|
cache_logger_on_first_use=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_logger(name: str | None = None) -> Any: # noqa: ANN401 - structlog returns a dynamic proxy
|
||||||
|
"""Return a bound structlog logger."""
|
||||||
|
return structlog.get_logger(name)
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["configure_logging", "get_logger"]
|
||||||
3
src/neuronetz_gateway/routes/__init__.py
Normal file
3
src/neuronetz_gateway/routes/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
"""HTTP route modules: health, native Ollama passthrough, OpenAI-compat."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
114
src/neuronetz_gateway/routes/health.py
Normal file
114
src/neuronetz_gateway/routes/health.py
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
"""Health, readiness, and metrics endpoints (SPEC §6.4).
|
||||||
|
|
||||||
|
- ``GET /healthz`` : liveness — always 200 if the process can respond.
|
||||||
|
- ``GET /readyz`` : readiness — 200 only if Postgres + Redis + Ollama are all
|
||||||
|
reachable; otherwise 503 with which dependencies are down.
|
||||||
|
In Phase 1 dev there is no Ollama, so 503 is expected.
|
||||||
|
- ``GET /metrics`` : Prometheus exposition. (Loopback-only IP check deferred.)
|
||||||
|
|
||||||
|
None of these endpoints require auth and none leak secrets or internal detail.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Awaitable
|
||||||
|
from typing import Literal, cast
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import redis.asyncio as redis
|
||||||
|
from fastapi import APIRouter, Request, Response, status
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from sqlalchemy import text
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||||
|
|
||||||
|
from neuronetz_gateway.observability.logging import get_logger
|
||||||
|
from neuronetz_gateway.observability.metrics import CONTENT_TYPE_LATEST, render_latest
|
||||||
|
|
||||||
|
router = APIRouter(tags=["health"])
|
||||||
|
_log = get_logger("health")
|
||||||
|
|
||||||
|
|
||||||
|
class HealthResponse(BaseModel):
|
||||||
|
"""Liveness response body."""
|
||||||
|
|
||||||
|
status: Literal["ok"] = "ok"
|
||||||
|
|
||||||
|
|
||||||
|
class ReadyResponse(BaseModel):
|
||||||
|
"""Readiness response body. ``checks`` maps dependency -> reachable bool."""
|
||||||
|
|
||||||
|
status: Literal["ready", "not_ready"]
|
||||||
|
checks: dict[str, bool]
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/healthz", response_model=HealthResponse, status_code=status.HTTP_200_OK)
|
||||||
|
async def healthz() -> HealthResponse:
|
||||||
|
"""Liveness probe — always returns 200 while the process is responsive."""
|
||||||
|
return HealthResponse()
|
||||||
|
|
||||||
|
|
||||||
|
async def _check_postgres(app_state: object) -> bool:
|
||||||
|
"""Return True if a trivial query succeeds against Postgres."""
|
||||||
|
factory: async_sessionmaker[AsyncSession] | None = getattr(
|
||||||
|
app_state, "db_sessionmaker", None
|
||||||
|
)
|
||||||
|
if factory is None:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
async with factory() as session:
|
||||||
|
await session.execute(text("SELECT 1"))
|
||||||
|
return True
|
||||||
|
except Exception as exc: # noqa: BLE001 - any failure means not ready
|
||||||
|
_log.warning("readyz_postgres_unreachable", error=str(exc))
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
async def _check_redis(app_state: object) -> bool:
|
||||||
|
"""Return True if Redis answers PING."""
|
||||||
|
client: redis.Redis | None = getattr(app_state, "redis", None)
|
||||||
|
if client is None:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
# redis-py types ping() as Awaitable[bool] | bool (sync+async share stubs);
|
||||||
|
# the asyncio client always returns an awaitable at runtime.
|
||||||
|
return bool(await cast("Awaitable[bool]", client.ping()))
|
||||||
|
except Exception as exc: # noqa: BLE001 - any failure means not ready
|
||||||
|
_log.warning("readyz_redis_unreachable", error=str(exc))
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
async def _check_ollama(app_state: object) -> bool:
|
||||||
|
"""Return True if Ollama's root endpoint is reachable."""
|
||||||
|
client: httpx.AsyncClient | None = getattr(app_state, "http_client", None)
|
||||||
|
if client is None:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
resp = await client.get("/")
|
||||||
|
return resp.status_code < 500
|
||||||
|
except Exception as exc: # noqa: BLE001 - any failure means not ready
|
||||||
|
_log.warning("readyz_ollama_unreachable", error=str(exc))
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/readyz", response_model=ReadyResponse)
|
||||||
|
async def readyz(request: Request, response: Response) -> ReadyResponse:
|
||||||
|
"""Readiness probe — 200 only if every dependency is reachable, else 503."""
|
||||||
|
app_state = request.app.state
|
||||||
|
checks = {
|
||||||
|
"postgres": await _check_postgres(app_state),
|
||||||
|
"redis": await _check_redis(app_state),
|
||||||
|
"ollama": await _check_ollama(app_state),
|
||||||
|
}
|
||||||
|
all_ready = all(checks.values())
|
||||||
|
if not all_ready:
|
||||||
|
response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
|
||||||
|
return ReadyResponse(status="ready" if all_ready else "not_ready", checks=checks)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/metrics")
|
||||||
|
async def metrics() -> Response:
|
||||||
|
"""Prometheus exposition. Loopback-only enforcement is deferred to Phase 4."""
|
||||||
|
return Response(content=render_latest(), media_type=CONTENT_TYPE_LATEST)
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["router"]
|
||||||
Reference in New Issue
Block a user