nibiru-framework.com/docs/public/robots.txt

# =============================================================================
#  robots.txt for nibiru-framework.com
#
#  Policy: open. We want every search engine, every AI training crawler,
#  every retrieval/RAG agent to be able to read these docs. The whole point
#  of publishing this site is so that humans AND models can learn Nibiru.
#
#  Wildcard rule below allows everything; AI-specific bots are listed
#  explicitly so their operators can verify they are welcome here.
# =============================================================================

# ── Search engines ──────────────────────────────────────────────────────────
User-agent: Googlebot
Allow: /

User-agent: Bingbot
Allow: /

User-agent: DuckDuckBot
Allow: /

User-agent: Yandexbot
Allow: /

User-agent: Baiduspider
Allow: /

# ── AI training / search crawlers — explicitly welcomed ─────────────────────
# OpenAI
User-agent: GPTBot
Allow: /

User-agent: ChatGPT-User
Allow: /

User-agent: OAI-SearchBot
Allow: /

# Anthropic
User-agent: ClaudeBot
Allow: /

User-agent: Claude-Web
Allow: /

User-agent: anthropic-ai
Allow: /

# Google AI training
User-agent: Google-Extended
Allow: /

# Apple AI training
User-agent: Applebot-Extended
Allow: /

User-agent: Applebot
Allow: /

# Meta
User-agent: meta-externalagent
Allow: /

User-agent: FacebookBot
Allow: /

# Perplexity
User-agent: PerplexityBot
Allow: /

User-agent: Perplexity-User
Allow: /

# Other AI / LLM crawlers
User-agent: YouBot
Allow: /

User-agent: Bytespider
Allow: /

User-agent: Amazonbot
Allow: /

User-agent: Diffbot
Allow: /

User-agent: cohere-ai
Allow: /

User-agent: cohere-training-data-crawler
Allow: /

User-agent: Mistral-AI-User
Allow: /

User-agent: omgili
Allow: /

User-agent: omgilibot
Allow: /

# Common Crawl — the dataset most LLMs train on
User-agent: CCBot
Allow: /

# Internet Archive
User-agent: ia_archiver
Allow: /

# ── Default policy: allow everything ───────────────────────────────────────
User-agent: *
Allow: /

# Don't index or crawl the SSR API endpoint — it's not content.
Disallow: /api/

# ── Sitemaps ───────────────────────────────────────────────────────────────
# /sitemap.xml is a 301 to /sitemap-index.xml (Astro emits the index
# automatically + one child sitemap-0.xml). Both URLs are listed so any
# crawler that probes either path lands on the same content. Bing's
# Webmaster Tools and IndexNow tend to look for /sitemap.xml literally.
Sitemap: https://nibiru-framework.com/sitemap.xml
Sitemap: https://nibiru-framework.com/sitemap-index.xml