grace-scripts/rag-query.py

#!/usr/bin/env python3
"""
Query Grace's RAG knowledge base.

Usage (from exec):
    python3 ~/scripts/rag-query.py "how do I configure reverse_proxy in Caddy"
    python3 ~/scripts/rag-query.py "OPNsense unbound API add host" --top 5
    python3 ~/scripts/rag-query.py "proxmox create LXC" --source "Proxmox VE API"

Returns relevant doc chunks with source + URL — use instead of web_fetch for
known homelab documentation.
"""

import argparse
import json
import sys
import urllib.request
from typing import Optional

QDRANT_URL  = "http://localhost:6333"
OLLAMA_URL  = "http://192.168.20.142:11434"
EMBED_MODEL = "nomic-embed-text"
COLLECTION  = "homelab_docs"


def embed_query(text: str) -> Optional[list[float]]:
    body = json.dumps({"model": EMBED_MODEL, "prompt": text}).encode()
    req = urllib.request.Request(
        f"{OLLAMA_URL}/api/embeddings", data=body,
        headers={"Content-Type": "application/json"}
    )
    try:
        with urllib.request.urlopen(req, timeout=15) as r:
            return json.loads(r.read())["embedding"]
    except Exception as e:
        print(f"Embed error: {e}", file=sys.stderr)
        return None


def search(query_vector: list[float], top: int = 5, source_filter: Optional[str] = None) -> list[dict]:
    payload: dict = {
        "vector": query_vector,
        "limit":  top,
        "with_payload": True,
    }
    if source_filter:
        payload["filter"] = {
            "must": [{"key": "source", "match": {"value": source_filter}}]
        }

    body = json.dumps(payload).encode()
    req = urllib.request.Request(
        f"{QDRANT_URL}/collections/{COLLECTION}/points/search",
        data=body,
        headers={"Content-Type": "application/json"}
    )
    try:
        with urllib.request.urlopen(req, timeout=15) as r:
            return json.loads(r.read()).get("result", [])
    except Exception as e:
        print(f"Search error: {e}", file=sys.stderr)
        return []


def main():
    parser = argparse.ArgumentParser(description="Query Grace's RAG knowledge base")
    parser.add_argument("query",           help="Natural language question")
    parser.add_argument("--top",    type=int,  default=5, help="Number of results (default 5)")
    parser.add_argument("--source", type=str,  default=None, help="Filter by source name")
    args = parser.parse_args()

    vector = embed_query(args.query)
    if not vector:
        print("Failed to embed query")
        sys.exit(1)

    results = search(vector, top=args.top, source_filter=args.source)

    if not results:
        print("No results found. Has the RAG been ingested yet?")
        print("Run: python3 ~/scripts/rag-ingest.py")
        sys.exit(0)

    for i, r in enumerate(results, 1):
        p = r.get("payload", {})
        score = r.get("score", 0)
        print(f"\n{'='*60}")
        print(f"[{i}] {p.get('source','')} — {p.get('section','')}")
        print(f"    Score: {score:.3f} | {p.get('url','')}")
        print(f"{'-'*60}")
        print(p.get("text", "")[:800])


if __name__ == "__main__":
    main()