feat: initial import of all helper scripts from ~/scripts/

- Training data pipeline: convert, export, extract, load-to-db - Infra tooling: infra-audit, infra-gitea-link - RAG pipeline: rag-ingest, rag-query - Fine-tuning: finetune-lora, overnight-qwen3, install-unsloth - Transcripts: export-transcripts - Updated README with script index and token reduction strategy
2026-03-16 22:32:48 -07:00
parent 462f5298e6
commit 014ec8bd5c
15 changed files with 2979 additions and 1 deletions
--- a/rag-query.py
+++ b/rag-query.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+"""
+Query Grace's RAG knowledge base.
+
+Usage (from exec):
+    python3 ~/scripts/rag-query.py "how do I configure reverse_proxy in Caddy"
+    python3 ~/scripts/rag-query.py "OPNsense unbound API add host" --top 5
+    python3 ~/scripts/rag-query.py "proxmox create LXC" --source "Proxmox VE API"
+
+Returns relevant doc chunks with source + URL — use instead of web_fetch for
+known homelab documentation.
+"""
+
+import argparse
+import json
+import sys
+import urllib.request
+from typing import Optional
+
+QDRANT_URL  = "http://localhost:6333"
+OLLAMA_URL  = "http://192.168.20.142:11434"
+EMBED_MODEL = "nomic-embed-text"
+COLLECTION  = "homelab_docs"
+
+
+def embed_query(text: str) -> Optional[list[float]]:
+    body = json.dumps({"model": EMBED_MODEL, "prompt": text}).encode()
+    req = urllib.request.Request(
+        f"{OLLAMA_URL}/api/embeddings", data=body,
+        headers={"Content-Type": "application/json"}
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=15) as r:
+            return json.loads(r.read())["embedding"]
+    except Exception as e:
+        print(f"Embed error: {e}", file=sys.stderr)
+        return None
+
+
+def search(query_vector: list[float], top: int = 5, source_filter: Optional[str] = None) -> list[dict]:
+    payload: dict = {
+        "vector": query_vector,
+        "limit":  top,
+        "with_payload": True,
+    }
+    if source_filter:
+        payload["filter"] = {
+            "must": [{"key": "source", "match": {"value": source_filter}}]
+        }
+
+    body = json.dumps(payload).encode()
+    req = urllib.request.Request(
+        f"{QDRANT_URL}/collections/{COLLECTION}/points/search",
+        data=body,
+        headers={"Content-Type": "application/json"}
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=15) as r:
+            return json.loads(r.read()).get("result", [])
+    except Exception as e:
+        print(f"Search error: {e}", file=sys.stderr)
+        return []
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Query Grace's RAG knowledge base")
+    parser.add_argument("query",           help="Natural language question")
+    parser.add_argument("--top",    type=int,  default=5, help="Number of results (default 5)")
+    parser.add_argument("--source", type=str,  default=None, help="Filter by source name")
+    args = parser.parse_args()
+
+    vector = embed_query(args.query)
+    if not vector:
+        print("Failed to embed query")
+        sys.exit(1)
+
+    results = search(vector, top=args.top, source_filter=args.source)
+
+    if not results:
+        print("No results found. Has the RAG been ingested yet?")
+        print("Run: python3 ~/scripts/rag-ingest.py")
+        sys.exit(0)
+
+    for i, r in enumerate(results, 1):
+        p = r.get("payload", {})
+        score = r.get("score", 0)
+        print(f"\n{'='*60}")
+        print(f"[{i}] {p.get('source','')} — {p.get('section','')}")
+        print(f"    Score: {score:.3f} | {p.get('url','')}")
+        print(f"{'-'*60}")
+        print(p.get("text", "")[:800])
+
+
+if __name__ == "__main__":
+    main()