- Training data pipeline: convert, export, extract, load-to-db - Infra tooling: infra-audit, infra-gitea-link - RAG pipeline: rag-ingest, rag-query - Fine-tuning: finetune-lora, overnight-qwen3, install-unsloth - Transcripts: export-transcripts - Updated README with script index and token reduction strategy
96 lines
3.0 KiB
Python
Executable File
96 lines
3.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Query Grace's RAG knowledge base.
|
|
|
|
Usage (from exec):
|
|
python3 ~/scripts/rag-query.py "how do I configure reverse_proxy in Caddy"
|
|
python3 ~/scripts/rag-query.py "OPNsense unbound API add host" --top 5
|
|
python3 ~/scripts/rag-query.py "proxmox create LXC" --source "Proxmox VE API"
|
|
|
|
Returns relevant doc chunks with source + URL — use instead of web_fetch for
|
|
known homelab documentation.
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
import urllib.request
|
|
from typing import Optional
|
|
|
|
QDRANT_URL = "http://localhost:6333"
|
|
OLLAMA_URL = "http://192.168.20.142:11434"
|
|
EMBED_MODEL = "nomic-embed-text"
|
|
COLLECTION = "homelab_docs"
|
|
|
|
|
|
def embed_query(text: str) -> Optional[list[float]]:
|
|
body = json.dumps({"model": EMBED_MODEL, "prompt": text}).encode()
|
|
req = urllib.request.Request(
|
|
f"{OLLAMA_URL}/api/embeddings", data=body,
|
|
headers={"Content-Type": "application/json"}
|
|
)
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=15) as r:
|
|
return json.loads(r.read())["embedding"]
|
|
except Exception as e:
|
|
print(f"Embed error: {e}", file=sys.stderr)
|
|
return None
|
|
|
|
|
|
def search(query_vector: list[float], top: int = 5, source_filter: Optional[str] = None) -> list[dict]:
|
|
payload: dict = {
|
|
"vector": query_vector,
|
|
"limit": top,
|
|
"with_payload": True,
|
|
}
|
|
if source_filter:
|
|
payload["filter"] = {
|
|
"must": [{"key": "source", "match": {"value": source_filter}}]
|
|
}
|
|
|
|
body = json.dumps(payload).encode()
|
|
req = urllib.request.Request(
|
|
f"{QDRANT_URL}/collections/{COLLECTION}/points/search",
|
|
data=body,
|
|
headers={"Content-Type": "application/json"}
|
|
)
|
|
try:
|
|
with urllib.request.urlopen(req, timeout=15) as r:
|
|
return json.loads(r.read()).get("result", [])
|
|
except Exception as e:
|
|
print(f"Search error: {e}", file=sys.stderr)
|
|
return []
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Query Grace's RAG knowledge base")
|
|
parser.add_argument("query", help="Natural language question")
|
|
parser.add_argument("--top", type=int, default=5, help="Number of results (default 5)")
|
|
parser.add_argument("--source", type=str, default=None, help="Filter by source name")
|
|
args = parser.parse_args()
|
|
|
|
vector = embed_query(args.query)
|
|
if not vector:
|
|
print("Failed to embed query")
|
|
sys.exit(1)
|
|
|
|
results = search(vector, top=args.top, source_filter=args.source)
|
|
|
|
if not results:
|
|
print("No results found. Has the RAG been ingested yet?")
|
|
print("Run: python3 ~/scripts/rag-ingest.py")
|
|
sys.exit(0)
|
|
|
|
for i, r in enumerate(results, 1):
|
|
p = r.get("payload", {})
|
|
score = r.get("score", 0)
|
|
print(f"\n{'='*60}")
|
|
print(f"[{i}] {p.get('source','')} — {p.get('section','')}")
|
|
print(f" Score: {score:.3f} | {p.get('url','')}")
|
|
print(f"{'-'*60}")
|
|
print(p.get("text", "")[:800])
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|