feat: initial import of all helper scripts from ~/scripts/
- Training data pipeline: convert, export, extract, load-to-db - Infra tooling: infra-audit, infra-gitea-link - RAG pipeline: rag-ingest, rag-query - Fine-tuning: finetune-lora, overnight-qwen3, install-unsloth - Transcripts: export-transcripts - Updated README with script index and token reduction strategy
This commit is contained in:
95
rag-query.py
Executable file
95
rag-query.py
Executable file
@@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Query Grace's RAG knowledge base.
|
||||
|
||||
Usage (from exec):
|
||||
python3 ~/scripts/rag-query.py "how do I configure reverse_proxy in Caddy"
|
||||
python3 ~/scripts/rag-query.py "OPNsense unbound API add host" --top 5
|
||||
python3 ~/scripts/rag-query.py "proxmox create LXC" --source "Proxmox VE API"
|
||||
|
||||
Returns relevant doc chunks with source + URL — use instead of web_fetch for
|
||||
known homelab documentation.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import urllib.request
|
||||
from typing import Optional
|
||||
|
||||
QDRANT_URL = "http://localhost:6333"
|
||||
OLLAMA_URL = "http://192.168.20.142:11434"
|
||||
EMBED_MODEL = "nomic-embed-text"
|
||||
COLLECTION = "homelab_docs"
|
||||
|
||||
|
||||
def embed_query(text: str) -> Optional[list[float]]:
|
||||
body = json.dumps({"model": EMBED_MODEL, "prompt": text}).encode()
|
||||
req = urllib.request.Request(
|
||||
f"{OLLAMA_URL}/api/embeddings", data=body,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=15) as r:
|
||||
return json.loads(r.read())["embedding"]
|
||||
except Exception as e:
|
||||
print(f"Embed error: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def search(query_vector: list[float], top: int = 5, source_filter: Optional[str] = None) -> list[dict]:
|
||||
payload: dict = {
|
||||
"vector": query_vector,
|
||||
"limit": top,
|
||||
"with_payload": True,
|
||||
}
|
||||
if source_filter:
|
||||
payload["filter"] = {
|
||||
"must": [{"key": "source", "match": {"value": source_filter}}]
|
||||
}
|
||||
|
||||
body = json.dumps(payload).encode()
|
||||
req = urllib.request.Request(
|
||||
f"{QDRANT_URL}/collections/{COLLECTION}/points/search",
|
||||
data=body,
|
||||
headers={"Content-Type": "application/json"}
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=15) as r:
|
||||
return json.loads(r.read()).get("result", [])
|
||||
except Exception as e:
|
||||
print(f"Search error: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Query Grace's RAG knowledge base")
|
||||
parser.add_argument("query", help="Natural language question")
|
||||
parser.add_argument("--top", type=int, default=5, help="Number of results (default 5)")
|
||||
parser.add_argument("--source", type=str, default=None, help="Filter by source name")
|
||||
args = parser.parse_args()
|
||||
|
||||
vector = embed_query(args.query)
|
||||
if not vector:
|
||||
print("Failed to embed query")
|
||||
sys.exit(1)
|
||||
|
||||
results = search(vector, top=args.top, source_filter=args.source)
|
||||
|
||||
if not results:
|
||||
print("No results found. Has the RAG been ingested yet?")
|
||||
print("Run: python3 ~/scripts/rag-ingest.py")
|
||||
sys.exit(0)
|
||||
|
||||
for i, r in enumerate(results, 1):
|
||||
p = r.get("payload", {})
|
||||
score = r.get("score", 0)
|
||||
print(f"\n{'='*60}")
|
||||
print(f"[{i}] {p.get('source','')} — {p.get('section','')}")
|
||||
print(f" Score: {score:.3f} | {p.get('url','')}")
|
||||
print(f"{'-'*60}")
|
||||
print(p.get("text", "")[:800])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user