Files
grace-scripts/infra-audit.py
Grace 014ec8bd5c feat: initial import of all helper scripts from ~/scripts/
- Training data pipeline: convert, export, extract, load-to-db
- Infra tooling: infra-audit, infra-gitea-link
- RAG pipeline: rag-ingest, rag-query
- Fine-tuning: finetune-lora, overnight-qwen3, install-unsloth
- Transcripts: export-transcripts
- Updated README with script index and token reduction strategy
2026-03-16 22:32:48 -07:00

496 lines
24 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Grace Homelab Infrastructure Audit
Queries Proxmox, TrueNAS, OPNsense, and each VM/LXC for full system state.
Writes everything to MongoDB homelab_infra on DB VM (192.168.20.87).
Usage:
python3 ~/scripts/infra-audit.py # full audit
python3 ~/scripts/infra-audit.py --update proxmox
python3 ~/scripts/infra-audit.py --update truenas
python3 ~/scripts/infra-audit.py --update opnsense
python3 ~/scripts/infra-audit.py --update services
python3 ~/scripts/infra-audit.py --query "what disk backs /mnt/ai-storage"
python3 ~/scripts/infra-audit.py --dump # print full DB as JSON
GRACE RULE: Before any infrastructure work, run:
python3 ~/scripts/infra-audit.py --query "<question>"
After any infrastructure change, run:
python3 ~/scripts/infra-audit.py --update <component>
"""
import argparse
import json
import os
import sys
import urllib.request
import urllib.error
import ssl
from datetime import datetime, timezone
# ── Config ───────────────────────────────────────────────────────────────────
MONGO_HOST = "192.168.20.87"
MONGO_PORT = 27017
MONGO_DB = "homelab_infra"
PROXMOX_HOST = "https://192.168.20.135:8006"
TRUENAS_HOST = "https://truenas.home.local"
OPNSENSE_HOST = "https://router.home.local:8443"
# Load creds from Infisical at runtime
def get_creds():
"""Pull all needed credentials from Infisical."""
try:
env_file = os.path.expanduser("~/.infisical-identities/grace-ai.env")
env = {}
with open(env_file) as f:
for line in f:
line = line.strip()
if line and not line.startswith('#') and '=' in line:
k, v = line.split('=', 1)
env[k.strip()] = v.strip().strip('"')
# Get Infisical token
body = json.dumps({
"clientId": env["INFISICAL_UNIVERSAL_AUTH_CLIENT_ID"],
"clientSecret": env["INFISICAL_UNIVERSAL_AUTH_CLIENT_SECRET"]
}).encode()
req = urllib.request.Request(
"http://infisical.home.local/api/v1/auth/universal-auth/login",
data=body, headers={"Content-Type": "application/json"}
)
with urllib.request.urlopen(req, timeout=10) as r:
token = json.loads(r.read())["accessToken"]
def get_secret(name):
req = urllib.request.Request(
f"http://infisical.home.local/api/v3/secrets/raw/{name}"
f"?workspaceId=80f319a5-9a0c-4cd2-911d-9c59fa515929&environment=dev&secretPath=/",
headers={"Authorization": f"Bearer {token}"}
)
with urllib.request.urlopen(req, timeout=10) as r:
return json.loads(r.read())["secret"]["secretValue"]
return {
"proxmox_token_id": get_secret("PROXMOX_TOKEN_ID"),
"proxmox_token_secret": get_secret("PROXMOX_TOKEN_SECRET"),
"truenas_api_key": get_secret("TRUENAS_API_KEY"),
"opnsense_api_key": get_secret("OPNSENSE_API_KEY"),
"opnsense_api_secret": get_secret("OPNSENSE_API_SECRET"),
}
except Exception as e:
print(f"Cred fetch error: {e}")
return {}
# ── HTTP helpers ──────────────────────────────────────────────────────────────
CTX = ssl.create_default_context()
CTX.check_hostname = False
CTX.verify_mode = ssl.CERT_NONE
def http_get(url, headers=None, auth=None, timeout=15):
try:
req = urllib.request.Request(url, headers=headers or {})
if auth:
import base64
creds = base64.b64encode(f"{auth[0]}:{auth[1]}".encode()).decode()
req.add_header("Authorization", f"Basic {creds}")
with urllib.request.urlopen(req, timeout=timeout, context=CTX) as r:
return json.loads(r.read())
except Exception as e:
print(f" GET error {url}: {e}")
return None
# ── MongoDB helpers ───────────────────────────────────────────────────────────
_mongo_client = None
def get_mongo():
global _mongo_client
if _mongo_client is None:
import pymongo
_mongo_client = pymongo.MongoClient(MONGO_HOST, MONGO_PORT, serverSelectionTimeoutMS=5000)
return _mongo_client[MONGO_DB]
def mongo_upsert(collection, doc, key_field="name"):
try:
db = get_mongo()
db[collection].update_one({key_field: doc[key_field]}, {"$set": doc}, upsert=True)
return True
except Exception as e:
print(f" Mongo error: {e}")
return False
def mongo_query_all(collection):
try:
db = get_mongo()
return list(db[collection].find({}, {"_id": 0}))
except Exception as e:
print(f" Mongo query error: {e}")
return []
# ── Proxmox audit ─────────────────────────────────────────────────────────────
def audit_proxmox(creds):
print("\n[Proxmox] Auditing...")
token_id = creds.get("proxmox_token_id", "")
token_secret = creds.get("proxmox_token_secret", "")
auth_header = {"Authorization": f"PVEAPIToken={token_id}={token_secret}"}
# Cluster resources — all nodes, VMs, LXCs in one call
resources = http_get(f"{PROXMOX_HOST}/api2/json/cluster/resources", headers=auth_header)
if not resources:
print(" Failed to reach Proxmox")
return
nodes = {}
vms = []
for r in resources.get("data", []):
rtype = r.get("type")
if rtype == "node":
nodes[r["node"]] = {
"name": r["node"],
"type": "proxmox_node",
"status": r.get("status"),
"cpu": r.get("cpu"),
"maxcpu": r.get("maxcpu"),
"mem": r.get("mem"),
"maxmem": r.get("maxmem"),
"disk": r.get("disk"),
"maxdisk": r.get("maxdisk"),
"uptime": r.get("uptime"),
"audited_at": datetime.now(timezone.utc).isoformat(),
}
elif rtype in ("qemu", "lxc"):
vms.append({
"name": r.get("name", f"vm-{r.get('vmid')}"),
"type": rtype,
"vmid": r.get("vmid"),
"node": r.get("node"),
"status": r.get("status"),
"cpu": r.get("cpu"),
"maxcpu": r.get("maxcpu"),
"mem": r.get("mem"),
"maxmem": r.get("maxmem"),
"disk": r.get("disk"),
"maxdisk": r.get("maxdisk"),
"uptime": r.get("uptime"),
"template": r.get("template", 0),
"audited_at": datetime.now(timezone.utc).isoformat(),
})
# Get node storage details
for node_name in nodes:
storage = http_get(f"{PROXMOX_HOST}/api2/json/nodes/{node_name}/storage", headers=auth_header)
if storage:
nodes[node_name]["storage"] = storage.get("data", [])
# Upsert to MongoDB
for node in nodes.values():
mongo_upsert("nodes", node)
print(f" Node: {node['name']} ({node.get('status')})")
for vm in vms:
mongo_upsert("vms", vm)
print(f" {vm['type'].upper()}: {vm['name']} (vmid={vm['vmid']}, node={vm['node']}, status={vm['status']})")
print(f" Saved {len(nodes)} nodes, {len(vms)} VMs/LXCs")
# ── TrueNAS audit ─────────────────────────────────────────────────────────────
def audit_truenas(creds):
print("\n[TrueNAS] Auditing...")
api_key = creds.get("truenas_api_key", "")
headers = {"Authorization": f"Bearer {api_key}"}
# Pools
pools = http_get(f"{TRUENAS_HOST}/api/v2.0/pool", headers=headers)
if pools:
for pool in pools:
doc = {
"name": pool.get("name"),
"type": "truenas_pool",
"status": pool.get("status"),
"size": pool.get("size"),
"free": pool.get("free"),
"allocated": pool.get("allocated"),
"topology": pool.get("topology", {}),
"autotrim": pool.get("autotrim", {}).get("value"),
"audited_at": datetime.now(timezone.utc).isoformat(),
}
mongo_upsert("storage", doc)
print(f" Pool: {doc['name']} ({doc['status']})")
# Datasets (NFS exports, mount points)
datasets = http_get(f"{TRUENAS_HOST}/api/v2.0/pool/dataset?limit=50", headers=headers)
if datasets:
for ds in datasets:
name = ds.get("name", "")
doc = {
"name": name,
"type": "truenas_dataset",
"pool": name.split("/")[0] if "/" in name else name,
"mountpoint": ds.get("mountpoint", {}).get("value") if isinstance(ds.get("mountpoint"), dict) else ds.get("mountpoint"),
"used": ds.get("used", {}).get("value") if isinstance(ds.get("used"), dict) else ds.get("used"),
"available": ds.get("available", {}).get("value") if isinstance(ds.get("available"), dict) else ds.get("available"),
"compression": ds.get("compression", {}).get("value") if isinstance(ds.get("compression"), dict) else None,
"audited_at": datetime.now(timezone.utc).isoformat(),
}
mongo_upsert("storage", doc)
print(f" {len(datasets)} datasets saved")
# Disks — critical for knowing SSD vs HDD
disks = http_get(f"{TRUENAS_HOST}/api/v2.0/disk?limit=50", headers=headers)
if disks:
for disk in disks:
rpm = disk.get("rotationrate")
doc = {
"name": disk.get("name"),
"type": "truenas_disk",
"serial": disk.get("serial"),
"model": disk.get("model"),
"size": disk.get("size"),
"rotationrate": rpm,
"disk_type": "SSD" if rpm == 0 else (f"HDD_{rpm}rpm" if rpm else "unknown"),
"description": disk.get("description"),
"audited_at": datetime.now(timezone.utc).isoformat(),
}
mongo_upsert("storage", doc, key_field="name")
print(f" {len(disks)} disks saved")
# NFS shares — what's exported where
nfs = http_get(f"{TRUENAS_HOST}/api/v2.0/sharing/nfs", headers=headers)
if nfs:
for share in nfs:
doc = {
"name": share.get("path", "").replace("/", "_"),
"type": "nfs_share",
"path": share.get("path"),
"networks": share.get("networks", []),
"hosts": share.get("hosts", []),
"enabled": share.get("enabled"),
"audited_at": datetime.now(timezone.utc).isoformat(),
}
mongo_upsert("storage", doc)
print(f" {len(nfs)} NFS shares saved")
# ── OPNsense audit ────────────────────────────────────────────────────────────
def audit_opnsense(creds):
print("\n[OPNsense] Auditing...")
key = creds.get("opnsense_api_key", "")
secret = creds.get("opnsense_api_secret", "")
# Firmware/version info
firmware = http_get(f"{OPNSENSE_HOST}/api/core/firmware/info", auth=(key, secret))
if firmware:
doc = {
"name": "opnsense",
"type": "router",
"ip": "router.home.local",
"version": firmware.get("product_version"),
"arch": firmware.get("product_arch"),
"last_check": firmware.get("last_check"),
"audited_at": datetime.now(timezone.utc).isoformat(),
}
mongo_upsert("services", doc)
print(f" OPNsense {doc.get('version')}")
# Unbound DNS hosts
dns_hosts = http_get(f"{OPNSENSE_HOST}/api/unbound/host/searchhost", auth=(key, secret))
if dns_hosts:
entries = dns_hosts.get("rows", [])
doc = {
"name": "unbound_dns_hosts",
"type": "dns_config",
"entries": entries,
"count": len(entries),
"audited_at": datetime.now(timezone.utc).isoformat(),
}
mongo_upsert("network", doc)
print(f" {len(entries)} DNS host entries saved")
# Interfaces
interfaces = http_get(f"{OPNSENSE_HOST}/api/interfaces/overview/interfacesInfo", auth=(key, secret))
if interfaces:
rows = interfaces.get("rows", [])
doc = {
"name": "opnsense_interfaces",
"type": "network_interfaces",
"interfaces": rows,
"audited_at": datetime.now(timezone.utc).isoformat(),
}
mongo_upsert("network", doc)
print(f" {len(rows)} interfaces saved")
# ── Services audit ────────────────────────────────────────────────────────────
def audit_services():
"""Document known services with their locations, ports, and storage."""
print("\n[Services] Recording known service map...")
services = [
{"name": "assistant-llm", "host": "grace-vm", "ip": "192.168.20.142", "port": 8000, "type": "ai_inference", "container": "docker", "storage": "/home/grace/models", "notes": "Qwen3-8B-Q4_K_M, GPU 0 (GTX 1080 Ti)"},
{"name": "memory-engine", "host": "grace-vm", "ip": "192.168.20.142", "port": 11434, "type": "ai_embeddings", "container": "docker", "storage": "/opt/ollama-memory/models", "notes": "Ollama CPU-only, nomic-embed-text + phi3:mini"},
{"name": "qdrant", "host": "grace-vm", "ip": "192.168.20.142", "port": 6333, "type": "vector_db", "container": "docker", "storage": "/mnt/ai-storage/qdrant/storage", "notes": "Vector DB for RAG + mem0. NFS-backed (TrueNAS HDD+SSD cache)"},
{"name": "open-webui", "host": "grace-vm", "ip": "192.168.20.142", "port": 3000, "type": "ai_ui", "container": "docker", "storage": None},
{"name": "searxng", "host": "grace-vm", "ip": "192.168.20.142", "port": 8080, "type": "search", "container": "docker", "storage": None},
{"name": "postgresql", "host": "db-vm", "ip": "192.168.20.87", "port": 5432, "type": "database", "container": "native", "storage": "/srv/databases/postgresql", "notes": "grace_training DB for transcripts"},
{"name": "mysql", "host": "db-vm", "ip": "192.168.20.87", "port": 3306, "type": "database", "container": "native", "storage": "/var/lib/mysql"},
{"name": "mongodb", "host": "db-vm", "ip": "192.168.20.87", "port": 27017, "type": "database", "container": "native", "storage": "/srv/databases/mongodb", "notes": "homelab_infra — this audit DB"},
{"name": "influxdb", "host": "db-vm", "ip": "192.168.20.87", "port": 8086, "type": "timeseries_db", "container": "native", "storage": None},
{"name": "pgadmin", "host": "db-vm", "ip": "192.168.20.87", "port": 5050, "type": "db_ui", "container": "docker", "storage": None},
{"name": "proxmox-homelab", "host": "proxmox", "ip": "192.168.20.135", "port": 8006, "type": "hypervisor", "container": "native", "storage": None, "notes": "Primary Proxmox node — homelab"},
{"name": "proxmox-router", "host": "proxmox", "ip": "192.168.20.2", "port": 8006, "type": "hypervisor", "container": "native", "storage": None, "notes": "Router Proxmox node — hosts Caddy LXC 130"},
{"name": "caddy", "host": "lxc-130", "ip": "192.168.20.130", "port": 443, "type": "reverse_proxy", "container": "native", "storage": "/etc/caddy/Caddyfile", "notes": "Custom INWX build, on ROUTER node not homelab node"},
{"name": "gitea", "host": "lxc-115", "ip": "192.168.20.132", "port": 3000, "type": "git", "container": "native", "storage": None, "notes": "Port 3000 not 80. Use 192.168.20.132:3000 for remotes"},
{"name": "matrix-synapse", "host": "lxc-126", "ip": "192.168.20.127", "port": 8008, "type": "chat", "container": "native", "storage": None, "notes": "Proxied via Caddy as matrix.maxwellburton.com"},
{"name": "element", "host": "lxc-126", "ip": "192.168.20.127", "port": 8009, "type": "chat_ui", "container": "native", "storage": None, "notes": "Proxied via Caddy as chat.maxwellburton.com"},
{"name": "nextcloud", "host": "vm", "ip": "192.168.20.125", "port": 11000, "type": "files", "container": "native", "storage": None, "notes": "Proxied via Caddy as drive.maxwellburton.com"},
{"name": "grafana", "host": "lxc-120", "ip": "192.168.20.120", "port": 3000, "type": "monitoring", "container": "native", "storage": None},
{"name": "prometheus", "host": "lxc-123", "ip": None, "port": 9090, "type": "monitoring", "container": "native", "storage": None},
{"name": "opnsense", "host": "router", "ip": "router.home.local", "port": 8443, "type": "firewall", "container": "native", "storage": None, "notes": "API at router.home.local:8443 NOT 192.168.20.1 (HAProxy)"},
{"name": "truenas", "host": "vm-102", "ip": "192.168.20.228", "port": 443, "type": "nas", "container": "native", "storage": None, "notes": "Tank pool: 4x HDD mirrors + SSD L2ARC cache"},
{"name": "infisical", "host": "lxc", "ip": "infisical.home.local", "port": 80, "type": "secrets", "container": "native", "storage": None},
{"name": "joplin-server", "host": "lxc-111", "ip": "192.168.20.x", "port": 22300, "type": "notes", "container": "native", "storage": None},
{"name": "n8n", "host": "lxc-105", "ip": None, "port": 5678, "type": "automation", "container": "native", "storage": None},
]
for svc in services:
svc["audited_at"] = datetime.now(timezone.utc).isoformat()
mongo_upsert("services", svc)
print(f" {len(services)} services documented")
# ── Storage topology ──────────────────────────────────────────────────────────
def audit_storage_topology():
"""Document the full storage topology — what backs what."""
print("\n[Storage Topology] Recording...")
topology = [
{
"name": "grace-vm-root",
"type": "storage_mapping",
"vm": "grace-vm (192.168.20.142)",
"mount": "/",
"device": "/dev/sda (180GB)",
"backed_by": "local-lvm on tower Proxmox node",
"medium": "SSD",
"use": "OS only",
},
{
"name": "grace-vm-ai-storage",
"type": "storage_mapping",
"vm": "grace-vm (192.168.20.142)",
"mount": "/mnt/ai-storage",
"device": "/dev/sdb (1TB)",
"backed_by": "TrueNAS Tank pool dataset vm-ai-storage, NFS via Proxmox passthrough",
"medium": "HDD with SSD L2ARC cache (NOT pure SSD)",
"use": "AI models, training data, Qdrant vectors, ChromaDB",
"notes": "Tank pool is HDD mirrors + SSD cache. Do NOT run latency-sensitive DBs here if possible.",
},
{
"name": "grace-vm-shared-documents",
"type": "storage_mapping",
"vm": "grace-vm (192.168.20.142)",
"mount": "/mnt/shared_documents",
"device": "NFS 192.168.20.228:/mnt/Tank/systems/shared_documents",
"backed_by": "TrueNAS Tank pool, HDD with SSD L2ARC cache",
"medium": "HDD with SSD L2ARC cache",
"use": ".md files, docs, memory files ONLY. Not for apps or DBs.",
},
{
"name": "db-vm-databases",
"type": "storage_mapping",
"vm": "db-vm (192.168.20.87)",
"mount": "/srv/databases",
"device": "/dev/sdb (1TB)",
"backed_by": "Proxmox local-lvm or TrueNAS dataset vm-db-server-dev",
"medium": "unknown — needs verification",
"use": "PostgreSQL, MongoDB, InfluxDB data",
},
{
"name": "truenas-tank-pool",
"type": "storage_hardware",
"pool": "Tank",
"topology": "2x MIRROR vdevs (4 HDDs total)",
"cache": "SSD L2ARC (read cache)",
"medium": "HDD primary + SSD cache",
"notes": "NFS exports: vm-ai-storage, shared_documents, backup-drive, immich_storage, joplinapp-storage",
},
]
for doc in topology:
mongo_upsert("storage", doc)
print(f" {len(topology)} storage mappings documented")
# ── Main ──────────────────────────────────────────────────────────────────────
def main():
parser = argparse.ArgumentParser(description="Grace Homelab Infrastructure Audit")
parser.add_argument("--update", type=str, help="Update specific component: proxmox|truenas|opnsense|services|storage")
parser.add_argument("--dump", action="store_true", help="Dump all collections as JSON")
parser.add_argument("--query", type=str, help="Natural language summary query (prints relevant docs)")
args = parser.parse_args()
if args.dump:
for collection in ["nodes", "vms", "services", "storage", "network"]:
docs = mongo_query_all(collection)
print(f"\n=== {collection.upper()} ({len(docs)} docs) ===")
print(json.dumps(docs, indent=2, default=str)[:3000])
return
if args.query:
# Simple keyword search across all collections
import subprocess
query_lower = args.query.lower()
keywords = [w for w in query_lower.split() if len(w) > 3]
for collection in ["nodes", "vms", "services", "storage", "network"]:
docs = mongo_query_all(collection)
for doc in docs:
doc_str = json.dumps(doc, default=str).lower()
if any(kw in doc_str for kw in keywords):
print(f"\n[{collection}] {doc.get('name','?')}")
# Print key fields
for k in ["type","ip","port","mount","medium","backed_by","notes","status","use"]:
if k in doc and doc[k]:
print(f" {k}: {doc[k]}")
return
creds = get_creds()
if args.update:
component = args.update.lower()
if component == "proxmox":
audit_proxmox(creds)
elif component == "truenas":
audit_truenas(creds)
elif component == "opnsense":
audit_opnsense(creds)
elif component == "services":
audit_services()
elif component == "storage":
audit_storage_topology()
else:
print(f"Unknown component: {component}")
return
# Full audit
print("=== Grace Homelab Full Infrastructure Audit ===")
print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
audit_proxmox(creds)
audit_truenas(creds)
audit_opnsense(creds)
audit_services()
audit_storage_topology()
print(f"\n=== Audit complete ===")
print("Query with: python3 ~/scripts/infra-audit.py --query '<question>'")
if __name__ == "__main__":
main()