import json from pathlib import Path from typing import List, Dict from sentence_transformers import SentenceTransformer import numpy as np def load_index(env: Dict): import faiss, json index_path = Path(env["INDEX_DIR"]) / "faiss.index" meta_path = Path(env["INDEX_DIR"]) / "meta.json" if not index_path.exists(): raise RuntimeError("Index not found. Run ingest first.") index = faiss.read_index(str(index_path)) metas = json.load(open(meta_path)) return index, metas def embed(texts: List[str]): model = SentenceTransformer("all-MiniLM-L6-v2") return model.encode(texts, convert_to_numpy=True, normalize_embeddings=True) def search(q: str, env: Dict, top_k: int = 15, filters: Dict = None) -> List[Dict]: index, metas = load_index(env) qv = embed([q]) scores, idxs = index.search(qv, top_k) results = [] for score, idx in zip(scores[0], idxs[0]): if idx == -1: continue m = metas[idx] if filters: if "geo" in filters and filters["geo"] and m.get("geo") not in filters["geo"]: continue if "categories" in filters and filters["categories"]: if not set(filters["categories"]).intersection(set(m.get("categories",[]))): continue m["score"] = float(score) results.append(m) return results