Spaces:
Runtime error
Runtime error
Commit
·
796e66c
1
Parent(s):
b363844
feat: add state-level HTML/PDF adapters and updated capacity filters
Browse files- .gitignore +7 -0
- app/ingest.py +294 -14
- config/sources.yaml +148 -1
- project-plan-rag.rtf +569 -0
.gitignore
CHANGED
|
@@ -1,3 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
.venv/
|
| 2 |
__pycache__/
|
| 3 |
*.pyc
|
|
|
|
| 1 |
+
|
| 2 |
+
venv/
|
| 3 |
+
*.pyc
|
| 4 |
+
__pycache__/
|
| 5 |
+
.DS_Store
|
| 6 |
+
Makefile.old
|
| 7 |
+
start-up-project.txt
|
| 8 |
.venv/
|
| 9 |
__pycache__/
|
| 10 |
*.pyc
|
app/ingest.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
from __future__ import annotations
|
| 3 |
import json
|
| 4 |
from pathlib import Path
|
| 5 |
-
from typing import Dict, List, Any
|
| 6 |
|
| 7 |
import yaml
|
| 8 |
import numpy as np
|
|
@@ -11,6 +11,12 @@ from sentence_transformers import SentenceTransformer
|
|
| 11 |
from app.paths import DOCSTORE_DIR, INDEX_DIR
|
| 12 |
from .normalize import normalize # ← central normalizer
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
# -------------------- Config --------------------
|
| 16 |
|
|
@@ -19,6 +25,65 @@ def load_config(cfg_path: str) -> Dict:
|
|
| 19 |
return yaml.safe_load(f)
|
| 20 |
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# -------------------- Grants.gov collector --------------------
|
| 23 |
|
| 24 |
def _collect_from_grantsgov_api(src: Dict) -> List[Dict[str, Any]]:
|
|
@@ -39,6 +104,194 @@ def _collect_from_grantsgov_api(src: Dict) -> List[Dict[str, Any]]:
|
|
| 39 |
return [h for h in hits if isinstance(h, dict)]
|
| 40 |
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
# -------------------- Write docstore & build index --------------------
|
| 43 |
|
| 44 |
def _save_docstore(recs: List[Dict[str, Any]]) -> str:
|
|
@@ -55,7 +308,6 @@ def _build_index_from_docstore() -> int:
|
|
| 55 |
if not ds_path.exists():
|
| 56 |
raise RuntimeError("Docstore not found. Run ingest first.")
|
| 57 |
|
| 58 |
-
# Load records → texts + metas
|
| 59 |
texts: List[str] = []
|
| 60 |
metas: List[Dict[str, Any]] = []
|
| 61 |
with ds_path.open("r", encoding="utf-8") as f:
|
|
@@ -85,16 +337,15 @@ def _build_index_from_docstore() -> int:
|
|
| 85 |
print(f"[index] Rows loaded from docstore: {len(texts)}")
|
| 86 |
|
| 87 |
if not texts:
|
| 88 |
-
|
| 89 |
-
(INDEX_DIR).mkdir(parents=True, exist_ok=True)
|
| 90 |
(INDEX_DIR / "meta.json").write_text(json.dumps([], ensure_ascii=False))
|
| 91 |
print("[index] No texts to embed. Wrote empty meta.json.")
|
| 92 |
return 0
|
| 93 |
|
| 94 |
-
# Embed (CPU default;
|
| 95 |
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
| 96 |
model.max_seq_length = 256
|
| 97 |
-
batch = max(8, min(32, len(texts)))
|
| 98 |
emb = model.encode(
|
| 99 |
texts,
|
| 100 |
convert_to_numpy=True,
|
|
@@ -117,22 +368,32 @@ def _build_index_from_docstore() -> int:
|
|
| 117 |
return len(texts)
|
| 118 |
|
| 119 |
|
| 120 |
-
# --------------------
|
|
|
|
|
|
|
| 121 |
|
| 122 |
def ingest(cfg_path: str = "config/sources.yaml", env: Dict | None = None):
|
| 123 |
"""
|
| 124 |
-
Reads config, fetches from enabled sources, normalizes
|
| 125 |
-
|
|
|
|
| 126 |
"""
|
| 127 |
cfg = load_config(cfg_path)
|
| 128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
all_rows: List[Dict[str, Any]] = []
|
| 130 |
for entry in cfg.get("sources", []):
|
| 131 |
if not entry.get("enabled"):
|
| 132 |
continue
|
| 133 |
|
| 134 |
name = entry.get("name", "<source>")
|
| 135 |
-
geo
|
| 136 |
cats = entry.get("categories") or []
|
| 137 |
static = {"geo": geo, "categories": cats}
|
| 138 |
|
|
@@ -143,20 +404,37 @@ def ingest(cfg_path: str = "config/sources.yaml", env: Dict | None = None):
|
|
| 143 |
raw_hits = _collect_from_grantsgov_api(entry)
|
| 144 |
rows = [normalize("grants_gov", h, static) for h in raw_hits]
|
| 145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
elif typ == "local_sample":
|
| 147 |
p = Path(entry["path"]).expanduser()
|
| 148 |
blob = json.loads(p.read_text(encoding="utf-8"))
|
| 149 |
items = blob.get("opportunities") or []
|
| 150 |
rows = [normalize("local_sample", op, static) for op in items]
|
| 151 |
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
print(f"[collect] {name} → {len(rows)} rows")
|
| 157 |
all_rows.extend(rows)
|
| 158 |
|
| 159 |
-
# ---- DEDUPE (id → url → title) ----
|
| 160 |
seen, unique = set(), []
|
| 161 |
for r in all_rows:
|
| 162 |
key = r.get("id") or r.get("url") or r.get("title")
|
|
@@ -172,6 +450,8 @@ def ingest(cfg_path: str = "config/sources.yaml", env: Dict | None = None):
|
|
| 172 |
return path, n
|
| 173 |
|
| 174 |
|
|
|
|
|
|
|
| 175 |
if __name__ == "__main__":
|
| 176 |
import argparse
|
| 177 |
ap = argparse.ArgumentParser()
|
|
|
|
| 2 |
from __future__ import annotations
|
| 3 |
import json
|
| 4 |
from pathlib import Path
|
| 5 |
+
from typing import Dict, List, Any, Tuple, Optional
|
| 6 |
|
| 7 |
import yaml
|
| 8 |
import numpy as np
|
|
|
|
| 11 |
from app.paths import DOCSTORE_DIR, INDEX_DIR
|
| 12 |
from .normalize import normalize # ← central normalizer
|
| 13 |
|
| 14 |
+
import re
|
| 15 |
+
import time
|
| 16 |
+
import hashlib
|
| 17 |
+
import requests
|
| 18 |
+
from bs4 import BeautifulSoup
|
| 19 |
+
|
| 20 |
|
| 21 |
# -------------------- Config --------------------
|
| 22 |
|
|
|
|
| 25 |
return yaml.safe_load(f)
|
| 26 |
|
| 27 |
|
| 28 |
+
# -------------------- Capacity / Geo Filters (config-driven) --------------------
|
| 29 |
+
# controls live in config/sources.yaml:
|
| 30 |
+
# filters:
|
| 31 |
+
# capacity_only: true
|
| 32 |
+
# pa_md_only: false
|
| 33 |
+
|
| 34 |
+
_INCLUDE_PATTERNS = [re.compile(p, re.I) for p in [
|
| 35 |
+
r"\bcapacity(?:[-\s]?building)?\b",
|
| 36 |
+
r"\btechnical\s+assistance\b",
|
| 37 |
+
r"\bTA\b",
|
| 38 |
+
r"\borganizational\s+(capacity|effectiveness|development|readiness|stabilization)\b",
|
| 39 |
+
r"\borganization(?:al)?\s+infrastructure\b",
|
| 40 |
+
r"\bback[-\s]?office\b|\bbackbone\s+organization\b",
|
| 41 |
+
r"\bgovernance\b|\bboard\s+development\b|\bboard\s+training\b",
|
| 42 |
+
r"\bpre[-\s]?development\b|\bpredevelopment\b|\bplanning\s+grant\b",
|
| 43 |
+
r"\bdata\s+systems?\b|\bCRM\b|\bcase\s+management\b",
|
| 44 |
+
r"\b(staff|workforce)\s+capacity\b|\bhire\s+(?:staff|positions?)\b",
|
| 45 |
+
r"\bscal(?:e|ing)\s+capacity\b|\bexpand\s+capacity\b",
|
| 46 |
+
r"\bnonprofit\b|\bfaith[-\s]?based\b|\bcommunity[-\s]?based\b",
|
| 47 |
+
]]
|
| 48 |
+
|
| 49 |
+
_EXCLUDE_PATTERNS = [re.compile(p, re.I) for p in [
|
| 50 |
+
r"\bteaching\s+assistant\b|\bTAs\b",
|
| 51 |
+
r"\bbench\s+capacity\b|\bmanufacturing\s+capacity\b(?!.*organiz)",
|
| 52 |
+
r"\bclinical\s+trial\b|\blaboratory\s+capacity\b(?!.*community)",
|
| 53 |
+
r"\b(postsecondary|university|college)\b(?!.*community\s+partner)",
|
| 54 |
+
r"\bconstruction\b(?!.*(admin|organiz|back[-\s]?office|governance|systems))",
|
| 55 |
+
]]
|
| 56 |
+
|
| 57 |
+
_PA_MD_HINTS = re.compile(
|
| 58 |
+
r"\b("
|
| 59 |
+
r"Pennsylvania|PA\b|Harrisburg|Philadelphia|Allegheny|Montgomery County\b|Pittsburgh|Scranton|Erie|"
|
| 60 |
+
r"Maryland|MD\b|Annapolis|Baltimore|Prince\s+George'?s|Howard County\b"
|
| 61 |
+
r")\b",
|
| 62 |
+
re.I,
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
def _doc_text_from_row(rec: Dict[str, Any]) -> str:
|
| 66 |
+
title = rec.get("title") or ""
|
| 67 |
+
synopsis = rec.get("synopsis") or rec.get("summary") or ""
|
| 68 |
+
agency = rec.get("agency") or ""
|
| 69 |
+
eligibility = rec.get("eligibility") or ""
|
| 70 |
+
categories = " ".join(rec.get("categories") or []) if isinstance(rec.get("categories"), list) else (rec.get("categories") or "")
|
| 71 |
+
geo = rec.get("geo") or ""
|
| 72 |
+
return "\n".join([title, synopsis, agency, eligibility, categories, geo]).strip()
|
| 73 |
+
|
| 74 |
+
def _is_capacity_building_text(text: str) -> bool:
|
| 75 |
+
if not text:
|
| 76 |
+
return False
|
| 77 |
+
if any(p.search(text) for p in _EXCLUDE_PATTERNS):
|
| 78 |
+
return False
|
| 79 |
+
return any(p.search(text) for p in _INCLUDE_PATTERNS)
|
| 80 |
+
|
| 81 |
+
def _is_pa_md_text(text: str) -> bool:
|
| 82 |
+
if not text:
|
| 83 |
+
return False
|
| 84 |
+
return bool(_PA_MD_HINTS.search(text))
|
| 85 |
+
|
| 86 |
+
|
| 87 |
# -------------------- Grants.gov collector --------------------
|
| 88 |
|
| 89 |
def _collect_from_grantsgov_api(src: Dict) -> List[Dict[str, Any]]:
|
|
|
|
| 104 |
return [h for h in hits if isinstance(h, dict)]
|
| 105 |
|
| 106 |
|
| 107 |
+
# -------------------- NEW: Generic HTML / PDF collectors --------------------
|
| 108 |
+
|
| 109 |
+
_HTTP_HEADERS = {
|
| 110 |
+
"User-Agent": "grants-rag/1.0 (+https://example.local) requests",
|
| 111 |
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
def _http_get(url: str, timeout: int = 20) -> Optional[requests.Response]:
|
| 115 |
+
try:
|
| 116 |
+
r = requests.get(url, headers=_HTTP_HEADERS, timeout=timeout)
|
| 117 |
+
if r.status_code == 200 and r.content:
|
| 118 |
+
return r
|
| 119 |
+
except requests.RequestException:
|
| 120 |
+
return None
|
| 121 |
+
return None
|
| 122 |
+
|
| 123 |
+
def _soup(html: str) -> BeautifulSoup:
|
| 124 |
+
# use lxml or html5lib if available for robustness
|
| 125 |
+
return BeautifulSoup(html, "lxml")
|
| 126 |
+
|
| 127 |
+
def _text_from_soup(s: BeautifulSoup, selectors: Optional[List[str]] = None) -> Tuple[str, str]:
|
| 128 |
+
"""
|
| 129 |
+
Returns (title, text). Uses selectors if provided;
|
| 130 |
+
falls back to common content containers.
|
| 131 |
+
"""
|
| 132 |
+
title = s.title.string.strip() if s.title and s.title.string else ""
|
| 133 |
+
|
| 134 |
+
nodes = []
|
| 135 |
+
if selectors:
|
| 136 |
+
for css in selectors:
|
| 137 |
+
nodes.extend(s.select(css) or [])
|
| 138 |
+
if not nodes:
|
| 139 |
+
for css in ("main", "article", "#content", ".content", "[role='main']"):
|
| 140 |
+
nodes.extend(s.select(css) or [])
|
| 141 |
+
if not nodes:
|
| 142 |
+
nodes = [s.body] if s.body else []
|
| 143 |
+
|
| 144 |
+
parts: List[str] = []
|
| 145 |
+
for n in nodes:
|
| 146 |
+
if not n:
|
| 147 |
+
continue
|
| 148 |
+
txt = n.get_text(separator="\n", strip=True)
|
| 149 |
+
if txt:
|
| 150 |
+
parts.append(txt)
|
| 151 |
+
body = "\n\n".join(parts).strip()
|
| 152 |
+
return title, body
|
| 153 |
+
|
| 154 |
+
def _make_id(*fields: str) -> str:
|
| 155 |
+
h = hashlib.sha1()
|
| 156 |
+
for f in fields:
|
| 157 |
+
if f:
|
| 158 |
+
h.update(f.encode("utf-8", "ignore"))
|
| 159 |
+
h.update(b"|")
|
| 160 |
+
return h.hexdigest()
|
| 161 |
+
|
| 162 |
+
def _normalize_web_record(
|
| 163 |
+
source_name: str,
|
| 164 |
+
url: str,
|
| 165 |
+
title: str,
|
| 166 |
+
body: str,
|
| 167 |
+
static: Dict[str, Any],
|
| 168 |
+
extra: Optional[Dict[str, Any]] = None,
|
| 169 |
+
) -> Dict[str, Any]:
|
| 170 |
+
"""
|
| 171 |
+
Produce a record shaped like normalize() output so downstream stays unchanged.
|
| 172 |
+
"""
|
| 173 |
+
rec = {
|
| 174 |
+
"id": (extra or {}).get("id") or _make_id(url, title or body[:160]),
|
| 175 |
+
"title": title or (extra.get("title") if extra else "") or url,
|
| 176 |
+
"synopsis": body[:2000], # clip; embeddings use title+synopsis later
|
| 177 |
+
"summary": None,
|
| 178 |
+
"url": url,
|
| 179 |
+
"source": source_name,
|
| 180 |
+
"geo": static.get("geo"),
|
| 181 |
+
"categories": static.get("categories"),
|
| 182 |
+
"agency": (extra or {}).get("agency", ""),
|
| 183 |
+
"eligibility": (extra or {}).get("eligibility", ""),
|
| 184 |
+
"deadline": (extra or {}).get("deadline"),
|
| 185 |
+
"program_number": (extra or {}).get("program_number"),
|
| 186 |
+
"posted_date": (extra or {}).get("posted_date"),
|
| 187 |
+
}
|
| 188 |
+
return rec
|
| 189 |
+
|
| 190 |
+
def _collect_from_http_html(entry: Dict, source_name: str, static: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 191 |
+
"""
|
| 192 |
+
Supports types: 'web_page' and 'http_html'
|
| 193 |
+
Config keys supported:
|
| 194 |
+
- url (str)
|
| 195 |
+
- parse: { follow_links: bool, link_selectors: [..], content_selectors: [..] }
|
| 196 |
+
- crawl: { schedule: "...", max_depth: int } # max_depth 0/None = only landing
|
| 197 |
+
"""
|
| 198 |
+
url = entry.get("url")
|
| 199 |
+
if not url:
|
| 200 |
+
return []
|
| 201 |
+
r = _http_get(url)
|
| 202 |
+
if not r:
|
| 203 |
+
return []
|
| 204 |
+
|
| 205 |
+
s = _soup(r.text)
|
| 206 |
+
parse = entry.get("parse", {}) or entry.get("extract", {}) or {}
|
| 207 |
+
content_selectors = parse.get("content_selectors") or []
|
| 208 |
+
title, body = _text_from_soup(s, content_selectors)
|
| 209 |
+
|
| 210 |
+
rows = []
|
| 211 |
+
rows.append(_normalize_web_record(source_name, url, title, body, static, extra={"posted_date": None}))
|
| 212 |
+
|
| 213 |
+
# follow links?
|
| 214 |
+
follow = bool(parse.get("follow_links"))
|
| 215 |
+
link_selectors = parse.get("link_selectors") or []
|
| 216 |
+
crawl = entry.get("crawl", {}) or {}
|
| 217 |
+
max_depth = int(crawl.get("max_depth", 0) or 0)
|
| 218 |
+
visited = set([url])
|
| 219 |
+
|
| 220 |
+
def _enq_links(soup: BeautifulSoup) -> List[str]:
|
| 221 |
+
if link_selectors:
|
| 222 |
+
links = []
|
| 223 |
+
for sel in link_selectors:
|
| 224 |
+
for a in soup.select(sel) or []:
|
| 225 |
+
href = a.get("href")
|
| 226 |
+
if href and href.startswith("http"):
|
| 227 |
+
links.append(href)
|
| 228 |
+
out, seen = [], set()
|
| 229 |
+
for h in links:
|
| 230 |
+
if h not in seen:
|
| 231 |
+
out.append(h)
|
| 232 |
+
seen.add(h)
|
| 233 |
+
return out[:40] # polite cap
|
| 234 |
+
return []
|
| 235 |
+
|
| 236 |
+
if follow and max_depth > 0:
|
| 237 |
+
frontier = _enq_links(s)
|
| 238 |
+
depth = 1
|
| 239 |
+
while frontier and depth <= max_depth and len(rows) < 200:
|
| 240 |
+
next_frontier = []
|
| 241 |
+
for link in frontier:
|
| 242 |
+
if link in visited:
|
| 243 |
+
continue
|
| 244 |
+
visited.add(link)
|
| 245 |
+
rr = _http_get(link)
|
| 246 |
+
if not rr:
|
| 247 |
+
continue
|
| 248 |
+
ss = _soup(rr.text)
|
| 249 |
+
t2, b2 = _text_from_soup(ss, content_selectors)
|
| 250 |
+
if b2:
|
| 251 |
+
rows.append(_normalize_web_record(source_name, link, t2, b2, static, extra={"posted_date": None}))
|
| 252 |
+
if depth < max_depth:
|
| 253 |
+
next_frontier.extend(_enq_links(ss))
|
| 254 |
+
time.sleep(0.1) # gentle
|
| 255 |
+
frontier = next_frontier
|
| 256 |
+
depth += 1
|
| 257 |
+
|
| 258 |
+
return rows
|
| 259 |
+
|
| 260 |
+
def _collect_from_http_pdf(entry: Dict, source_name: str, static: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 261 |
+
"""
|
| 262 |
+
type: 'http_pdf'
|
| 263 |
+
keys:
|
| 264 |
+
- url (single PDF fetch)
|
| 265 |
+
"""
|
| 266 |
+
url = entry.get("url")
|
| 267 |
+
if not url:
|
| 268 |
+
return []
|
| 269 |
+
|
| 270 |
+
try:
|
| 271 |
+
from pdfminer.high_level import extract_text # lazy import
|
| 272 |
+
except Exception:
|
| 273 |
+
return []
|
| 274 |
+
|
| 275 |
+
rows = []
|
| 276 |
+
r = _http_get(url, timeout=40)
|
| 277 |
+
if not r:
|
| 278 |
+
return rows
|
| 279 |
+
tmp = DOCSTORE_DIR / (hashlib.sha1(url.encode("utf-8")).hexdigest() + ".pdf")
|
| 280 |
+
try:
|
| 281 |
+
DOCSTORE_DIR.mkdir(parents=True, exist_ok=True)
|
| 282 |
+
tmp.write_bytes(r.content)
|
| 283 |
+
body = extract_text(str(tmp)) or ""
|
| 284 |
+
finally:
|
| 285 |
+
try:
|
| 286 |
+
tmp.unlink(missing_ok=True)
|
| 287 |
+
except Exception:
|
| 288 |
+
pass
|
| 289 |
+
title = entry.get("name") or "PDF Document"
|
| 290 |
+
if body.strip():
|
| 291 |
+
rows.append(_normalize_web_record(source_name, url, title, body, static, extra={"posted_date": None}))
|
| 292 |
+
return rows
|
| 293 |
+
|
| 294 |
+
|
| 295 |
# -------------------- Write docstore & build index --------------------
|
| 296 |
|
| 297 |
def _save_docstore(recs: List[Dict[str, Any]]) -> str:
|
|
|
|
| 308 |
if not ds_path.exists():
|
| 309 |
raise RuntimeError("Docstore not found. Run ingest first.")
|
| 310 |
|
|
|
|
| 311 |
texts: List[str] = []
|
| 312 |
metas: List[Dict[str, Any]] = []
|
| 313 |
with ds_path.open("r", encoding="utf-8") as f:
|
|
|
|
| 337 |
print(f"[index] Rows loaded from docstore: {len(texts)}")
|
| 338 |
|
| 339 |
if not texts:
|
| 340 |
+
INDEX_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
| 341 |
(INDEX_DIR / "meta.json").write_text(json.dumps([], ensure_ascii=False))
|
| 342 |
print("[index] No texts to embed. Wrote empty meta.json.")
|
| 343 |
return 0
|
| 344 |
|
| 345 |
+
# Embed (CPU default; portable)
|
| 346 |
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
| 347 |
model.max_seq_length = 256
|
| 348 |
+
batch = max(8, min(32, len(texts)))
|
| 349 |
emb = model.encode(
|
| 350 |
texts,
|
| 351 |
convert_to_numpy=True,
|
|
|
|
| 368 |
return len(texts)
|
| 369 |
|
| 370 |
|
| 371 |
+
# -------------------- Public API: ingest --------------------
|
| 372 |
+
|
| 373 |
+
__all__ = ["ingest"]
|
| 374 |
|
| 375 |
def ingest(cfg_path: str = "config/sources.yaml", env: Dict | None = None):
|
| 376 |
"""
|
| 377 |
+
Reads config, fetches from enabled sources via adapters, normalizes to a single schema,
|
| 378 |
+
applies filters (capacity / PA-MD), dedupes, writes docstore, and builds the FAISS index.
|
| 379 |
+
Returns (docstore_path, n_indexed).
|
| 380 |
"""
|
| 381 |
cfg = load_config(cfg_path)
|
| 382 |
|
| 383 |
+
# ---- Filters from config ----
|
| 384 |
+
f_cfg = (cfg or {}).get("filters", {}) or {}
|
| 385 |
+
capacity_only = bool(f_cfg.get("capacity_only", True))
|
| 386 |
+
pa_md_only = bool(f_cfg.get("pa_md_only", False))
|
| 387 |
+
print(f"[filters] capacity_only = {'TRUE' if capacity_only else 'FALSE'}")
|
| 388 |
+
print(f"[filters] pa_md_only = {'TRUE' if pa_md_only else 'FALSE'}")
|
| 389 |
+
|
| 390 |
all_rows: List[Dict[str, Any]] = []
|
| 391 |
for entry in cfg.get("sources", []):
|
| 392 |
if not entry.get("enabled"):
|
| 393 |
continue
|
| 394 |
|
| 395 |
name = entry.get("name", "<source>")
|
| 396 |
+
geo = entry.get("geo") or "US"
|
| 397 |
cats = entry.get("categories") or []
|
| 398 |
static = {"geo": geo, "categories": cats}
|
| 399 |
|
|
|
|
| 404 |
raw_hits = _collect_from_grantsgov_api(entry)
|
| 405 |
rows = [normalize("grants_gov", h, static) for h in raw_hits]
|
| 406 |
|
| 407 |
+
elif typ in ("web_page", "http_html"):
|
| 408 |
+
rows = _collect_from_http_html(entry, name, static)
|
| 409 |
+
|
| 410 |
+
elif typ == "http_pdf":
|
| 411 |
+
rows = _collect_from_http_pdf(entry, name, static)
|
| 412 |
+
|
| 413 |
elif typ == "local_sample":
|
| 414 |
p = Path(entry["path"]).expanduser()
|
| 415 |
blob = json.loads(p.read_text(encoding="utf-8"))
|
| 416 |
items = blob.get("opportunities") or []
|
| 417 |
rows = [normalize("local_sample", op, static) for op in items]
|
| 418 |
|
| 419 |
+
# Unknown types => skip silently
|
| 420 |
+
|
| 421 |
+
# ---- Apply capacity / geo filters BEFORE collecting ----
|
| 422 |
+
if rows and (capacity_only or pa_md_only):
|
| 423 |
+
filtered = []
|
| 424 |
+
for r in rows:
|
| 425 |
+
t = _doc_text_from_row(r)
|
| 426 |
+
if capacity_only and not _is_capacity_building_text(t):
|
| 427 |
+
continue
|
| 428 |
+
if pa_md_only and not _is_pa_md_text(t):
|
| 429 |
+
continue
|
| 430 |
+
filtered.append(r)
|
| 431 |
+
print(f"[filter] {name}: kept {len(filtered)}/{len(rows)} after filters")
|
| 432 |
+
rows = filtered
|
| 433 |
|
| 434 |
print(f"[collect] {name} → {len(rows)} rows")
|
| 435 |
all_rows.extend(rows)
|
| 436 |
|
| 437 |
+
# ---- DEDUPE (by id → url → title) ----
|
| 438 |
seen, unique = set(), []
|
| 439 |
for r in all_rows:
|
| 440 |
key = r.get("id") or r.get("url") or r.get("title")
|
|
|
|
| 450 |
return path, n
|
| 451 |
|
| 452 |
|
| 453 |
+
# -------------------- CLI --------------------
|
| 454 |
+
|
| 455 |
if __name__ == "__main__":
|
| 456 |
import argparse
|
| 457 |
ap = argparse.ArgumentParser()
|
config/sources.yaml
CHANGED
|
@@ -1,4 +1,8 @@
|
|
| 1 |
# Minimal, valid config — v6.3
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
sources:
|
| 3 |
# ---------- FEDERAL: Grants.gov (focused for buses/van/mobility & reentry) ----------
|
| 4 |
|
|
@@ -83,6 +87,8 @@ sources:
|
|
| 83 |
sortBy: "openDate|desc"
|
| 84 |
|
| 85 |
# ---------- STATE & METRO PASS-THROUGHS (FTA 5310 etc.) ----------
|
|
|
|
|
|
|
| 86 |
|
| 87 |
- name: "Maryland MTA — Grants (incl. 5310)"
|
| 88 |
type: web_page
|
|
@@ -156,8 +162,149 @@ sources:
|
|
| 156 |
mode: "article"
|
| 157 |
keep_links: true
|
| 158 |
|
| 159 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
- name: "State 5310 Listings (curated JSON)"
|
| 162 |
type: json_static
|
| 163 |
enabled: false # set to true once you generate the file below
|
|
|
|
| 1 |
# Minimal, valid config — v6.3
|
| 2 |
+
filters:
|
| 3 |
+
capacity_only: true # keep only capacity-building items
|
| 4 |
+
pa_md_only: false # set to true to restrict index to PA/MD
|
| 5 |
+
|
| 6 |
sources:
|
| 7 |
# ---------- FEDERAL: Grants.gov (focused for buses/van/mobility & reentry) ----------
|
| 8 |
|
|
|
|
| 87 |
sortBy: "openDate|desc"
|
| 88 |
|
| 89 |
# ---------- STATE & METRO PASS-THROUGHS (FTA 5310 etc.) ----------
|
| 90 |
+
# NOTE: These require adapters (http_html/web_page/http_pdf) you haven't implemented yet.
|
| 91 |
+
# They are kept here (enabled) in case your runtime supports them; otherwise set enabled: false.
|
| 92 |
|
| 93 |
- name: "Maryland MTA — Grants (incl. 5310)"
|
| 94 |
type: web_page
|
|
|
|
| 162 |
mode: "article"
|
| 163 |
keep_links: true
|
| 164 |
|
| 165 |
+
# --- Pennsylvania: PCA (state arts) ---
|
| 166 |
+
- name: "PA Creative Industries – Capacity Building (landing)"
|
| 167 |
+
type: http_html
|
| 168 |
+
enabled: true
|
| 169 |
+
url: "https://www.pa.gov/agencies/coa/grants-and-loans/capacity-building-programs.html"
|
| 170 |
+
geo: "PA"
|
| 171 |
+
categories: ["capacity_building"]
|
| 172 |
+
parse:
|
| 173 |
+
follow_links: true
|
| 174 |
+
link_selectors:
|
| 175 |
+
- "a[href*='capacity']"
|
| 176 |
+
- "a[href*='strategies-for-success']"
|
| 177 |
+
- "a[href$='.pdf']"
|
| 178 |
+
content_selectors:
|
| 179 |
+
- "main"
|
| 180 |
+
- "article"
|
| 181 |
+
- ".content"
|
| 182 |
+
|
| 183 |
+
- name: "PA Creative Industries – Creative Sector Flex Fund"
|
| 184 |
+
type: http_html
|
| 185 |
+
enabled: true
|
| 186 |
+
url: "https://www.pa.gov/agencies/coa/grants-and-loans/creative-sector-flex-fund.html"
|
| 187 |
+
geo: "PA"
|
| 188 |
+
categories: ["capacity_building"]
|
| 189 |
+
parse:
|
| 190 |
+
follow_links: true
|
| 191 |
+
link_selectors:
|
| 192 |
+
- "a[href$='.pdf']"
|
| 193 |
+
- "a[href*='guidelines']"
|
| 194 |
+
- "a[href*='apply']"
|
| 195 |
+
content_selectors:
|
| 196 |
+
- "main"
|
| 197 |
+
- "article"
|
| 198 |
+
- ".content"
|
| 199 |
|
| 200 |
+
# --- Pennsylvania: PCCD (eGrants announcements & PDFs) ---
|
| 201 |
+
- name: "PCCD – Funding Announcements (eGrants)"
|
| 202 |
+
type: http_html
|
| 203 |
+
enabled: true
|
| 204 |
+
url: "https://www.pccd.pa.gov/Funding/Pages/default.aspx"
|
| 205 |
+
geo: "PA"
|
| 206 |
+
categories: ["capacity_building", "public_safety", "youth"]
|
| 207 |
+
parse:
|
| 208 |
+
follow_links: true
|
| 209 |
+
link_selectors:
|
| 210 |
+
- "a[href*='Funding-Announcement']"
|
| 211 |
+
- "a[href$='.pdf']"
|
| 212 |
+
- "a[href*='CJAB']"
|
| 213 |
+
- "a[href*='VIP']"
|
| 214 |
+
- "a[href*='CCVI']"
|
| 215 |
+
- "a[href*='BOOST']"
|
| 216 |
+
content_selectors:
|
| 217 |
+
- "main"
|
| 218 |
+
- "article"
|
| 219 |
+
- ".ms-rtestate-field"
|
| 220 |
+
|
| 221 |
+
- name: "PCCD – PDFs (deep fetch)"
|
| 222 |
+
type: http_pdf
|
| 223 |
+
enabled: true
|
| 224 |
+
url_patterns:
|
| 225 |
+
- "https://www.pccd.pa.gov/*/*.pdf"
|
| 226 |
+
geo: "PA"
|
| 227 |
+
categories: ["capacity_building"]
|
| 228 |
+
|
| 229 |
+
# --- Maryland: OneStop (statewide grant listings with 'capacity' search) ---
|
| 230 |
+
- name: "Maryland OneStop – Capacity search"
|
| 231 |
+
type: http_html
|
| 232 |
+
enabled: true
|
| 233 |
+
url: "https://onestop.md.gov/search?query=capacity"
|
| 234 |
+
geo: "MD"
|
| 235 |
+
categories: ["capacity_building"]
|
| 236 |
+
parse:
|
| 237 |
+
follow_links: true
|
| 238 |
+
link_selectors:
|
| 239 |
+
- "a[href*='/forms/']"
|
| 240 |
+
- "a[href*='/search/']"
|
| 241 |
+
content_selectors:
|
| 242 |
+
- "main"
|
| 243 |
+
- "article"
|
| 244 |
+
- "[role='main']"
|
| 245 |
+
|
| 246 |
+
# --- Maryland: DHCD (housing/community programs & press) ---
|
| 247 |
+
- name: "MD DHCD – Programs (grants & loans index)"
|
| 248 |
+
type: http_html
|
| 249 |
+
enabled: true
|
| 250 |
+
url: "https://dhcd.maryland.gov/Pages/Programs.aspx"
|
| 251 |
+
geo: "MD"
|
| 252 |
+
categories: ["capacity_building", "housing", "community_development"]
|
| 253 |
+
parse:
|
| 254 |
+
follow_links: true
|
| 255 |
+
link_selectors:
|
| 256 |
+
- "a[href*='Programs']"
|
| 257 |
+
- "a[href$='.pdf']"
|
| 258 |
+
- "a[href*='Trust']"
|
| 259 |
+
content_selectors:
|
| 260 |
+
- "#content"
|
| 261 |
+
- "main"
|
| 262 |
+
- "article"
|
| 263 |
+
|
| 264 |
+
- name: "MD DHCD – Press/Notices (watch for NOFOs)"
|
| 265 |
+
type: http_html
|
| 266 |
+
enabled: true
|
| 267 |
+
url: "https://dhcd.maryland.gov/Pages/PressReleases.aspx"
|
| 268 |
+
geo: "MD"
|
| 269 |
+
categories: ["capacity_building"]
|
| 270 |
+
parse:
|
| 271 |
+
follow_links: true
|
| 272 |
+
link_selectors:
|
| 273 |
+
- "a[href$='.pdf']"
|
| 274 |
+
- "a[href*='Notice']"
|
| 275 |
+
- "a[href*='Funding']"
|
| 276 |
+
content_selectors:
|
| 277 |
+
- "#content"
|
| 278 |
+
- "main"
|
| 279 |
+
- "article"
|
| 280 |
+
|
| 281 |
+
# --- Maryland: Chesapeake Bay Trust (recurring capacity-building RFPs) ---
|
| 282 |
+
- name: "Chesapeake Bay Trust – Capacity Building Initiative (CBI)"
|
| 283 |
+
type: http_html
|
| 284 |
+
enabled: true
|
| 285 |
+
url: "https://cbtrust.org/grants/capacity-building/"
|
| 286 |
+
geo: "MD"
|
| 287 |
+
categories: ["capacity_building", "environment", "community_health"]
|
| 288 |
+
parse:
|
| 289 |
+
follow_links: true
|
| 290 |
+
link_selectors:
|
| 291 |
+
- "a[href$='.pdf']"
|
| 292 |
+
- "a[href*='Request-for-Proposals']"
|
| 293 |
+
- "a[href*='RFP']"
|
| 294 |
+
content_selectors:
|
| 295 |
+
- "main"
|
| 296 |
+
- "article"
|
| 297 |
+
- ".entry-content"
|
| 298 |
+
|
| 299 |
+
- name: "CB Trust – PDFs (deep fetch)"
|
| 300 |
+
type: http_pdf
|
| 301 |
+
enabled: true
|
| 302 |
+
url_patterns:
|
| 303 |
+
- "https://cbtrust.org/*/*.pdf"
|
| 304 |
+
geo: "MD"
|
| 305 |
+
categories: ["capacity_building"]
|
| 306 |
+
|
| 307 |
+
# ---------- OPTIONAL: Curated JSON (enable after you generate it) ----------
|
| 308 |
- name: "State 5310 Listings (curated JSON)"
|
| 309 |
type: json_static
|
| 310 |
enabled: false # set to true once you generate the file below
|
project-plan-rag.rtf
ADDED
|
@@ -0,0 +1,569 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{\rtf1\ansi\ansicpg1252\cocoartf2822
|
| 2 |
+
\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\froman\fcharset0 Times-Bold;\f1\froman\fcharset0 Times-Roman;\f2\fmodern\fcharset0 Courier;
|
| 3 |
+
\f3\froman\fcharset0 TimesNewRomanPSMT;\f4\fnil\fcharset0 AppleColorEmoji;\f5\froman\fcharset0 Times-Italic;
|
| 4 |
+
\f6\fnil\fcharset0 Menlo-Regular;\f7\fnil\fcharset0 HelveticaNeue;}
|
| 5 |
+
{\colortbl;\red255\green255\blue255;\red0\green0\blue0;\red0\green0\blue233;\red109\green109\blue109;
|
| 6 |
+
\red109\green109\blue109;\red0\green0\blue0;}
|
| 7 |
+
{\*\expandedcolortbl;;\cssrgb\c0\c0\c0;\cssrgb\c0\c0\c93333;\cssrgb\c50196\c50196\c50196;
|
| 8 |
+
\cssrgb\c50196\c50196\c50196;\cssrgb\c0\c0\c0\c84706;}
|
| 9 |
+
{\*\listtable{\list\listtemplateid1\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{disc\}}{\leveltext\leveltemplateid1\'01\uc0\u8226 ;}{\levelnumbers;}\fi-360\li720\lin720 }{\listname ;}\listid1}
|
| 10 |
+
{\list\listtemplateid2\listhybrid{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{decimal\}}{\leveltext\leveltemplateid101\'01\'00;}{\levelnumbers\'01;}\fi-360\li720\lin720 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{circle\}}{\leveltext\leveltemplateid102\'01\uc0\u9702 ;}{\levelnumbers;}\fi-360\li1440\lin1440 }{\listname ;}\listid2}
|
| 11 |
+
{\list\listtemplateid3\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{disc\}}{\leveltext\leveltemplateid201\'01\uc0\u8226 ;}{\levelnumbers;}\fi-360\li720\lin720 }{\listname ;}\listid3}
|
| 12 |
+
{\list\listtemplateid4\listhybrid{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{decimal\}}{\leveltext\leveltemplateid301\'01\'00;}{\levelnumbers\'01;}\fi-360\li720\lin720 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{circle\}}{\leveltext\leveltemplateid302\'01\uc0\u9702 ;}{\levelnumbers;}\fi-360\li1440\lin1440 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{square\}}{\leveltext\leveltemplateid303\'01\uc0\u9642 ;}{\levelnumbers;}\fi-360\li2160\lin2160 }{\listname ;}\listid4}
|
| 13 |
+
{\list\listtemplateid5\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{disc\}}{\leveltext\leveltemplateid401\'01\uc0\u8226 ;}{\levelnumbers;}\fi-360\li720\lin720 }{\listname ;}\listid5}
|
| 14 |
+
{\list\listtemplateid6\listhybrid{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{decimal\}}{\leveltext\leveltemplateid501\'01\'00;}{\levelnumbers\'01;}\fi-360\li720\lin720 }{\listname ;}\listid6}}
|
| 15 |
+
{\*\listoverridetable{\listoverride\listid1\listoverridecount0\ls1}{\listoverride\listid2\listoverridecount0\ls2}{\listoverride\listid3\listoverridecount0\ls3}{\listoverride\listid4\listoverridecount0\ls4}{\listoverride\listid5\listoverridecount0\ls5}{\listoverride\listid6\listoverridecount0\ls6}}
|
| 16 |
+
\margl1440\margr1440\vieww31340\viewh19300\viewkind0
|
| 17 |
+
\deftab720
|
| 18 |
+
\pard\pardeftab720\sa298\partightenfactor0
|
| 19 |
+
|
| 20 |
+
\f0\b\fs36 \cf0 \expnd0\expndtw0\kerning0
|
| 21 |
+
Best practices & features to include\
|
| 22 |
+
\pard\pardeftab720\sa240\partightenfactor0
|
| 23 |
+
|
| 24 |
+
\f1\b0\fs24 \cf0 From the literature + what you\'92ve built already, here are features that improve quality & usability. {\field{\*\fldinst{HYPERLINK "https://www.funraise.org/blog/grant-management-software-for-nonprofits?utm_source=chatgpt.com"}}{\fldrslt \cf3 \ul \ulc3 NetSuite+3Funraise+3Fluxx+3}}\
|
| 25 |
+
|
| 26 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrt\brdrnil \trbrdrl\brdrnil \trbrdrr\brdrnil
|
| 27 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
| 28 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
| 29 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
| 30 |
+
\pard\intbl\itap1\pardeftab720\qc\partightenfactor0
|
| 31 |
+
|
| 32 |
+
\f0\b \cf0 Feature\cell
|
| 33 |
+
\pard\intbl\itap1\pardeftab720\qc\partightenfactor0
|
| 34 |
+
\cf0 Why it matters\cell
|
| 35 |
+
\pard\intbl\itap1\pardeftab720\qc\partightenfactor0
|
| 36 |
+
\cf0 How to implement / what to watch out for\cell \row
|
| 37 |
+
|
| 38 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
|
| 39 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
| 40 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
| 41 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
| 42 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 43 |
+
\cf0 Opportunity filters
|
| 44 |
+
\f1\b0 (keywords, geography, type, capacity-building etc.)\cell
|
| 45 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 46 |
+
\cf0 Helps users narrow to what matters, reduces overload.\cell
|
| 47 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 48 |
+
\cf0 You have keyword filters already. Also include date, state, amount range, \'93open vs closed\'94 status.\cell \row
|
| 49 |
+
|
| 50 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
|
| 51 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
| 52 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
| 53 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
| 54 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 55 |
+
|
| 56 |
+
\f0\b \cf0 Deadline alerts / reminders
|
| 57 |
+
\f1\b0 \cell
|
| 58 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 59 |
+
\cf0 Prevents missing grants because deadlines slip.\cell
|
| 60 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 61 |
+
\cf0 Track
|
| 62 |
+
\f2\fs26 deadline
|
| 63 |
+
\f1\fs24 (if parsed), then show upcoming ones. Allow export to calendar or reminders.\cell \row
|
| 64 |
+
|
| 65 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
|
| 66 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
| 67 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
| 68 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
| 69 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 70 |
+
|
| 71 |
+
\f0\b \cf0 Document / PDF fetching + parsing
|
| 72 |
+
\f1\b0 \cell
|
| 73 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 74 |
+
\cf0 Many state sources have PDFs, RFPs etc. Users want details, not just summary.\cell
|
| 75 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 76 |
+
\cf0 Use PDF adapter + follow-links. Flag PDFs clearly in UI.\cell \row
|
| 77 |
+
|
| 78 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
|
| 79 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
| 80 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
| 81 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
| 82 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 83 |
+
|
| 84 |
+
\f0\b \cf0 Staleness / expiration detection
|
| 85 |
+
\f1\b0 \cell
|
| 86 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 87 |
+
\cf0 Projects with expired deadlines clutter feeds.\cell
|
| 88 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 89 |
+
\cf0 Tag items with \'93deadline passed\'94 or \'93pending\'94 etc. Maybe auto-hide old ones after some time.\cell \row
|
| 90 |
+
|
| 91 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
|
| 92 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
| 93 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
| 94 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
| 95 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 96 |
+
|
| 97 |
+
\f0\b \cf0 User feedback / manual review / save items
|
| 98 |
+
\f1\b0 \cell
|
| 99 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 100 |
+
\cf0 Users can flag false positives, save promising ones.\cell
|
| 101 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 102 |
+
\cf0 Add \'93bookmark / save\'94 or \'93dismiss\'94 features. Could feed into machine learning or heuristics over time.\cell \row
|
| 103 |
+
|
| 104 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
|
| 105 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
| 106 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
| 107 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
| 108 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 109 |
+
|
| 110 |
+
\f0\b \cf0 Dashboard / Analytics
|
| 111 |
+
\f1\b0 \cell
|
| 112 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 113 |
+
\cf0 Helps see grant volume by type, deadlines, states, etc.\cell
|
| 114 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 115 |
+
\cf0 Simple charts like # grants by month, # capacity-building grants vs total, etc.\cell \row
|
| 116 |
+
|
| 117 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
|
| 118 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
| 119 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
| 120 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
| 121 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 122 |
+
|
| 123 |
+
\f0\b \cf0 Good defaults / simple UI
|
| 124 |
+
\f1\b0 \cell
|
| 125 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 126 |
+
\cf0 Avoid cognitive overload \'97 show only essentials, allow advanced filtering if needed.\cell
|
| 127 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 128 |
+
\cf0 E.g. show title, deadline, source, eligibility; hide long descriptions by default.\cell \row
|
| 129 |
+
|
| 130 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
|
| 131 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
| 132 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
| 133 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
| 134 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 135 |
+
|
| 136 |
+
\f0\b \cf0 Mobile-friendly / responsive
|
| 137 |
+
\f1\b0 \cell
|
| 138 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 139 |
+
\cf0 Many will check on phones or tablets.\cell
|
| 140 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 141 |
+
\cf0 If web UI, ensure collapsible fields, simple menus.\cell \row
|
| 142 |
+
|
| 143 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrt\brdrnil \trbrdrr\brdrnil
|
| 144 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
| 145 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
| 146 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
| 147 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 148 |
+
|
| 149 |
+
\f0\b \cf0 Integrations
|
| 150 |
+
\f1\b0 \cell
|
| 151 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 152 |
+
\cf0 E.g. calendar, Slack/email alerts.\cell
|
| 153 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
| 154 |
+
\cf0 Helps push notifications rather than only manual checking.\cell \lastrow\row
|
| 155 |
+
\pard\pardeftab720\partightenfactor0
|
| 156 |
+
\cf4 \
|
| 157 |
+
\pard\pardeftab720\sa298\partightenfactor0
|
| 158 |
+
|
| 159 |
+
\f0\b\fs36 \cf0 What to avoid / limit to keep usability\
|
| 160 |
+
\pard\pardeftab720\sa240\partightenfactor0
|
| 161 |
+
|
| 162 |
+
\f1\b0\fs24 \cf0 These things often add data/noise or complexity, so either avoid or gate them behind \'93advanced\'94 toggles.\
|
| 163 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 164 |
+
\ls1\ilvl0
|
| 165 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
| 166 |
+
Over-broad scraping
|
| 167 |
+
\f1\b0 : pulling every \'93policy\'94, \'93program info\'94, \'93newsletter\'94, etc. just because capacity keywords appear once.\
|
| 168 |
+
\ls1\ilvl0
|
| 169 |
+
\f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
| 170 |
+
Too much automation without oversight
|
| 171 |
+
\f1\b0 : e.g. hidden deadlines if PDF text parsing fails.\
|
| 172 |
+
\ls1\ilvl0
|
| 173 |
+
\f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
| 174 |
+
Overwhelming metadata fields
|
| 175 |
+
\f1\b0 : stuff like \'93agency budget history\'94, \'93application score weights\'94 etc unless users request them.\
|
| 176 |
+
\ls1\ilvl0
|
| 177 |
+
\f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
| 178 |
+
Frequent UI changes or too many fields
|
| 179 |
+
\f1\b0 \'97 keep interface consistent.\
|
| 180 |
+
\ls1\ilvl0
|
| 181 |
+
\f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
| 182 |
+
Large batch of false positives
|
| 183 |
+
\f1\b0 \'97 this ruins trust quickly.\
|
| 184 |
+
\pard\pardeftab720\partightenfactor0
|
| 185 |
+
\cf4 \
|
| 186 |
+
\pard\pardeftab720\sa298\partightenfactor0
|
| 187 |
+
|
| 188 |
+
\f0\b\fs36 \cf0 What to build next in your tool\
|
| 189 |
+
\pard\pardeftab720\sa240\partightenfactor0
|
| 190 |
+
|
| 191 |
+
\f1\b0\fs24 \cf0 Here are prioritized improvements/next features for your tool to make it more powerful while preserving clarity:\
|
| 192 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 193 |
+
\ls2\ilvl0
|
| 194 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 1 }\expnd0\expndtw0\kerning0
|
| 195 |
+
Deadline extraction / detection
|
| 196 |
+
\f1\b0 \
|
| 197 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
| 198 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
| 199 |
+
\f3 \uc0\u9702
|
| 200 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 201 |
+
From Grants.gov API: likely available.\
|
| 202 |
+
\ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
|
| 203 |
+
\f3 \uc0\u9702
|
| 204 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 205 |
+
From scraped state pages / PDFs: attempt to parse \'93deadline\'94, \'93closing date\'94. If missing, mark as \'93TBD\'94.\
|
| 206 |
+
\ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
|
| 207 |
+
\f3 \uc0\u9702
|
| 208 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 209 |
+
UI: highlight upcoming deadlines (\'93Due in next 30 days\'94).\
|
| 210 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 211 |
+
\ls2\ilvl0
|
| 212 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 2 }\expnd0\expndtw0\kerning0
|
| 213 |
+
\'93Open / Active\'94 flag
|
| 214 |
+
\f1\b0 \
|
| 215 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
| 216 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
| 217 |
+
\f3 \uc0\u9702
|
| 218 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 219 |
+
If RFPs are active or you can detect \'93application now open\'94 in text.\
|
| 220 |
+
\ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
|
| 221 |
+
\f3 \uc0\u9702
|
| 222 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 223 |
+
If not, mark as \'93reference / program\'94 so users know it\'92s structural info, not a live call.\
|
| 224 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 225 |
+
\ls2\ilvl0
|
| 226 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 3 }\expnd0\expndtw0\kerning0
|
| 227 |
+
Bookmark / dismiss / feedback
|
| 228 |
+
\f1\b0 \
|
| 229 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
| 230 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
| 231 |
+
\f3 \uc0\u9702
|
| 232 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 233 |
+
Users should be able to mark \'93this is useful\'94 or \'93not relevant\'94 to train future filtering.\
|
| 234 |
+
\ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
|
| 235 |
+
\f3 \uc0\u9702
|
| 236 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 237 |
+
Possibly store local tags (e.g. \'93my state\'94, \'93my priority\'94).\
|
| 238 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 239 |
+
\ls2\ilvl0
|
| 240 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 4 }\expnd0\expndtw0\kerning0
|
| 241 |
+
Improved link / PDF following
|
| 242 |
+
\f1\b0 \
|
| 243 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
| 244 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
| 245 |
+
\f3 \uc0\u9702
|
| 246 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 247 |
+
As suggested earlier.\
|
| 248 |
+
\ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
|
| 249 |
+
\f3 \uc0\u9702
|
| 250 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 251 |
+
Make sure link selectors are fine-tuned for each state source.\
|
| 252 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 253 |
+
\ls2\ilvl0
|
| 254 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 5 }\expnd0\expndtw0\kerning0
|
| 255 |
+
Expired / historical items archive
|
| 256 |
+
\f1\b0 \
|
| 257 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
| 258 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
| 259 |
+
\f3 \uc0\u9702
|
| 260 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 261 |
+
Hide automatically unless explicitly requested. Keeps main view clean.\
|
| 262 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 263 |
+
\ls2\ilvl0
|
| 264 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 6 }\expnd0\expndtw0\kerning0
|
| 265 |
+
Search + sort + filter UI in front end
|
| 266 |
+
\f1\b0 \
|
| 267 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
| 268 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
| 269 |
+
\f3 \uc0\u9702
|
| 270 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 271 |
+
Filters: State, source type (federal/state), capacity vs other, keyword.\
|
| 272 |
+
\ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
|
| 273 |
+
\f3 \uc0\u9702
|
| 274 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 275 |
+
Sort by deadline, date posted, amount (if available).\
|
| 276 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 277 |
+
\ls2\ilvl0
|
| 278 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 7 }\expnd0\expndtw0\kerning0
|
| 279 |
+
Notifications / reminders
|
| 280 |
+
\f1\b0 \
|
| 281 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
| 282 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
| 283 |
+
\f3 \uc0\u9702
|
| 284 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 285 |
+
Let users get alerts (email / Slack / calendar) for items that match their saved filters and have upcoming deadlines.\
|
| 286 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 287 |
+
\ls2\ilvl0
|
| 288 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 8 }\expnd0\expndtw0\kerning0
|
| 289 |
+
Confidence score or matching hint
|
| 290 |
+
\f1\b0 \
|
| 291 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
| 292 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
| 293 |
+
\f3 \uc0\u9702
|
| 294 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 295 |
+
For scraped items, display \'93match strength\'94 (how many capacity keywords matched, whether in title vs body). Helps users see which items are likely relevant.\
|
| 296 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 297 |
+
\ls2\ilvl0
|
| 298 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 9 }\expnd0\expndtw0\kerning0
|
| 299 |
+
Performance / scheduling
|
| 300 |
+
\f1\b0 \
|
| 301 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
| 302 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
| 303 |
+
\f3 \uc0\u9702
|
| 304 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 305 |
+
Check feeds regularly, avoid stale caches.\
|
| 306 |
+
\ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
|
| 307 |
+
\f3 \uc0\u9702
|
| 308 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 309 |
+
Maybe incremental ingest (only new items) rather than full crawl all the time.\
|
| 310 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 311 |
+
\ls2\ilvl0
|
| 312 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 10 }\expnd0\expndtw0\kerning0
|
| 313 |
+
Testing & feedback
|
| 314 |
+
\f1\b0 \
|
| 315 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
| 316 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
| 317 |
+
\f3 \uc0\u9702
|
| 318 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 319 |
+
Ask actual users (your team) to test beta versions and tell you what\'92s too much / too little.\
|
| 320 |
+
\ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
|
| 321 |
+
\f3 \uc0\u9702
|
| 322 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 323 |
+
Adjust based on real use.\
|
| 324 |
+
\pard\pardeftab720\partightenfactor0
|
| 325 |
+
\cf4 \
|
| 326 |
+
\pard\pardeftab720\sa298\partightenfactor0
|
| 327 |
+
|
| 328 |
+
\f0\b\fs36 \cf0 Your roadmap can look like this\
|
| 329 |
+
\pard\pardeftab720\sa240\partightenfactor0
|
| 330 |
+
|
| 331 |
+
\f1\b0\fs24 \cf0 Here\'92s a potential plan for the next few sprints:\
|
| 332 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 333 |
+
\ls3\ilvl0
|
| 334 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
| 335 |
+
Sprint 1
|
| 336 |
+
\f1\b0 (now): Add deadline extraction + \'93active\'94 flag + improved link following for state sources.\
|
| 337 |
+
\ls3\ilvl0
|
| 338 |
+
\f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
| 339 |
+
Sprint 2
|
| 340 |
+
\f1\b0 : Bookmark/dismiss features + advanced filtering UI + sort by deadline.\
|
| 341 |
+
\ls3\ilvl0
|
| 342 |
+
\f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
| 343 |
+
Sprint 3
|
| 344 |
+
\f1\b0 : Notifications/reminders + mobile UI polish + confidence scoring.\
|
| 345 |
+
\ls3\ilvl0
|
| 346 |
+
\f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
| 347 |
+
Sprint 4
|
| 348 |
+
\f1\b0 : Incorporate JS-rendered sources (Playwright) + archive/historical mode.\
|
| 349 |
+
\pard\tx720\pardeftab720\sa240\partightenfactor0
|
| 350 |
+
\cf0 \
|
| 351 |
+
\pard\pardeftab720\sa240\partightenfactor0
|
| 352 |
+
\cf0 \outl0\strokewidth0 \strokec2 Yes\'97exactly. We can treat the roadmap I outlined earlier as
|
| 353 |
+
\f0\b four sprints
|
| 354 |
+
\f1\b0 .\uc0\u8232 Here\'92s a clear
|
| 355 |
+
\f0\b Sprint 1 plan
|
| 356 |
+
\f1\b0 (2\'963 weeks is typical) so we can start delivering value quickly while keeping the other sprints in view.\
|
| 357 |
+
\pard\pardeftab720\partightenfactor0
|
| 358 |
+
\cf5 \strokec5 \
|
| 359 |
+
\pard\pardeftab720\sa298\partightenfactor0
|
| 360 |
+
|
| 361 |
+
\f4\fs36 \cf0 \strokec2 \uc0\u55356 \u57281
|
| 362 |
+
\f0\b Sprint 1 \'96 \'93Actionable Deadlines & Better State Data\'94\
|
| 363 |
+
\pard\pardeftab720\sa240\partightenfactor0
|
| 364 |
+
|
| 365 |
+
\fs24 \cf0 Goal:
|
| 366 |
+
\f1\b0 Surface
|
| 367 |
+
\f5\i current
|
| 368 |
+
\f1\i0 and
|
| 369 |
+
\f5\i timely
|
| 370 |
+
\f1\i0 grant opportunities\'97especially from PA & MD\'97without flooding users.\
|
| 371 |
+
\pard\pardeftab720\sa280\partightenfactor0
|
| 372 |
+
|
| 373 |
+
\f0\b\fs28 \cf0 Deliverables\
|
| 374 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 375 |
+
\ls4\ilvl0
|
| 376 |
+
\fs24 \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 1 }\expnd0\expndtw0\kerning0
|
| 377 |
+
\outl0\strokewidth0 \strokec2 Deadline Extraction & \'93Active\'94 Flag
|
| 378 |
+
\f1\b0 \
|
| 379 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
| 380 |
+
\ls4\ilvl1\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
| 381 |
+
\f3 \uc0\u9702
|
| 382 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 383 |
+
\outl0\strokewidth0 \strokec2 Parse
|
| 384 |
+
\f2\fs26 deadline
|
| 385 |
+
\f1\fs24 /
|
| 386 |
+
\f2\fs26 closing date
|
| 387 |
+
\f1\fs24 text from Grants.gov (API gives it directly).\
|
| 388 |
+
\ls4\ilvl1\kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
| 389 |
+
\f3 \uc0\u9702
|
| 390 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 391 |
+
\outl0\strokewidth0 \strokec2 Add a lightweight NLP/regex extractor for state HTML/PDFs (\'93Deadline\'94, \'93Applications due\'94, etc.).\
|
| 392 |
+
\ls4\ilvl1\kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
| 393 |
+
\f3 \uc0\u9702
|
| 394 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 395 |
+
\outl0\strokewidth0 \strokec2 In the index, add fields:
|
| 396 |
+
\f2\fs26 deadline_date
|
| 397 |
+
\f1\fs24 ,
|
| 398 |
+
\f2\fs26 is_active
|
| 399 |
+
\f1\fs24 (true if deadline >= today or marked open/TBD).\
|
| 400 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 401 |
+
\ls4\ilvl0
|
| 402 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 2 }\expnd0\expndtw0\kerning0
|
| 403 |
+
\outl0\strokewidth0 \strokec2 UI Updates in the Dashboard
|
| 404 |
+
\f1\b0 \
|
| 405 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
| 406 |
+
\ls4\ilvl1\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
| 407 |
+
\f3 \uc0\u9702
|
| 408 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 409 |
+
\outl0\strokewidth0 \strokec2 Show deadlines and days-to-close (already in the mock).\
|
| 410 |
+
\ls4\ilvl1\kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
| 411 |
+
\f3 \uc0\u9702
|
| 412 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 413 |
+
\outl0\strokewidth0 \strokec2 Add a small
|
| 414 |
+
\f0\b \'93Active Only\'94
|
| 415 |
+
\f1\b0 toggle to hide expired or guideline-only records.\
|
| 416 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 417 |
+
\ls4\ilvl0
|
| 418 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 3 }\expnd0\expndtw0\kerning0
|
| 419 |
+
\outl0\strokewidth0 \strokec2 Link-Following Enhancements
|
| 420 |
+
\f1\b0 \
|
| 421 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
| 422 |
+
\ls4\ilvl1\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
| 423 |
+
\f3 \uc0\u9702
|
| 424 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 425 |
+
\outl0\strokewidth0 \strokec2 Update YAML for
|
| 426 |
+
\f0\b MD DHCD Press
|
| 427 |
+
\f1\b0 ,
|
| 428 |
+
\f0\b MTA
|
| 429 |
+
\f1\b0 ,
|
| 430 |
+
\f0\b PennDOT
|
| 431 |
+
\f1\b0 , etc. to:\
|
| 432 |
+
\pard\tx1660\tx2160\pardeftab720\li2160\fi-2160\sa240\partightenfactor0
|
| 433 |
+
\ls4\ilvl2
|
| 434 |
+
\f2\fs26 \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
| 435 |
+
\f6 \uc0\u9642
|
| 436 |
+
\f2 }\expnd0\expndtw0\kerning0
|
| 437 |
+
\outl0\strokewidth0 \strokec2 parse.follow_links: true
|
| 438 |
+
\f1\fs24 \
|
| 439 |
+
\ls4\ilvl2
|
| 440 |
+
\f2\fs26 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
| 441 |
+
\f6 \uc0\u9642
|
| 442 |
+
\f2 }\expnd0\expndtw0\kerning0
|
| 443 |
+
\outl0\strokewidth0 \strokec2 crawl.max_depth: 1
|
| 444 |
+
\f1\fs24 \
|
| 445 |
+
\ls4\ilvl2
|
| 446 |
+
\f2\fs26 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
| 447 |
+
\f6 \uc0\u9642
|
| 448 |
+
\f2 }\expnd0\expndtw0\kerning0
|
| 449 |
+
\outl0\strokewidth0 \strokec2 link_selectors
|
| 450 |
+
\f1\fs24 for PDFs / NOFO pages\
|
| 451 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
| 452 |
+
\ls4\ilvl1\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
| 453 |
+
\f3 \uc0\u9702
|
| 454 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 455 |
+
\outl0\strokewidth0 \strokec2 Ensure adapter fetches and normalizes these linked pages.\
|
| 456 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 457 |
+
\ls4\ilvl0
|
| 458 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 4 }\expnd0\expndtw0\kerning0
|
| 459 |
+
\outl0\strokewidth0 \strokec2 Improved Capacity Filter
|
| 460 |
+
\f1\b0 \
|
| 461 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
| 462 |
+
\ls4\ilvl1\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
| 463 |
+
\f3 \uc0\u9702
|
| 464 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 465 |
+
\outl0\strokewidth0 \strokec2 Add a few transportation-related capacity phrases (\'93service capacity\'94, \'93provider capacity\'94) to capture more relevant 5310-type opportunities.\
|
| 466 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 467 |
+
\ls4\ilvl0
|
| 468 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 5 }\expnd0\expndtw0\kerning0
|
| 469 |
+
\outl0\strokewidth0 \strokec2 Basic Quality Checks
|
| 470 |
+
\f1\b0 \
|
| 471 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
| 472 |
+
\ls4\ilvl1\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
| 473 |
+
\f3 \uc0\u9702
|
| 474 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 475 |
+
\outl0\strokewidth0 \strokec2 Logging: show counts of items skipped due to expired deadlines or missing text.\
|
| 476 |
+
\ls4\ilvl1\kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
| 477 |
+
\f3 \uc0\u9702
|
| 478 |
+
\f1 }\expnd0\expndtw0\kerning0
|
| 479 |
+
\outl0\strokewidth0 \strokec2 Quick manual review with your team to confirm that the new records are genuinely grant opportunities.\
|
| 480 |
+
\pard\pardeftab720\partightenfactor0
|
| 481 |
+
\cf5 \strokec5 \
|
| 482 |
+
\pard\pardeftab720\sa298\partightenfactor0
|
| 483 |
+
|
| 484 |
+
\f4\fs36 \cf0 \strokec2 \uc0\u55357 \u56621
|
| 485 |
+
\f0\b Sprint 2\'964 Preview\
|
| 486 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 487 |
+
\ls5\ilvl0
|
| 488 |
+
\fs24 \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
| 489 |
+
\outl0\strokewidth0 \strokec2 Sprint 2:
|
| 490 |
+
\f1\b0 Bookmarks & Dismiss, Advanced Filtering UI, Sorting by deadline/confidence.\
|
| 491 |
+
\ls5\ilvl0
|
| 492 |
+
\f0\b \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
| 493 |
+
\outl0\strokewidth0 \strokec2 Sprint 3:
|
| 494 |
+
\f1\b0 Notifications (email/Slack/ICS calendar) + mobile UI polish + confidence scoring display.\
|
| 495 |
+
\ls5\ilvl0
|
| 496 |
+
\f0\b \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
| 497 |
+
\outl0\strokewidth0 \strokec2 Sprint 4:
|
| 498 |
+
\f1\b0 Full
|
| 499 |
+
\f0\b JS-rendered
|
| 500 |
+
\f1\b0 source support (Playwright for MD OneStop etc.) and historical archive mode.\
|
| 501 |
+
\pard\pardeftab720\partightenfactor0
|
| 502 |
+
\cf5 \strokec5 \
|
| 503 |
+
\pard\pardeftab720\sa298\partightenfactor0
|
| 504 |
+
|
| 505 |
+
\f0\b\fs36 \cf0 \strokec2 Next Steps to Kick Off Sprint 1\
|
| 506 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 507 |
+
\ls6\ilvl0
|
| 508 |
+
\fs24 \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 1 }\expnd0\expndtw0\kerning0
|
| 509 |
+
\outl0\strokewidth0 \strokec2 Confirm dev environment
|
| 510 |
+
\f1\b0 \'96 you already have
|
| 511 |
+
\f2\fs26 grants-rag
|
| 512 |
+
\f1\fs24 running locally.\
|
| 513 |
+
\ls6\ilvl0
|
| 514 |
+
\f0\b \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 2 }\expnd0\expndtw0\kerning0
|
| 515 |
+
\outl0\strokewidth0 \strokec2 Create a new branch
|
| 516 |
+
\f1\b0 :\uc0\u8232
|
| 517 |
+
\f2\fs26 \uc0\u8232 \u8232 \u8232 git checkout -b sprint1-deadline-active-flag\
|
| 518 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\partightenfactor0
|
| 519 |
+
\ls6\ilvl0\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 3 }\expnd0\expndtw0\kerning0
|
| 520 |
+
\outl0\strokewidth0 \strokec2 \uc0\u8232 \u8232
|
| 521 |
+
\f1\fs24 \
|
| 522 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
| 523 |
+
\ls6\ilvl0
|
| 524 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 4 }\expnd0\expndtw0\kerning0
|
| 525 |
+
\outl0\strokewidth0 \strokec2 Add extraction utilities
|
| 526 |
+
\f1\b0 \uc0\u8232
|
| 527 |
+
\f5\i Regex for state HTML/PDFs
|
| 528 |
+
\f1\i0 and update
|
| 529 |
+
\f2\fs26 _normalize_web_record
|
| 530 |
+
\f1\fs24 to accept
|
| 531 |
+
\f2\fs26 deadline_date
|
| 532 |
+
\f1\fs24 and
|
| 533 |
+
\f2\fs26 is_active
|
| 534 |
+
\f1\fs24 .\
|
| 535 |
+
\ls6\ilvl0
|
| 536 |
+
\f0\b \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 5 }\expnd0\expndtw0\kerning0
|
| 537 |
+
\outl0\strokewidth0 \strokec2 UI
|
| 538 |
+
\f1\b0 : enable the
|
| 539 |
+
\f0\b Active Only
|
| 540 |
+
\f1\b0 toggle (mock is already designed for it).\
|
| 541 |
+
\pard\pardeftab720\partightenfactor0
|
| 542 |
+
\cf5 \strokec5 \
|
| 543 |
+
\pard\pardeftab720\sa240\partightenfactor0
|
| 544 |
+
|
| 545 |
+
\f0\b \cf0 \strokec2 Timeline suggestion:
|
| 546 |
+
\f1\b0 2 weeks dev + 1 week QA / user feedback.\uc0\u8232 Once Sprint 1 is merged, we\'92ll have a tool that surfaces only
|
| 547 |
+
\f5\i active
|
| 548 |
+
\f1\i0 capacity-building grants with clear deadlines\'97exactly the balance of \'93rich data without overload\'94 we\'92re after.\
|
| 549 |
+
\pard\pardeftab720\partightenfactor0
|
| 550 |
+
\cf0 \
|
| 551 |
+
\
|
| 552 |
+
\
|
| 553 |
+
\
|
| 554 |
+
\
|
| 555 |
+
\
|
| 556 |
+
\
|
| 557 |
+
\pard\pardeftab720\sa240\partightenfactor0
|
| 558 |
+
\cf0 \
|
| 559 |
+
\pard\pardeftab720\partightenfactor0
|
| 560 |
+
\cf0 \
|
| 561 |
+
\pard\pardeftab720\qc\partightenfactor0
|
| 562 |
+
|
| 563 |
+
\f7\fs22 \cf6 \strokec6 \
|
| 564 |
+
\pard\pardeftab720\partightenfactor0
|
| 565 |
+
|
| 566 |
+
\f1\fs24 \cf0 \strokec2 ChatGPT can make mistakes. Check important info.\
|
| 567 |
+
\pard\tx720\pardeftab720\sa240\partightenfactor0
|
| 568 |
+
\cf0 \outl0\strokewidth0 \
|
| 569 |
+
}
|