Update app.py
Browse files
app.py
CHANGED
|
@@ -56,9 +56,13 @@ from sklearn.metrics.pairwise import cosine_similarity
|
|
| 56 |
from concurrent.futures import ThreadPoolExecutor
|
| 57 |
|
| 58 |
# --- Setup Logging ---
|
| 59 |
-
logging.basicConfig(
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
try:
|
| 64 |
import pvporcupine
|
|
@@ -1298,7 +1302,7 @@ class Hive:
|
|
| 1298 |
# This can be manually overridden by setting HIVE_USE_HF_INFERENCE.
|
| 1299 |
is_hf_space = "SPACE_ID" in os.environ
|
| 1300 |
use_remote_default = is_hf_space
|
| 1301 |
-
|
| 1302 |
# Check for manual override from environment variable
|
| 1303 |
if "HIVE_USE_HF_INFERENCE" in os.environ:
|
| 1304 |
use_remote = CFG["HIVE_USE_HF_INFERENCE"]
|
|
@@ -1306,7 +1310,7 @@ class Hive:
|
|
| 1306 |
use_remote = use_remote_default
|
| 1307 |
|
| 1308 |
if use_remote:
|
| 1309 |
-
print("[Hive] Using remote Hugging Face Inference endpoint.")
|
| 1310 |
from huggingface_hub import InferenceClient; endpoint = CFG["HIVE_HF_ENDPOINT"] or None; token = CFG["HF_READ_TOKEN"] or os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN") or None
|
| 1311 |
self.client = InferenceClient(model=self.model_id if endpoint is None else None, token=token, timeout=60, base_url=endpoint) # type: ignore
|
| 1312 |
def _remote_pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, **kw):
|
|
@@ -1323,7 +1327,7 @@ class Hive:
|
|
| 1323 |
self.model = None # No local model needed
|
| 1324 |
self.stopping_criteria = None # Not used with InferenceClient
|
| 1325 |
else:
|
| 1326 |
-
print("[Hive] Using local LLM for inference.")
|
| 1327 |
self.tok = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=trust, chat_template=None)
|
| 1328 |
if self.tok.pad_token is None:
|
| 1329 |
self.tok.pad_token = self.tok.eos_token
|
|
@@ -1454,7 +1458,7 @@ class Hive:
|
|
| 1454 |
if hasattr(self, 'client') and self.client: # Remote Inference
|
| 1455 |
stop_sequences = ["</s>", "Assistant:"] + [self.tok.decode(st) for st in self.stop_tokens]
|
| 1456 |
try:
|
| 1457 |
-
for token in self.client.text_generation(prompt, max_new_tokens=int(max_new_tokens), temperature=float(temperature), do_sample=True, stop_sequences=stop_sequences, stream=True):
|
| 1458 |
yield token
|
| 1459 |
except Exception as e:
|
| 1460 |
print(f"[ModelBridge] Remote inference stream failed: {e}")
|
|
@@ -1671,7 +1675,7 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
|
|
| 1671 |
|
| 1672 |
current_history.append({"role": "assistant", "content": ""})
|
| 1673 |
try:
|
| 1674 |
-
# The dialogue manager needs the full history to maintain context.
|
| 1675 |
for chunk in hive_instance.dialogue_manager.process_turn(current_history, current_user_id, effective_role, session_id):
|
| 1676 |
if chunk["type"] == "token":
|
| 1677 |
current_history[-1]["content"] += chunk["content"]
|
|
@@ -1679,7 +1683,7 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
|
|
| 1679 |
# After the stream is complete, re-enable the textbox.
|
| 1680 |
yield current_history, gr.Textbox(placeholder=f"Talk to {CFG['AGENT_NAME']}", interactive=True)
|
| 1681 |
except Exception as e:
|
| 1682 |
-
error_msg = f"Error in DialogueManager: {e}"
|
| 1683 |
print(f"[ERROR] {error_msg}")
|
| 1684 |
current_history[-1]["content"] = f"An error occurred: {error_msg}"
|
| 1685 |
yield current_history, gr.Textbox(value="", interactive=True)
|
|
@@ -1704,7 +1708,7 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
|
|
| 1704 |
|
| 1705 |
def do_get_vocab_word(uid, request: gr.Request):
|
| 1706 |
hive_instance = get_hive_instance() # type: ignore
|
| 1707 |
-
if hive_instance.lite_mode: return "Vocabulary features are disabled in Lite Mode."
|
| 1708 |
current_user_id = uid or request.session_hash
|
| 1709 |
log_path = os.path.join(CFG["HIVE_HOME"], "users", "conversations", f"{current_user_id}.jsonl")
|
| 1710 |
if not os.path.exists(log_path): return "No conversation history to find words from."
|
|
@@ -1720,7 +1724,7 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
|
|
| 1720 |
|
| 1721 |
def wait_for_memory_features():
|
| 1722 |
"""Waits for the full Hive core and enables memory-related UI features."""
|
| 1723 |
-
bootstrap_instance.hive_ready.wait()
|
| 1724 |
hive_instance = get_hive_instance() # Ensure the UI's HIVE_INSTANCE is updated to full
|
| 1725 |
return (
|
| 1726 |
"✅ **Full Hive Core is Ready.** Advanced features are now online.",
|
|
@@ -1742,7 +1746,7 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
|
|
| 1742 |
demo.load(wait_for_memory_features, None, [core_status, summary_output, msg, summary_btn, vocab_output, vocab_btn, progress_output, online_now, ingest_now_btn, mem_compress_btn, hotpatch_apply, propose_btn, test_btn, apply_btn, network_status_md])
|
| 1743 |
def wait_for_lite_core():
|
| 1744 |
"""Waits for the lite Hive core and enables basic chat."""
|
| 1745 |
-
bootstrap_instance.lite_core_ready.wait()
|
| 1746 |
return gr.Textbox(placeholder=f"Talk to {CFG['AGENT_NAME']} (Lite Mode)", interactive=True)
|
| 1747 |
|
| 1748 |
demo.load(wait_for_lite_core, None, [msg])
|
|
@@ -1752,12 +1756,12 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
|
|
| 1752 |
global HIVE_INSTANCE
|
| 1753 |
|
| 1754 |
# If the full hive is ready, ensure we are using it, and it's a valid instance.
|
| 1755 |
-
if bootstrap_instance.hive_ready.is_set():
|
| 1756 |
if bootstrap_instance.hive_instance is not None and (HIVE_INSTANCE is None or HIVE_INSTANCE.lite_mode):
|
| 1757 |
HIVE_INSTANCE = bootstrap_instance.hive_instance
|
| 1758 |
print("[UI] Full Hive instance attached.")
|
| 1759 |
return HIVE_INSTANCE
|
| 1760 |
-
|
| 1761 |
# Otherwise, use the lite instance.
|
| 1762 |
if HIVE_INSTANCE is None:
|
| 1763 |
if bootstrap_instance.lite_core_ready.is_set() and bootstrap_instance.hive_lite_instance is not None:
|
|
@@ -1772,8 +1776,8 @@ def launch_ui(bootstrap_instance: "Bootstrap"):
|
|
| 1772 |
|
| 1773 |
def wait_for_voice_features(request: gr.Request):
|
| 1774 |
"""Waits for ASR/TTS models and enables voice-related UI elements."""
|
| 1775 |
-
bootstrap_instance.voice_ready.wait()
|
| 1776 |
-
bootstrap_instance.hive_ready.wait() # Also wait for full core for voice features
|
| 1777 |
hive_instance = get_hive_instance(bootstrap_instance)
|
| 1778 |
|
| 1779 |
voice_ready = not hive_instance.lite_mode and hasattr(hive_instance, 'asr_service') and hasattr(hive_instance, 'tts_service')
|
|
|
|
| 56 |
from concurrent.futures import ThreadPoolExecutor
|
| 57 |
|
| 58 |
# --- Setup Logging ---
|
| 59 |
+
logging.basicConfig(
|
| 60 |
+
level=logging.INFO,
|
| 61 |
+
format='[%(asctime)s] [%(levelname)s] [%(threadName)s] %(message)s',
|
| 62 |
+
datefmt='%Y-%m-%d %H:%M:%S',
|
| 63 |
+
stream=sys.stdout,
|
| 64 |
+
force=True
|
| 65 |
+
)
|
| 66 |
|
| 67 |
try:
|
| 68 |
import pvporcupine
|
|
|
|
| 1302 |
# This can be manually overridden by setting HIVE_USE_HF_INFERENCE.
|
| 1303 |
is_hf_space = "SPACE_ID" in os.environ
|
| 1304 |
use_remote_default = is_hf_space
|
| 1305 |
+
print(f"[Hive] Detected Hugging Face Space: {is_hf_space}. Defaulting to remote inference: {use_remote_default}.")
|
| 1306 |
# Check for manual override from environment variable
|
| 1307 |
if "HIVE_USE_HF_INFERENCE" in os.environ:
|
| 1308 |
use_remote = CFG["HIVE_USE_HF_INFERENCE"]
|
|
|
|
| 1310 |
use_remote = use_remote_default
|
| 1311 |
|
| 1312 |
if use_remote:
|
| 1313 |
+
print("[Hive] Using remote Hugging Face Inference endpoint.", flush=True)
|
| 1314 |
from huggingface_hub import InferenceClient; endpoint = CFG["HIVE_HF_ENDPOINT"] or None; token = CFG["HF_READ_TOKEN"] or os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN") or None
|
| 1315 |
self.client = InferenceClient(model=self.model_id if endpoint is None else None, token=token, timeout=60, base_url=endpoint) # type: ignore
|
| 1316 |
def _remote_pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, **kw):
|
|
|
|
| 1327 |
self.model = None # No local model needed
|
| 1328 |
self.stopping_criteria = None # Not used with InferenceClient
|
| 1329 |
else:
|
| 1330 |
+
print("[Hive] Using local LLM for inference.", flush=True)
|
| 1331 |
self.tok = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=trust, chat_template=None)
|
| 1332 |
if self.tok.pad_token is None:
|
| 1333 |
self.tok.pad_token = self.tok.eos_token
|
|
|
|
| 1458 |
if hasattr(self, 'client') and self.client: # Remote Inference
|
| 1459 |
stop_sequences = ["</s>", "Assistant:"] + [self.tok.decode(st) for st in self.stop_tokens]
|
| 1460 |
try:
|
| 1461 |
+
for token in self.client.text_generation(prompt, max_new_tokens=int(max_new_tokens), temperature=float(temperature), do_sample=True, stop_sequences=stop_sequences, stream=True): # type: ignore
|
| 1462 |
yield token
|
| 1463 |
except Exception as e:
|
| 1464 |
print(f"[ModelBridge] Remote inference stream failed: {e}")
|
|
|
|
| 1675 |
|
| 1676 |
current_history.append({"role": "assistant", "content": ""})
|
| 1677 |
try:
|
| 1678 |
+
# The dialogue manager needs the full history to maintain context. # type: ignore
|
| 1679 |
for chunk in hive_instance.dialogue_manager.process_turn(current_history, current_user_id, effective_role, session_id):
|
| 1680 |
if chunk["type"] == "token":
|
| 1681 |
current_history[-1]["content"] += chunk["content"]
|
|
|
|
| 1683 |
# After the stream is complete, re-enable the textbox.
|
| 1684 |
yield current_history, gr.Textbox(placeholder=f"Talk to {CFG['AGENT_NAME']}", interactive=True)
|
| 1685 |
except Exception as e:
|
| 1686 |
+
error_msg = f"Error in DialogueManager: {e}" # type: ignore
|
| 1687 |
print(f"[ERROR] {error_msg}")
|
| 1688 |
current_history[-1]["content"] = f"An error occurred: {error_msg}"
|
| 1689 |
yield current_history, gr.Textbox(value="", interactive=True)
|
|
|
|
| 1708 |
|
| 1709 |
def do_get_vocab_word(uid, request: gr.Request):
|
| 1710 |
hive_instance = get_hive_instance() # type: ignore
|
| 1711 |
+
if hive_instance.lite_mode: return "Vocabulary features are disabled in Lite Mode." # type: ignore
|
| 1712 |
current_user_id = uid or request.session_hash
|
| 1713 |
log_path = os.path.join(CFG["HIVE_HOME"], "users", "conversations", f"{current_user_id}.jsonl")
|
| 1714 |
if not os.path.exists(log_path): return "No conversation history to find words from."
|
|
|
|
| 1724 |
|
| 1725 |
def wait_for_memory_features():
|
| 1726 |
"""Waits for the full Hive core and enables memory-related UI features."""
|
| 1727 |
+
bootstrap_instance.hive_ready.wait() # type: ignore
|
| 1728 |
hive_instance = get_hive_instance() # Ensure the UI's HIVE_INSTANCE is updated to full
|
| 1729 |
return (
|
| 1730 |
"✅ **Full Hive Core is Ready.** Advanced features are now online.",
|
|
|
|
| 1746 |
demo.load(wait_for_memory_features, None, [core_status, summary_output, msg, summary_btn, vocab_output, vocab_btn, progress_output, online_now, ingest_now_btn, mem_compress_btn, hotpatch_apply, propose_btn, test_btn, apply_btn, network_status_md])
|
| 1747 |
def wait_for_lite_core():
|
| 1748 |
"""Waits for the lite Hive core and enables basic chat."""
|
| 1749 |
+
bootstrap_instance.lite_core_ready.wait() # type: ignore
|
| 1750 |
return gr.Textbox(placeholder=f"Talk to {CFG['AGENT_NAME']} (Lite Mode)", interactive=True)
|
| 1751 |
|
| 1752 |
demo.load(wait_for_lite_core, None, [msg])
|
|
|
|
| 1756 |
global HIVE_INSTANCE
|
| 1757 |
|
| 1758 |
# If the full hive is ready, ensure we are using it, and it's a valid instance.
|
| 1759 |
+
if bootstrap_instance.hive_ready.is_set(): # type: ignore
|
| 1760 |
if bootstrap_instance.hive_instance is not None and (HIVE_INSTANCE is None or HIVE_INSTANCE.lite_mode):
|
| 1761 |
HIVE_INSTANCE = bootstrap_instance.hive_instance
|
| 1762 |
print("[UI] Full Hive instance attached.")
|
| 1763 |
return HIVE_INSTANCE
|
| 1764 |
+
# type: ignore
|
| 1765 |
# Otherwise, use the lite instance.
|
| 1766 |
if HIVE_INSTANCE is None:
|
| 1767 |
if bootstrap_instance.lite_core_ready.is_set() and bootstrap_instance.hive_lite_instance is not None:
|
|
|
|
| 1776 |
|
| 1777 |
def wait_for_voice_features(request: gr.Request):
|
| 1778 |
"""Waits for ASR/TTS models and enables voice-related UI elements."""
|
| 1779 |
+
bootstrap_instance.voice_ready.wait() # type: ignore
|
| 1780 |
+
bootstrap_instance.hive_ready.wait() # Also wait for full core for voice features # type: ignore
|
| 1781 |
hive_instance = get_hive_instance(bootstrap_instance)
|
| 1782 |
|
| 1783 |
voice_ready = not hive_instance.lite_mode and hasattr(hive_instance, 'asr_service') and hasattr(hive_instance, 'tts_service')
|