Spaces:
Sleeping
Sleeping
| # inference.py | |
| import os | |
| import csv | |
| import random | |
| import torch | |
| import pandas as pd | |
| import psutil | |
| import platform | |
| import GPUtil | |
| import torch.nn.functional as F | |
| from transformers import AutoTokenizer | |
| from evo_model import EvoTransformerV22 | |
| from evo_architecture import ( | |
| build_model_from_config, | |
| mutate_genome, | |
| log_genome, | |
| save_best_genome, | |
| load_best_genome | |
| ) | |
| import openai | |
| # π API Key | |
| openai.api_key = os.getenv("OPENAI_API_KEY", "sk-...") | |
| # π¦ Setup | |
| tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| # π Current genome state | |
| current_config = load_best_genome() | |
| model = build_model_from_config(current_config).to(device) | |
| model.eval() | |
| FEEDBACK_LOG = "feedback_log.csv" | |
| # π§ Evo prediction | |
| def evo_chat_predict(history, question, options): | |
| inputs = [f"{question} {opt}" for opt in options] | |
| enc = tokenizer(inputs, padding=True, truncation=True, max_length=128, return_tensors="pt").to(device) | |
| with torch.no_grad(): | |
| logits = model(enc["input_ids"]) | |
| probs = torch.sigmoid(logits).squeeze().tolist() | |
| best_idx = int(torch.argmax(torch.tensor(probs))) | |
| reasoning = f"{options[0]}: {probs[0]:.3f} vs {options[1]}: {probs[1]:.3f}" | |
| return { | |
| "answer": options[best_idx], | |
| "confidence": round(probs[best_idx], 3), | |
| "reasoning": reasoning, | |
| "context_used": question | |
| } | |
| # π€ GPT response | |
| def get_gpt_response(prompt): | |
| try: | |
| client = openai.OpenAI() | |
| response = client.chat.completions.create( | |
| model="gpt-3.5-turbo", | |
| messages=[{"role": "user", "content": prompt}] | |
| ) | |
| return response.choices[0].message.content.strip() | |
| except Exception as e: | |
| return f"(GPT Error) {e}" | |
| # π Genome stats | |
| def get_model_config(): | |
| return { | |
| "num_layers": current_config.get("num_layers", "?"), | |
| "num_heads": current_config.get("num_heads", "?"), | |
| "ffn_dim": current_config.get("ffn_dim", "?"), | |
| "memory_enabled": current_config.get("memory_enabled", "?"), | |
| "accuracy": current_config.get("accuracy", "N/A") | |
| } | |
| # π₯οΈ System info | |
| def get_system_stats(): | |
| mem = psutil.virtual_memory() | |
| cpu = psutil.cpu_percent() | |
| try: | |
| gpus = GPUtil.getGPUs() | |
| gpu = gpus[0] if gpus else None | |
| gpu_name = gpu.name if gpu else "N/A" | |
| gpu_mem_used = round(gpu.memoryUsed / 1024, 2) if gpu else 0 | |
| gpu_mem_total = round(gpu.memoryTotal / 1024, 2) if gpu else 0 | |
| except: | |
| gpu_name, gpu_mem_used, gpu_mem_total = "N/A", 0, 0 | |
| return { | |
| "device": device.type, | |
| "cpu_usage_percent": cpu, | |
| "memory_used_gb": round(mem.used / 1024**3, 2), | |
| "memory_total_gb": round(mem.total / 1024**3, 2), | |
| "gpu_name": gpu_name, | |
| "gpu_memory_used_gb": gpu_mem_used, | |
| "gpu_memory_total_gb": gpu_mem_total, | |
| "platform": platform.platform() | |
| } | |
| # π Evo retrain logic | |
| def retrain_from_feedback_csv(): | |
| global current_config, model | |
| if not os.path.exists(FEEDBACK_LOG): | |
| return "β οΈ No feedback log found." | |
| df = pd.read_csv(FEEDBACK_LOG) | |
| if df.empty or "vote" not in df.columns: | |
| return "β οΈ No usable feedback data. Please vote on Evo or GPT." | |
| df = df[df["vote"].isin(["Evo", "GPT"])] | |
| if df.empty: | |
| return "β οΈ No usable feedback data. Please vote on Evo or GPT." | |
| data = [] | |
| for _, row in df.iterrows(): | |
| label = 1 if row["vote"] == "Evo" else 0 | |
| text = f"{row['question']} {row['option1']} {row['option2']}" | |
| data.append((text, label)) | |
| if not data: | |
| return "β οΈ No usable feedback data." | |
| new_config = mutate_genome(current_config) | |
| model = build_model_from_config(new_config).to(device) | |
| current_config = new_config | |
| log_genome(new_config) | |
| # π Fine-tune | |
| model.train() | |
| optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) | |
| for epoch in range(3): | |
| random.shuffle(data) | |
| for text, label in data: | |
| enc = tokenizer(text, padding="max_length", truncation=True, max_length=128, return_tensors="pt").to(device) | |
| input_ids = enc["input_ids"] | |
| label_tensor = torch.tensor([label], dtype=torch.float32).to(device) | |
| logits = model(input_ids).squeeze(1) | |
| loss = F.binary_cross_entropy_with_logits(logits, label_tensor) | |
| optimizer.zero_grad() | |
| loss.backward() | |
| optimizer.step() | |
| model.eval() | |
| save_best_genome({**new_config, "accuracy": "Live-Finetuned"}) | |
| return f"β Evo retrained on {len(data)} feedback entries." | |
| # π Reload trigger | |
| def load_model(force_reload=False): | |
| global model | |
| model.eval() | |