hypernet-n1-sdc / run_6lane.py
NameONEStudios's picture
Upload folder using huggingface_hub
1db22f0 verified
import json, sys, time, requests, subprocess
from datetime import datetime
HYPERNET_URL = "http://localhost:5000"
AUTH_TOKEN = "cpn-steve-kawa-hypernet-alpha"
LANES = ["lola", "claude", "grok", "deep", "gemini", "kimi"]
def call_lane(query, lane):
try:
r = requests.post(f"{HYPERNET_URL}/api/v1/run",
headers={"Authorization": f"Bearer {AUTH_TOKEN}"},
json={"query": query, "lane": lane}, timeout=120)
if r.status_code == 200:
return r.json()
except: pass
return {"error": "failed"}
def extract_code(response):
code = response
if "```python" in code:
code = code.split("```python")[1].split("```")[0]
elif "```" in code:
code = code.split("```")[1].split("```")[0]
return code.strip()
def test_solution(problem, solution):
code = extract_code(solution)
test_code = f'''{problem["prompt"]}
{code}
{problem["test"]}
check({problem["entry_point"]})
print("PASS")
'''
try:
result = subprocess.run([sys.executable, "-c", test_code],
capture_output=True, text=True, timeout=10)
return result.returncode == 0 and "PASS" in result.stdout
except: return False
def run_benchmark(problems):
results = {"lanes": {l: {"pass": 0, "fail": 0} for l in LANES}, "problems": []}
print(f"\n{'='*60}")
print(f"FULL 6-LANE BENCHMARK - 164 PROBLEMS")
print(f"{'='*60}\n")
for i, p in enumerate(problems):
print(f"[{i+1}/164] {p['task_id']}")
prob_result = {"task_id": p["task_id"], "lanes": {}}
for lane in LANES:
prompt = f"Solve this Python function. Return ONLY the implementation, no explanation.\n\n{p['prompt']}"
resp = call_lane(prompt, lane)
if resp.get("response_text"):
passed = test_solution(p, resp["response_text"])
prob_result["lanes"][lane] = passed
results["lanes"][lane]["pass" if passed else "fail"] += 1
print(f" {lane}: {'PASS' if passed else 'FAIL'}")
else:
prob_result["lanes"][lane] = False
results["lanes"][lane]["fail"] += 1
print(f" {lane}: ERROR")
results["problems"].append(prob_result)
print()
print(f"{'='*60}")
print("RESULTS (pass@1) - ALL 6 LANES")
print(f"{'='*60}")
for lane, stats in results["lanes"].items():
total = stats["pass"] + stats["fail"]
pct = (stats["pass"]/total*100) if total > 0 else 0
print(f" {lane:10s}: {stats['pass']:3d}/{total:3d} ({pct:.1f}%)")
return results
if __name__ == "__main__":
from datasets import load_dataset
print("Loading official HumanEval...")
ds = load_dataset("openai/openai_humaneval")
problems = [dict(item) for item in ds["test"]]
print(f"Loaded {len(problems)} problems\n")
results = run_benchmark(problems)
with open(f"humaneval_6lane_{datetime.now().strftime('%H%M%S')}.json", "w") as f:
json.dump(results, f, indent=2)
print("\nResults saved!")