Spaces:
Running
Running
Commit
·
73cf928
1
Parent(s):
9d7fc91
Add Gemini 2.5 Flash (Medium)
Browse files- config/model_metadata.py +5 -2
- data_processing.py +5 -1
- results/aggregated_scores_icarus.csv +1 -0
- results/aggregated_scores_verilator.csv +1 -0
- results/results_icarus.json +275 -0
- results/results_icarus_november_2025.csv +69 -0
- results/results_verilator.json +275 -0
- results/results_verilator_november_2025.csv +69 -0
config/model_metadata.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
from dataclasses import dataclass
|
| 2 |
-
from typing import Literal
|
| 3 |
|
| 4 |
|
| 5 |
@dataclass
|
| 6 |
class ModelMetadata:
|
| 7 |
url: str # HF model card
|
| 8 |
-
params: float # in B
|
| 9 |
model_type: Literal["General", "Coding", "RTL-Specific"]
|
| 10 |
release: Literal["V1", "V2", "V3"] # release of the leaderboard for which the model was included
|
| 11 |
model_arch: Literal["Dense", "Reasoning"] # to distinguish between reasoners and non-reasoners
|
|
@@ -121,4 +121,7 @@ MODELS = {
|
|
| 121 |
"Hermes-4-14B-Reasoning": ModelMetadata(
|
| 122 |
"https://huggingface.co/NousResearch/Hermes-4-14B", 14, "General", "V3", "Reasoning"
|
| 123 |
),
|
|
|
|
|
|
|
|
|
|
| 124 |
}
|
|
|
|
| 1 |
from dataclasses import dataclass
|
| 2 |
+
from typing import Literal, Optional
|
| 3 |
|
| 4 |
|
| 5 |
@dataclass
|
| 6 |
class ModelMetadata:
|
| 7 |
url: str # HF model card
|
| 8 |
+
params: Optional[float] # in B
|
| 9 |
model_type: Literal["General", "Coding", "RTL-Specific"]
|
| 10 |
release: Literal["V1", "V2", "V3"] # release of the leaderboard for which the model was included
|
| 11 |
model_arch: Literal["Dense", "Reasoning"] # to distinguish between reasoners and non-reasoners
|
|
|
|
| 121 |
"Hermes-4-14B-Reasoning": ModelMetadata(
|
| 122 |
"https://huggingface.co/NousResearch/Hermes-4-14B", 14, "General", "V3", "Reasoning"
|
| 123 |
),
|
| 124 |
+
"Gemini 2.5 Flash (Medium)": ModelMetadata(
|
| 125 |
+
"https://huggingface.co/google", None, "General", "V3", "Reasoning"
|
| 126 |
+
),
|
| 127 |
}
|
data_processing.py
CHANGED
|
@@ -68,7 +68,11 @@ def filter_leaderboard(task, benchmark, model_type, search_query, max_params, st
|
|
| 68 |
if search_query:
|
| 69 |
subset = subset[subset["Model"].str.contains(search_query, case=False, na=False)]
|
| 70 |
max_params = float(max_params)
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
if name == "Other Models":
|
| 74 |
subset = subset[subset["Model"].isin(DISCARDED_MODELS)]
|
|
|
|
| 68 |
if search_query:
|
| 69 |
subset = subset[subset["Model"].str.contains(search_query, case=False, na=False)]
|
| 70 |
max_params = float(max_params)
|
| 71 |
+
|
| 72 |
+
if max_params < 695: # when re-setting the max param slider we never reach 700 again xd
|
| 73 |
+
subset = subset[subset["Params"] <= max_params]
|
| 74 |
+
else:
|
| 75 |
+
subset["Params"] = subset["Params"].fillna("Unknown")
|
| 76 |
|
| 77 |
if name == "Other Models":
|
| 78 |
subset = subset[subset["Model"].isin(DISCARDED_MODELS)]
|
results/aggregated_scores_icarus.csv
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
Model,Agg S2R,Agg MC,Agg VerilogEval S2R,Agg VerilogEval MC,Agg RTLLM,Agg VeriGen
|
|
|
|
| 2 |
DeepSeek R1-0528,76.79,78.86,78.84,79.65,70.04,71.64
|
| 3 |
DeepSeek R1,75.53,76.99,77.67,77.55,68.49,71.92
|
| 4 |
Qwen3 Coder 480B A35B,60.56,57.84,62.98,56.13,52.56,73.45
|
|
|
|
| 1 |
Model,Agg S2R,Agg MC,Agg VerilogEval S2R,Agg VerilogEval MC,Agg RTLLM,Agg VeriGen
|
| 2 |
+
Gemini 2.5 Flash (Medium),63.55,69.84,68.27,69.16,47.99,76.02
|
| 3 |
DeepSeek R1-0528,76.79,78.86,78.84,79.65,70.04,71.64
|
| 4 |
DeepSeek R1,75.53,76.99,77.67,77.55,68.49,71.92
|
| 5 |
Qwen3 Coder 480B A35B,60.56,57.84,62.98,56.13,52.56,73.45
|
results/aggregated_scores_verilator.csv
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
Model,Agg S2R,Agg MC,Agg VerilogEval S2R,Agg VerilogEval MC,Agg RTLLM,Agg VeriGen
|
|
|
|
| 2 |
DeepSeek R1-0528,75.83,78.07,77.64,78.78,69.85,71.64
|
| 3 |
DeepSeek R1,75.78,75.99,78.04,76.42,68.31,72.05
|
| 4 |
Qwen3 Coder 480B A35B,61.46,58.18,64.01,56.5,53.04,73.45
|
|
|
|
| 1 |
Model,Agg S2R,Agg MC,Agg VerilogEval S2R,Agg VerilogEval MC,Agg RTLLM,Agg VeriGen
|
| 2 |
+
Gemini 2.5 Flash (Medium),63.27,70.19,67.75,69.55,48.49,76.02
|
| 3 |
DeepSeek R1-0528,75.83,78.07,77.64,78.78,69.85,71.64
|
| 4 |
DeepSeek R1,75.78,75.99,78.04,76.42,68.31,72.05
|
| 5 |
Qwen3 Coder 480B A35B,61.46,58.18,64.01,56.5,53.04,73.45
|
results/results_icarus.json
CHANGED
|
@@ -1,4 +1,279 @@
|
|
| 1 |
[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
{
|
| 3 |
"Model": "DeepSeek R1-0528",
|
| 4 |
"Model Type": "General",
|
|
|
|
| 1 |
[
|
| 2 |
+
{
|
| 3 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 4 |
+
"Model Type": "General",
|
| 5 |
+
"Benchmark": "VerilogEval S2R",
|
| 6 |
+
"Task": "Syntax (STX)",
|
| 7 |
+
"Result": 92.05,
|
| 8 |
+
"Model URL": "https://huggingface.co/google",
|
| 9 |
+
"Params": null,
|
| 10 |
+
"Release": "V3",
|
| 11 |
+
"Thinking": "Reasoning"
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 15 |
+
"Model Type": "General",
|
| 16 |
+
"Benchmark": "RTLLM",
|
| 17 |
+
"Task": "Syntax (STX)",
|
| 18 |
+
"Result": 79.57,
|
| 19 |
+
"Model URL": "https://huggingface.co/google",
|
| 20 |
+
"Params": null,
|
| 21 |
+
"Release": "V3",
|
| 22 |
+
"Thinking": "Reasoning"
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 26 |
+
"Model Type": "General",
|
| 27 |
+
"Benchmark": "VerilogEval S2R",
|
| 28 |
+
"Task": "Functionality (FNC)",
|
| 29 |
+
"Result": 70.38,
|
| 30 |
+
"Model URL": "https://huggingface.co/google",
|
| 31 |
+
"Params": null,
|
| 32 |
+
"Release": "V3",
|
| 33 |
+
"Thinking": "Reasoning"
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 37 |
+
"Model Type": "General",
|
| 38 |
+
"Benchmark": "RTLLM",
|
| 39 |
+
"Task": "Functionality (FNC)",
|
| 40 |
+
"Result": 62.13,
|
| 41 |
+
"Model URL": "https://huggingface.co/google",
|
| 42 |
+
"Params": null,
|
| 43 |
+
"Release": "V3",
|
| 44 |
+
"Thinking": "Reasoning"
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 48 |
+
"Model Type": "General",
|
| 49 |
+
"Benchmark": "VerilogEval S2R",
|
| 50 |
+
"Task": "Synthesis (SYN)",
|
| 51 |
+
"Result": 70.26,
|
| 52 |
+
"Model URL": "https://huggingface.co/google",
|
| 53 |
+
"Params": null,
|
| 54 |
+
"Release": "V3",
|
| 55 |
+
"Thinking": "Reasoning"
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 59 |
+
"Model Type": "General",
|
| 60 |
+
"Benchmark": "RTLLM",
|
| 61 |
+
"Task": "Synthesis (SYN)",
|
| 62 |
+
"Result": 45.96,
|
| 63 |
+
"Model URL": "https://huggingface.co/google",
|
| 64 |
+
"Params": null,
|
| 65 |
+
"Release": "V3",
|
| 66 |
+
"Thinking": "Reasoning"
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 70 |
+
"Model Type": "General",
|
| 71 |
+
"Benchmark": "VerilogEval S2R",
|
| 72 |
+
"Task": "Power",
|
| 73 |
+
"Result": 69.01,
|
| 74 |
+
"Model URL": "https://huggingface.co/google",
|
| 75 |
+
"Params": null,
|
| 76 |
+
"Release": "V3",
|
| 77 |
+
"Thinking": "Reasoning"
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 81 |
+
"Model Type": "General",
|
| 82 |
+
"Benchmark": "RTLLM",
|
| 83 |
+
"Task": "Power",
|
| 84 |
+
"Result": 50.51,
|
| 85 |
+
"Model URL": "https://huggingface.co/google",
|
| 86 |
+
"Params": null,
|
| 87 |
+
"Release": "V3",
|
| 88 |
+
"Thinking": "Reasoning"
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 92 |
+
"Model Type": "General",
|
| 93 |
+
"Benchmark": "VerilogEval S2R",
|
| 94 |
+
"Task": "Performance",
|
| 95 |
+
"Result": 67.11,
|
| 96 |
+
"Model URL": "https://huggingface.co/google",
|
| 97 |
+
"Params": null,
|
| 98 |
+
"Release": "V3",
|
| 99 |
+
"Thinking": "Reasoning"
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 103 |
+
"Model Type": "General",
|
| 104 |
+
"Benchmark": "RTLLM",
|
| 105 |
+
"Task": "Performance",
|
| 106 |
+
"Result": 44.66,
|
| 107 |
+
"Model URL": "https://huggingface.co/google",
|
| 108 |
+
"Params": null,
|
| 109 |
+
"Release": "V3",
|
| 110 |
+
"Thinking": "Reasoning"
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 114 |
+
"Model Type": "General",
|
| 115 |
+
"Benchmark": "VerilogEval S2R",
|
| 116 |
+
"Task": "Area",
|
| 117 |
+
"Result": 68.68,
|
| 118 |
+
"Model URL": "https://huggingface.co/google",
|
| 119 |
+
"Params": null,
|
| 120 |
+
"Release": "V3",
|
| 121 |
+
"Thinking": "Reasoning"
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 125 |
+
"Model Type": "General",
|
| 126 |
+
"Benchmark": "RTLLM",
|
| 127 |
+
"Task": "Area",
|
| 128 |
+
"Result": 48.79,
|
| 129 |
+
"Model URL": "https://huggingface.co/google",
|
| 130 |
+
"Params": null,
|
| 131 |
+
"Release": "V3",
|
| 132 |
+
"Thinking": "Reasoning"
|
| 133 |
+
},
|
| 134 |
+
{
|
| 135 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 136 |
+
"Model Type": "General",
|
| 137 |
+
"Benchmark": "RTL-Repo",
|
| 138 |
+
"Task": "Exact Matching (EM)",
|
| 139 |
+
"Result": -1.0,
|
| 140 |
+
"Model URL": "https://huggingface.co/google",
|
| 141 |
+
"Params": null,
|
| 142 |
+
"Release": "V3",
|
| 143 |
+
"Thinking": "Reasoning"
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 147 |
+
"Model Type": "General",
|
| 148 |
+
"Benchmark": "VerilogEval MC",
|
| 149 |
+
"Task": "Syntax (STX)",
|
| 150 |
+
"Result": 91.03,
|
| 151 |
+
"Model URL": "https://huggingface.co/google",
|
| 152 |
+
"Params": null,
|
| 153 |
+
"Release": "V3",
|
| 154 |
+
"Thinking": "Reasoning"
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 158 |
+
"Model Type": "General",
|
| 159 |
+
"Benchmark": "VeriGen",
|
| 160 |
+
"Task": "Syntax (STX)",
|
| 161 |
+
"Result": 95.29,
|
| 162 |
+
"Model URL": "https://huggingface.co/google",
|
| 163 |
+
"Params": null,
|
| 164 |
+
"Release": "V3",
|
| 165 |
+
"Thinking": "Reasoning"
|
| 166 |
+
},
|
| 167 |
+
{
|
| 168 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 169 |
+
"Model Type": "General",
|
| 170 |
+
"Benchmark": "VerilogEval MC",
|
| 171 |
+
"Task": "Functionality (FNC)",
|
| 172 |
+
"Result": 71.79,
|
| 173 |
+
"Model URL": "https://huggingface.co/google",
|
| 174 |
+
"Params": null,
|
| 175 |
+
"Release": "V3",
|
| 176 |
+
"Thinking": "Reasoning"
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 180 |
+
"Model Type": "General",
|
| 181 |
+
"Benchmark": "VeriGen",
|
| 182 |
+
"Task": "Functionality (FNC)",
|
| 183 |
+
"Result": 77.65,
|
| 184 |
+
"Model URL": "https://huggingface.co/google",
|
| 185 |
+
"Params": null,
|
| 186 |
+
"Release": "V3",
|
| 187 |
+
"Thinking": "Reasoning"
|
| 188 |
+
},
|
| 189 |
+
{
|
| 190 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 191 |
+
"Model Type": "General",
|
| 192 |
+
"Benchmark": "VerilogEval MC",
|
| 193 |
+
"Task": "Synthesis (SYN)",
|
| 194 |
+
"Result": 71.41,
|
| 195 |
+
"Model URL": "https://huggingface.co/google",
|
| 196 |
+
"Params": null,
|
| 197 |
+
"Release": "V3",
|
| 198 |
+
"Thinking": "Reasoning"
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 202 |
+
"Model Type": "General",
|
| 203 |
+
"Benchmark": "VeriGen",
|
| 204 |
+
"Task": "Synthesis (SYN)",
|
| 205 |
+
"Result": 77.65,
|
| 206 |
+
"Model URL": "https://huggingface.co/google",
|
| 207 |
+
"Params": null,
|
| 208 |
+
"Release": "V3",
|
| 209 |
+
"Thinking": "Reasoning"
|
| 210 |
+
},
|
| 211 |
+
{
|
| 212 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 213 |
+
"Model Type": "General",
|
| 214 |
+
"Benchmark": "VerilogEval MC",
|
| 215 |
+
"Task": "Power",
|
| 216 |
+
"Result": 69.91,
|
| 217 |
+
"Model URL": "https://huggingface.co/google",
|
| 218 |
+
"Params": null,
|
| 219 |
+
"Release": "V3",
|
| 220 |
+
"Thinking": "Reasoning"
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 224 |
+
"Model Type": "General",
|
| 225 |
+
"Benchmark": "VeriGen",
|
| 226 |
+
"Task": "Power",
|
| 227 |
+
"Result": 71.27,
|
| 228 |
+
"Model URL": "https://huggingface.co/google",
|
| 229 |
+
"Params": null,
|
| 230 |
+
"Release": "V3",
|
| 231 |
+
"Thinking": "Reasoning"
|
| 232 |
+
},
|
| 233 |
+
{
|
| 234 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 235 |
+
"Model Type": "General",
|
| 236 |
+
"Benchmark": "VerilogEval MC",
|
| 237 |
+
"Task": "Performance",
|
| 238 |
+
"Result": 68.04,
|
| 239 |
+
"Model URL": "https://huggingface.co/google",
|
| 240 |
+
"Params": null,
|
| 241 |
+
"Release": "V3",
|
| 242 |
+
"Thinking": "Reasoning"
|
| 243 |
+
},
|
| 244 |
+
{
|
| 245 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 246 |
+
"Model Type": "General",
|
| 247 |
+
"Benchmark": "VeriGen",
|
| 248 |
+
"Task": "Performance",
|
| 249 |
+
"Result": 77.35,
|
| 250 |
+
"Model URL": "https://huggingface.co/google",
|
| 251 |
+
"Params": null,
|
| 252 |
+
"Release": "V3",
|
| 253 |
+
"Thinking": "Reasoning"
|
| 254 |
+
},
|
| 255 |
+
{
|
| 256 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 257 |
+
"Model Type": "General",
|
| 258 |
+
"Benchmark": "VerilogEval MC",
|
| 259 |
+
"Task": "Area",
|
| 260 |
+
"Result": 69.52,
|
| 261 |
+
"Model URL": "https://huggingface.co/google",
|
| 262 |
+
"Params": null,
|
| 263 |
+
"Release": "V3",
|
| 264 |
+
"Thinking": "Reasoning"
|
| 265 |
+
},
|
| 266 |
+
{
|
| 267 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 268 |
+
"Model Type": "General",
|
| 269 |
+
"Benchmark": "VeriGen",
|
| 270 |
+
"Task": "Area",
|
| 271 |
+
"Result": 79.45,
|
| 272 |
+
"Model URL": "https://huggingface.co/google",
|
| 273 |
+
"Params": null,
|
| 274 |
+
"Release": "V3",
|
| 275 |
+
"Thinking": "Reasoning"
|
| 276 |
+
},
|
| 277 |
{
|
| 278 |
"Model": "DeepSeek R1-0528",
|
| 279 |
"Model Type": "General",
|
results/results_icarus_november_2025.csv
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
,Syntax (STX),Syntax (STX),Functionality (FNC),Functionality (FNC),Synthesis (SYN),Synthesis (SYN),Power,Power,Performance,Performance,Area,Area,EM,Syntax (STX),Syntax (STX),Functionality (FNC),Functionality (FNC),Synthesis (SYN),Synthesis (SYN),Power,Power,Performance,Performance,Area,Area
|
| 2 |
+
,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,RTL-Repo,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen
|
| 3 |
+
Gemini 2.5 Flash (Medium),92.049999999999997,79.569999999999993,70.379999999999995,62.130000000000003,70.260000000000005,45.960000000000001,69.010000000000005,50.509999999999998,67.109999999999999,44.659999999999997,68.680000000000007,48.789999999999999,-1,91.030000000000001,95.290000000000006,71.790000000000006,77.650000000000006,71.409999999999997,77.650000000000006,69.909999999999997,71.269999999999996,68.040000000000006,77.349999999999994,69.519999999999996,79.450000000000003
|
| 4 |
+
DeepSeek R1-0528,96.150000000000006,86.120000000000005,81.540000000000006,64.900000000000006,81.280000000000001,64.489999999999995,79.150000000000006,73.329999999999998,78.090000000000003,65.640000000000001,79.280000000000001,71.150000000000006,-1,95.379999999999995,91.760000000000005,81.540000000000006,74.120000000000005,81.409999999999997,74.120000000000005,80.269999999999996,63.859999999999999,78.689999999999998,74.049999999999997,79.989999999999995,77.010000000000005
|
| 5 |
+
DeepSeek R1,97.180000000000007,89.799999999999997,79.739999999999995,65.709999999999994,79.620000000000005,63.270000000000003,78.329999999999998,71.340000000000003,76.489999999999995,64.060000000000002,78.189999999999998,70.079999999999998,-1,97.439999999999998,96.469999999999999,79.489999999999995,74.120000000000005,79.489999999999995,74.120000000000005,78.269999999999996,64.340000000000003,76.430000000000007,74.290000000000006,77.959999999999994,77.120000000000005
|
| 6 |
+
Qwen3 Coder 480B A35B,96.409999999999997,86.379999999999995,65.379999999999995,59.57,64.739999999999995,53.619999999999997,64.090000000000003,52.850000000000001,61.909999999999997,53.030000000000001,62.93,51.810000000000002,38.880000000000003,84.099999999999994,100,58.850000000000001,75.290000000000006,58.460000000000001,75.290000000000006,57.049999999999997,69.629999999999995,55.539999999999999,73.950000000000003,55.789999999999999,76.760000000000005
|
| 7 |
+
Llama 3.1 405B,87.439999999999998,77.140000000000001,58.969999999999999,45.710000000000001,58.850000000000001,41.630000000000003,57.579999999999998,50.880000000000003,55.93,32.439999999999998,56.130000000000003,43.450000000000003,34.619999999999997,88.590000000000003,95.290000000000006,56.149999999999999,58.82,55.899999999999999,58.82,55.130000000000003,55.100000000000001,53.450000000000003,58.399999999999999,54.479999999999997,61.200000000000003
|
| 8 |
+
Qwen3 236B A22B,91.280000000000001,73.879999999999995,76.920000000000002,51.43,76.790000000000006,48.57,75.25,54.609999999999999,73.560000000000002,46.369999999999997,75.670000000000002,50.469999999999999,41.939999999999998,82.180000000000007,87.060000000000002,69.620000000000005,62.350000000000001,69.620000000000005,62.350000000000001,69.040000000000006,54.630000000000003,66.890000000000001,62.630000000000003,69.150000000000006,62.780000000000001
|
| 9 |
+
gpt-oss-120b,92.950000000000003,83.829999999999998,78.079999999999998,60,77.560000000000002,51.490000000000002,76.659999999999997,53.960000000000001,74.489999999999995,49.850000000000001,76.530000000000001,54.640000000000001,-1,95.640000000000001,88.239999999999995,81.150000000000006,68.239999999999995,80.510000000000005,68.239999999999995,79.719999999999999,70.099999999999994,77.540000000000006,68.269999999999996,78.969999999999999,69.849999999999994
|
| 10 |
+
Qwen2.5 72B,82.180000000000007,79.590000000000003,52.439999999999998,45.310000000000002,51.920000000000002,44.079999999999998,51.829999999999998,46.469999999999999,48.75,45.399999999999999,50.090000000000003,47.649999999999999,37.439999999999998,80.900000000000006,84.709999999999994,52.950000000000003,47.060000000000002,52.689999999999998,47.060000000000002,51.659999999999997,47.590000000000003,49.369999999999997,46.960000000000001,51.18,47.700000000000003
|
| 11 |
+
Llama 3.(1-3) 70B,66.150000000000006,73.879999999999995,40.640000000000001,42.450000000000003,40.640000000000001,39.18,40.460000000000001,40.810000000000002,38.079999999999998,38.140000000000001,39.859999999999999,39.649999999999999,28.719999999999999,84.739999999999995,89.409999999999997,41.670000000000002,65.879999999999995,41.670000000000002,64.709999999999994,41.380000000000003,63.469999999999999,39.75,64.689999999999998,41.359999999999999,64.709999999999994
|
| 12 |
+
Seed-OSS-36B,88.969999999999999,81.700000000000003,71.150000000000006,59.149999999999999,71.150000000000006,56.600000000000001,70.099999999999994,64.629999999999995,68.099999999999994,57.32,70.319999999999993,60.880000000000003,-1,91.030000000000001,91.760000000000005,76.540000000000006,70.590000000000003,76.150000000000006,70.590000000000003,74.950000000000003,66.109999999999999,73.329999999999998,70.189999999999998,74.689999999999998,72.450000000000003
|
| 13 |
+
QwQ 32B,87.950000000000003,82.450000000000003,66.409999999999997,56.729999999999997,66.409999999999997,52.240000000000002,66.150000000000006,55.829999999999998,63.799999999999997,51.909999999999997,65.120000000000005,56.07,-1,58.969999999999999,68.239999999999995,40,52.939999999999998,39.619999999999997,52.939999999999998,39.399999999999999,51.469999999999999,37.530000000000001,52.93,39.100000000000001,53.390000000000001
|
| 14 |
+
Qwen2.5 32B,88.590000000000003,84.079999999999998,52.560000000000002,50.200000000000003,52.18,46.119999999999997,52.32,49.729999999999997,49.43,46.43,50.82,50.43,28.93,93.209999999999994,85.879999999999995,41.539999999999999,45.880000000000003,41.539999999999999,45.880000000000003,41.310000000000002,43.560000000000002,40.479999999999997,46.079999999999998,41.229999999999997,45.369999999999997
|
| 15 |
+
Magistral Small 2506,63.850000000000001,71.909999999999997,42.950000000000003,43.399999999999999,42.439999999999998,37.450000000000003,41.390000000000001,43.359999999999999,40.090000000000003,37.439999999999998,41.390000000000001,40.32,-1,31.280000000000001,68.239999999999995,20.899999999999999,50.590000000000003,20.77,49.409999999999997,20.109999999999999,41.590000000000003,19.609999999999999,48.969999999999999,20.16,50.07
|
| 16 |
+
gpt-oss-20b,88.079999999999998,86.810000000000002,69.739999999999995,58.719999999999999,69.230000000000004,48.939999999999998,69.849999999999994,55.890000000000001,64.569999999999993,45.210000000000001,67.819999999999993,53.289999999999999,-1,91.920000000000002,76.469999999999999,70.260000000000005,50.590000000000003,69.739999999999995,50.590000000000003,69.5,45.409999999999997,67.060000000000002,50.479999999999997,68.760000000000005,49.850000000000001
|
| 17 |
+
StarChat2 15B v0.1,88.459999999999994,84.900000000000006,37.950000000000003,44.490000000000002,37.950000000000003,44.079999999999998,37.560000000000002,46.950000000000003,35.299999999999997,43.219999999999999,37.189999999999998,46.649999999999999,13.42,79.739999999999995,92.939999999999998,36.409999999999997,63.530000000000001,36.030000000000001,63.530000000000001,36.079999999999998,58.060000000000002,34.909999999999997,63.259999999999998,35.759999999999998,64.560000000000002
|
| 18 |
+
DeepSeek R1 Distill Qwen 14B,42.18,34.689999999999998,25.510000000000002,18.370000000000001,25.510000000000002,16.329999999999998,25.359999999999999,17.859999999999999,24.190000000000001,16.48,25.27,17.329999999999998,-1,45,44.710000000000001,25.640000000000001,28.239999999999998,25.260000000000002,28.239999999999998,24.789999999999999,24.710000000000001,23.48,28.140000000000001,24.629999999999999,28.350000000000001
|
| 19 |
+
Hermes-4-14B-Reasoning,79.739999999999995,76.599999999999994,55.899999999999999,39.57,55.640000000000001,34.469999999999999,55.340000000000003,38.350000000000001,53.729999999999997,32.729999999999997,55.170000000000002,36.130000000000003,-1,51.149999999999999,74.120000000000005,36.539999999999999,56.469999999999999,36.280000000000001,56.469999999999999,35.259999999999998,52.969999999999999,35.520000000000003,56.219999999999999,34.939999999999998,57.439999999999998
|
| 20 |
+
Hermes-4-14B,72.180000000000007,80.849999999999994,42.310000000000002,48.090000000000003,42.310000000000002,45.530000000000001,41.729999999999997,50.590000000000003,40.329999999999998,45.810000000000002,41.380000000000003,47.909999999999997,28.359999999999999,52.689999999999998,83.530000000000001,27.440000000000001,45.880000000000003,27.440000000000001,45.880000000000003,26.539999999999999,43.549999999999997,26.68,45.979999999999997,25.109999999999999,44.960000000000001
|
| 21 |
+
Qwen3-8B,70.769999999999996,62.130000000000003,50,34.469999999999999,50,32.770000000000003,49.329999999999998,35.100000000000001,47.920000000000002,32.520000000000003,48.909999999999997,31.879999999999999,-1,69.359999999999999,81.180000000000007,51.539999999999999,40,50.640000000000001,38.82,49.909999999999997,37.469999999999999,48.399999999999999,38.759999999999998,49.409999999999997,38.729999999999997
|
| 22 |
+
CodeLlama 70B,67.049999999999997,69.799999999999997,33.079999999999998,36.329999999999998,33.079999999999998,34.289999999999999,32.689999999999998,37.189999999999998,31.460000000000001,34.289999999999999,32.439999999999998,35.950000000000003,24.329999999999998,90.769999999999996,88.239999999999995,33.329999999999998,47.060000000000002,33.329999999999998,47.060000000000002,33.020000000000003,45.799999999999997,30.800000000000001,46.909999999999997,32.990000000000002,46.979999999999997
|
| 23 |
+
DeepSeek Coder 33B,62.82,83.670000000000002,23.329999999999998,42.450000000000003,23.079999999999998,42.039999999999999,22.859999999999999,42.289999999999999,22.809999999999999,39.420000000000002,22.289999999999999,42.710000000000001,24.579999999999998,75.260000000000005,88.239999999999995,39.619999999999997,45.880000000000003,39.359999999999999,45.880000000000003,38.229999999999997,46.259999999999998,36.789999999999999,45.609999999999999,37.899999999999999,46.170000000000002
|
| 24 |
+
QwenCoder 2.5 32B,87.180000000000007,77.959999999999994,45,43.270000000000003,44.869999999999997,43.270000000000003,44.25,46.82,43.030000000000001,43.200000000000003,43.759999999999998,45.420000000000002,31.07,83.719999999999999,87.060000000000002,45.640000000000001,54.119999999999997,45.130000000000003,54.119999999999997,44.590000000000003,54.549999999999997,43.009999999999998,54.009999999999998,44.549999999999997,55.009999999999998
|
| 25 |
+
DeepCoder 14B,43.850000000000001,39.590000000000003,28.079999999999998,23.670000000000002,28.079999999999998,22.039999999999999,27.940000000000001,25,26.260000000000002,22,27.77,23.149999999999999,-1,61.920000000000002,48.240000000000002,34.100000000000001,32.939999999999998,33.719999999999999,32.939999999999998,33.700000000000003,30.469999999999999,32.170000000000002,32.840000000000003,33.670000000000002,32.909999999999997
|
| 26 |
+
QwenCoder 2.5 14B,78.969999999999999,81.629999999999995,37.82,46.119999999999997,37.439999999999998,45.310000000000002,35.939999999999998,45.82,34.829999999999998,44.640000000000001,35.18,46.049999999999997,37.530000000000001,80,83.530000000000001,41.670000000000002,48.240000000000002,41.149999999999999,48.240000000000002,40.740000000000002,47.090000000000003,39.200000000000003,48.289999999999999,40.829999999999998,47.539999999999999
|
| 27 |
+
SeedCoder 8B,91.409999999999997,85.310000000000002,53.460000000000001,47.350000000000001,53.329999999999998,46.530000000000001,52.859999999999999,49.420000000000002,50.619999999999997,45.600000000000001,51.649999999999999,49.590000000000003,28.23,77.439999999999998,94.120000000000005,37.310000000000002,42.350000000000001,37.310000000000002,38.82,37.32,35.289999999999999,35.350000000000001,38.689999999999998,36.890000000000001,38.990000000000002
|
| 28 |
+
SeedCoder 8B Reasoning,67.819999999999993,53.469999999999999,49.229999999999997,30.199999999999999,49.229999999999997,29.390000000000001,48.920000000000002,32.039999999999999,46.759999999999998,28.640000000000001,47.869999999999997,29.989999999999998,-1,83.329999999999998,78.819999999999993,48.210000000000001,62.350000000000001,48.079999999999998,62.350000000000001,47.780000000000001,53.479999999999997,45.439999999999998,61.810000000000002,47.060000000000002,64.620000000000005
|
| 29 |
+
OpenCoder 8B,78.209999999999994,75.920000000000002,28.460000000000001,42.859999999999999,27.82,40.82,27.34,41.359999999999999,25.949999999999999,39.770000000000003,27.109999999999999,41.359999999999999,16.170000000000002,80,95.290000000000006,35.640000000000001,45.880000000000003,35.380000000000003,45.880000000000003,35.119999999999997,42.399999999999999,33.469999999999999,45.75,35.130000000000003,46.259999999999998
|
| 30 |
+
QwenCoder 2.5 7B,20.129999999999999,76.329999999999998,6.9199999999999999,38.780000000000001,6.6699999999999999,37.140000000000001,6.5099999999999998,40.649999999999999,6.6299999999999999,37.25,6.5599999999999996,39.579999999999998,28.329999999999998,74.099999999999994,90.590000000000003,33.719999999999999,40,33.719999999999999,40,33.590000000000003,37.729999999999997,31.780000000000001,40.07,33.619999999999997,40.57
|
| 31 |
+
DeepSeek Coder 6.7B,82.049999999999997,78.780000000000001,29.620000000000001,41.219999999999999,29.489999999999998,38.780000000000001,29.510000000000002,42.619999999999997,27.73,39.329999999999998,29.41,43.299999999999997,24.629999999999999,67.180000000000007,84.709999999999994,31.670000000000002,37.649999999999999,29.870000000000001,37.649999999999999,29.780000000000001,36.450000000000003,27.98,37.469999999999999,29.210000000000001,37.659999999999997
|
| 32 |
+
CodeV R1 Distill Qwen 7B,56.920000000000002,73.060000000000002,33.329999999999998,49.799999999999997,33.329999999999998,47.350000000000001,32.579999999999998,49.25,32.009999999999998,47.450000000000003,32.450000000000003,49.009999999999998,-1,92.689999999999998,89.409999999999997,21.280000000000001,65.879999999999995,21.280000000000001,65.879999999999995,21.039999999999999,60.100000000000001,19.59,65.590000000000003,21.050000000000001,66.239999999999995
|
| 33 |
+
HaVen-CodeQwen,93.329999999999998,80.409999999999997,47.310000000000002,42.859999999999999,46.149999999999999,41.219999999999999,45.079999999999998,40.590000000000003,44.259999999999998,38.829999999999998,44.68,40.530000000000001,25.140000000000001,93.590000000000003,100,50.130000000000003,62.350000000000001,49.490000000000002,62.350000000000001,47.549999999999997,61.82,47.049999999999997,62.530000000000001,47.090000000000003,61.759999999999998
|
| 34 |
+
CodeV-QW-7B,45.380000000000003,68.159999999999997,19.620000000000001,34.289999999999999,18.969999999999999,26.530000000000001,18.91,28.140000000000001,18.710000000000001,21.800000000000001,18.850000000000001,26.5,20.940000000000001,93.329999999999998,100,52.310000000000002,60,51.539999999999999,60,51.689999999999998,59.899999999999999,48.789999999999999,59.950000000000003,51.450000000000003,60.579999999999998
|
| 35 |
+
RTLCoder Mistral,54.869999999999997,32.240000000000002,24.620000000000001,16.329999999999998,24.620000000000001,15.92,24.280000000000001,16.030000000000001,22.780000000000001,14.710000000000001,24.059999999999999,16,14.77,60.509999999999998,85.879999999999995,27.050000000000001,51.759999999999998,27.050000000000001,51.759999999999998,26.940000000000001,49.850000000000001,25.219999999999999,51.939999999999998,26.870000000000001,52.719999999999999
|
| 36 |
+
RTLCoder DeepSeek,84.620000000000005,73.060000000000002,39.490000000000002,37.140000000000001,39.490000000000002,34.689999999999998,38.909999999999997,34.299999999999997,37.520000000000003,32.759999999999998,38.549999999999997,33.689999999999998,19.350000000000001,77.310000000000002,85.879999999999995,36.920000000000002,55.289999999999999,36.789999999999999,55.289999999999999,36.939999999999998,50.789999999999999,34.840000000000003,55.219999999999999,36.619999999999997,54.689999999999998
|
| 37 |
+
OriGen,96.150000000000006,81.629999999999995,54.229999999999997,50.609999999999999,54.229999999999997,50.609999999999999,54.289999999999999,53.100000000000001,51.57,50.859999999999999,53.149999999999999,53.439999999999998,17.07,92.439999999999998,98.819999999999993,50.770000000000003,76.469999999999999,50.770000000000003,76.469999999999999,50.950000000000003,71.659999999999997,48.530000000000001,76.609999999999999,50.509999999999998,78.700000000000003
|
| 38 |
+
CodeV-CL-7B,32.18,48.159999999999997,13.08,24.489999999999998,12.949999999999999,21.629999999999999,12.800000000000001,22.25,12.51,20.59,12.82,21.289999999999999,12.27,92.049999999999997,98.819999999999993,31.789999999999999,49.409999999999997,31.789999999999999,49.409999999999997,31.739999999999998,48.130000000000003,29.449999999999999,49.340000000000003,31.609999999999999,49.079999999999998
|
| 39 |
+
CodeV-DS-6.7B,33.590000000000003,67.349999999999994,15,38.780000000000001,15,37.140000000000001,15.1,35.560000000000002,14.460000000000001,35.130000000000003,14.85,35.880000000000003,21.260000000000002,95.510000000000005,100,47.049999999999997,61.18,47.049999999999997,60,47.369999999999997,59.880000000000003,44.350000000000001,59.960000000000001,46.520000000000003,59.770000000000003
|
| 40 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 41 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 42 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 43 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 44 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 45 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 46 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 47 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 48 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 49 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 50 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 51 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 52 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 53 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 54 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 55 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 56 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 57 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 58 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 59 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 60 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 61 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 62 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 63 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 64 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 65 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 66 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 67 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 68 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 69 |
+
|
results/results_verilator.json
CHANGED
|
@@ -1,4 +1,279 @@
|
|
| 1 |
[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
{
|
| 3 |
"Model": "DeepSeek R1-0528",
|
| 4 |
"Model Type": "General",
|
|
|
|
| 1 |
[
|
| 2 |
+
{
|
| 3 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 4 |
+
"Model Type": "General",
|
| 5 |
+
"Benchmark": "VerilogEval S2R",
|
| 6 |
+
"Task": "Syntax (STX)",
|
| 7 |
+
"Result": 91.54,
|
| 8 |
+
"Model URL": "https://huggingface.co/google",
|
| 9 |
+
"Params": null,
|
| 10 |
+
"Release": "V3",
|
| 11 |
+
"Thinking": "Reasoning"
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 15 |
+
"Model Type": "General",
|
| 16 |
+
"Benchmark": "RTLLM",
|
| 17 |
+
"Task": "Syntax (STX)",
|
| 18 |
+
"Result": 81.7,
|
| 19 |
+
"Model URL": "https://huggingface.co/google",
|
| 20 |
+
"Params": null,
|
| 21 |
+
"Release": "V3",
|
| 22 |
+
"Thinking": "Reasoning"
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 26 |
+
"Model Type": "General",
|
| 27 |
+
"Benchmark": "VerilogEval S2R",
|
| 28 |
+
"Task": "Functionality (FNC)",
|
| 29 |
+
"Result": 69.74,
|
| 30 |
+
"Model URL": "https://huggingface.co/google",
|
| 31 |
+
"Params": null,
|
| 32 |
+
"Release": "V3",
|
| 33 |
+
"Thinking": "Reasoning"
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 37 |
+
"Model Type": "General",
|
| 38 |
+
"Benchmark": "RTLLM",
|
| 39 |
+
"Task": "Functionality (FNC)",
|
| 40 |
+
"Result": 64.68,
|
| 41 |
+
"Model URL": "https://huggingface.co/google",
|
| 42 |
+
"Params": null,
|
| 43 |
+
"Release": "V3",
|
| 44 |
+
"Thinking": "Reasoning"
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 48 |
+
"Model Type": "General",
|
| 49 |
+
"Benchmark": "VerilogEval S2R",
|
| 50 |
+
"Task": "Synthesis (SYN)",
|
| 51 |
+
"Result": 69.23,
|
| 52 |
+
"Model URL": "https://huggingface.co/google",
|
| 53 |
+
"Params": null,
|
| 54 |
+
"Release": "V3",
|
| 55 |
+
"Thinking": "Reasoning"
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 59 |
+
"Model Type": "General",
|
| 60 |
+
"Benchmark": "RTLLM",
|
| 61 |
+
"Task": "Synthesis (SYN)",
|
| 62 |
+
"Result": 46.38,
|
| 63 |
+
"Model URL": "https://huggingface.co/google",
|
| 64 |
+
"Params": null,
|
| 65 |
+
"Release": "V3",
|
| 66 |
+
"Thinking": "Reasoning"
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 70 |
+
"Model Type": "General",
|
| 71 |
+
"Benchmark": "VerilogEval S2R",
|
| 72 |
+
"Task": "Power",
|
| 73 |
+
"Result": 68.4,
|
| 74 |
+
"Model URL": "https://huggingface.co/google",
|
| 75 |
+
"Params": null,
|
| 76 |
+
"Release": "V3",
|
| 77 |
+
"Thinking": "Reasoning"
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 81 |
+
"Model Type": "General",
|
| 82 |
+
"Benchmark": "RTLLM",
|
| 83 |
+
"Task": "Power",
|
| 84 |
+
"Result": 51.11,
|
| 85 |
+
"Model URL": "https://huggingface.co/google",
|
| 86 |
+
"Params": null,
|
| 87 |
+
"Release": "V3",
|
| 88 |
+
"Thinking": "Reasoning"
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 92 |
+
"Model Type": "General",
|
| 93 |
+
"Benchmark": "VerilogEval S2R",
|
| 94 |
+
"Task": "Performance",
|
| 95 |
+
"Result": 66.57,
|
| 96 |
+
"Model URL": "https://huggingface.co/google",
|
| 97 |
+
"Params": null,
|
| 98 |
+
"Release": "V3",
|
| 99 |
+
"Thinking": "Reasoning"
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 103 |
+
"Model Type": "General",
|
| 104 |
+
"Benchmark": "RTLLM",
|
| 105 |
+
"Task": "Performance",
|
| 106 |
+
"Result": 45.1,
|
| 107 |
+
"Model URL": "https://huggingface.co/google",
|
| 108 |
+
"Params": null,
|
| 109 |
+
"Release": "V3",
|
| 110 |
+
"Thinking": "Reasoning"
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 114 |
+
"Model Type": "General",
|
| 115 |
+
"Benchmark": "VerilogEval S2R",
|
| 116 |
+
"Task": "Area",
|
| 117 |
+
"Result": 68.27,
|
| 118 |
+
"Model URL": "https://huggingface.co/google",
|
| 119 |
+
"Params": null,
|
| 120 |
+
"Release": "V3",
|
| 121 |
+
"Thinking": "Reasoning"
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 125 |
+
"Model Type": "General",
|
| 126 |
+
"Benchmark": "RTLLM",
|
| 127 |
+
"Task": "Area",
|
| 128 |
+
"Result": 49.25,
|
| 129 |
+
"Model URL": "https://huggingface.co/google",
|
| 130 |
+
"Params": null,
|
| 131 |
+
"Release": "V3",
|
| 132 |
+
"Thinking": "Reasoning"
|
| 133 |
+
},
|
| 134 |
+
{
|
| 135 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 136 |
+
"Model Type": "General",
|
| 137 |
+
"Benchmark": "RTL-Repo",
|
| 138 |
+
"Task": "Exact Matching (EM)",
|
| 139 |
+
"Result": -1.0,
|
| 140 |
+
"Model URL": "https://huggingface.co/google",
|
| 141 |
+
"Params": null,
|
| 142 |
+
"Release": "V3",
|
| 143 |
+
"Thinking": "Reasoning"
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 147 |
+
"Model Type": "General",
|
| 148 |
+
"Benchmark": "VerilogEval MC",
|
| 149 |
+
"Task": "Syntax (STX)",
|
| 150 |
+
"Result": 90.64,
|
| 151 |
+
"Model URL": "https://huggingface.co/google",
|
| 152 |
+
"Params": null,
|
| 153 |
+
"Release": "V3",
|
| 154 |
+
"Thinking": "Reasoning"
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 158 |
+
"Model Type": "General",
|
| 159 |
+
"Benchmark": "VeriGen",
|
| 160 |
+
"Task": "Syntax (STX)",
|
| 161 |
+
"Result": 96.47,
|
| 162 |
+
"Model URL": "https://huggingface.co/google",
|
| 163 |
+
"Params": null,
|
| 164 |
+
"Release": "V3",
|
| 165 |
+
"Thinking": "Reasoning"
|
| 166 |
+
},
|
| 167 |
+
{
|
| 168 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 169 |
+
"Model Type": "General",
|
| 170 |
+
"Benchmark": "VerilogEval MC",
|
| 171 |
+
"Task": "Functionality (FNC)",
|
| 172 |
+
"Result": 71.67,
|
| 173 |
+
"Model URL": "https://huggingface.co/google",
|
| 174 |
+
"Params": null,
|
| 175 |
+
"Release": "V3",
|
| 176 |
+
"Thinking": "Reasoning"
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 180 |
+
"Model Type": "General",
|
| 181 |
+
"Benchmark": "VeriGen",
|
| 182 |
+
"Task": "Functionality (FNC)",
|
| 183 |
+
"Result": 77.65,
|
| 184 |
+
"Model URL": "https://huggingface.co/google",
|
| 185 |
+
"Params": null,
|
| 186 |
+
"Release": "V3",
|
| 187 |
+
"Thinking": "Reasoning"
|
| 188 |
+
},
|
| 189 |
+
{
|
| 190 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 191 |
+
"Model Type": "General",
|
| 192 |
+
"Benchmark": "VerilogEval MC",
|
| 193 |
+
"Task": "Synthesis (SYN)",
|
| 194 |
+
"Result": 71.28,
|
| 195 |
+
"Model URL": "https://huggingface.co/google",
|
| 196 |
+
"Params": null,
|
| 197 |
+
"Release": "V3",
|
| 198 |
+
"Thinking": "Reasoning"
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 202 |
+
"Model Type": "General",
|
| 203 |
+
"Benchmark": "VeriGen",
|
| 204 |
+
"Task": "Synthesis (SYN)",
|
| 205 |
+
"Result": 77.65,
|
| 206 |
+
"Model URL": "https://huggingface.co/google",
|
| 207 |
+
"Params": null,
|
| 208 |
+
"Release": "V3",
|
| 209 |
+
"Thinking": "Reasoning"
|
| 210 |
+
},
|
| 211 |
+
{
|
| 212 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 213 |
+
"Model Type": "General",
|
| 214 |
+
"Benchmark": "VerilogEval MC",
|
| 215 |
+
"Task": "Power",
|
| 216 |
+
"Result": 70.28,
|
| 217 |
+
"Model URL": "https://huggingface.co/google",
|
| 218 |
+
"Params": null,
|
| 219 |
+
"Release": "V3",
|
| 220 |
+
"Thinking": "Reasoning"
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 224 |
+
"Model Type": "General",
|
| 225 |
+
"Benchmark": "VeriGen",
|
| 226 |
+
"Task": "Power",
|
| 227 |
+
"Result": 71.27,
|
| 228 |
+
"Model URL": "https://huggingface.co/google",
|
| 229 |
+
"Params": null,
|
| 230 |
+
"Release": "V3",
|
| 231 |
+
"Thinking": "Reasoning"
|
| 232 |
+
},
|
| 233 |
+
{
|
| 234 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 235 |
+
"Model Type": "General",
|
| 236 |
+
"Benchmark": "VerilogEval MC",
|
| 237 |
+
"Task": "Performance",
|
| 238 |
+
"Result": 68.06,
|
| 239 |
+
"Model URL": "https://huggingface.co/google",
|
| 240 |
+
"Params": null,
|
| 241 |
+
"Release": "V3",
|
| 242 |
+
"Thinking": "Reasoning"
|
| 243 |
+
},
|
| 244 |
+
{
|
| 245 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 246 |
+
"Model Type": "General",
|
| 247 |
+
"Benchmark": "VeriGen",
|
| 248 |
+
"Task": "Performance",
|
| 249 |
+
"Result": 77.35,
|
| 250 |
+
"Model URL": "https://huggingface.co/google",
|
| 251 |
+
"Params": null,
|
| 252 |
+
"Release": "V3",
|
| 253 |
+
"Thinking": "Reasoning"
|
| 254 |
+
},
|
| 255 |
+
{
|
| 256 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 257 |
+
"Model Type": "General",
|
| 258 |
+
"Benchmark": "VerilogEval MC",
|
| 259 |
+
"Task": "Area",
|
| 260 |
+
"Result": 70.32,
|
| 261 |
+
"Model URL": "https://huggingface.co/google",
|
| 262 |
+
"Params": null,
|
| 263 |
+
"Release": "V3",
|
| 264 |
+
"Thinking": "Reasoning"
|
| 265 |
+
},
|
| 266 |
+
{
|
| 267 |
+
"Model": "Gemini 2.5 Flash (Medium)",
|
| 268 |
+
"Model Type": "General",
|
| 269 |
+
"Benchmark": "VeriGen",
|
| 270 |
+
"Task": "Area",
|
| 271 |
+
"Result": 79.45,
|
| 272 |
+
"Model URL": "https://huggingface.co/google",
|
| 273 |
+
"Params": null,
|
| 274 |
+
"Release": "V3",
|
| 275 |
+
"Thinking": "Reasoning"
|
| 276 |
+
},
|
| 277 |
{
|
| 278 |
"Model": "DeepSeek R1-0528",
|
| 279 |
"Model Type": "General",
|
results/results_verilator_november_2025.csv
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
,Syntax (STX),Syntax (STX),Functionality (FNC),Functionality (FNC),Synthesis (SYN),Synthesis (SYN),Power,Power,Performance,Performance,Area,Area,EM,Syntax (STX),Syntax (STX),Functionality (FNC),Functionality (FNC),Synthesis (SYN),Synthesis (SYN),Power,Power,Performance,Performance,Area,Area
|
| 2 |
+
,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,VerilogEval S2R,RTLLM,RTL-Repo,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen,VerilogEval MC,VeriGen
|
| 3 |
+
Gemini 2.5 Flash (Medium),91.540000000000006,81.700000000000003,69.739999999999995,64.680000000000007,69.230000000000004,46.380000000000003,68.400000000000006,51.109999999999999,66.569999999999993,45.100000000000001,68.269999999999996,49.25,-1,90.640000000000001,96.469999999999999,71.670000000000002,77.650000000000006,71.280000000000001,77.650000000000006,70.280000000000001,71.269999999999996,68.060000000000002,77.349999999999994,70.319999999999993,79.450000000000003
|
| 4 |
+
DeepSeek R1-0528,96.540000000000006,89.790000000000006,79.099999999999994,67.659999999999997,78.969999999999999,64.260000000000005,78.439999999999998,73.319999999999993,76.290000000000006,65.379999999999995,78.200000000000003,70.859999999999999,-1,94.739999999999995,98.819999999999993,80,80,79.870000000000005,74.120000000000005,79.659999999999997,63.859999999999999,77.280000000000001,74.049999999999997,79.409999999999997,77.010000000000005
|
| 5 |
+
DeepSeek R1,97.689999999999998,93.189999999999998,79.359999999999999,67.659999999999997,79.099999999999994,62.979999999999997,79.010000000000005,71.379999999999995,76.489999999999995,63.740000000000002,78.629999999999995,69.819999999999993,-1,97.439999999999998,95.290000000000006,77.819999999999993,74.120000000000005,77.689999999999998,74.120000000000005,77.299999999999997,64.579999999999998,75.030000000000001,74.420000000000002,76.939999999999998,77.150000000000006
|
| 6 |
+
Qwen3 Coder 480B A35B,95.900000000000006,89.359999999999999,66.030000000000001,61.280000000000001,65.260000000000005,54.039999999999999,65.329999999999998,53.43,62.450000000000003,53.43,64.239999999999995,52.259999999999998,38.880000000000003,83.969999999999999,100,58.719999999999999,75.290000000000006,58.210000000000001,75.290000000000006,57.579999999999998,69.629999999999995,55.420000000000002,73.950000000000003,56.5,76.760000000000005
|
| 7 |
+
Llama 3.1 405B,88.209999999999994,83.400000000000006,56.789999999999999,50.210000000000001,56.409999999999997,43.829999999999998,55.159999999999997,53.869999999999997,54.009999999999998,33.829999999999998,53.990000000000002,45.729999999999997,34.619999999999997,90.510000000000005,95.290000000000006,57.689999999999998,60,56.670000000000002,55.289999999999999,55.950000000000003,51.57,54.090000000000003,54.869999999999997,55.259999999999998,57.670000000000002
|
| 8 |
+
Qwen3 236B A22B,93.969999999999999,78.719999999999999,75.769999999999996,56.170000000000002,75.379999999999995,50.210000000000001,75,57.009999999999998,72.939999999999998,47.850000000000001,74.780000000000001,52.450000000000003,41.939999999999998,83.719999999999999,88.239999999999995,68.969999999999999,62.350000000000001,68.459999999999994,62.350000000000001,68.299999999999997,54.630000000000003,65.989999999999995,62.630000000000003,68.349999999999994,62.780000000000001
|
| 9 |
+
gpt-oss-120b,94.359999999999999,85.959999999999994,76.670000000000002,62.130000000000003,76.540000000000006,51.490000000000002,76.510000000000005,53.960000000000001,73.829999999999998,49.850000000000001,76.010000000000005,54.640000000000001,-1,95.379999999999995,89.409999999999997,76.920000000000002,69.409999999999997,76.670000000000002,69.409999999999997,76.480000000000004,71.049999999999997,73.909999999999997,69.439999999999998,75.810000000000002,70.879999999999995
|
| 10 |
+
Qwen2.5 72B,83.849999999999994,82.980000000000004,55,48.509999999999998,54.490000000000002,45.960000000000001,53.969999999999999,48.450000000000003,51.299999999999997,47.340000000000003,52.840000000000003,49.68,37.439999999999998,83.079999999999998,87.060000000000002,54.740000000000002,50.590000000000003,54.359999999999999,50.590000000000003,53.359999999999999,50.049999999999997,51.200000000000003,50.560000000000002,52.759999999999998,52.229999999999997
|
| 11 |
+
Llama 3.(1-3) 70B,68.329999999999998,79.150000000000006,40.899999999999999,45.960000000000001,40.899999999999999,40.43,40.770000000000003,42.119999999999997,38.329999999999998,39.340000000000003,40.420000000000002,40.920000000000002,28.719999999999999,86.030000000000001,87.060000000000002,43.719999999999999,63.530000000000001,43.590000000000003,63.530000000000001,43.549999999999997,62.289999999999999,41.57,63.520000000000003,42.939999999999998,63.539999999999999
|
| 12 |
+
Seed-OSS-36B,89.359999999999999,83.400000000000006,71.150000000000006,61.700000000000003,70.900000000000006,57.020000000000003,70.530000000000001,65.329999999999998,68.219999999999999,57.32,70.329999999999998,61.390000000000001,-1,91.030000000000001,91.760000000000005,73.849999999999994,70.590000000000003,73.459999999999994,70.590000000000003,73.040000000000006,66.109999999999999,70.890000000000001,70.189999999999998,72.599999999999994,72.450000000000003
|
| 13 |
+
QwQ 32B,89.230000000000004,85.959999999999994,67.560000000000002,58.299999999999997,67.180000000000007,53.619999999999997,67.040000000000006,57.289999999999999,64.780000000000001,53.340000000000003,66.439999999999998,57.560000000000002,-1,59.619999999999997,87.060000000000002,40.130000000000003,65.879999999999995,39.490000000000002,64.709999999999994,39.259999999999998,60.950000000000003,37.490000000000002,64.480000000000004,39.170000000000002,66.310000000000002
|
| 14 |
+
Qwen2.5 32B,90.900000000000006,87.659999999999997,55.899999999999999,50.210000000000001,55.259999999999998,46.810000000000002,55.560000000000002,50.659999999999997,53.100000000000001,47.409999999999997,54.020000000000003,51.369999999999997,28.93,95.510000000000005,85.879999999999995,45,49.409999999999997,43.719999999999999,49.409999999999997,43.719999999999999,46.409999999999997,42.490000000000002,49.890000000000001,42.950000000000003,49.609999999999999
|
| 15 |
+
Magistral Small 2506,65.129999999999995,74.040000000000006,42.950000000000003,44.259999999999998,42.310000000000002,37.450000000000003,41.810000000000002,43.100000000000001,40.189999999999998,37.369999999999997,41.75,40.350000000000001,-1,31.789999999999999,70.590000000000003,21.030000000000001,55.289999999999999,20.77,54.119999999999997,20.329999999999998,46.600000000000001,20.079999999999998,54.020000000000003,20.66,55.060000000000002
|
| 16 |
+
gpt-oss-20b,88.209999999999994,89.790000000000006,68.079999999999998,61.280000000000001,67.689999999999998,49.359999999999999,68.920000000000002,57.140000000000001,63.630000000000003,45.880000000000003,66.810000000000002,54.340000000000003,-1,91.670000000000002,76.469999999999999,68.849999999999994,51.759999999999998,68.590000000000003,51.759999999999998,68.689999999999998,46.57,66.390000000000001,51.560000000000002,68.040000000000006,50.869999999999997
|
| 17 |
+
StarChat2 15B v0.1,88.719999999999999,89.359999999999999,38.850000000000001,48.509999999999998,38.850000000000001,45.960000000000001,38.920000000000002,48.950000000000003,36.18,45.049999999999997,38.810000000000002,48.630000000000003,13.42,81.409999999999997,91.760000000000005,36.539999999999999,63.530000000000001,36.280000000000001,63.530000000000001,36.189999999999998,57.509999999999998,35.109999999999999,63.229999999999997,36.240000000000002,65
|
| 18 |
+
DeepSeek R1 Distill Qwen 14B,42.82,36.600000000000001,25.260000000000002,20.43,25,17.02,24.809999999999999,18.620000000000001,23.68,17.18,24.829999999999998,18.07,-1,45.640000000000001,44.710000000000001,25.379999999999999,29.41,25.260000000000002,29.41,24.98,25.920000000000002,23.609999999999999,29.43,24.690000000000001,29.649999999999999
|
| 19 |
+
Hermes-4-14B-Reasoning,80.640000000000001,79.569999999999993,56.149999999999999,41.280000000000001,55.770000000000003,34.469999999999999,55.850000000000001,38.350000000000001,53.710000000000001,32.729999999999997,55.649999999999999,36.130000000000003,-1,52.18,84.709999999999994,36.920000000000002,60,36.670000000000002,58.82,36.210000000000001,52.969999999999999,36.020000000000003,58.57,36.119999999999997,59.799999999999997
|
| 20 |
+
Hermes-4-14B,72.180000000000007,85.109999999999999,43.850000000000001,50.640000000000001,43.850000000000001,45.960000000000001,43.270000000000003,51.009999999999998,41.869999999999997,46.229999999999997,42.920000000000002,48.329999999999998,28.359999999999999,54.229999999999997,84.709999999999994,27.690000000000001,48.240000000000002,27.690000000000001,48.240000000000002,27.359999999999999,46.18,27.34,48.57,26.199999999999999,47.469999999999999
|
| 21 |
+
Qwen3-8B,70.900000000000006,65.959999999999994,51.030000000000001,37.869999999999997,51.030000000000001,33.619999999999997,50.57,36.520000000000003,48.899999999999999,33.600000000000001,49.93,33.200000000000003,-1,70.769999999999996,83.530000000000001,51.409999999999997,40,51.149999999999999,38.82,50.579999999999998,37.469999999999999,49.170000000000002,38.759999999999998,50.18,38.729999999999997
|
| 22 |
+
CodeLlama 70B,67.310000000000002,77.450000000000003,34.740000000000002,38.719999999999999,34.490000000000002,36.170000000000002,34.030000000000001,39.18,32.880000000000003,36.140000000000001,33.829999999999998,37.93,24.329999999999998,92.560000000000002,88.239999999999995,35.770000000000003,52.939999999999998,35.380000000000003,51.759999999999998,34.810000000000002,50.619999999999997,32.340000000000003,51.890000000000001,34.520000000000003,52.359999999999999
|
| 23 |
+
DeepSeek Coder 33B,64.489999999999995,88.939999999999998,23.59,46.380000000000003,23.59,44.259999999999998,23.370000000000001,44.539999999999999,23.32,41.539999999999999,22.739999999999998,45.039999999999999,24.579999999999998,76.540000000000006,88.239999999999995,40.130000000000003,50.590000000000003,39.869999999999997,50.590000000000003,38.700000000000003,51.5,37.329999999999998,50.219999999999999,38.399999999999999,51.630000000000003
|
| 24 |
+
QwenCoder 2.5 32B,90.260000000000005,81.280000000000001,46.539999999999999,47.659999999999997,46.409999999999997,45.109999999999999,45.950000000000003,48.810000000000002,44.420000000000002,45.039999999999999,45.57,47.359999999999999,31.07,86.150000000000006,85.879999999999995,48.210000000000001,54.119999999999997,47.560000000000002,54.119999999999997,47.350000000000001,54.590000000000003,45.350000000000001,54.119999999999997,46.780000000000001,55.140000000000001
|
| 25 |
+
DeepCoder 14B,45.130000000000003,42.549999999999997,28.719999999999999,25.530000000000001,28.719999999999999,22.98,28.579999999999998,26.059999999999999,26.859999999999999,22.940000000000001,28.16,24.140000000000001,-1,63.460000000000001,49.409999999999997,34.359999999999999,37.649999999999999,34.100000000000001,35.289999999999999,34.170000000000002,32.789999999999999,32.579999999999998,35.289999999999999,33.409999999999997,35.280000000000001
|
| 26 |
+
QwenCoder 2.5 14B,80.900000000000006,82.980000000000004,39.869999999999997,50.640000000000001,38.969999999999999,47.659999999999997,37.490000000000002,47.82,36.369999999999997,46.979999999999997,36.729999999999997,48.399999999999999,37.530000000000001,81.409999999999997,84.709999999999994,43.079999999999998,50.590000000000003,42.439999999999998,50.590000000000003,42.020000000000003,49.509999999999998,40.479999999999997,50.859999999999999,42.109999999999999,50.149999999999999
|
| 27 |
+
SeedCoder 8B,96.409999999999997,89.359999999999999,54.359999999999999,51.490000000000002,54.229999999999997,48.509999999999998,53.68,51.520000000000003,51.5,47.539999999999999,52.560000000000002,51.700000000000003,28.23,78.209999999999994,91.760000000000005,38.210000000000001,47.060000000000002,38.079999999999998,43.530000000000001,38.100000000000001,37.890000000000001,36.090000000000003,43.770000000000003,37.539999999999999,43.869999999999997
|
| 28 |
+
SeedCoder 8B Reasoning,68.719999999999999,56.170000000000002,48.079999999999998,34.039999999999999,48.079999999999998,30.640000000000001,48.270000000000003,33.409999999999997,45.780000000000001,29.859999999999999,47,31.27,-1,84.489999999999995,80,48.969999999999999,63.530000000000001,48.850000000000001,63.530000000000001,48.68,54.979999999999997,46.32,62.969999999999999,47.530000000000001,66.099999999999994
|
| 29 |
+
OpenCoder 8B,80,80,27.949999999999999,43.829999999999998,27.949999999999999,42.130000000000003,27.52,42.439999999999998,26.07,40.979999999999997,27.280000000000001,42.460000000000001,16.170000000000002,81.030000000000001,94.120000000000005,36.789999999999999,49.409999999999997,36.149999999999999,44.710000000000001,35.890000000000001,41.219999999999999,34.240000000000002,44.579999999999998,35.899999999999999,45.079999999999998
|
| 30 |
+
QwenCoder 2.5 7B,19.23,83.400000000000006,7.1799999999999997,40.850000000000001,7.1799999999999997,38.719999999999999,7.0199999999999996,42.310000000000002,7.1500000000000004,38.710000000000001,7.0800000000000001,41.200000000000003,28.329999999999998,76.150000000000006,90.590000000000003,35,48.240000000000002,34.740000000000002,48.240000000000002,34.619999999999997,44.93,32.799999999999997,48.399999999999999,34.649999999999999,49.759999999999998
|
| 31 |
+
DeepSeek Coder 6.7B,86.030000000000001,89.359999999999999,31.280000000000001,45.960000000000001,31.149999999999999,42.979999999999997,31.16,47.369999999999997,29.149999999999999,43.299999999999997,31.079999999999998,48.170000000000002,24.629999999999999,69.099999999999994,83.530000000000001,31.670000000000002,37.649999999999999,30.129999999999999,37.649999999999999,30.190000000000001,36.450000000000003,28.16,37.469999999999999,29.82,37.659999999999997
|
| 32 |
+
CodeV R1 Distill Qwen 7B,56.409999999999997,79.150000000000006,33.969999999999999,50.640000000000001,33.850000000000001,48.939999999999998,33.799999999999997,50.920000000000002,32.490000000000002,49.039999999999999,33.710000000000001,50.670000000000002,-1,94.230000000000004,87.060000000000002,22.18,63.530000000000001,22.18,63.530000000000001,22.210000000000001,57.75,20.73,63.240000000000002,22.079999999999998,63.890000000000001
|
| 33 |
+
HaVen-CodeQwen,93.969999999999999,84.260000000000005,46.789999999999999,47.229999999999997,46.280000000000001,44.259999999999998,45.469999999999999,42.270000000000003,44.289999999999999,42.07,45.5,44.270000000000003,25.140000000000001,95,95.290000000000006,50,67.060000000000002,50,67.060000000000002,48.509999999999998,65.700000000000003,47.560000000000002,67.609999999999999,48.140000000000001,66.799999999999997
|
| 34 |
+
CodeV-QW-7B,45.259999999999998,71.489999999999995,20.640000000000001,36.170000000000002,20,29.359999999999999,19.940000000000001,30.309999999999999,19.739999999999998,24.52,19.879999999999999,28.48,20.940000000000001,92.689999999999998,96.469999999999999,51.539999999999999,57.649999999999999,51.409999999999997,57.649999999999999,51.520000000000003,56.439999999999998,48.710000000000001,57.560000000000002,51.310000000000002,58.350000000000001
|
| 35 |
+
RTLCoder Mistral,56.030000000000001,35.32,25.379999999999999,17.02,25.379999999999999,16.170000000000002,25.52,15.859999999999999,23.18,15.34,25.539999999999999,15.84,14.77,62.310000000000002,87.060000000000002,27.309999999999999,56.469999999999999,27.18,56.469999999999999,26.93,52.350000000000001,25.43,56.549999999999997,26.949999999999999,59.170000000000002
|
| 36 |
+
RTLCoder DeepSeek,84.739999999999995,76.170000000000002,40.509999999999998,42.130000000000003,40.380000000000003,37.450000000000003,39.729999999999997,36.75,38.439999999999998,35.289999999999999,39.399999999999999,36.390000000000001,19.350000000000001,79.230000000000004,85.879999999999995,36.409999999999997,55.289999999999999,36.409999999999997,55.289999999999999,36.490000000000002,50.789999999999999,34.509999999999998,55.219999999999999,36.210000000000001,54.689999999999998
|
| 37 |
+
OriGen,96.790000000000006,85.109999999999999,54.359999999999999,53.189999999999998,53.850000000000001,51.060000000000002,54.079999999999998,54.170000000000002,51.07,51.310000000000002,53.009999999999998,54.390000000000001,17.07,92.560000000000002,96.469999999999999,50.509999999999998,74.120000000000005,50.509999999999998,74.120000000000005,50.829999999999998,69.299999999999997,48.280000000000001,74.260000000000005,50.409999999999997,76.349999999999994
|
| 38 |
+
CodeV-CL-7B,32.82,54.469999999999999,13.460000000000001,29.359999999999999,13.33,22.550000000000001,13.19,23.190000000000001,13.26,21.469999999999999,13.19,22.190000000000001,12.27,92.180000000000007,98.819999999999993,31.789999999999999,49.409999999999997,31.789999999999999,49.409999999999997,31.739999999999998,48.130000000000003,29.829999999999998,49.340000000000003,31.609999999999999,49.079999999999998
|
| 39 |
+
CodeV-DS-6.7B,34.359999999999999,69.790000000000006,14.74,41.280000000000001,14.359999999999999,37.869999999999997,14.380000000000001,35.890000000000001,14.130000000000001,35.740000000000002,14.19,36.18,21.260000000000002,95.769999999999996,100,46.670000000000002,61.18,46.670000000000002,61.18,47.07,61.060000000000002,44.189999999999998,61.130000000000003,46.469999999999999,60.950000000000003
|
| 40 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 41 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 42 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 43 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 44 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 45 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 46 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 47 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 48 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 49 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 50 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 51 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 52 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 53 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 54 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 55 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 56 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 57 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 58 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 59 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 60 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 61 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 62 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 63 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 64 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 65 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 66 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 67 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 68 |
+
,,,,,,,,,,,,,,,,,,,,,,,,,
|
| 69 |
+
|