Update app.py
Browse files
app.py
CHANGED
|
@@ -29,6 +29,7 @@ SUBMISSION_DATASET_PUBLIC = f"{OWNER}/submissions_public"
|
|
| 29 |
#CONTACT_DATASET = f"{OWNER}/contact_info"
|
| 30 |
RESULTS_DATASET = f"{OWNER}/results_public"
|
| 31 |
LEADERBOARD_PATH = f"HiTZ/Critical_Questions_Leaderboard"
|
|
|
|
| 32 |
api = HfApi()
|
| 33 |
|
| 34 |
YEAR_VERSION = "2025"
|
|
@@ -162,28 +163,29 @@ def add_new_eval(
|
|
| 162 |
if id_to_eval == intervention_id:
|
| 163 |
references = gold_dataset['cqs']
|
| 164 |
reference_set = [row['cq'] for row in references[indx]]
|
| 165 |
-
print(reference_set, flush=True)
|
| 166 |
for cq in line['cqs']:
|
| 167 |
# TODO: compare to each reference and get a value
|
| 168 |
cq_text = cq['cq']
|
| 169 |
-
print(cq_text, flush=True)
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
|
|
|
| 183 |
#else:
|
| 184 |
# label = 'not_able_to_evaluate'
|
| 185 |
-
|
| 186 |
-
return format_error(score)
|
| 187 |
|
| 188 |
|
| 189 |
|
|
|
|
| 29 |
#CONTACT_DATASET = f"{OWNER}/contact_info"
|
| 30 |
RESULTS_DATASET = f"{OWNER}/results_public"
|
| 31 |
LEADERBOARD_PATH = f"HiTZ/Critical_Questions_Leaderboard"
|
| 32 |
+
METRIC = 'similarity'
|
| 33 |
api = HfApi()
|
| 34 |
|
| 35 |
YEAR_VERSION = "2025"
|
|
|
|
| 163 |
if id_to_eval == intervention_id:
|
| 164 |
references = gold_dataset['cqs']
|
| 165 |
reference_set = [row['cq'] for row in references[indx]]
|
| 166 |
+
#print(reference_set, flush=True)
|
| 167 |
for cq in line['cqs']:
|
| 168 |
# TODO: compare to each reference and get a value
|
| 169 |
cq_text = cq['cq']
|
| 170 |
+
#print(cq_text, flush=True)
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
if METRIC == 'similarity':
|
| 174 |
+
sentence_embedding = similarity_model.encode(cq_text)
|
| 175 |
+
reference_embedding = similarity_model.encode(reference_set)
|
| 176 |
+
sims = similarity_model.similarity(sentence_embedding, reference_embedding).tolist()[0]
|
| 177 |
+
#print(sims, flush=True)
|
| 178 |
+
|
| 179 |
+
winner = np.argmax(sims)
|
| 180 |
+
# make sure the similarity of the winning reference sentence is at least 0.65
|
| 181 |
+
if sims[winner] > 0.65:
|
| 182 |
+
label = references[indx][winner]['label']
|
| 183 |
+
if label == 'Useful':
|
| 184 |
+
score += 1/3
|
| 185 |
#else:
|
| 186 |
# label = 'not_able_to_evaluate'
|
| 187 |
+
print(indx, score, flush=True)
|
| 188 |
+
#return format_error(score)
|
| 189 |
|
| 190 |
|
| 191 |
|