hi-melnikov commited on
Commit
781a150
·
1 Parent(s): 863f952

making the submits + removing internal / external

Browse files
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import logging
2
  import os
3
  import subprocess
@@ -18,6 +19,7 @@ from src.display.utils import (
18
  )
19
  from src.envs import (
20
  API,
 
21
  H4_TOKEN,
22
  HF_HOME,
23
  HF_TOKEN_PRIVATE,
@@ -26,8 +28,9 @@ from src.envs import (
26
  PERSISTENT_FILE_CHECK_PATH,
27
  REPO_ID,
28
  RESET_JUDGEMENT_ENV,
 
29
  )
30
- from src.leaderboard.build_leaderboard import build_leadearboard_df, download_openbench
31
 
32
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
33
 
@@ -37,7 +40,7 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(
37
  # Start ephemeral Spaces on PRs (see config in README.md)
38
  enable_space_ci()
39
 
40
- download_openbench()
41
 
42
 
43
  def build_demo():
@@ -75,15 +78,45 @@ def build_demo():
75
 
76
  with gr.Column():
77
  model_name_textbox = gr.Textbox(label="Model name")
78
- submitter_username = gr.Textbox(label="Username")
79
 
80
  def upload_file(file):
81
- file_path = file.name.split("/")[-1] if "/" in file.name else file.name
82
- logging.info("New submition: file saved to %s", file_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  API.upload_file(
84
  path_or_fileobj=file.name,
85
- path_in_repo="model_answers/external/" + file_path,
86
- repo_id="Vikhrmodels/openbench-eval",
 
 
 
 
 
 
 
87
  repo_type="dataset",
88
  token=HF_TOKEN_PRIVATE,
89
  )
 
1
+ import json
2
  import logging
3
  import os
4
  import subprocess
 
19
  )
20
  from src.envs import (
21
  API,
22
+ DATA_PATH,
23
  H4_TOKEN,
24
  HF_HOME,
25
  HF_TOKEN_PRIVATE,
 
28
  PERSISTENT_FILE_CHECK_PATH,
29
  REPO_ID,
30
  RESET_JUDGEMENT_ENV,
31
+ SUBMITS_META_FILE,
32
  )
33
+ from src.leaderboard.build_leaderboard import build_leadearboard_df, download_meta
34
 
35
  os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
36
 
 
40
  # Start ephemeral Spaces on PRs (see config in README.md)
41
  enable_space_ci()
42
 
43
+ download_meta()
44
 
45
 
46
  def build_demo():
 
78
 
79
  with gr.Column():
80
  model_name_textbox = gr.Textbox(label="Model name")
81
+ submitter_username = gr.Textbox(label="Username") # can we get this info from hf??
82
 
83
  def upload_file(file):
84
+ file_name = file.name.split("/")[-1] if "/" in file.name else file.name
85
+
86
+ with open(f"{DATA_PATH}/{SUBMITS_META_FILE}", "r", encoding="utf-8") as submit_meta_file:
87
+ current_info = json.loads(submit_meta_file)
88
+
89
+ # for now just do not save same name model
90
+ if model_name_textbox in current_info:
91
+ return False
92
+
93
+ submit_info = {
94
+ "username": submitter_username,
95
+ "file_name": file_name,
96
+ }
97
+
98
+ current_info[model_name_textbox] = submit_info
99
+
100
+ with open(f"{DATA_PATH}/{SUBMITS_META_FILE}", "w", encoding="utf-8") as submit_meta_file:
101
+ submit_meta_file.write(json.dumps(current_info))
102
+
103
+ logging.info(
104
+ "New submition: file from %s saved to %s with model %s",
105
+ submitter_username,
106
+ file_name,
107
+ model_name_textbox,
108
+ )
109
  API.upload_file(
110
  path_or_fileobj=file.name,
111
+ path_in_repo="arena-hard-v0.1/model_answers/" + file_name,
112
+ repo_id=METAINFO_DATASET,
113
+ repo_type="dataset",
114
+ token=HF_TOKEN_PRIVATE,
115
+ )
116
+ API.upload_file(
117
+ path_or_fileobj=SUBMITS_META_FILE,
118
+ path_in_repo=SUBMITS_META_FILE,
119
+ repo_id=METAINFO_DATASET,
120
  repo_type="dataset",
121
  token=HF_TOKEN_PRIVATE,
122
  )
src/envs.py CHANGED
@@ -35,6 +35,8 @@ RESET_JUDGEMENT_ENV = "RESET_JUDGEMENT"
35
 
36
  API = HfApi(token=H4_TOKEN)
37
 
 
 
38
  PERSISTENT_FILE_CHECK = "persistent_file_check"
39
  PERSISTENT_FILE_CHECK_PATH = f"{DATA_PATH}/{PERSISTENT_FILE_CHECK}"
40
 
 
35
 
36
  API = HfApi(token=H4_TOKEN)
37
 
38
+ # if any more info about service pls make this file just a json
39
+ SUBMITS_META_FILE = "submits_info.json"
40
  PERSISTENT_FILE_CHECK = "persistent_file_check"
41
  PERSISTENT_FILE_CHECK_PATH = f"{DATA_PATH}/{PERSISTENT_FILE_CHECK}"
42
 
src/gen/gen_answer.py CHANGED
@@ -3,6 +3,7 @@
3
  Usage:
4
  python gen_api_answer --parallel 32
5
  """
 
6
  import argparse
7
  import concurrent.futures
8
  import json
@@ -138,7 +139,7 @@ if __name__ == "__main__":
138
  settings = make_config(args.setting_file)
139
  endpoint_list = make_config(args.endpoint_file)
140
 
141
- existing_answer = load_model_answers(os.path.join("data", settings["bench_name"], "model_answers", "internal"))
142
 
143
  print(settings)
144
 
 
3
  Usage:
4
  python gen_api_answer --parallel 32
5
  """
6
+
7
  import argparse
8
  import concurrent.futures
9
  import json
 
139
  settings = make_config(args.setting_file)
140
  endpoint_list = make_config(args.endpoint_file)
141
 
142
+ existing_answer = load_model_answers(os.path.join("data", settings["bench_name"], "model_answers"))
143
 
144
  print(settings)
145
 
src/gen/gen_judgment.py CHANGED
@@ -116,12 +116,12 @@ def judgment(**args):
116
  result = {"user_prompt": conv[1]["content"], "judgment": judgment, "score": score}
117
  output["games"].append(result)
118
 
119
- with open(output_file, "a") as f:
120
  f.write(json.dumps(output, ensure_ascii=False) + "\n")
121
  huggingface_hub.HfApi().upload_file(
122
  output_file,
123
  path_in_repo=f'model_judgment/{configs["judge_model"]}/{output_file.split("/")[-1]}',
124
- repo_id="Vikhrmodels/openbench-eval",
125
  repo_type="dataset",
126
  )
127
 
@@ -145,21 +145,16 @@ if __name__ == "__main__":
145
  pattern = re.compile(configs["regex_pattern"])
146
 
147
  question_file = os.path.join(f"{HF_HOME}/data", configs["bench_name"], "question.jsonl")
148
- internal_dir = os.path.join(f"{HF_HOME}/data", configs["bench_name"], "model_answers/internal")
149
- external_dir = os.path.join(f"{HF_HOME}/data", configs["bench_name"], "model_answers/external")
150
  ref_answer_dir = os.path.join(f"{HF_HOME}/data", configs["bench_name"], "reference_answer")
151
 
152
  questions = load_questions(question_file)
153
- model_answers_external = load_model_answers(external_dir)
154
- model_answers_internal = load_model_answers(internal_dir)
155
-
156
- # internal has priority
157
- model_answers = {**model_answers_external, **model_answers_internal}
158
 
159
  # if user choose a set of models, only judge those models
160
  models = [
161
  model.split("/")[-1].split(".")[0]
162
- for model in glob.glob(f"{HF_HOME}/data/arena-hard-v0.1/model_answers/external/*.jsonl")
163
  ]
164
 
165
  ref_answers = None
 
116
  result = {"user_prompt": conv[1]["content"], "judgment": judgment, "score": score}
117
  output["games"].append(result)
118
 
119
+ with open(output_file, "a", encoding="utf-8") as f:
120
  f.write(json.dumps(output, ensure_ascii=False) + "\n")
121
  huggingface_hub.HfApi().upload_file(
122
  output_file,
123
  path_in_repo=f'model_judgment/{configs["judge_model"]}/{output_file.split("/")[-1]}',
124
+ repo_id="Vikhrmodels/-eval",
125
  repo_type="dataset",
126
  )
127
 
 
145
  pattern = re.compile(configs["regex_pattern"])
146
 
147
  question_file = os.path.join(f"{HF_HOME}/data", configs["bench_name"], "question.jsonl")
148
+ answers_dir = os.path.join(f"{HF_HOME}/data", configs["bench_name"], "model_answers")
 
149
  ref_answer_dir = os.path.join(f"{HF_HOME}/data", configs["bench_name"], "reference_answer")
150
 
151
  questions = load_questions(question_file)
152
+ model_answers = load_model_answers(answers_dir)
 
 
 
 
153
 
154
  # if user choose a set of models, only judge those models
155
  models = [
156
  model.split("/")[-1].split(".")[0]
157
+ for model in glob.glob(f"{HF_HOME}/data/arena-hard-v0.1/model_answers/*.jsonl")
158
  ]
159
 
160
  ref_answers = None
src/gen/show_result.py CHANGED
@@ -198,7 +198,7 @@ if __name__ == "__main__":
198
  args.load_battles and args.load_bootstrap
199
  ), "If loading prexisting bootstrapping data, you must also load preexisting battles."
200
 
201
- answer_dir = os.path.join("data", args.bench_name, "model_answers/external")
202
  model_answers = load_model_answers(answer_dir)
203
 
204
  if args.load_battles:
@@ -265,7 +265,7 @@ if __name__ == "__main__":
265
  huggingface_hub.HfApi().upload_file(
266
  path_or_fileobj=json_file_name,
267
  path_in_repo="leaderboard.json",
268
- repo_id="Vikhrmodels/arena-leaderboard-metainfo",
269
  repo_type="dataset",
270
  token=HF_TOKEN_PRIVATE,
271
  )
 
198
  args.load_battles and args.load_bootstrap
199
  ), "If loading prexisting bootstrapping data, you must also load preexisting battles."
200
 
201
+ answer_dir = os.path.join("data", args.bench_name, "model_answers")
202
  model_answers = load_model_answers(answer_dir)
203
 
204
  if args.load_battles:
 
265
  huggingface_hub.HfApi().upload_file(
266
  path_or_fileobj=json_file_name,
267
  path_in_repo="leaderboard.json",
268
+ repo_id=METAINFO_DATASET,
269
  repo_type="dataset",
270
  token=HF_TOKEN_PRIVATE,
271
  )
src/leaderboard/build_leaderboard.py CHANGED
@@ -6,7 +6,7 @@ import time
6
  import pandas as pd
7
  from huggingface_hub import snapshot_download
8
 
9
- from src.envs import DATA_ARENA_PATH, DATA_PATH, HF_TOKEN_PRIVATE, METAINFO_DATASET
10
 
11
  # Configure logging
12
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -51,16 +51,17 @@ def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, ba
51
  logging.error("Failed to download %s after %s attempts", repo_id, max_attempts)
52
 
53
 
54
- def download_openbench():
55
- # download prev autogenerated leaderboard files
 
 
56
  download_dataset(METAINFO_DATASET, DATA_PATH)
57
 
58
- # download answers of different models that we trust
59
- download_dataset("Vikhrmodels/openbench-eval", DATA_ARENA_PATH)
60
-
61
 
62
  def build_leadearboard_df():
63
  # Retrieve the leaderboard DataFrame
64
  with open(f"{DATA_PATH}/leaderboard.json", "r", encoding="utf-8") as eval_file:
65
- leaderboard_df = pd.DataFrame.from_records(json.load(eval_file))
66
- return leaderboard_df.copy()
 
 
 
6
  import pandas as pd
7
  from huggingface_hub import snapshot_download
8
 
9
+ from src.envs import DATA_PATH, HF_TOKEN_PRIVATE, METAINFO_DATASET, SUBMITS_META_FILE
10
 
11
  # Configure logging
12
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 
51
  logging.error("Failed to download %s after %s attempts", repo_id, max_attempts)
52
 
53
 
54
+ def download_meta():
55
+ # download all metainfo about submits
56
+ # also all the submits questions
57
+ # also all other stuff
58
  download_dataset(METAINFO_DATASET, DATA_PATH)
59
 
 
 
 
60
 
61
  def build_leadearboard_df():
62
  # Retrieve the leaderboard DataFrame
63
  with open(f"{DATA_PATH}/leaderboard.json", "r", encoding="utf-8") as eval_file:
64
+ battle_info = pd.DataFrame.from_records(json.load(eval_file))
65
+ with open(f"{DATA_PATH}/{SUBMITS_META_FILE}", "r", encoding="utf-8") as submit_meta_file:
66
+ submit_info = pd.DataFrame.from_records(json.load(submit_meta_file))
67
+ return battle_info.copy()