Spaces:
Sleeping
Sleeping
Maciej
commited on
Commit
Β·
4f08cdd
1
Parent(s):
35b8093
Add filters to tabs
Browse files- app.py +45 -13
- results.jsonl +0 -0
app.py
CHANGED
|
@@ -8,7 +8,7 @@ abs_path = Path(__file__).parent
|
|
| 8 |
|
| 9 |
|
| 10 |
def overall_leaderboard(df: pd.DataFrame, sort_column: str = "f1_macro"):
|
| 11 |
-
df = df[df["language"] == "
|
| 12 |
df = df[["model", "temperature", "f1_macro", "weighted_f1", "accuracy"]]
|
| 13 |
df = df.sort_values(by=sort_column, ascending=False)
|
| 14 |
df.insert(0, "Rank", range(1, len(df) + 1))
|
|
@@ -40,17 +40,17 @@ def build_ds_dict(df: pd.DataFrame):
|
|
| 40 |
|
| 41 |
|
| 42 |
def build_emo_dict(df: pd.DataFrame):
|
| 43 |
-
df = df[df["language"] == "
|
| 44 |
emo_data = defaultdict(lambda: defaultdict(dict))
|
| 45 |
emotions = df.iloc[0].metrics_per_label.keys() - ["accuracy", "macro avg", "weighted avg"]
|
| 46 |
for row in df.itertuples():
|
| 47 |
for emotion in emotions:
|
| 48 |
emo_data[row.model][row.temperature][emotion] = row.metrics_per_label[emotion].get("f1-score")
|
| 49 |
-
emo_data[row.model][row.temperature]["
|
| 50 |
return emo_data
|
| 51 |
|
| 52 |
|
| 53 |
-
def leaderboard_per_group(lang_dict, metric: str = "f1_macro"):
|
| 54 |
df = []
|
| 55 |
for model, inner in lang_dict.items():
|
| 56 |
for temperature, metrics in inner.items():
|
|
@@ -69,8 +69,9 @@ def leaderboard_per_group(lang_dict, metric: str = "f1_macro"):
|
|
| 69 |
for col in df.columns.difference(["model", "temperature"]):
|
| 70 |
df[col] = df[col].round(4)
|
| 71 |
|
| 72 |
-
df = df[["model", "temperature"
|
| 73 |
-
|
|
|
|
| 74 |
df.insert(0, "Rank", range(1, len(df) + 1))
|
| 75 |
|
| 76 |
return df
|
|
@@ -80,38 +81,69 @@ def app():
|
|
| 80 |
with gr.Blocks() as demo:
|
| 81 |
gr.Markdown("# π Leaderboard Viewer")
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
with gr.Tabs():
|
| 84 |
with gr.Tab("Overall Results"):
|
| 85 |
overall_table = gr.Dataframe()
|
| 86 |
|
| 87 |
with gr.Tab("Results per Language"):
|
|
|
|
|
|
|
| 88 |
lang_table = gr.Dataframe()
|
| 89 |
|
| 90 |
with gr.Tab("Results per Dataset"):
|
|
|
|
|
|
|
| 91 |
dataset_table = gr.Dataframe()
|
| 92 |
|
| 93 |
with gr.Tab("Results per Emotion"):
|
|
|
|
| 94 |
emotion_table = gr.Dataframe()
|
| 95 |
|
| 96 |
df_state = gr.State()
|
| 97 |
|
| 98 |
-
def update_leaderboards(select_lang_metric="f1_macro", select_ds_metric="f1_macro"):
|
| 99 |
df = pd.read_json(str(abs_path / "results.jsonl"), lines=True)
|
| 100 |
lang_dict = build_lang_dict(df)
|
| 101 |
ds_dict = build_ds_dict(df)
|
| 102 |
emo_dict = build_emo_dict(df)
|
| 103 |
overall = overall_leaderboard(df)
|
| 104 |
-
by_lang = leaderboard_per_group(lang_dict, metric=select_lang_metric)
|
| 105 |
-
by_dataset = leaderboard_per_group(ds_dict, metric=select_ds_metric)
|
| 106 |
-
by_emotion = leaderboard_per_group(emo_dict)
|
| 107 |
-
return overall, by_lang, by_dataset, by_emotion, "Loaded successfully."
|
| 108 |
|
| 109 |
demo.load(
|
| 110 |
update_leaderboards,
|
| 111 |
-
inputs=[],
|
| 112 |
outputs=[overall_table, lang_table, dataset_table, emotion_table, df_state]
|
| 113 |
)
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
return demo
|
| 116 |
|
| 117 |
if __name__ == "__main__":
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
def overall_leaderboard(df: pd.DataFrame, sort_column: str = "f1_macro"):
|
| 11 |
+
df = df[df["language"] == "All"]
|
| 12 |
df = df[["model", "temperature", "f1_macro", "weighted_f1", "accuracy"]]
|
| 13 |
df = df.sort_values(by=sort_column, ascending=False)
|
| 14 |
df.insert(0, "Rank", range(1, len(df) + 1))
|
|
|
|
| 40 |
|
| 41 |
|
| 42 |
def build_emo_dict(df: pd.DataFrame):
|
| 43 |
+
df = df[df["language"] == "All"]
|
| 44 |
emo_data = defaultdict(lambda: defaultdict(dict))
|
| 45 |
emotions = df.iloc[0].metrics_per_label.keys() - ["accuracy", "macro avg", "weighted avg"]
|
| 46 |
for row in df.itertuples():
|
| 47 |
for emotion in emotions:
|
| 48 |
emo_data[row.model][row.temperature][emotion] = row.metrics_per_label[emotion].get("f1-score")
|
| 49 |
+
emo_data[row.model][row.temperature]["All"] = row.f1_macro
|
| 50 |
return emo_data
|
| 51 |
|
| 52 |
|
| 53 |
+
def leaderboard_per_group(lang_dict, use_cols, metric: str = "f1_macro"):
|
| 54 |
df = []
|
| 55 |
for model, inner in lang_dict.items():
|
| 56 |
for temperature, metrics in inner.items():
|
|
|
|
| 69 |
for col in df.columns.difference(["model", "temperature"]):
|
| 70 |
df[col] = df[col].round(4)
|
| 71 |
|
| 72 |
+
df = df[["model", "temperature"] + sorted(use_cols)]
|
| 73 |
+
if "All" in use_cols:
|
| 74 |
+
df = df.sort_values(by="All", ascending=False)
|
| 75 |
df.insert(0, "Rank", range(1, len(df) + 1))
|
| 76 |
|
| 77 |
return df
|
|
|
|
| 81 |
with gr.Blocks() as demo:
|
| 82 |
gr.Markdown("# π Leaderboard Viewer")
|
| 83 |
|
| 84 |
+
languages = ['All', 'Bengali', 'English', 'French', 'German', 'Italian', 'Polish', 'Russian', 'Spanish']
|
| 85 |
+
datasets = ['All', 'CaFE', 'CREMA-D', 'EMNS', 'Emozionalmente', 'eNTERFACE', 'JL-Corpus', 'MESD', 'nEMO', 'Oreau', 'PAVOQUE', 'RAVDESS', 'RESD', 'SUBESCO']
|
| 86 |
+
emotions = ['All', 'anger', 'anxiety',
|
| 87 |
+
'apology', 'assertiveness', 'calm', 'concern', 'disgust',
|
| 88 |
+
'encouragement', 'enthusiasm', 'excitement', 'fear', 'happiness',
|
| 89 |
+
'neutral', 'poker', 'sadness', 'sarcasm', 'surprise']
|
| 90 |
+
metric=["f1_macro", "accuracy", "weighted_f1"]
|
| 91 |
+
|
| 92 |
with gr.Tabs():
|
| 93 |
with gr.Tab("Overall Results"):
|
| 94 |
overall_table = gr.Dataframe()
|
| 95 |
|
| 96 |
with gr.Tab("Results per Language"):
|
| 97 |
+
languages_filter = gr.CheckboxGroup(choices=languages, label="Filter by Language", value=languages)
|
| 98 |
+
select_lang_metric = gr.Radio(metric, value='f1_macro', label="Metric")
|
| 99 |
lang_table = gr.Dataframe()
|
| 100 |
|
| 101 |
with gr.Tab("Results per Dataset"):
|
| 102 |
+
dataset_filter = gr.CheckboxGroup(choices=datasets, label="Filter by Dataset", value=datasets)
|
| 103 |
+
select_ds_metric = gr.Radio(metric, value='f1_macro', label="Metric")
|
| 104 |
dataset_table = gr.Dataframe()
|
| 105 |
|
| 106 |
with gr.Tab("Results per Emotion"):
|
| 107 |
+
emo_filter = gr.CheckboxGroup(choices=emotions, label="Filter by Emotion", value=emotions)
|
| 108 |
emotion_table = gr.Dataframe()
|
| 109 |
|
| 110 |
df_state = gr.State()
|
| 111 |
|
| 112 |
+
def update_leaderboards(languages=[], datasets=[], emotions=[], select_lang_metric="f1_macro", select_ds_metric="f1_macro"):
|
| 113 |
df = pd.read_json(str(abs_path / "results.jsonl"), lines=True)
|
| 114 |
lang_dict = build_lang_dict(df)
|
| 115 |
ds_dict = build_ds_dict(df)
|
| 116 |
emo_dict = build_emo_dict(df)
|
| 117 |
overall = overall_leaderboard(df)
|
| 118 |
+
by_lang = leaderboard_per_group(lang_dict, languages, metric=select_lang_metric)
|
| 119 |
+
by_dataset = leaderboard_per_group(ds_dict, datasets, metric=select_ds_metric)
|
| 120 |
+
by_emotion = leaderboard_per_group(emo_dict, emotions)
|
| 121 |
+
return overall, by_lang, by_dataset, by_emotion, "Loaded successfully."
|
| 122 |
|
| 123 |
demo.load(
|
| 124 |
update_leaderboards,
|
| 125 |
+
inputs=[languages_filter, dataset_filter, emo_filter],
|
| 126 |
outputs=[overall_table, lang_table, dataset_table, emotion_table, df_state]
|
| 127 |
)
|
| 128 |
+
|
| 129 |
+
def on_change(selected_languages, selected_lang_metric, selected_datasets, selected_ds_metric, selected_emotions):
|
| 130 |
+
return update_leaderboards(languages=selected_languages, select_lang_metric=selected_lang_metric, datasets=selected_datasets, select_ds_metric=selected_ds_metric, emotions=selected_emotions)
|
| 131 |
+
|
| 132 |
+
languages_filter.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
|
| 133 |
+
[overall_table, lang_table, dataset_table, emotion_table])
|
| 134 |
+
|
| 135 |
+
select_lang_metric.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
|
| 136 |
+
[overall_table, lang_table, dataset_table, emotion_table])
|
| 137 |
+
|
| 138 |
+
dataset_filter.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
|
| 139 |
+
[overall_table, lang_table, dataset_table, emotion_table])
|
| 140 |
+
|
| 141 |
+
select_ds_metric.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
|
| 142 |
+
[overall_table, lang_table, dataset_table, emotion_table])
|
| 143 |
+
|
| 144 |
+
emo_filter.change(on_change, [languages_filter, select_lang_metric, dataset_filter, select_ds_metric, emo_filter],
|
| 145 |
+
[overall_table, lang_table, dataset_table, emotion_table])
|
| 146 |
+
|
| 147 |
return demo
|
| 148 |
|
| 149 |
if __name__ == "__main__":
|
results.jsonl
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|