Spaces:

Kamyar-zeinalipour
/

CS-mohadesse

Sleeping

App Files Files Community

Kamyar-zeinalipour commited on Jun 11, 2025

Commit

cab9a43

verified ·

1 Parent(s): 9aaf42f

Update app.py

Browse files

Files changed (1) hide show

app.py +178 -260

app.py CHANGED Viewed

@@ -1,337 +1,255 @@
-# paragraph_annotation_tool.py NEW ui
 """
-Paragraph-level annotation tool with per-model comments
-────────────────────────────────────────────────────────────────────────────
-• Upload a CSV containing at least:
-      Content_Paragraph,
-      <model>_prompt1, <model>_prompt2             … for each model
-• Enter your annotator name and click “Start / Resume”.
-• Rate each prompt A / B / C, optionally leave a comment for every model,
-  navigate Back / Next, download the annotated CSV.
-New in this version
-───────────────────
-• One **comment textbox per model** (shown next to the model’s two ratings).
-• Comments are stored in the CSV under columns named  ``comment_<model>``.
-• Blank comments are saved as the literal string ``"no comment"``.
 """
 from __future__ import annotations
-import gradio as gr, pandas as pd
-import random, time, os, shutil, uuid
 from typing import List
-# ─── CONFIG ───────────────────────────────────────────────────────────────
-MAX_MODELS      = 10                      # pre-allocate up to this many models
-CONTENT_COL     = "Content_Paragraph"
-PROMPT1_SUFFIX  = "_prompt1"
-PROMPT2_SUFFIX  = "_prompt2"
-COMMENT_PREFIX  = "comment_"            # <COMMENT_PREFIX><model>
-PERM_COL        = "perm_models"
-RATING_OPTS     = ["A", "B", "C"]
-# ─── GLOBALS (filled after CSV load) ───────────────────────────────────────
 df: pd.DataFrame | None = None
-csv_path: str | None    = None
-models: List[str]       = []
-TOTAL = 0
-annotator = ""
 current_start: float | None = None
-# ─── CSV HELPERS ───────────────────────────────────────────────────────────
-def load_csv(path: str):
-    """Read CSV, discover model columns, add helper columns if needed."""
-    global df, models, TOTAL, csv_path
-    csv_path = path
-    df = pd.read_csv(csv_path, keep_default_na=False)
-    TOTAL = len(df)
-    models.clear()
-    for col in df.columns:
-        if col.endswith(PROMPT1_SUFFIX) and not col.startswith("rating_"):
-            m = col[: -len(PROMPT1_SUFFIX)]
             if f"{m}{PROMPT2_SUFFIX}" not in df.columns:
-                raise ValueError(f"Found '{col}' but no '{m}{PROMPT2_SUFFIX}'")
             models.append(m)
     if not models:
-        raise ValueError(f"No '*{PROMPT1_SUFFIX}' columns found")
     if len(models) > MAX_MODELS:
-        raise ValueError(
-            f"CSV has {len(models)} models but MAX_MODELS is {MAX_MODELS}")
-    # helper columns
     if PERM_COL not in df.columns:
         df[PERM_COL] = ""
     for m in models:
-        # rating columns per prompt
         for p in ("prompt1", "prompt2"):
-            rc = f"rating_{m}__{p}"
-            if rc not in df.columns:
-                df[rc] = ""
-        # NEW → comment column per model
-        cc = f"{COMMENT_PREFIX}{m}"
-        if cc not in df.columns:
-            df[cc] = "no comment"  # default value
     for col in ("annotator", "annotation_time"):
         if col not in df.columns:
             df[col] = "" if col == "annotator" else 0.0
-# ─── BOOK-KEEPING ──────────────────────────────────────────────────────────
 def first_incomplete() -> int:
     for i, row in df.iterrows():
         for m in models:
-            if (
-                row[f"rating_{m}__prompt1"] == "" or
-                row[f"rating_{m}__prompt2"] == ""
-            ):
                 return i
     return 0
 def get_perm(idx: int) -> List[str]:
-    cell = str(df.at[idx, PERM_COL])
-    if not cell:
-        seq = models.copy()
-        random.shuffle(seq)
-        df.at[idx, PERM_COL] = "|".join(seq)
-        df.to_csv(csv_path, index=False)
-    return df.at[idx, PERM_COL].split("|")
 def build_row(idx: int):
-    """Return fixed-length tuple of widget values for example *idx*."""
-    global current_start
     row   = df.loc[idx]
     order = get_perm(idx)
-    outs, rates, comms = [], [], []
     for m in order:
-        outs.append(row[f"{m}{PROMPT1_SUFFIX}"])
-        outs.append(row[f"{m}{PROMPT2_SUFFIX}"])
-        rates.append(row[f"rating_{m}__prompt1"] or None)
-        rates.append(row[f"rating_{m}__prompt2"] or None)
-        val = row[f"{COMMENT_PREFIX}{m}"]
-        comms.append("" if val == "no comment" else val)
-    # pad up to MAX_MODELS
-    outs  += [""]  * (MAX_MODELS*2 - len(outs))
-    rates += [None]* (MAX_MODELS*2 - len(rates))
-    comms += ["" ] * (MAX_MODELS   - len(comms))
-    ready = all(r in RATING_OPTS for r in rates[: 2*len(models)])
     current_start = time.time()
-    header = f"Example {idx+1}/{TOTAL}"
-    return (
-        idx, header, row[CONTENT_COL],
-        *outs, *rates, *comms,
-        gr.update(visible=True),                   # back_btn update
-        gr.update(visible=True, interactive=ready) # next_btn update
-    )
 def save_row(idx: int, ratings: List[str], comments: List[str]):
-    """Persist ratings & comments for example *idx* → CSV."""
-    if not all(r in RATING_OPTS for r in ratings[: 2*len(models)]):
         return
     elapsed = time.time() - current_start if current_start else 0.0
-    order = get_perm(idx)
-    p = 0  # rating pointer
-    for m in order:
         df.at[idx, f"rating_{m}__prompt1"] = ratings[p]; p += 1
         df.at[idx, f"rating_{m}__prompt2"] = ratings[p]; p += 1
-    # comments
-    for m, c in zip(order, comments):
-        clean = (c or "").strip()
-        df.at[idx, f"{COMMENT_PREFIX}{m}"] = clean or "no comment"
     df.at[idx, "annotator"]       = annotator
     df.at[idx, "annotation_time"] = float(elapsed)
     df.to_csv(csv_path, index=False)
-def _writable_dir() -> str:
-    """Return /data on Spaces, /tmp elsewhere – whichever is writeable."""
-    for d in ("/data", "/tmp"):
-        try:
-            os.makedirs(d, exist_ok=True)
-            with open(os.path.join(d, ".touch"), "w"):
-                pass
-            return d
-        except PermissionError:
-            continue
-    raise PermissionError("No writable directory found.")
-# ─── GRADIO UI ────────────────────────────────────────────────────────────
 with gr.Blocks(title="Paragraph Annotation Tool") as demo:
-    # shared state
-    idx_state      = gr.State(0)  # current example index
-    nmodels_state  = gr.State(0)  # how many model slots are active
-    gr.Markdown("## Paragraph Annotation Tool")
-    with gr.Row():
-        upload_box = gr.File(label="Upload / Resume CSV", file_types=[".csv"])
-        annot_box  = gr.Textbox(label="Annotator name")
-        start_btn  = gr.Button("Start / Resume")
-    annotator_label = gr.Markdown(visible=False)
-    annotation_area = gr.Column(visible=False)
-    with annotation_area:
         idx_box  = gr.Number(label="Index", interactive=False)
         hdr_box  = gr.Markdown()
-        para_box = gr.Textbox(lines=6, interactive=False,
-                              label="Content Paragraph")
-        # Pre-allocate up to MAX_MODELS slots
-        out_boxes, radio_widgets, comment_boxes = [], [], []
         for _ in range(MAX_MODELS):
-            with gr.Row():
-                # prompts + ratings
-                with gr.Column(scale=2):
-                    out1 = gr.Textbox(lines=6, interactive=False)
-                    rad1 = gr.Radio(RATING_OPTS, label="Rating (P1)", value=None)
-                with gr.Column(scale=2):
-                    out2 = gr.Textbox(lines=6, interactive=False)
-                    rad2 = gr.Radio(RATING_OPTS, label="Rating (P2)", value=None)
-                # NEW → comment textbox
-                with gr.Column(scale=1):
-                    com = gr.Textbox(lines=2, label="Comment", placeholder="Optional…")
                 out_boxes.extend((out1, out2))
                 radio_widgets.extend((rad1, rad2))
-                comment_boxes.append(com)
-        back_btn     = gr.Button("⟵ Back", visible=False)
-        next_btn     = gr.Button("Save & Next ⟶", visible=False)
-        download_btn = gr.Button("💾 Download CSV", visible=False)
-    # Enable NEXT when visible radios are filled (comments are optional)
-    def toggle_next(model_cnt: int, *vals):
-        needed = vals[: model_cnt*2]  # only rating radios
-        return gr.update(interactive=all(v in RATING_OPTS for v in needed))
     for r in radio_widgets:
-        r.change(toggle_next,
-                 inputs=[nmodels_state]+radio_widgets,
-                 outputs=next_btn)
-    # ── navigation callbacks ──────────────────────────────────────────────
     def goto(step: int):
-        def _fn(idx: int, model_cnt: int, *vals):
-            """Handle Back / Next logic."""
-            # structure of *vals*: radios (model_cnt*2) + comments (model_cnt) + next_btn
-            RADIO_COUNT = MAX_MODELS * 2
-            ratings  = list(vals[: model_cnt * 2])
-            comments = list(vals[RADIO_COUNT : RADIO_COUNT + model_cnt])
-            # save current row unless we attempted to go back without finishing ratings
-            if step != -1 or all(r in RATING_OPTS for r in ratings):
                 save_row(idx, ratings, comments)
-            new_idx = max(0, min(idx+step, TOTAL-1))
             return build_row(new_idx)
         return _fn
-    back_btn.click(
-        goto(-1),
-        inputs=[idx_state, nmodels_state]+radio_widgets+comment_boxes+[next_btn],
-        outputs=[idx_state, hdr_box, para_box,
-                 *out_boxes, *radio_widgets, *comment_boxes,
-                 back_btn, next_btn],
-    )
-    next_btn.click(
-        goto(1),
-        inputs=[idx_state, nmodels_state]+radio_widgets+comment_boxes+[next_btn],
-        outputs=[idx_state, hdr_box, para_box,
-                 *out_boxes, *radio_widgets, *comment_boxes,
-                 back_btn, next_btn],
-    )
-    # CSV download
-    def make_download():
-        if df is None:
-            raise gr.Error("No CSV loaded yet.")
-        tmp = os.path.join(_writable_dir(),
-                           f"annotations_{uuid.uuid4().hex}.csv")
-        df.to_csv(tmp, index=False)
-        return tmp
-    download_btn.click(make_download, outputs=gr.File())
-    # ── Start / Resume ────────────────────────────────────────────────────
-    def start_app(csv_file, name):
-        global annotator
-        if csv_file is None or not name.strip():
-            raise gr.Error("Please upload a CSV and enter your name.")
-        new_path = os.path.join(_writable_dir(), f"{uuid.uuid4().hex}.csv")
-        shutil.copy(csv_file.name, new_path)
-        load_csv(new_path)
         annotator = name.strip()
-        # visibility flags – one boolean per model slot
-        vis_flags = [i < len(models) for i in range(MAX_MODELS)]
-        # build first row values
-        row_vals = build_row(first_incomplete())
-        idx_val, hdr_val, para_val = row_vals[:3]
-        outs   = row_vals[3              : 3 + MAX_MODELS*2]
-        rates  = row_vals[3 + MAX_MODELS*2             : 3 + MAX_MODELS*4]
-        comms  = row_vals[3 + MAX_MODELS*4             : 3 + MAX_MODELS*5]
-        back_update, next_update = row_vals[-2:]
-        # updates for textboxes, radios, comments
-        out_updates = [
-            gr.update(value=outs[i],  visible=vis_flags[i//2])
-            for i in range(MAX_MODELS*2)
-        ]
-        radio_updates = [
-            gr.update(value=rates[i], visible=vis_flags[i//2])
-            for i in range(MAX_MODELS*2)
-        ]
-        comment_updates = [
-            gr.update(value=comms[i], visible=vis_flags[i])
-            for i in range(MAX_MODELS)
-        ]
-        return (
-            first_incomplete(),           # idx_state
-            len(models),                  # nmodels_state
-            gr.update(value=idx_val),     # idx_box
-            gr.update(value=hdr_val),     # hdr_box
-            gr.update(value=para_val),    # para_box
-            *out_updates,
-            *radio_updates,
-            *comment_updates,
-            back_update, next_update,     # nav buttons
-            gr.update(visible=True,
-                      value=f"**Annotator:** {annotator}"),
-            gr.update(visible=True),      # download_btn
-            gr.update(visible=True)       # annotation_area
-        )
-    start_btn.click(
-        start_app,
-        inputs=[upload_box, annot_box],
-        outputs=[
-            idx_state, nmodels_state,
-            idx_box, hdr_box, para_box,
-            *out_boxes, *radio_widgets, *comment_boxes,
-            back_btn, next_btn,
-            annotator_label,
-            download_btn,
-            annotation_area
-        ],
-    )
-# ─── RUN ───────────────────────────────────────────────────────────────────
 if __name__ == "__main__":
     demo.queue()
-    demo.launch()          # keep share=False on HF Spaces

 """
+Paragraph‑level annotation tool for rating two prompts from multiple LLMs.
+Patch 3 – show hidden rows
+--------------------------
+* **Bug fix:** the model rows stayed invisible after you hit **Run**
+  because Gradio needs `gr.update(visible=…)` objects returned, not
+  on‑the‑fly attribute tweaks.  The init callback now returns a
+  visibility update for every row container, so you’ll see the prompt,
+  rating and comment widgets immediately.
+* Logic still hides surplus rows when your CSV contains fewer than
+  `MAX_MODELS` models.
+* No other behaviour changed.
 """
 from __future__ import annotations
+import gradio as gr
+import pandas as pd
+import time, random
 from typing import List
+# ---------- CONFIG ----------
+CONTENT_COL      = "Content_Paragraph"
+PROMPT1_SUFFIX   = "_prompt1"
+PROMPT2_SUFFIX   = "_prompt2"
+PERM_COL         = "perm_models"
+RATING_OPTS      = ["A", "B", "C"]
+NO_COMMENT       = "No comment"
+MAX_MODELS       = 8   # UI reserves slots for up to this many models
+# ---------- GLOBAL STATE ----------
 df: pd.DataFrame | None = None
+models: List[str]        = []
+csv_path: str            = ""
+annotator: str           = ""
+TOTAL: int               = 0
 current_start: float | None = None
+# ---------- HELPERS ----------
+def discover_models() -> None:
+    global models, df
+    models = []
+    for c in df.columns:
+        if c.endswith(PROMPT1_SUFFIX) and not (
+            c.startswith("rating_") or c.startswith("comment_") or
+            c in ["perm_models", "annotator", "annotation_time"]
+        ):
+            m = c[:-len(PROMPT1_SUFFIX)]
             if f"{m}{PROMPT2_SUFFIX}" not in df.columns:
+                raise ValueError(f"Found '{c}' but no matching '{m}{PROMPT2_SUFFIX}'")
             models.append(m)
     if not models:
+        raise ValueError(f"No '*{PROMPT1_SUFFIX}' columns found in CSV")
     if len(models) > MAX_MODELS:
+        raise ValueError(f"CSV has {len(models)} model columns but UI can display only {MAX_MODELS}. Increase MAX_MODELS and restart.")
+def ensure_helper_columns() -> None:
+    global df, models
     if PERM_COL not in df.columns:
         df[PERM_COL] = ""
     for m in models:
         for p in ("prompt1", "prompt2"):
+            rcol = f"rating_{m}__{p}"
+            ccol = f"comment_{m}__{p}"
+            if rcol not in df.columns:
+                df[rcol] = ""
+            if ccol not in df.columns:
+                df[ccol] = ""
     for col in ("annotator", "annotation_time"):
         if col not in df.columns:
             df[col] = "" if col == "annotator" else 0.0
 def first_incomplete() -> int:
+    global df, models
     for i, row in df.iterrows():
         for m in models:
+            if row[f"rating_{m}__prompt1"] == "" or row[f"rating_{m}__prompt2"] == "":
                 return i
     return 0
 def get_perm(idx: int) -> List[str]:
+    global df, models
+    cell = str(df.at[idx, PERM_COL]).strip()
+    if cell:
+        seq = cell.split("|")
+        if set(seq) == set(models):
+            return seq
+    seq = models.copy(); random.shuffle(seq)
+    df.at[idx, PERM_COL] = "|".join(seq)
+    return seq
+# ---------- ROW I/O ----------
 def build_row(idx: int):
+    """Return a list of widget values with length matching *common_outputs*."""
+    global df, models, current_start, TOTAL
     row   = df.loc[idx]
     order = get_perm(idx)
+    txt_outputs, ratings, comments = [], [], []
     for m in order:
+        txt_outputs.extend([
+            row[f"{m}{PROMPT1_SUFFIX}"],
+            row[f"{m}{PROMPT2_SUFFIX}"],
+        ])
+        ratings.extend([
+            row[f"rating_{m}__prompt1"] or None,
+            row[f"rating_{m}__prompt2"] or None,
+        ])
+        for p in ("prompt1", "prompt2"):
+            comments.append(row[f"comment_{m}__{p}"])
+    pad_slots = MAX_MODELS - len(order)
+    txt_outputs.extend(["", ""] * pad_slots)
+    ratings.extend(["A", "A"] * pad_slots)
+    comments.extend(["", ""] * pad_slots)
     current_start = time.time()
+    ready  = all(r in RATING_OPTS for r in ratings[:2*len(models)])
+    header = f"Example {idx + 1}/{TOTAL}"
+    return [idx, idx, header, row[CONTENT_COL]] + \
+           txt_outputs + ratings + comments + \
+           [gr.update(), gr.update(interactive=ready)]
 def save_row(idx: int, ratings: List[str], comments: List[str]):
+    global df, annotator, csv_path, current_start
+    needed = 2 * len(models)
+    if not all(r in RATING_OPTS for r in ratings[:needed]):
         return
     elapsed = time.time() - current_start if current_start else 0.0
+    p = q = 0
+    for m in get_perm(idx):
         df.at[idx, f"rating_{m}__prompt1"] = ratings[p]; p += 1
         df.at[idx, f"rating_{m}__prompt2"] = ratings[p]; p += 1
+        c1 = comments[q].strip() or NO_COMMENT; q += 1
+        c2 = comments[q].strip() or NO_COMMENT; q += 1
+        df.at[idx, f"comment_{m}__prompt1"] = c1
+        df.at[idx, f"comment_{m}__prompt2"] = c2
     df.at[idx, "annotator"]       = annotator
     df.at[idx, "annotation_time"] = float(elapsed)
     df.to_csv(csv_path, index=False)
+# ---------- GRADIO ----------
 with gr.Blocks(title="Paragraph Annotation Tool") as demo:
+    gr.Markdown("# Paragraph Annotation Tool")
+    # Setup panel
+    with gr.Column() as setup_panel:
+        csv_upload = gr.File(label="Upload CSV", file_types=[".csv"])
+        name_input = gr.Textbox(label="Your Name")
+        run_btn    = gr.Button("Run")
+    annotator_md = gr.Markdown(visible=False)
+    # Annotation panel (hidden until CSV is loaded)
+    with gr.Column(visible=False) as annotation_panel:
+        state = gr.State(0)
         idx_box  = gr.Number(label="Index", interactive=False)
         hdr_box  = gr.Markdown()
+        para_box = gr.Textbox(label="Content Paragraph", interactive=False, lines=6)
+        out_boxes, radio_widgets, comment_widgets = [], [], []
+        row_containers = []
         for _ in range(MAX_MODELS):
+            with gr.Row(visible=False) as r:
+                with gr.Column():
+                    out1 = gr.Textbox(interactive=False, lines=6)
+                    rad1 = gr.Radio(RATING_OPTS, label="Rating (P1)")
+                    com1 = gr.Textbox(lines=2, label="Comment (P1)")
+                with gr.Column():
+                    out2 = gr.Textbox(interactive=False, lines=6)
+                    rad2 = gr.Radio(RATING_OPTS, label="Rating (P2)")
+                    com2 = gr.Textbox(lines=2, label="Comment (P2)")
                 out_boxes.extend((out1, out2))
                 radio_widgets.extend((rad1, rad2))
+                comment_widgets.extend((com1, com2))
+                row_containers.append(r)
+        back_btn = gr.Button("⟵ Back")
+        next_btn = gr.Button("Save & Next ⟶", interactive=False)
+        download_btn = gr.Button("Download CSV")
+        csv_file_out = gr.File()
+    # ---------- CALLBACKS ----------
+    def toggle_next(*vals):
+        needed = 2 * len(models)
+        return gr.update(interactive=all(v in RATING_OPTS for v in vals[:needed]))
     for r in radio_widgets:
+        r.change(toggle_next, inputs=radio_widgets, outputs=next_btn)
     def goto(step: int):
+        def _fn(idx: int, *vals):
+            n_rad = len(radio_widgets)
+            ratings  = list(vals[:n_rad])
+            comments = list(vals[n_rad:-1])
+            if step != -1 or all(r in RATING_OPTS for r in ratings[:2*len(models)]):
                 save_row(idx, ratings, comments)
+            new_idx = max(0, min(idx + step, TOTAL - 1))
             return build_row(new_idx)
         return _fn
+    common_inputs  = [state] + radio_widgets + comment_widgets + [next_btn]
+    common_outputs = [state, idx_box, hdr_box, para_box] + \
+                     out_boxes + radio_widgets + comment_widgets + \
+                     [back_btn, next_btn]
+    back_btn.click(goto(-1), inputs=common_inputs, outputs=common_outputs)
+    next_btn.click(goto(1),  inputs=common_inputs, outputs=common_outputs)
+    download_btn.click(lambda: csv_path, outputs=csv_file_out)
+    # ---------- INIT ----------
+    def init_annotation(uploaded_file, name):
+        global df, csv_path, annotator, TOTAL
+        if uploaded_file is None or not name.strip():
+            raise gr.Error("Please upload a CSV and enter your name.")
         annotator = name.strip()
+        csv_path  = uploaded_file.name
+        local_df = pd.read_csv(csv_path, keep_default_na=False)
+        if CONTENT_COL not in local_df.columns:
+            raise gr.Error(f"Missing required column '{CONTENT_COL}' in CSV")
+        globals()['df'] = local_df
+        discover_models()
+        ensure_helper_columns()
+        TOTAL = len(df)
+        df.to_csv(csv_path, index=False)
+        first_idx = first_incomplete()
+        row_vals  = build_row(first_idx)
+        # visibility updates for rows
+        vis_updates = [gr.update(visible=i < len(models)) for i in range(MAX_MODELS)]
+        return [f"**Annotator:** {annotator}", gr.update(visible=True)] + vis_updates + row_vals
+    # run_outputs: annotator_md, annotation_panel, row_vis_updates..., common_outputs
+    run_outputs = [annotator_md, annotation_panel] + [gr.Row()]*MAX_MODELS + common_outputs
+    # substitute placeholder Rows with actual containers
+    run_outputs[2:2+MAX_MODELS] = row_containers
+    run_btn.click(init_annotation, inputs=[csv_upload, name_input], outputs=run_outputs)
 if __name__ == "__main__":
     demo.queue()
+    demo.launch(share=True)