voiceclear-zar

Running

App Files Files Community

Diggz10 commited on Aug 21

Commit

deaaabb

verified ·

1 Parent(s): 203bd74

Update app.py

Browse files

Files changed (1) hide show

app.py +148 -21

app.py CHANGED Viewed

@@ -1,13 +1,10 @@
-# app.py — Voice Clarity Booster with clear A/B comparison & loudness match
-# - Modes: MetricGAN+ (denoise), SepFormer (dereverb+denoise), Bypass
-# - Dry/Wet, Presence, Low-cut
-# - Loudness Match (optional)
-# - Outputs: Enhanced, A/B alternating (2s O/E flip), Delta (Original−Enhanced), Metrics
 import os
 import io
 import tempfile
-from typing import Tuple, Optional
 # --- Quiet noisy deprecation warnings (optional) ---
 import warnings
@@ -27,7 +24,7 @@ import soundfile as sf
 import torch
 import torchaudio
-# Optional: pyloudnorm for true LUFS matching; fallback to RMS if not available
 try:
     import pyloudnorm as pyln
     _HAVE_PYLN = True
@@ -96,6 +93,7 @@ def _to_mono(wav: np.ndarray) -> np.ndarray:
         if t <= 8:   # [C, T]
             return wav.mean(axis=0).astype(np.float32)
         return wav.mean(axis=1).astype(np.float32)
     return wav.reshape(-1).astype(np.float32)
@@ -184,7 +182,7 @@ def _make_ab_alternating(orig: np.ndarray, enh: np.ndarray, sr: int, seg_sec: fl
 # -----------------------------
-# Core pipeline
 # -----------------------------
 def _run_metricgan(path_16k: str) -> torch.Tensor:
     enh = _get_metricgan()
@@ -218,6 +216,31 @@ def _run_sepformer(path_16k: str) -> torch.Tensor:
     raise RuntimeError("Unexpected SepFormer output type")
 def _enhance_numpy_audio(
     audio: Tuple[int, np.ndarray],
     mode: str = "MetricGAN+ (denoise)",
@@ -256,6 +279,8 @@ def _enhance_numpy_audio(
             proc = _run_metricgan(path_16k)  # [1, T@16k]
         elif mode.startswith("SepFormer"):
             proc = _run_sepformer(path_16k)  # [1, T@16k]
         else:  # Bypass (EQ only)
             proc = wav_16k
     finally:
@@ -274,7 +299,7 @@ def _enhance_numpy_audio(
     proc_out = _resample_torch(proc, 16000, sr_out).squeeze(0).numpy().astype(np.float32)
     dry_out  = _resample_torch(dry_t, sr_in, sr_out).squeeze(0).numpy().astype(np.float32)
-    # Align and mix
     proc_out, dry_out = _align_lengths(proc_out, dry_out)
     dry_wet = float(np.clip(dry_wet, 0.0, 1.0))
     enhanced = proc_out * dry_wet + dry_out * (1.0 - dry_wet)
@@ -299,13 +324,93 @@ def _enhance_numpy_audio(
     metrics = (
         f"Mode: {mode} | Dry/Wet: {dry_wet*100:.0f}% | Presence: {presence_db:+.1f} dB | "
         f"Low-cut: {lowcut_hz:.0f} Hz | Loudness match: {loud_text}\n"
-        f"Dur: {len(enhanced)/sr_out:.2f}s | Δ (original−enhanced) RMS: {20*np.log10(rms_delta+eps):+.2f} dBFS | "
         f'Approx. "noise removed" ratio: {change_db:.2f} dB'
     )
     return sr_out, enhanced, delta, metrics
 # -----------------------------
 # Gradio UI
 # -----------------------------
@@ -342,28 +447,41 @@ def gradio_enhance(
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## Voice Clarity Booster — with A/B and Delta listening")
     with gr.Row():
-        with gr.Column():
             in_audio = gr.Audio(
                 sources=["upload", "microphone"],
                 type="numpy",
                 label="Input",
             )
             mode = gr.Radio(
-                choices=["MetricGAN+ (denoise)", "SepFormer (dereverb+denoise)", "Bypass (EQ only)"],
-                value="MetricGAN+ (denoise)",
                 label="Mode",
             )
             dry_wet = gr.Slider(
-                minimum=0, maximum=100, value=85, step=1,
                 label="Dry/Wet Mix (%) — lower to reduce artifacts"
             )
             presence = gr.Slider(
-                minimum=-12, maximum=12, value=0, step=0.5, label="Presence Boost (dB)"
             )
             lowcut = gr.Slider(
-                minimum=0, maximum=200, value=0, step=5, label="Low-Cut (Hz)"
             )
             loudmatch = gr.Checkbox(value=True, label="Loudness-match enhanced to original")
             out_sr = gr.Radio(
@@ -371,11 +489,20 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 value="Original",
                 label="Output Sample Rate",
             )
-            btn = gr.Button("Enhance")
-        with gr.Column():
             out_audio = gr.Audio(type="numpy", label="Enhanced (autoplay)", autoplay=True)
-            ab_audio = gr.Audio(type="numpy", label="A/B Alternating (2s O → 2s E)", autoplay=False)
-            delta_audio = gr.Audio(type="numpy", label="Delta: Original − Enhanced", autoplay=False)
             metrics = gr.Markdown("")
     btn.click(

+# app.py — Voice Clarity Booster with Presets, Dual-Stage "Ultimate Clean Voice",
+# A/B alternating, Delta (Original−Enhanced), and Loudness Match.
 import os
 import io
 import tempfile
+from typing import Tuple, Optional, Dict, Any
 # --- Quiet noisy deprecation warnings (optional) ---
 import warnings
 import torch
 import torchaudio
+# Optional: pyloudnorm for LUFS match; fallback to RMS if not available
 try:
     import pyloudnorm as pyln
     _HAVE_PYLN = True
         if t <= 8:   # [C, T]
             return wav.mean(axis=0).astype(np.float32)
         return wav.mean(axis=1).astype(np.float32)
+    # Higher dims: flatten
     return wav.reshape(-1).astype(np.float32)
 # -----------------------------
+# Model runners
 # -----------------------------
 def _run_metricgan(path_16k: str) -> torch.Tensor:
     enh = _get_metricgan()
     raise RuntimeError("Unexpected SepFormer output type")
+def _run_dual_stage(path_16k: str) -> torch.Tensor:
+    """
+    Ultimate Clean: SepFormer (dereverb/denoise) -> MetricGAN+ (denoise polish).
+    Both at 16 kHz mono.
+    """
+    # Stage 1: SepFormer
+    stage1 = _run_sepformer(path_16k)  # [1, T]
+    # Save Stage 1 to temp wav, then Stage 2 MetricGAN+
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_mid:
+        sf.write(tmp_mid.name, stage1.squeeze(0).numpy(), 16000, subtype="PCM_16")
+        tmp_mid.flush()
+        mid_path = tmp_mid.name
+    try:
+        stage2 = _run_metricgan(mid_path)  # [1, T]
+    finally:
+        try:
+            os.remove(mid_path)
+        except Exception:
+            pass
+    return stage2
+# -----------------------------
+# Core pipeline
+# -----------------------------
 def _enhance_numpy_audio(
     audio: Tuple[int, np.ndarray],
     mode: str = "MetricGAN+ (denoise)",
             proc = _run_metricgan(path_16k)  # [1, T@16k]
         elif mode.startswith("SepFormer"):
             proc = _run_sepformer(path_16k)  # [1, T@16k]
+        elif mode.startswith("Dual-Stage"):
+            proc = _run_dual_stage(path_16k)  # [1, T@16k]
         else:  # Bypass (EQ only)
             proc = wav_16k
     finally:
     proc_out = _resample_torch(proc, 16000, sr_out).squeeze(0).numpy().astype(np.float32)
     dry_out  = _resample_torch(dry_t, sr_in, sr_out).squeeze(0).numpy().astype(np.float32)
+    # Align and mix (dry/wet)
     proc_out, dry_out = _align_lengths(proc_out, dry_out)
     dry_wet = float(np.clip(dry_wet, 0.0, 1.0))
     enhanced = proc_out * dry_wet + dry_out * (1.0 - dry_wet)
     metrics = (
         f"Mode: {mode} | Dry/Wet: {dry_wet*100:.0f}% | Presence: {presence_db:+.1f} dB | "
         f"Low-cut: {lowcut_hz:.0f} Hz | Loudness match: {loud_text}\n"
+        f"Dur: {len(enhanced)/sr_out:.2f}s | Δ RMS: {20*np.log10(rms_delta+eps):+.2f} dBFS | "
         f'Approx. "noise removed" ratio: {change_db:.2f} dB'
     )
     return sr_out, enhanced, delta, metrics
+# -----------------------------
+# Presets
+# -----------------------------
+PRESETS: Dict[str, Dict[str, Any]] = {
+    # Maximum cleanup: dereverb + denoise chain, high dry/wet, subtle presence, mild HPF
+    "Ultimate Clean Voice": {
+        "mode": "Dual-Stage (SepFormer → MetricGAN+)",
+        "dry_wet": 0.92,
+        "presence_db": 1.5,
+        "lowcut_hz": 80.0,
+        "loudness_match": True,
+    },
+    # Natural cleanup for most cases
+    "Natural Speech": {
+        "mode": "MetricGAN+ (denoise)",
+        "dry_wet": 0.85,
+        "presence_db": 1.0,
+        "lowcut_hz": 50.0,
+        "loudness_match": True,
+    },
+    # Studio-ish clarity
+    "Podcast Studio": {
+        "mode": "MetricGAN+ (denoise)",
+        "dry_wet": 0.9,
+        "presence_db": 2.0,
+        "lowcut_hz": 75.0,
+        "loudness_match": True,
+    },
+    # Strong dereverb, blend to avoid artifacts
+    "Room Dereverb": {
+        "mode": "SepFormer (dereverb+denoise)",
+        "dry_wet": 0.7,
+        "presence_db": 0.5,
+        "lowcut_hz": 60.0,
+        "loudness_match": True,
+    },
+    # When music bed is under voice—be gentle
+    "Music + Voice Safe": {
+        "mode": "MetricGAN+ (denoise)",
+        "dry_wet": 0.6,
+        "presence_db": 0.0,
+        "lowcut_hz": 40.0,
+        "loudness_match": True,
+    },
+    # Harsh phone/zoom recordings
+    "Phone Call Rescue": {
+        "mode": "MetricGAN+ (denoise)",
+        "dry_wet": 0.88,
+        "presence_db": 2.0,
+        "lowcut_hz": 100.0,
+        "loudness_match": True,
+    },
+    # Light touch
+    "Gentle Denoise": {
+        "mode": "MetricGAN+ (denoise)",
+        "dry_wet": 0.65,
+        "presence_db": 0.0,
+        "lowcut_hz": 0.0,
+        "loudness_match": True,
+    },
+    "Custom": {}  # no-op, keeps current settings
+}
+def _apply_preset(preset_name: str):
+    cfg = PRESETS.get(preset_name, {})
+    # Return gr.update() for each adjustable control
+    def upd(val=None):
+        return gr.update(value=val) if val is not None else gr.update()
+    if not cfg or preset_name == "Custom":
+        return upd(), upd(), upd(), upd(), upd()
+    return (
+        upd(cfg["mode"]),
+        upd(int(round(cfg["dry_wet"] * 100))),
+        upd(float(cfg["presence_db"])),
+        upd(float(cfg["lowcut_hz"])),
+        upd(bool(cfg["loudness_match"])),
+    )
 # -----------------------------
 # Gradio UI
 # -----------------------------
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## Voice Clarity Booster — Presets, A/B, Delta, Loudness Match")
     with gr.Row():
+        with gr.Column(scale=1):
             in_audio = gr.Audio(
                 sources=["upload", "microphone"],
                 type="numpy",
                 label="Input",
             )
+            preset = gr.Dropdown(
+                choices=list(PRESETS.keys()),
+                value="Ultimate Clean Voice",
+                label="Preset",
+            )
+            # Controls that presets will adjust
             mode = gr.Radio(
+                choices=[
+                    "MetricGAN+ (denoise)",
+                    "SepFormer (dereverb+denoise)",
+                    "Dual-Stage (SepFormer → MetricGAN+)",
+                    "Bypass (EQ only)"
+                ],
+                value="Dual-Stage (SepFormer → MetricGAN+)",
                 label="Mode",
             )
             dry_wet = gr.Slider(
+                minimum=0, maximum=100, value=92, step=1,
                 label="Dry/Wet Mix (%) — lower to reduce artifacts"
             )
             presence = gr.Slider(
+                minimum=-12, maximum=12, value=1.5, step=0.5, label="Presence Boost (dB)"
             )
             lowcut = gr.Slider(
+                minimum=0, maximum=200, value=80, step=5, label="Low-Cut (Hz)"
             )
             loudmatch = gr.Checkbox(value=True, label="Loudness-match enhanced to original")
             out_sr = gr.Radio(
                 value="Original",
                 label="Output Sample Rate",
             )
+            # Apply preset on change
+            preset.change(
+                _apply_preset,
+                inputs=[preset],
+                outputs=[mode, dry_wet, presence, lowcut, loudmatch],
+            )
+            btn = gr.Button("Enhance", variant="primary")
+        with gr.Column(scale=1):
             out_audio = gr.Audio(type="numpy", label="Enhanced (autoplay)", autoplay=True)
+            ab_audio = gr.Audio(type="numpy", label="A/B Alternating (2s O → 2s E)")
+            delta_audio = gr.Audio(type="numpy", label="Delta: Original − Enhanced")
             metrics = gr.Markdown("")
     btn.click(