VieNeu-TTS

Runtime error

App Files Files Community

Translsis commited on 19 days ago

Commit

bcd292a

verified ·

1 Parent(s): 77b4ca5

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -25

app.py CHANGED Viewed

@@ -30,7 +30,6 @@ except Exception as e:
         def encode_reference(self, path): return None
         def infer(self, text, ref, ref_text):
             import numpy as np
-            # Giả lập độ trễ để test tính năng đo thời gian
             time.sleep(1.5)
             return np.random.uniform(-0.5, 0.5, 24000*3)
     tts = MockTTS()
@@ -65,23 +64,24 @@ def load_reference_info(voice_choice):
     return None, ""
 @spaces.GPU(duration=120)
-def synthesize_speech(text, voice_choice, custom_audio, custom_text, use_custom_voice):
     try:
         if not text or text.strip() == "":
             return None, "⚠️ Vui lòng nhập văn bản cần tổng hợp!"
-        # --- LOGIC CHECK LIMIT 250 ---
         if len(text) > 250:
             return None, f"❌ Văn bản quá dài ({len(text)}/250 ký tự)! Vui lòng cắt ngắn lại để đảm bảo chất lượng."
-        # Logic chọn Reference
-        if use_custom_voice:
             if custom_audio is None or not custom_text:
                 return None, "⚠️ Vui lòng tải lên Audio và nhập nội dung Audio đó."
             ref_audio_path = custom_audio
             ref_text_raw = custom_text
             print("🎨 Mode: Custom Voice")
-        else: # Preset
             if voice_choice not in VOICE_SAMPLES:
                  return None, "⚠️ Vui lòng chọn một giọng mẫu."
             ref_audio_path = VOICE_SAMPLES[voice_choice]["audio"]
@@ -94,7 +94,6 @@ def synthesize_speech(text, voice_choice, custom_audio, custom_text, use_custom_
                 ref_text_raw = f.read()
             print(f"🎤 Mode: Preset Voice ({voice_choice})")
-        # Inference & Đo thời gian
         print(f"📝 Text: {text[:50]}...")
         start_time = time.time()
@@ -105,7 +104,6 @@ def synthesize_speech(text, voice_choice, custom_audio, custom_text, use_custom_
         end_time = time.time()
         process_time = end_time - start_time
-        # Save
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
             sf.write(tmp_file.name, wav, 24000)
             output_path = tmp_file.name
@@ -162,7 +160,6 @@ css = """
     transition: color 0.2s;
 }
 .link-group a:hover { color: #38bdf8; text-shadow: 0 0 5px rgba(56, 189, 248, 0.5); }
 .status-box { font-weight: bold; text-align: center; border: none; background: transparent; }
 """
@@ -180,7 +177,6 @@ EXAMPLES_LIST = [
 with gr.Blocks(theme=theme, css=css, title="VieNeu-TTS Studio") as demo:
     with gr.Column(elem_classes="container"):
-        # Header
         gr.HTML("""
             <div class="header-box">
                 <div class="header-title">🎙️ VieNeu-TTS Studio</div>
@@ -208,20 +204,17 @@ with gr.Blocks(theme=theme, css=css, title="VieNeu-TTS Studio") as demo:
                 show_label=False
             )
-            # Counter
             with gr.Row():
                 char_count = gr.HTML("<div style='text-align: right; color: #64748B; font-size: 0.8rem;'>0 / 250 ký tự</div>")
             gr.Markdown("### 🗣️ Chọn giọng đọc")
-            # SỬA LỖI: Dùng Radio thay vì Tabs để tránh lỗi internal Gradio
             voice_mode = gr.Radio(
                 choices=["👤 Giọng có sẵn (Preset)", "🎙️ Giọng tùy chỉnh (Custom)"],
                 value="👤 Giọng có sẵn (Preset)",
                 label="Chế độ"
             )
-            # Preset controls
             with gr.Group(visible=True) as preset_group:
                 voice_select = gr.Dropdown(
                     choices=list(VOICE_SAMPLES.keys()),
@@ -233,13 +226,11 @@ with gr.Blocks(theme=theme, css=css, title="VieNeu-TTS Studio") as demo:
                     ref_audio_preview = gr.Audio(label="Audio mẫu", interactive=False, type="filepath")
                     ref_text_preview = gr.Markdown("...")
-            # Custom controls
             with gr.Group(visible=False) as custom_group:
                 gr.Markdown("Tải lên giọng của bạn (Zero-shot Cloning)")
                 custom_audio = gr.Audio(label="File ghi âm (.wav)", type="filepath")
                 custom_text = gr.Textbox(label="Nội dung ghi âm", placeholder="Nhập chính xác lời thoại...")
-            use_custom_voice = gr.Checkbox(value=False, visible=False)
             btn_generate = gr.Button("🎵 Tổng hợp giọng nói", variant="primary", size="lg")
         # --- RIGHT: OUTPUT ---
@@ -278,30 +269,24 @@ with gr.Blocks(theme=theme, css=css, title="VieNeu-TTS Studio") as demo:
     voice_select.change(update_ref_preview, voice_select, [ref_audio_preview, ref_text_preview])
     demo.load(update_ref_preview, voice_select, [ref_audio_preview, ref_text_preview])
-    # Voice mode switching
     def toggle_voice_mode(mode):
         is_custom = (mode == "🎙️ Giọng tùy chỉnh (Custom)")
-        return (
-            gr.update(visible=not is_custom),  # preset_group
-            gr.update(visible=is_custom),      # custom_group
-            is_custom                          # use_custom_voice
-        )
     voice_mode.change(
         fn=toggle_voice_mode,
         inputs=[voice_mode],
-        outputs=[preset_group, custom_group, use_custom_voice]
     )
     btn_generate.click(
         fn=synthesize_speech,
-        inputs=[text_input, voice_select, custom_audio, custom_text, use_custom_voice],
         outputs=[audio_output, status_output]
     )
 if __name__ == "__main__":
     demo.queue().launch(
         server_name="0.0.0.0",
-        server_port=7860,
-        share=True  # QUAN TRỌNG: Thêm share=True cho HF Spaces
     )

         def encode_reference(self, path): return None
         def infer(self, text, ref, ref_text):
             import numpy as np
             time.sleep(1.5)
             return np.random.uniform(-0.5, 0.5, 24000*3)
     tts = MockTTS()
     return None, ""
 @spaces.GPU(duration=120)
+def synthesize_speech(text, voice_choice, custom_audio, custom_text, voice_mode):
     try:
         if not text or text.strip() == "":
             return None, "⚠️ Vui lòng nhập văn bản cần tổng hợp!"
         if len(text) > 250:
             return None, f"❌ Văn bản quá dài ({len(text)}/250 ký tự)! Vui lòng cắt ngắn lại để đảm bảo chất lượng."
+        # Xác định mode dựa vào voice_mode string
+        use_custom = (voice_mode == "🎙️ Giọng tùy chỉnh (Custom)")
+        if use_custom:
             if custom_audio is None or not custom_text:
                 return None, "⚠️ Vui lòng tải lên Audio và nhập nội dung Audio đó."
             ref_audio_path = custom_audio
             ref_text_raw = custom_text
             print("🎨 Mode: Custom Voice")
+        else:
             if voice_choice not in VOICE_SAMPLES:
                  return None, "⚠️ Vui lòng chọn một giọng mẫu."
             ref_audio_path = VOICE_SAMPLES[voice_choice]["audio"]
                 ref_text_raw = f.read()
             print(f"🎤 Mode: Preset Voice ({voice_choice})")
         print(f"📝 Text: {text[:50]}...")
         start_time = time.time()
         end_time = time.time()
         process_time = end_time - start_time
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
             sf.write(tmp_file.name, wav, 24000)
             output_path = tmp_file.name
     transition: color 0.2s;
 }
 .link-group a:hover { color: #38bdf8; text-shadow: 0 0 5px rgba(56, 189, 248, 0.5); }
 .status-box { font-weight: bold; text-align: center; border: none; background: transparent; }
 """
 with gr.Blocks(theme=theme, css=css, title="VieNeu-TTS Studio") as demo:
     with gr.Column(elem_classes="container"):
         gr.HTML("""
             <div class="header-box">
                 <div class="header-title">🎙️ VieNeu-TTS Studio</div>
                 show_label=False
             )
             with gr.Row():
                 char_count = gr.HTML("<div style='text-align: right; color: #64748B; font-size: 0.8rem;'>0 / 250 ký tự</div>")
             gr.Markdown("### 🗣️ Chọn giọng đọc")
             voice_mode = gr.Radio(
                 choices=["👤 Giọng có sẵn (Preset)", "🎙️ Giọng tùy chỉnh (Custom)"],
                 value="👤 Giọng có sẵn (Preset)",
                 label="Chế độ"
             )
             with gr.Group(visible=True) as preset_group:
                 voice_select = gr.Dropdown(
                     choices=list(VOICE_SAMPLES.keys()),
                     ref_audio_preview = gr.Audio(label="Audio mẫu", interactive=False, type="filepath")
                     ref_text_preview = gr.Markdown("...")
             with gr.Group(visible=False) as custom_group:
                 gr.Markdown("Tải lên giọng của bạn (Zero-shot Cloning)")
                 custom_audio = gr.Audio(label="File ghi âm (.wav)", type="filepath")
                 custom_text = gr.Textbox(label="Nội dung ghi âm", placeholder="Nhập chính xác lời thoại...")
             btn_generate = gr.Button("🎵 Tổng hợp giọng nói", variant="primary", size="lg")
         # --- RIGHT: OUTPUT ---
     voice_select.change(update_ref_preview, voice_select, [ref_audio_preview, ref_text_preview])
     demo.load(update_ref_preview, voice_select, [ref_audio_preview, ref_text_preview])
     def toggle_voice_mode(mode):
         is_custom = (mode == "🎙️ Giọng tùy chỉnh (Custom)")
+        return gr.update(visible=not is_custom), gr.update(visible=is_custom)
     voice_mode.change(
         fn=toggle_voice_mode,
         inputs=[voice_mode],
+        outputs=[preset_group, custom_group]
     )
     btn_generate.click(
         fn=synthesize_speech,
+        inputs=[text_input, voice_select, custom_audio, custom_text, voice_mode],
         outputs=[audio_output, status_output]
     )
 if __name__ == "__main__":
     demo.queue().launch(
         server_name="0.0.0.0",
+        server_port=7860
     )