hallo-talking-face

Running

App Files Files Community

fffiloni commited on Oct 30, 2024

Commit

f634467

verified ·

1 Parent(s): 6bc49fc

add maskGCT api option

Browse files

Files changed (1) hide show

app.py +46 -2

app.py CHANGED Viewed

@@ -190,6 +190,22 @@ def get_whisperspeech(prompt_audio_whisperspeech, audio_to_clone):
     print(result)
     return result, gr.update(value=result, visible=True)
 ########################
 # TALKING PORTRAIT GEN #
@@ -264,7 +280,7 @@ css = '''
 #video-block {
     flex: 9;
 }
-#audio-block, #audio-clone-elm {
     flex: 1;
 }
 div#audio-clone-elm > .audio-container > button {
@@ -273,6 +289,12 @@ div#audio-clone-elm > .audio-container > button {
 div#audio-clone-elm > .audio-container > button > .wrap {
     font-size: 0.9em;
 }
 #text-synth, #voice-desc{
     height: 130px;
 }
@@ -285,7 +307,7 @@ div#audio-clone-elm > .audio-container > button > .wrap {
 #gen-voice-btn {
     flex: 1;
 }
-#parler-tab, #whisperspeech-tab {
     padding: 0;
 }
 #main-submit{
@@ -405,6 +427,20 @@ with gr.Blocks(css=css) as demo:
                             elem_id = "audio-clone-elm"
                         )
                         gen_wsp_voice_btn = gr.Button("Generate voice clone (optional)")
                 with gr.Column(elem_id="result-column"):
@@ -501,6 +537,14 @@ with gr.Blocks(css=css) as demo:
         show_api = False
     )
     submit_btn.click(
         fn = generate_talking_portrait,
         inputs = [portrait, voice],

     print(result)
     return result, gr.update(value=result, visible=True)
+def get_maskGCT_TTS(prompt_audio_maskGCT, audio_to_clone):
+    try:
+        client = Client("amphion/maskgct")
+    except:
+        raise gr.Error(f"amphion/maskgct space's api might not be ready, please wait, or upload an audio instead.")
+    result = client.predict(
+        prompt_wav = handle_file(audio_to_clone),
+        target_text = prompt_audio_maskGCT,
+        target_len=-1,
+		n_timesteps=25,
+		api_name="/predict"
+    )
+    print(result)
+    return result, gr.update(value=result, visible=True)
 ########################
 # TALKING PORTRAIT GEN #
 #video-block {
     flex: 9;
 }
+#audio-block, #audio-clone-elm, audio-clone-elm-maskGCT {
     flex: 1;
 }
 div#audio-clone-elm > .audio-container > button {
 div#audio-clone-elm > .audio-container > button > .wrap {
     font-size: 0.9em;
 }
+div#audio-clone-elm-maskGCT > .audio-container > button {
+    height: 180px!important;
+}
+div#audio-clone-elm-maskGCT > .audio-container > button > .wrap {
+    font-size: 0.9em;
+}
 #text-synth, #voice-desc{
     height: 130px;
 }
 #gen-voice-btn {
     flex: 1;
 }
+#parler-tab, #whisperspeech-tab, maskGCT-tab {
     padding: 0;
 }
 #main-submit{
                             elem_id = "audio-clone-elm"
                         )
                         gen_wsp_voice_btn = gr.Button("Generate voice clone (optional)")
+                    with gr.Tab("MaskGCT TTS", elem_id="maskGCT-tab"):
+                        prompt_audio_maskGCT = gr.Textbox(
+                            label = "Text to synthetize",
+                            lines = 2,
+                            max_lines = 2,
+                            elem_id = "text-synth-maskGCT"
+                        )
+                        audio_to_clone_maskGCT = gr.Audio(
+                            label = "Voice to clone",
+                            type = "filepath",
+                            elem_id = "audio-clone-elm-maskGCT"
+                        )
+                        gen_maskGCT_voice_btn = gr.Button("Generate voice clone (optional)")
                 with gr.Column(elem_id="result-column"):
         show_api = False
     )
+    gen_maskGCT_voice_btn.click(
+        fn = get_maskGCT_TTS,
+        inputs = [prompt_audio_maskGCT, audio_to_clone_maskGCT],
+        outputs = [voice, preprocess_audio_file],
+        queue = False,
+        show_api = False
+    )
     submit_btn.click(
         fn = generate_talking_portrait,
         inputs = [portrait, voice],