Spaces:
Running
Running
add maskGCT api option
Browse files
app.py
CHANGED
|
@@ -190,6 +190,22 @@ def get_whisperspeech(prompt_audio_whisperspeech, audio_to_clone):
|
|
| 190 |
print(result)
|
| 191 |
return result, gr.update(value=result, visible=True)
|
| 192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
########################
|
| 195 |
# TALKING PORTRAIT GEN #
|
|
@@ -264,7 +280,7 @@ css = '''
|
|
| 264 |
#video-block {
|
| 265 |
flex: 9;
|
| 266 |
}
|
| 267 |
-
#audio-block, #audio-clone-elm {
|
| 268 |
flex: 1;
|
| 269 |
}
|
| 270 |
div#audio-clone-elm > .audio-container > button {
|
|
@@ -273,6 +289,12 @@ div#audio-clone-elm > .audio-container > button {
|
|
| 273 |
div#audio-clone-elm > .audio-container > button > .wrap {
|
| 274 |
font-size: 0.9em;
|
| 275 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
#text-synth, #voice-desc{
|
| 277 |
height: 130px;
|
| 278 |
}
|
|
@@ -285,7 +307,7 @@ div#audio-clone-elm > .audio-container > button > .wrap {
|
|
| 285 |
#gen-voice-btn {
|
| 286 |
flex: 1;
|
| 287 |
}
|
| 288 |
-
#parler-tab, #whisperspeech-tab {
|
| 289 |
padding: 0;
|
| 290 |
}
|
| 291 |
#main-submit{
|
|
@@ -405,6 +427,20 @@ with gr.Blocks(css=css) as demo:
|
|
| 405 |
elem_id = "audio-clone-elm"
|
| 406 |
)
|
| 407 |
gen_wsp_voice_btn = gr.Button("Generate voice clone (optional)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 408 |
|
| 409 |
with gr.Column(elem_id="result-column"):
|
| 410 |
|
|
@@ -501,6 +537,14 @@ with gr.Blocks(css=css) as demo:
|
|
| 501 |
show_api = False
|
| 502 |
)
|
| 503 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 504 |
submit_btn.click(
|
| 505 |
fn = generate_talking_portrait,
|
| 506 |
inputs = [portrait, voice],
|
|
|
|
| 190 |
print(result)
|
| 191 |
return result, gr.update(value=result, visible=True)
|
| 192 |
|
| 193 |
+
def get_maskGCT_TTS(prompt_audio_maskGCT, audio_to_clone):
|
| 194 |
+
try:
|
| 195 |
+
client = Client("amphion/maskgct")
|
| 196 |
+
except:
|
| 197 |
+
raise gr.Error(f"amphion/maskgct space's api might not be ready, please wait, or upload an audio instead.")
|
| 198 |
+
|
| 199 |
+
result = client.predict(
|
| 200 |
+
prompt_wav = handle_file(audio_to_clone),
|
| 201 |
+
target_text = prompt_audio_maskGCT,
|
| 202 |
+
target_len=-1,
|
| 203 |
+
n_timesteps=25,
|
| 204 |
+
api_name="/predict"
|
| 205 |
+
)
|
| 206 |
+
print(result)
|
| 207 |
+
return result, gr.update(value=result, visible=True)
|
| 208 |
+
|
| 209 |
|
| 210 |
########################
|
| 211 |
# TALKING PORTRAIT GEN #
|
|
|
|
| 280 |
#video-block {
|
| 281 |
flex: 9;
|
| 282 |
}
|
| 283 |
+
#audio-block, #audio-clone-elm, audio-clone-elm-maskGCT {
|
| 284 |
flex: 1;
|
| 285 |
}
|
| 286 |
div#audio-clone-elm > .audio-container > button {
|
|
|
|
| 289 |
div#audio-clone-elm > .audio-container > button > .wrap {
|
| 290 |
font-size: 0.9em;
|
| 291 |
}
|
| 292 |
+
div#audio-clone-elm-maskGCT > .audio-container > button {
|
| 293 |
+
height: 180px!important;
|
| 294 |
+
}
|
| 295 |
+
div#audio-clone-elm-maskGCT > .audio-container > button > .wrap {
|
| 296 |
+
font-size: 0.9em;
|
| 297 |
+
}
|
| 298 |
#text-synth, #voice-desc{
|
| 299 |
height: 130px;
|
| 300 |
}
|
|
|
|
| 307 |
#gen-voice-btn {
|
| 308 |
flex: 1;
|
| 309 |
}
|
| 310 |
+
#parler-tab, #whisperspeech-tab, maskGCT-tab {
|
| 311 |
padding: 0;
|
| 312 |
}
|
| 313 |
#main-submit{
|
|
|
|
| 427 |
elem_id = "audio-clone-elm"
|
| 428 |
)
|
| 429 |
gen_wsp_voice_btn = gr.Button("Generate voice clone (optional)")
|
| 430 |
+
|
| 431 |
+
with gr.Tab("MaskGCT TTS", elem_id="maskGCT-tab"):
|
| 432 |
+
prompt_audio_maskGCT = gr.Textbox(
|
| 433 |
+
label = "Text to synthetize",
|
| 434 |
+
lines = 2,
|
| 435 |
+
max_lines = 2,
|
| 436 |
+
elem_id = "text-synth-maskGCT"
|
| 437 |
+
)
|
| 438 |
+
audio_to_clone_maskGCT = gr.Audio(
|
| 439 |
+
label = "Voice to clone",
|
| 440 |
+
type = "filepath",
|
| 441 |
+
elem_id = "audio-clone-elm-maskGCT"
|
| 442 |
+
)
|
| 443 |
+
gen_maskGCT_voice_btn = gr.Button("Generate voice clone (optional)")
|
| 444 |
|
| 445 |
with gr.Column(elem_id="result-column"):
|
| 446 |
|
|
|
|
| 537 |
show_api = False
|
| 538 |
)
|
| 539 |
|
| 540 |
+
gen_maskGCT_voice_btn.click(
|
| 541 |
+
fn = get_maskGCT_TTS,
|
| 542 |
+
inputs = [prompt_audio_maskGCT, audio_to_clone_maskGCT],
|
| 543 |
+
outputs = [voice, preprocess_audio_file],
|
| 544 |
+
queue = False,
|
| 545 |
+
show_api = False
|
| 546 |
+
)
|
| 547 |
+
|
| 548 |
submit_btn.click(
|
| 549 |
fn = generate_talking_portrait,
|
| 550 |
inputs = [portrait, voice],
|