Spaces:
Runtime error
Runtime error
Commit
·
c6cb55a
1
Parent(s):
3217a02
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,12 +1,7 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import tempfile
|
| 3 |
-
import gradio as gr
|
| 4 |
-
from src.gradio_demo import SadTalker
|
| 5 |
-
# from src.utils.text2speech import TTSTalker
|
| 6 |
-
from huggingface_hub import snapshot_download
|
| 7 |
-
|
| 8 |
import torch
|
| 9 |
import librosa
|
|
|
|
| 10 |
from scipy.io.wavfile import write
|
| 11 |
from transformers import WavLMModel
|
| 12 |
|
|
@@ -27,9 +22,10 @@ import edge_tts
|
|
| 27 |
import tempfile
|
| 28 |
import anyio
|
| 29 |
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
def get_source_image(image):
|
| 32 |
-
return image
|
| 33 |
|
| 34 |
try:
|
| 35 |
import webui # in webui
|
|
@@ -49,23 +45,8 @@ def ref_video_fn(path_of_ref_video):
|
|
| 49 |
return gr.update(value=True)
|
| 50 |
else:
|
| 51 |
return gr.update(value=False)
|
| 52 |
-
|
| 53 |
-
def download_model():
|
| 54 |
-
REPO_ID = 'vinthony/SadTalker-V002rc'
|
| 55 |
-
snapshot_download(repo_id=REPO_ID, local_dir='./checkpoints', local_dir_use_symlinks=True)
|
| 56 |
-
|
| 57 |
-
def sadtalker_demo():
|
| 58 |
-
|
| 59 |
-
download_model()
|
| 60 |
|
| 61 |
-
|
| 62 |
-
# tts_talker = TTSTalker()
|
| 63 |
-
|
| 64 |
-
download_model()
|
| 65 |
-
sad_talker = SadTalker(lazy_load=True)
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
# ChatGLM2 & FreeVC
|
| 69 |
|
| 70 |
'''
|
| 71 |
def get_wavlm():
|
|
@@ -141,7 +122,7 @@ except Exception:
|
|
| 141 |
logger.warning("Windows, cant run time.tzset()")
|
| 142 |
|
| 143 |
# model_name = "THUDM/chatglm2-6b"
|
| 144 |
-
model_name = "THUDM/chatglm2-6b
|
| 145 |
|
| 146 |
RETRY_FLAG = False
|
| 147 |
|
|
@@ -332,15 +313,14 @@ async def text_to_speech_edge(text, language_code):
|
|
| 332 |
return tmp_path
|
| 333 |
|
| 334 |
|
| 335 |
-
with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm")
|
| 336 |
gr.HTML("<center>"
|
| 337 |
-
"<h1
|
| 338 |
"</center>")
|
| 339 |
-
gr.Markdown("## <center
|
| 340 |
gr.Markdown("## <center>🌊 - 更多精彩应用,尽在[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>")
|
| 341 |
-
gr.Markdown("### <center>⭐ - 如果您喜欢这个程序,欢迎给我的[
|
| 342 |
-
|
| 343 |
-
with gr.Tab("🍻 - ChatGLM2聊天区"):
|
| 344 |
with gr.Accordion("📒 相关信息", open=False):
|
| 345 |
_ = f""" ChatGLM2的可选参数信息:
|
| 346 |
* Low temperature: responses will be more deterministic and focused; High temperature: responses more creative.
|
|
@@ -364,7 +344,7 @@ with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm"), a
|
|
| 364 |
submitBtn = gr.Button("开始和GLM2交流吧", variant="primary")
|
| 365 |
deleteBtn = gr.Button("删除最新一轮对话", variant="secondary")
|
| 366 |
retryBtn = gr.Button("重新生成最新一轮对话", variant="secondary")
|
| 367 |
-
|
| 368 |
with gr.Accordion("🔧 更多设置", open=False):
|
| 369 |
with gr.Row():
|
| 370 |
emptyBtn = gr.Button("清空所有聊天记录")
|
|
@@ -382,8 +362,8 @@ with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm"), a
|
|
| 382 |
temperature = gr.Slider(
|
| 383 |
0.01, 1, value=0.95, step=0.01, label="Temperature", interactive=True
|
| 384 |
)
|
| 385 |
-
|
| 386 |
-
|
| 387 |
with gr.Row():
|
| 388 |
test1 = gr.Textbox(label="GLM2的最新回答 (可编辑)", lines = 3)
|
| 389 |
with gr.Column():
|
|
@@ -401,7 +381,7 @@ with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm"), a
|
|
| 401 |
audio_cloned = gr.Audio(label="为您生成的专属声音克隆音频", type='filepath')
|
| 402 |
|
| 403 |
clone_btn.click(convert, inputs=[model_choice, audio1, audio2], outputs=[audio_cloned])
|
| 404 |
-
|
| 405 |
history = gr.State([])
|
| 406 |
past_key_values = gr.State(None)
|
| 407 |
|
|
@@ -441,7 +421,7 @@ with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm"), a
|
|
| 441 |
emptyBtn.click(
|
| 442 |
reset_state, outputs=[chatbot, history, past_key_values, test1], show_progress="full"
|
| 443 |
)
|
| 444 |
-
|
| 445 |
retryBtn.click(
|
| 446 |
retry_last_answer,
|
| 447 |
inputs=[
|
|
@@ -457,7 +437,7 @@ with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm"), a
|
|
| 457 |
outputs=[chatbot, history, past_key_values, test1],
|
| 458 |
)
|
| 459 |
deleteBtn.click(delete_last_turn, [chatbot, history], [chatbot, history])
|
| 460 |
-
|
| 461 |
with gr.Accordion("📔 提示词示例", open=False):
|
| 462 |
etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
|
| 463 |
examples = gr.Examples(
|
|
@@ -513,90 +493,55 @@ with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm"), a
|
|
| 513 |
api_name="tr1",
|
| 514 |
)
|
| 515 |
# """
|
| 516 |
-
|
|
|
|
| 517 |
with gr.Row().style(equal_height=False):
|
| 518 |
with gr.Column(variant='panel'):
|
| 519 |
with gr.Tabs(elem_id="sadtalker_source_image"):
|
| 520 |
-
with gr.TabItem('
|
| 521 |
with gr.Row():
|
| 522 |
-
source_image = gr.Image(label="
|
| 523 |
-
|
| 524 |
|
| 525 |
with gr.Tabs(elem_id="sadtalker_driven_audio"):
|
| 526 |
-
with gr.TabItem('
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
driven_audio = audio_cloned
|
| 530 |
-
driven_audio_no = gr.Audio(label="Use IDLE mode, no audio is required", source="upload", type="filepath", visible=False)
|
| 531 |
-
|
| 532 |
-
with gr.Column():
|
| 533 |
-
use_idle_mode = gr.Checkbox(label="Use Idle Animation", visible=False)
|
| 534 |
-
length_of_audio = gr.Number(value=5, label="The length(seconds) of the generated video.", visible=False)
|
| 535 |
-
use_idle_mode.change(toggle_audio_file, inputs=use_idle_mode, outputs=[driven_audio, driven_audio_no]) # todo
|
| 536 |
-
|
| 537 |
-
with gr.Row():
|
| 538 |
-
ref_video = gr.Video(label="Reference Video", source="upload", type="filepath", elem_id="vidref", visible=False).style(width=512)
|
| 539 |
-
|
| 540 |
-
with gr.Column():
|
| 541 |
-
use_ref_video = gr.Checkbox(label="Use Reference Video", visible=False)
|
| 542 |
-
ref_info = gr.Radio(['pose', 'blink','pose+blink', 'all'], value='pose', label='Reference Video',info="How to borrow from reference Video?((fully transfer, aka, video driving mode))", visible=False)
|
| 543 |
-
|
| 544 |
-
ref_video.change(ref_video_fn, inputs=ref_video, outputs=[use_ref_video]) # todo
|
| 545 |
-
|
| 546 |
|
| 547 |
-
with gr.Column(variant='panel'):
|
| 548 |
with gr.Tabs(elem_id="sadtalker_checkbox"):
|
| 549 |
-
with gr.TabItem('
|
|
|
|
| 550 |
with gr.Column(variant='panel'):
|
| 551 |
# width = gr.Slider(minimum=64, elem_id="img2img_width", maximum=2048, step=8, label="Manually Crop Width", value=512) # img2img_width
|
| 552 |
# height = gr.Slider(minimum=64, elem_id="img2img_height", maximum=2048, step=8, label="Manually Crop Height", value=512) # img2img_width
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
with gr.Row():
|
| 563 |
-
is_still_mode = gr.Checkbox(label="静态模式 (开启静态模式,角色的面部动作会减少;默认开启)", value=True)
|
| 564 |
-
facerender = gr.Radio(['facevid2vid','pirender'], value='facevid2vid', label='facerender', info="which face render?", visible=False)
|
| 565 |
-
|
| 566 |
-
with gr.Row():
|
| 567 |
-
batch_size = gr.Slider(label="Batch size (数值越大,生成速度越快;若显卡性能好,可增大数值)", step=1, maximum=32, value=2)
|
| 568 |
-
enhancer = gr.Checkbox(label="GFPGAN as Face enhancer", value=True, visible=False)
|
| 569 |
-
|
| 570 |
-
submit = gr.Button('开始视频聊天吧', elem_id="sadtalker_generate", variant='primary')
|
| 571 |
-
|
| 572 |
with gr.Tabs(elem_id="sadtalker_genearted"):
|
| 573 |
-
gen_video = gr.Video(label="
|
| 574 |
-
|
| 575 |
-
|
| 576 |
|
| 577 |
submit.click(
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
use_idle_mode,
|
| 593 |
-
length_of_audio,
|
| 594 |
-
blink_every
|
| 595 |
-
],
|
| 596 |
-
outputs=[gen_video]
|
| 597 |
-
)
|
| 598 |
gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。</center>")
|
| 599 |
-
gr.Markdown("<center
|
| 600 |
gr.HTML('''
|
| 601 |
<div class="footer">
|
| 602 |
<p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘
|
|
|
|
| 1 |
+
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import torch
|
| 3 |
import librosa
|
| 4 |
+
import gradio as gr
|
| 5 |
from scipy.io.wavfile import write
|
| 6 |
from transformers import WavLMModel
|
| 7 |
|
|
|
|
| 22 |
import tempfile
|
| 23 |
import anyio
|
| 24 |
|
| 25 |
+
import os, sys
|
| 26 |
+
import gradio as gr
|
| 27 |
+
from src.gradio_demo import SadTalker
|
| 28 |
|
|
|
|
|
|
|
| 29 |
|
| 30 |
try:
|
| 31 |
import webui # in webui
|
|
|
|
| 45 |
return gr.update(value=True)
|
| 46 |
else:
|
| 47 |
return gr.update(value=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
+
sad_talker = SadTalker("checkpoints", "src/config", lazy_load=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
'''
|
| 52 |
def get_wavlm():
|
|
|
|
| 122 |
logger.warning("Windows, cant run time.tzset()")
|
| 123 |
|
| 124 |
# model_name = "THUDM/chatglm2-6b"
|
| 125 |
+
model_name = "THUDM/chatglm2-6b"
|
| 126 |
|
| 127 |
RETRY_FLAG = False
|
| 128 |
|
|
|
|
| 313 |
return tmp_path
|
| 314 |
|
| 315 |
|
| 316 |
+
with gr.Blocks(title="ChatGLM2-6B-int4", theme=gr.themes.Soft(text_size="sm")) as demo:
|
| 317 |
gr.HTML("<center>"
|
| 318 |
+
"<h1>🥳💕🎶 - ChatGLM2 + 声音克隆:和你喜欢的角色畅所欲言吧!</h1>"
|
| 319 |
"</center>")
|
| 320 |
+
gr.Markdown("## <center>💡 - 第二代ChatGLM大语言模型 + FreeVC变声,为您打造独一无二的沉浸式对话体验,支持中英双语</center>")
|
| 321 |
gr.Markdown("## <center>🌊 - 更多精彩应用,尽在[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>")
|
| 322 |
+
gr.Markdown("### <center>⭐ - 如果您喜欢这个程序,欢迎给我的[Github项目](https://github.com/KevinWang676/ChatGLM2-Voice-Cloning)点赞支持!</center>")
|
| 323 |
+
with gr.Tab("Chat"):
|
|
|
|
| 324 |
with gr.Accordion("📒 相关信息", open=False):
|
| 325 |
_ = f""" ChatGLM2的可选参数信息:
|
| 326 |
* Low temperature: responses will be more deterministic and focused; High temperature: responses more creative.
|
|
|
|
| 344 |
submitBtn = gr.Button("开始和GLM2交流吧", variant="primary")
|
| 345 |
deleteBtn = gr.Button("删除最新一轮对话", variant="secondary")
|
| 346 |
retryBtn = gr.Button("重新生成最新一轮对话", variant="secondary")
|
| 347 |
+
|
| 348 |
with gr.Accordion("🔧 更多设置", open=False):
|
| 349 |
with gr.Row():
|
| 350 |
emptyBtn = gr.Button("清空所有聊天记录")
|
|
|
|
| 362 |
temperature = gr.Slider(
|
| 363 |
0.01, 1, value=0.95, step=0.01, label="Temperature", interactive=True
|
| 364 |
)
|
| 365 |
+
|
| 366 |
+
|
| 367 |
with gr.Row():
|
| 368 |
test1 = gr.Textbox(label="GLM2的最新回答 (可编辑)", lines = 3)
|
| 369 |
with gr.Column():
|
|
|
|
| 381 |
audio_cloned = gr.Audio(label="为您生成的专属声音克隆音频", type='filepath')
|
| 382 |
|
| 383 |
clone_btn.click(convert, inputs=[model_choice, audio1, audio2], outputs=[audio_cloned])
|
| 384 |
+
|
| 385 |
history = gr.State([])
|
| 386 |
past_key_values = gr.State(None)
|
| 387 |
|
|
|
|
| 421 |
emptyBtn.click(
|
| 422 |
reset_state, outputs=[chatbot, history, past_key_values, test1], show_progress="full"
|
| 423 |
)
|
| 424 |
+
|
| 425 |
retryBtn.click(
|
| 426 |
retry_last_answer,
|
| 427 |
inputs=[
|
|
|
|
| 437 |
outputs=[chatbot, history, past_key_values, test1],
|
| 438 |
)
|
| 439 |
deleteBtn.click(delete_last_turn, [chatbot, history], [chatbot, history])
|
| 440 |
+
|
| 441 |
with gr.Accordion("📔 提示词示例", open=False):
|
| 442 |
etext = """In America, where cars are an important part of the national psyche, a decade ago people had suddenly started to drive less, which had not happened since the oil shocks of the 1970s. """
|
| 443 |
examples = gr.Examples(
|
|
|
|
| 493 |
api_name="tr1",
|
| 494 |
)
|
| 495 |
# """
|
| 496 |
+
|
| 497 |
+
with gr.Tab("Video"):
|
| 498 |
with gr.Row().style(equal_height=False):
|
| 499 |
with gr.Column(variant='panel'):
|
| 500 |
with gr.Tabs(elem_id="sadtalker_source_image"):
|
| 501 |
+
with gr.TabItem('Upload image'):
|
| 502 |
with gr.Row():
|
| 503 |
+
source_image = gr.Image(label="Source image", source="upload", type="filepath", elem_id="img2img_image").style(width=512)
|
|
|
|
| 504 |
|
| 505 |
with gr.Tabs(elem_id="sadtalker_driven_audio"):
|
| 506 |
+
with gr.TabItem('Upload OR TTS'):
|
| 507 |
+
with gr.Column(variant='panel'):
|
| 508 |
+
driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 509 |
|
| 510 |
+
with gr.Column(variant='panel'):
|
| 511 |
with gr.Tabs(elem_id="sadtalker_checkbox"):
|
| 512 |
+
with gr.TabItem('Settings'):
|
| 513 |
+
gr.Markdown("need help? please visit our [best practice page](https://github.com/OpenTalker/SadTalker/blob/main/docs/best_practice.md) for more detials")
|
| 514 |
with gr.Column(variant='panel'):
|
| 515 |
# width = gr.Slider(minimum=64, elem_id="img2img_width", maximum=2048, step=8, label="Manually Crop Width", value=512) # img2img_width
|
| 516 |
# height = gr.Slider(minimum=64, elem_id="img2img_height", maximum=2048, step=8, label="Manually Crop Height", value=512) # img2img_width
|
| 517 |
+
pose_style = gr.Slider(minimum=0, maximum=46, step=1, label="Pose style", value=0) #
|
| 518 |
+
size_of_image = gr.Radio([256, 512], value=256, label='face model resolution', info="use 256/512 model?") #
|
| 519 |
+
preprocess_type = gr.Radio(['crop', 'resize','full', 'extcrop', 'extfull'], value='crop', label='preprocess', info="How to handle input image?")
|
| 520 |
+
is_still_mode = gr.Checkbox(label="Still Mode (fewer hand motion, works with preprocess `full`)")
|
| 521 |
+
batch_size = gr.Slider(label="batch size in generation", step=1, maximum=10, value=2)
|
| 522 |
+
enhancer = gr.Checkbox(label="GFPGAN as Face enhancer")
|
| 523 |
+
submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
|
| 524 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 525 |
with gr.Tabs(elem_id="sadtalker_genearted"):
|
| 526 |
+
gen_video = gr.Video(label="Generated video", format="mp4").style(width=256)
|
|
|
|
|
|
|
| 527 |
|
| 528 |
submit.click(
|
| 529 |
+
fn=sad_talker.test,
|
| 530 |
+
inputs=[source_image,
|
| 531 |
+
driven_audio,
|
| 532 |
+
preprocess_type,
|
| 533 |
+
is_still_mode,
|
| 534 |
+
enhancer,
|
| 535 |
+
batch_size,
|
| 536 |
+
size_of_image,
|
| 537 |
+
pose_style
|
| 538 |
+
],
|
| 539 |
+
outputs=[gen_video]
|
| 540 |
+
)
|
| 541 |
+
|
| 542 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 543 |
gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。</center>")
|
| 544 |
+
gr.Markdown("<center>💡 - 如何使用此程序:输入您对ChatGLM的提问后,依次点击“开始和GLM2交流吧”、“生成对应的音频吧”、“开始AI声音克隆吧”三个按键即可;使用声音克隆功能时,请先上传一段您喜欢的音频</center>")
|
| 545 |
gr.HTML('''
|
| 546 |
<div class="footer">
|
| 547 |
<p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘
|