diff --git a/.gitattributes b/.gitattributes index 50ed1a3fa32f3d6c3860ecaf285d6fdc80ecf6f7..99b1ca1533c7ee5e072f1d169cb0b1f3e9748836 100644 --- a/.gitattributes +++ b/.gitattributes @@ -39,3 +39,28 @@ model/gambino/Hamza.png filter=lfs diff=lfs merge=lfs -text model/angele/Angele.png filter=lfs diff=lfs merge=lfs -text model/leto/Leto.png filter=lfs diff=lfs merge=lfs -text NotoSansSC-Regular.otf filter=lfs diff=lfs merge=lfs -text +SadTalker/checkpoints/mapping_00109-model.pth.tar filter=lfs diff=lfs merge=lfs -text +SadTalker/checkpoints/mapping_00229-model.pth.tar filter=lfs diff=lfs merge=lfs -text +docs/example_crop.gif filter=lfs diff=lfs merge=lfs -text +docs/example_crop_still.gif filter=lfs diff=lfs merge=lfs -text +docs/example_full.gif filter=lfs diff=lfs merge=lfs -text +docs/example_full_enhanced.gif filter=lfs diff=lfs merge=lfs -text +docs/free_view_result.gif filter=lfs diff=lfs merge=lfs -text +docs/resize_good.gif filter=lfs diff=lfs merge=lfs -text +docs/resize_no.gif filter=lfs diff=lfs merge=lfs -text +docs/using_ref_video.gif filter=lfs diff=lfs merge=lfs -text +examples/driven_audio/chinese_news.wav filter=lfs diff=lfs merge=lfs -text +examples/driven_audio/deyu.wav filter=lfs diff=lfs merge=lfs -text +examples/driven_audio/eluosi.wav filter=lfs diff=lfs merge=lfs -text +examples/driven_audio/fayu.wav filter=lfs diff=lfs merge=lfs -text +examples/driven_audio/imagine.wav filter=lfs diff=lfs merge=lfs -text +examples/driven_audio/japanese.wav filter=lfs diff=lfs merge=lfs -text +examples/ref_video/WDA_AlexandriaOcasioCortez_000.mp4 filter=lfs diff=lfs merge=lfs -text +examples/ref_video/WDA_KatieHill_000.mp4 filter=lfs diff=lfs merge=lfs -text +examples/source_image/art_16.png filter=lfs diff=lfs merge=lfs -text +examples/source_image/art_17.png filter=lfs diff=lfs merge=lfs -text +examples/source_image/art_3.png filter=lfs diff=lfs merge=lfs -text +examples/source_image/art_4.png filter=lfs diff=lfs merge=lfs -text +examples/source_image/art_5.png filter=lfs diff=lfs merge=lfs -text +examples/source_image/art_8.png filter=lfs diff=lfs merge=lfs -text +examples/source_image/art_9.png filter=lfs diff=lfs merge=lfs -text diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/Untitled-checkpoint.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..363fcab7ed6e9634e198cf5555ceb88932c9a245 --- /dev/null +++ b/.ipynb_checkpoints/Untitled-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/LICENSE b/LICENSE index 8bbcbdad1f797051187c5b916a79e9cdc253e0f8..b2a615ac931ce1e81df51deb56c3df2414b59e63 100644 --- a/LICENSE +++ b/LICENSE @@ -1,51 +1,21 @@ MIT License -Copyright (c) 2023 liujing04 -Copyright (c) 2023 源文雨 -Copyright (c) 2023 on9.moe Webslaves +Copyright (c) 2023 Tencent AI Lab - 本软件及其相关代码以MIT协议开源,作者不对软件具备任何控制力,使用软件者、传播软件导出的声音者自负全责。 - 如不认可该条款,则不能使用或引用软件包内任何代码和文件。 +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -特此授予任何获得本软件和相关文档文件(以下简称“软件”)副本的人免费使用、复制、修改、合并、出版、分发、再授权和/或销售本软件的权利,以及授予本软件所提供的人使用本软件的权利,但须符合以下条件: -上述版权声明和本许可声明应包含在软件的所有副本或实质部分中。 -软件是“按原样”提供的,没有任何明示或暗示的保证,包括但不限于适销性、适用于特定目的和不侵权的保证。在任何情况下,作者或版权持有人均不承担因软件或软件的使用或其他交易而产生、产生或与之相关的任何索赔、损害赔偿或其他责任,无论是在合同诉讼、侵权诉讼还是其他诉讼中。 - -相关引用库协议如下: -################# -ContentVec -https://github.com/auspicious3000/contentvec/blob/main/LICENSE -MIT License -################# -VITS -https://github.com/jaywalnut310/vits/blob/main/LICENSE -MIT License -################# -HIFIGAN -https://github.com/jik876/hifi-gan/blob/master/LICENSE -MIT License -################# -gradio -https://github.com/gradio-app/gradio/blob/main/LICENSE -Apache License 2.0 -################# -ffmpeg -https://github.com/FFmpeg/FFmpeg/blob/master/COPYING.LGPLv3 -https://github.com/BtbN/FFmpeg-Builds/releases/download/autobuild-2021-02-28-12-32/ffmpeg-n4.3.2-160-gfbb9368226-win64-lgpl-4.3.zip -LPGLv3 License -MIT License -################# -ultimatevocalremovergui -https://github.com/Anjok07/ultimatevocalremovergui/blob/master/LICENSE -https://github.com/yang123qwe/vocal_separation_by_uvr5 -MIT License -################# -audio-slicer -https://github.com/openvpi/audio-slicer/blob/main/LICENSE -MIT License +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index e92d9ef2dff45520b78329d36bb9f686e7a69d8f..23dd00504c9f96dba288d397a679905e4fe33f81 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,268 @@ ---- -title: VoiceChange -emoji: 👀 -colorFrom: blue -colorTo: purple -sdk: gradio -sdk_version: 3.28.3 -app_file: app_multi.py -pinned: false -license: mit -duplicated_from: BartPoint/VoiceChange ---- +
+
+
+
+
+
+
+- 🔥 Several new mode, eg, `still mode`, `reference mode`, `resize mode` are online for better and custom applications.
+
+- 🔥 Happy to see more community demos at [bilibili](https://search.bilibili.com/all?keyword=sadtalker&from_source=webtop_search&spm_id_from=333.1007&search_source=3
+), [Youtube](https://www.youtube.com/results?search_query=sadtalker&sp=CAM%253D) and [twitter #sadtalker](https://twitter.com/search?q=%23sadtalker&src=typed_query).
+
+## 📋 Changelog (Previous changelog can be founded [here](docs/changlelog.md))
+
+- __[2023.06.12]__: add more new features in WEBUI extension, see the discussion [here](https://github.com/OpenTalker/SadTalker/discussions/386).
+
+- __[2023.06.05]__: release a new 512 beta face model. Fixed some bugs and improve the performance.
+
+- __[2023.04.15]__: Adding automatic1111 colab by @camenduru, thanks for this awesome colab: [](https://colab.research.google.com/github/camenduru/stable-diffusion-webui-colab/blob/main/video/stable/stable_diffusion_1_5_video_webui_colab.ipynb).
+
+- __[2023.04.12]__: adding a more detailed sd-webui installation document, fixed reinstallation problem.
+
+- __[2023.04.12]__: Fixed the sd-webui safe issues becasue of the 3rd packages, optimize the output path in `sd-webui-extension`.
+
+- __[2023.04.08]__: ❗️❗️❗️ In v0.0.2, we add a logo watermark to the generated video to prevent abusing since it is very realistic.
+
+- __[2023.04.08]__: v0.0.2, full image animation, adding baidu driver for download checkpoints. Optimizing the logic about enhancer.
+
+
+## 🚧 TODO: See the Discussion https://github.com/OpenTalker/SadTalker/issues/280
+
+## If you have any problem, please view our [FAQ](docs/FAQ.md) before opening an issue.
+
+
+
+## ⚙️ 1. Installation.
+
+Tutorials from communities: [中文windows教程](https://www.bilibili.com/video/BV1Dc411W7V6/) | [日本語コース](https://br-d.fanbox.cc/posts/5685086?utm_campaign=manage_post_page&utm_medium=share&utm_source=twitter)
+
+### Linux:
+
+1. Installing [anaconda](https://www.anaconda.com/), python and git.
+
+2. Creating the env and install the requirements.
+ ```bash
+ git clone https://github.com/Winfredy/SadTalker.git
+
+ cd SadTalker
+
+ conda create -n sadtalker python=3.8
+
+ conda activate sadtalker
+
+ pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113
+
+ conda install ffmpeg
+
+ pip install -r requirements.txt
+
+ ### tts is optional for gradio demo.
+ ### pip install TTS
+
+ ```
+### Windows ([中文windows教程](https://www.bilibili.com/video/BV1Dc411W7V6/)):
+
+1. Install [Python 3.10.6](https://www.python.org/downloads/windows/), checking "Add Python to PATH".
+2. Install [git](https://git-scm.com/download/win) manually (OR `scoop install git` via [scoop](https://scoop.sh/)).
+3. Install `ffmpeg`, following [this instruction](https://www.wikihow.com/Install-FFmpeg-on-Windows) (OR using `scoop install ffmpeg` via [scoop](https://scoop.sh/)).
+4. Download our SadTalker repository, for example by running `git clone https://github.com/Winfredy/SadTalker.git`.
+5. Download the `checkpoint` and `gfpgan` [below↓](https://github.com/Winfredy/SadTalker#-2-download-trained-models).
+5. Run `start.bat` from Windows Explorer as normal, non-administrator, user, a gradio WebUI demo will be started.
+
+### Macbook:
+
+More tips about installnation on Macbook and the Docker file can be founded [here](docs/install.md)
+
+## 📥 2. Download Trained Models.
+
+You can run the following script to put all the models in the right place.
+
+```bash
+bash scripts/download_models.sh
+```
+
+Other alternatives:
+> we also provide an offline patch (`gfpgan/`), thus, no model will be downloaded when generating.
+
+**Google Driver**: download our pre-trained model from [ this link (main checkpoints)](https://drive.google.com/file/d/1gwWh45pF7aelNP_P78uDJL8Sycep-K7j/view?usp=sharing) and [ gfpgan (offline patch)](https://drive.google.com/file/d/19AIBsmfcHW6BRJmeqSFlG5fL445Xmsyi?usp=sharing)
+
+**Github Release Page**: download all the files from the [lastest github release page](https://github.com/Winfredy/SadTalker/releases), and then, put it in ./checkpoints.
+
+**百度云盘**: we provided the downloaded model in [checkpoints, 提取码: sadt.](https://pan.baidu.com/s/1P4fRgk9gaSutZnn8YW034Q?pwd=sadt) And [gfpgan, 提取码: sadt.](https://pan.baidu.com/s/1kb1BCPaLOWX1JJb9Czbn6w?pwd=sadt)
+
+
+
+
+
+
+
|  |  |
+
+
+ In `resize` mode, we resize the whole images to generate the fully talking head video. Thus, an image similar to the ID photo can be produced. ⚠️ It will produce bad results for full person images.
+
+
+
+
+|
|
|
+|:--------------------: |:--------------------: |
+| ❌ not suitable for resize mode | ✅ good for resize mode |
+|
|
|
+
+In `full` mode, our model will automatically process the croped region and paste back to the original image. Remember to use `--still` to keep the original head pose.
+
+| input | `--still` | `--still` & `enhancer` |
+|:--------------------: |:--------------------: | :--:|
+|
|
|
+
+
+### About `--enhancer`
+
+For better facial quality, we intergate [gfpgan](https://github.com/TencentARC/GFPGAN) and [real-esrgan](https://github.com/xinntao/Real-ESRGAN) for different purpose. Just adding `--enhancer
| |
+
+> Kindly ensure to activate the audio as the default audio playing is incompatible with GitHub.
+
+
+
+#### reference eye-link mode.
+
+| Input, w/ reference video , reference video |
+|:-------------: |
+| |
+| If the reference video is shorter than the input audio, we will loop the reference video .
+
+
+
+#### Generating 4D free-view talking examples from audio and a single image
+
+We use `input_yaw`, `input_pitch`, `input_roll` to control head pose. For example, `--input_yaw -20 30 10` means the input head yaw degree changes from -20 to 30 and then changes from 30 to 10.
+```bash
+python inference.py --driven_audio
+
+2. Download the checkpoints manually, for Linux and Mac:
+
+ ```bash
+
+ cd SOMEWHERE_YOU_LIKE
+
+ bash <(wget -qO- https://raw.githubusercontent.com/Winfredy/SadTalker/main/scripts/download_models.sh)
+ ```
+
+ For windows, you can download all the checkpoints from [google drive](https://drive.google.com/drive/folders/1Wd88VDoLhVzYsQ30_qDVluQr_Xm46yHT?usp=sharing) or [百度云盘](https://pan.baidu.com/s/1nXuVNd0exUl37ISwWqbFGA?pwd=sadt) 提取码: sadt.
+
+3.1. options 1: put the checkpoint in `stable-diffusion-webui/models/SadTalker` or `stable-diffusion-webui/extensions/SadTalker/checkpoints/`, the checkpoints will be detected automatically.
+
+3.2. Options 2: Set the path of `SADTALKTER_CHECKPOINTS` in `webui_user.sh`(linux) or `webui_user.bat`(windows) by:
+
+ > only works if you are directly starting webui from `webui_user.sh` or `webui_user.bat`.
+
+ ```bash
+ # windows (webui_user.bat)
+ set SADTALKER_CHECKPOINTS=D:\SadTalker\checkpoints
+
+ # linux (webui_user.sh)
+ export SADTALKER_CHECKPOINTS=/path/to/SadTalker/checkpoints
+ ```
+
+4. Then, starting the webui via `webui.sh or webui_user.sh(linux)` or `webui_user.bat(windows)` or any other methods, the SadTalker can be used in stable-diffusion-webui directly.
+
+
+
+## Questsions
+
+1. if you are running on CPU, you need to specific `--disable-safe-unpickle` in `webui_user.sh` or `webui_user.bat`.
+
+ ```bash
+ # windows (webui_user.bat)
+ set COMMANDLINE_ARGS="--disable-safe-unpickle"
+
+ # linux (webui_user.sh)
+ export COMMANDLINE_ARGS="--disable-safe-unpickle"
+ ```
+
+
+
+(Some [important discussion](https://github.com/Winfredy/SadTalker/issues/78) if you are unable to use `full` mode).
diff --git a/examples/driven_audio/RD_Radio31_000.wav b/examples/driven_audio/RD_Radio31_000.wav
new file mode 100644
index 0000000000000000000000000000000000000000..3b04940a0bff7481179c29bfc47553d9c4224bcf
Binary files /dev/null and b/examples/driven_audio/RD_Radio31_000.wav differ
diff --git a/examples/driven_audio/RD_Radio34_002.wav b/examples/driven_audio/RD_Radio34_002.wav
new file mode 100644
index 0000000000000000000000000000000000000000..6813e812a8d1c57cb2f02eee3fece68a0864d96e
Binary files /dev/null and b/examples/driven_audio/RD_Radio34_002.wav differ
diff --git a/examples/driven_audio/RD_Radio36_000.wav b/examples/driven_audio/RD_Radio36_000.wav
new file mode 100644
index 0000000000000000000000000000000000000000..c73adfed5f142886940bc249904d77f9e54befda
Binary files /dev/null and b/examples/driven_audio/RD_Radio36_000.wav differ
diff --git a/examples/driven_audio/RD_Radio40_000.wav b/examples/driven_audio/RD_Radio40_000.wav
new file mode 100644
index 0000000000000000000000000000000000000000..88ce964e1734210451e3a364f87f8661db388b74
Binary files /dev/null and b/examples/driven_audio/RD_Radio40_000.wav differ
diff --git a/examples/driven_audio/bus_chinese.wav b/examples/driven_audio/bus_chinese.wav
new file mode 100644
index 0000000000000000000000000000000000000000..888647738d72dfaee99b8d40bb0ddf6f7a1872e7
Binary files /dev/null and b/examples/driven_audio/bus_chinese.wav differ
diff --git a/examples/driven_audio/chinese_news.wav b/examples/driven_audio/chinese_news.wav
new file mode 100644
index 0000000000000000000000000000000000000000..9232795586cbcb926cca70f90691a9e281d32ab9
--- /dev/null
+++ b/examples/driven_audio/chinese_news.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b0f4d313a1ca671bc4831d60bcf0c12225efbffe6c0e93e54fbfe9bcd4021cb
+size 1536078
diff --git a/examples/driven_audio/chinese_poem1.wav b/examples/driven_audio/chinese_poem1.wav
new file mode 100644
index 0000000000000000000000000000000000000000..17c0871100d454bcd95b4281ab6b153c04724fe5
Binary files /dev/null and b/examples/driven_audio/chinese_poem1.wav differ
diff --git a/examples/driven_audio/chinese_poem2.wav b/examples/driven_audio/chinese_poem2.wav
new file mode 100644
index 0000000000000000000000000000000000000000..e3b294eceff5c5ee43124b7cfa42e4a70196a45f
Binary files /dev/null and b/examples/driven_audio/chinese_poem2.wav differ
diff --git a/examples/driven_audio/deyu.wav b/examples/driven_audio/deyu.wav
new file mode 100644
index 0000000000000000000000000000000000000000..438cd45b36be0d7cec6732d1ffa1c396141a563e
--- /dev/null
+++ b/examples/driven_audio/deyu.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba1839c57770a2ab0b593ce814344bfd4d750da02acc9be9e8cf5b9113a0f88a
+size 2694784
diff --git a/examples/driven_audio/eluosi.wav b/examples/driven_audio/eluosi.wav
new file mode 100644
index 0000000000000000000000000000000000000000..336e85fe5cb8d7110fbade7684cce4a33fdffb98
--- /dev/null
+++ b/examples/driven_audio/eluosi.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4a3593815dc7b68c256672baa61934c9479efa770af2065fb0886f02713606e
+size 1786672
diff --git a/examples/driven_audio/fayu.wav b/examples/driven_audio/fayu.wav
new file mode 100644
index 0000000000000000000000000000000000000000..bf5cb6e65b2f959174facc80e13ce145226991cc
--- /dev/null
+++ b/examples/driven_audio/fayu.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16ebd13626ae4171030b4ea05cceef06078483c352e4b68d469fc2a52bfffceb
+size 1940428
diff --git a/examples/driven_audio/imagine.wav b/examples/driven_audio/imagine.wav
new file mode 100644
index 0000000000000000000000000000000000000000..c02a95b80b8e2b5c4353a4047239c361e9e3d01a
--- /dev/null
+++ b/examples/driven_audio/imagine.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2db410217e074d91ae6011e1c5dc0b94f02d05d381c50af8e54253eeacad17d2
+size 1618510
diff --git a/examples/driven_audio/itosinger1.wav b/examples/driven_audio/itosinger1.wav
new file mode 100644
index 0000000000000000000000000000000000000000..4937dbb264e2fc24d4752baf8b802b0bac41be24
Binary files /dev/null and b/examples/driven_audio/itosinger1.wav differ
diff --git a/examples/driven_audio/japanese.wav b/examples/driven_audio/japanese.wav
new file mode 100644
index 0000000000000000000000000000000000000000..63db9ffc287a9186f144b635f87bf352ba30ff22
--- /dev/null
+++ b/examples/driven_audio/japanese.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3db5426d0b158799e2be4f609b11f75bfbd4affffe18e9a1c8e6f241fcdedcfc
+size 2622712
diff --git a/examples/ref_video/WDA_AlexandriaOcasioCortez_000.mp4 b/examples/ref_video/WDA_AlexandriaOcasioCortez_000.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..a9d1219ed3f515765753a3c2e4bb97655781bcc4
--- /dev/null
+++ b/examples/ref_video/WDA_AlexandriaOcasioCortez_000.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a85242c3fc4d50e2202cea393b9e7ee59019759b68e78e26a254d528c22615a7
+size 2257667
diff --git a/examples/ref_video/WDA_KatieHill_000.mp4 b/examples/ref_video/WDA_KatieHill_000.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..61bf40411df95054efee238debd31c7d38ab6a3d
--- /dev/null
+++ b/examples/ref_video/WDA_KatieHill_000.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1fbb4cfd64eedc49b170c441714a9c4fd5e2c2f8a11592070ad89fbd257f2817
+size 3548230
diff --git a/examples/source_image/art_0.png b/examples/source_image/art_0.png
new file mode 100644
index 0000000000000000000000000000000000000000..d8d97645a4ecd9018bf2ad6d9094cf581f816f58
Binary files /dev/null and b/examples/source_image/art_0.png differ
diff --git a/examples/source_image/art_1.png b/examples/source_image/art_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..4388abe026a5ba1f6c2e9f3a782564bb611f5781
Binary files /dev/null and b/examples/source_image/art_1.png differ
diff --git a/examples/source_image/art_10.png b/examples/source_image/art_10.png
new file mode 100644
index 0000000000000000000000000000000000000000..5f6568b30f063b09cef08c54df629dae7ff54360
Binary files /dev/null and b/examples/source_image/art_10.png differ
diff --git a/examples/source_image/art_11.png b/examples/source_image/art_11.png
new file mode 100644
index 0000000000000000000000000000000000000000..4caf17ca866fe54cc5c3af33fb0e93114da1bfb9
Binary files /dev/null and b/examples/source_image/art_11.png differ
diff --git a/examples/source_image/art_12.png b/examples/source_image/art_12.png
new file mode 100644
index 0000000000000000000000000000000000000000..e15306c30f09807f7df80504032cc39b1c265b6a
Binary files /dev/null and b/examples/source_image/art_12.png differ
diff --git a/examples/source_image/art_13.png b/examples/source_image/art_13.png
new file mode 100644
index 0000000000000000000000000000000000000000..129374120f1f01580a9baa0f37d8bbbe904b2373
Binary files /dev/null and b/examples/source_image/art_13.png differ
diff --git a/examples/source_image/art_14.png b/examples/source_image/art_14.png
new file mode 100644
index 0000000000000000000000000000000000000000..0f0489bf7cebb41346f029421fdf41dc2e52519b
Binary files /dev/null and b/examples/source_image/art_14.png differ
diff --git a/examples/source_image/art_15.png b/examples/source_image/art_15.png
new file mode 100644
index 0000000000000000000000000000000000000000..a0af242a4b3e962aef8ce5c10a5026646509bfc6
Binary files /dev/null and b/examples/source_image/art_15.png differ
diff --git a/examples/source_image/art_16.png b/examples/source_image/art_16.png
new file mode 100644
index 0000000000000000000000000000000000000000..afb659b641b564a3d850229c67d014483516af67
--- /dev/null
+++ b/examples/source_image/art_16.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f6d350055eea3abe35ee3fe9df80dcd99d8edae66ef4fc20bf06168bf189f25
+size 1480263
diff --git a/examples/source_image/art_17.png b/examples/source_image/art_17.png
new file mode 100644
index 0000000000000000000000000000000000000000..875a3e3c2e985efe7407b6c8fff99faa591b9811
--- /dev/null
+++ b/examples/source_image/art_17.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05747bb45dcf271d9bb24344bd1bce0e0746d24ce4e13545b27ad40b50c3bfe7
+size 2092096
diff --git a/examples/source_image/art_18.png b/examples/source_image/art_18.png
new file mode 100644
index 0000000000000000000000000000000000000000..96358e0e542f66d1f4fd92acd092124e738fc6fe
Binary files /dev/null and b/examples/source_image/art_18.png differ
diff --git a/examples/source_image/art_19.png b/examples/source_image/art_19.png
new file mode 100644
index 0000000000000000000000000000000000000000..4f477a1ab58994e3cb4140b1a8ca59dcc428f387
Binary files /dev/null and b/examples/source_image/art_19.png differ
diff --git a/examples/source_image/art_2.png b/examples/source_image/art_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..9560673430d461ad94980731ee0b404fcda32084
Binary files /dev/null and b/examples/source_image/art_2.png differ
diff --git a/examples/source_image/art_20.png b/examples/source_image/art_20.png
new file mode 100644
index 0000000000000000000000000000000000000000..de1ea5c975dbed93ce80c1aa70f6298703acf70f
Binary files /dev/null and b/examples/source_image/art_20.png differ
diff --git a/examples/source_image/art_3.png b/examples/source_image/art_3.png
new file mode 100644
index 0000000000000000000000000000000000000000..f2d3c117ed2d7074ec5427ebd1e68147e4476031
--- /dev/null
+++ b/examples/source_image/art_3.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81be3a9cc605ab01cbf741330b406db5246e8bbbcb443ad43ffeca2ef161e005
+size 1353396
diff --git a/examples/source_image/art_4.png b/examples/source_image/art_4.png
new file mode 100644
index 0000000000000000000000000000000000000000..ce5fda1d95dd1d6d497648fbfb95dc53380d367e
--- /dev/null
+++ b/examples/source_image/art_4.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab322220d8eab1bfefdaedea91ca5d08a34258c1ab1e585a9b1c85b32968f983
+size 3625669
diff --git a/examples/source_image/art_5.png b/examples/source_image/art_5.png
new file mode 100644
index 0000000000000000000000000000000000000000..2726da0cb91b4ab9d54eef21efa653d2f8cda959
--- /dev/null
+++ b/examples/source_image/art_5.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:199217b4c839ed849577aedcad32f2bce934628b9783ba4654a93756b25e7896
+size 1228028
diff --git a/examples/source_image/art_6.png b/examples/source_image/art_6.png
new file mode 100644
index 0000000000000000000000000000000000000000..e9f6d8f272dc9bf971285667ecbe765ede41c967
Binary files /dev/null and b/examples/source_image/art_6.png differ
diff --git a/examples/source_image/art_7.png b/examples/source_image/art_7.png
new file mode 100644
index 0000000000000000000000000000000000000000..d8cc380aacb76a6ce9f5e41086bb1fb375a4e7db
Binary files /dev/null and b/examples/source_image/art_7.png differ
diff --git a/examples/source_image/art_8.png b/examples/source_image/art_8.png
new file mode 100644
index 0000000000000000000000000000000000000000..169035fba5a1ab690564e661e2e5ea95a5a71e87
--- /dev/null
+++ b/examples/source_image/art_8.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d704497947c07ac16534299451fc0526acddf286c2ab4ceb48161ff6facc2af
+size 3119298
diff --git a/examples/source_image/art_9.png b/examples/source_image/art_9.png
new file mode 100644
index 0000000000000000000000000000000000000000..61a02dd4a57d382f215a73d635959ae45c208635
--- /dev/null
+++ b/examples/source_image/art_9.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:90f84739e2aa2388efaf0fac2b57a82df279b213a8dab9faa7af8ae7468b4e80
+size 1262963
diff --git a/examples/source_image/full3.png b/examples/source_image/full3.png
new file mode 100644
index 0000000000000000000000000000000000000000..40cd6d6d3c5b95c29d6648c2ba7d7e27c9781970
Binary files /dev/null and b/examples/source_image/full3.png differ
diff --git a/examples/source_image/full4.jpeg b/examples/source_image/full4.jpeg
new file mode 100644
index 0000000000000000000000000000000000000000..79f17f52123e8d173600e0df138a30e98ba2c6f3
Binary files /dev/null and b/examples/source_image/full4.jpeg differ
diff --git a/examples/source_image/full_body_1.png b/examples/source_image/full_body_1.png
new file mode 100644
index 0000000000000000000000000000000000000000..4fca65c949b7c7e7f7ed9459c473314a38be791f
Binary files /dev/null and b/examples/source_image/full_body_1.png differ
diff --git a/examples/source_image/full_body_2.png b/examples/source_image/full_body_2.png
new file mode 100644
index 0000000000000000000000000000000000000000..b7bc6228cb2f4e8c01af8d2f52bbbf62540e2412
Binary files /dev/null and b/examples/source_image/full_body_2.png differ
diff --git a/examples/source_image/happy.png b/examples/source_image/happy.png
new file mode 100644
index 0000000000000000000000000000000000000000..9d194ba9a03dfda0867703d54ea6233819c46a73
Binary files /dev/null and b/examples/source_image/happy.png differ
diff --git a/examples/source_image/happy1.png b/examples/source_image/happy1.png
new file mode 100644
index 0000000000000000000000000000000000000000..b702974cca1a648ec70efee776e484284b527c90
Binary files /dev/null and b/examples/source_image/happy1.png differ
diff --git a/examples/source_image/people_0.png b/examples/source_image/people_0.png
new file mode 100644
index 0000000000000000000000000000000000000000..8895eeb07a3e300b9bcfa3bb53e7a6a552182bc3
Binary files /dev/null and b/examples/source_image/people_0.png differ
diff --git a/examples/source_image/sad.png b/examples/source_image/sad.png
new file mode 100644
index 0000000000000000000000000000000000000000..6584467fdac971207883cdcd84b31da1dbc4dfa6
Binary files /dev/null and b/examples/source_image/sad.png differ
diff --git a/examples/source_image/sad1.png b/examples/source_image/sad1.png
new file mode 100644
index 0000000000000000000000000000000000000000..341e0cb70886995ecf72eebb4b8a4474ab7d287b
Binary files /dev/null and b/examples/source_image/sad1.png differ
diff --git a/onstart.log b/onstart.log
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/onstart.sh b/onstart.sh
new file mode 100644
index 0000000000000000000000000000000000000000..0d4f510ecbdd6c632f34cbc54165c2df08970bab
--- /dev/null
+++ b/onstart.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+# This file is run on instance start. Output in ./onstart.log
+
diff --git a/ports.log b/ports.log
new file mode 100644
index 0000000000000000000000000000000000000000..8c10144e7ff5801895bc111fd55785481eba0b53
--- /dev/null
+++ b/ports.log
@@ -0,0 +1 @@
+10246
diff --git a/quick_demo.ipynb b/quick_demo.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..8b9767d073779093782815c8f9a9b3e4d0a5653d
--- /dev/null
+++ b/quick_demo.ipynb
@@ -0,0 +1,213 @@
+{
+ "cells": [
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "M74Gs_TjYl_B"
+ },
+ "source": [
+ "[](https://colab.research.google.com/github/Winfredy/SadTalker/blob/main/quick_demo.ipynb)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github"
+ },
+ "source": [
+ "### SadTalker:Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation \n",
+ "\n",
+ "[arxiv](https://arxiv.org/abs/2211.12194) | [project](https://sadtalker.github.io) | [Github](https://github.com/Winfredy/SadTalker)\n",
+ "\n",
+ "Wenxuan Zhang, Xiaodong Cun, Xuan Wang, Yong Zhang, Xi Shen, Yu Guo, Ying Shan, Fei Wang.\n",
+ "\n",
+ "Xi'an Jiaotong University, Tencent AI Lab, Ant Group\n",
+ "\n",
+ "CVPR 2023\n",
+ "\n",
+ "TL;DR: A realistic and stylized talking head video generation method from a single image and audio\n"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "kA89DV-sKS4i"
+ },
+ "source": [
+ "Installation (around 5 mins)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "qJ4CplXsYl_E"
+ },
+ "outputs": [],
+ "source": [
+ "### make sure that CUDA is available in Edit -> Nootbook settings -> GPU\n",
+ "!nvidia-smi --query-gpu=name,memory.total,memory.free --format=csv,noheader"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "Mdq6j4E5KQAR"
+ },
+ "outputs": [],
+ "source": [
+ "!update-alternatives --install /usr/local/bin/python3 python3 /usr/bin/python3.8 2 \n",
+ "!update-alternatives --install /usr/local/bin/python3 python3 /usr/bin/python3.9 1 \n",
+ "!python --version \n",
+ "!apt-get update\n",
+ "!apt install software-properties-common\n",
+ "!sudo dpkg --remove --force-remove-reinstreq python3-pip python3-setuptools python3-wheel\n",
+ "!apt-get install python3-pip\n",
+ "\n",
+ "print('Git clone project and install requirements...')\n",
+ "!git clone https://github.com/Winfredy/SadTalker &> /dev/null\n",
+ "%cd SadTalker \n",
+ "!export PYTHONPATH=/content/SadTalker:$PYTHONPATH \n",
+ "!python3.8 -m pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113\n",
+ "!apt update\n",
+ "!apt install ffmpeg &> /dev/null \n",
+ "!python3.8 -m pip install -r requirements.txt"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "DddcKB_nKsnk"
+ },
+ "source": [
+ "Download models (1 mins)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "eDw3_UN8K2xa"
+ },
+ "outputs": [],
+ "source": [
+ "print('Download pre-trained models...')\n",
+ "!rm -rf checkpoints\n",
+ "!bash scripts/download_models.sh"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "kK7DYeo7Yl_H"
+ },
+ "outputs": [],
+ "source": [
+ "# borrow from makeittalk\n",
+ "import ipywidgets as widgets\n",
+ "import glob\n",
+ "import matplotlib.pyplot as plt\n",
+ "print(\"Choose the image name to animate: (saved in folder 'examples/')\")\n",
+ "img_list = glob.glob1('examples/source_image', '*.png')\n",
+ "img_list.sort()\n",
+ "img_list = [item.split('.')[0] for item in img_list]\n",
+ "default_head_name = widgets.Dropdown(options=img_list, value='full3')\n",
+ "def on_change(change):\n",
+ " if change['type'] == 'change' and change['name'] == 'value':\n",
+ " plt.imshow(plt.imread('examples/source_image/{}.png'.format(default_head_name.value)))\n",
+ " plt.axis('off')\n",
+ " plt.show()\n",
+ "default_head_name.observe(on_change)\n",
+ "display(default_head_name)\n",
+ "plt.imshow(plt.imread('examples/source_image/{}.png'.format(default_head_name.value)))\n",
+ "plt.axis('off')\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "-khNZcnGK4UK"
+ },
+ "source": [
+ "Animation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "ToBlDusjK5sS"
+ },
+ "outputs": [],
+ "source": [
+ "# selected audio from exmaple/driven_audio\n",
+ "img = 'examples/source_image/{}.png'.format(default_head_name.value)\n",
+ "print(img)\n",
+ "!python3.8 inference.py --driven_audio ./examples/driven_audio/RD_Radio31_000.wav \\\n",
+ " --source_image {img} \\\n",
+ " --result_dir ./results --still --preprocess full --enhancer gfpgan"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "fAjwGmKKYl_I"
+ },
+ "outputs": [],
+ "source": [
+ "# visualize code from makeittalk\n",
+ "from IPython.display import HTML\n",
+ "from base64 import b64encode\n",
+ "import os, sys\n",
+ "\n",
+ "# get the last from results\n",
+ "\n",
+ "results = sorted(os.listdir('./results/'))\n",
+ "\n",
+ "mp4_name = glob.glob('./results/*.mp4')[0]\n",
+ "\n",
+ "mp4 = open('{}'.format(mp4_name),'rb').read()\n",
+ "data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
+ "\n",
+ "print('Display animation: {}'.format(mp4_name), file=sys.stderr)\n",
+ "display(HTML(\"\"\"\n",
+ " \n",
+ " \"\"\" % data_url))\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "provenance": []
+ },
+ "gpuClass": "standard",
+ "kernelspec": {
+ "display_name": "base",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python",
+ "version": "3.9.7"
+ },
+ "vscode": {
+ "interpreter": {
+ "hash": "db5031b3636a3f037ea48eb287fd3d023feb9033aefc2a9652a92e470fb0851b"
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/requirements3d.txt b/requirements3d.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7ad8d9f412e1c7647d8d914b19bb40a658148416
--- /dev/null
+++ b/requirements3d.txt
@@ -0,0 +1,21 @@
+numpy==1.23.4
+face_alignment==1.3.5
+imageio==2.19.3
+imageio-ffmpeg==0.4.7
+librosa==0.9.2 #
+numba
+resampy==0.3.1
+pydub==0.25.1
+scipy==1.5.3
+kornia==0.6.8
+tqdm
+yacs==0.1.8
+pyyaml
+joblib==1.1.0
+scikit-image==0.19.3
+basicsr==1.4.2
+facexlib==0.3.0
+trimesh==3.9.20
+gradio
+gfpgan
+safetensors
\ No newline at end of file
diff --git a/webui.bat b/webui.bat
new file mode 100644
index 0000000000000000000000000000000000000000..6ff83231242ac2260c38a2a4a7ba030aa707b1a3
--- /dev/null
+++ b/webui.bat
@@ -0,0 +1,17 @@
+@echo off
+
+IF NOT EXIST venv (
+python -m venv venv
+) ELSE (
+echo venv folder already exists, skipping creation...
+)
+call .\venv\Scripts\activate.bat
+
+set PYTHON="venv\Scripts\Python.exe"
+echo venv %PYTHON%
+
+%PYTHON% Launcher.py
+
+echo.
+echo Launch unsuccessful. Exiting.
+pause
\ No newline at end of file
diff --git a/webui.sh b/webui.sh
new file mode 100644
index 0000000000000000000000000000000000000000..245750237954e140777c0bd20e6d26a1f9d1f74e
--- /dev/null
+++ b/webui.sh
@@ -0,0 +1,140 @@
+#!/usr/bin/env bash
+
+
+# If run from macOS, load defaults from webui-macos-env.sh
+if [[ "$OSTYPE" == "darwin"* ]]; then
+ export TORCH_COMMAND="pip install torch==1.12.1 torchvision==0.13.1"
+fi
+
+# python3 executable
+if [[ -z "${python_cmd}" ]]
+then
+ python_cmd="python3"
+fi
+
+# git executable
+if [[ -z "${GIT}" ]]
+then
+ export GIT="git"
+fi
+
+# python3 venv without trailing slash (defaults to ${install_dir}/${clone_dir}/venv)
+if [[ -z "${venv_dir}" ]]
+then
+ venv_dir="venv"
+fi
+
+if [[ -z "${LAUNCH_SCRIPT}" ]]
+then
+ LAUNCH_SCRIPT="launcher.py"
+fi
+
+# this script cannot be run as root by default
+can_run_as_root=1
+
+# read any command line flags to the webui.sh script
+while getopts "f" flag > /dev/null 2>&1
+do
+ case ${flag} in
+ f) can_run_as_root=1;;
+ *) break;;
+ esac
+done
+
+# Disable sentry logging
+export ERROR_REPORTING=FALSE
+
+# Do not reinstall existing pip packages on Debian/Ubuntu
+export PIP_IGNORE_INSTALLED=0
+
+# Pretty print
+delimiter="################################################################"
+
+printf "\n%s\n" "${delimiter}"
+printf "\e[1m\e[32mInstall script for SadTalker + Web UI\n"
+printf "\e[1m\e[34mTested on Debian 11 (Bullseye)\e[0m"
+printf "\n%s\n" "${delimiter}"
+
+# Do not run as root
+if [[ $(id -u) -eq 0 && can_run_as_root -eq 0 ]]
+then
+ printf "\n%s\n" "${delimiter}"
+ printf "\e[1m\e[31mERROR: This script must not be launched as root, aborting...\e[0m"
+ printf "\n%s\n" "${delimiter}"
+ exit 1
+else
+ printf "\n%s\n" "${delimiter}"
+ printf "Running on \e[1m\e[32m%s\e[0m user" "$(whoami)"
+ printf "\n%s\n" "${delimiter}"
+fi
+
+if [[ -d .git ]]
+then
+ printf "\n%s\n" "${delimiter}"
+ printf "Repo already cloned, using it as install directory"
+ printf "\n%s\n" "${delimiter}"
+ install_dir="${PWD}/../"
+ clone_dir="${PWD##*/}"
+fi
+
+# Check prerequisites
+gpu_info=$(lspci 2>/dev/null | grep VGA)
+case "$gpu_info" in
+ *"Navi 1"*|*"Navi 2"*) export HSA_OVERRIDE_GFX_VERSION=10.3.0
+ ;;
+ *"Renoir"*) export HSA_OVERRIDE_GFX_VERSION=9.0.0
+ printf "\n%s\n" "${delimiter}"
+ printf "Experimental support for Renoir: make sure to have at least 4GB of VRAM and 10GB of RAM or enable cpu mode: --use-cpu all --no-half"
+ printf "\n%s\n" "${delimiter}"
+ ;;
+ *)
+ ;;
+esac
+if echo "$gpu_info" | grep -q "AMD" && [[ -z "${TORCH_COMMAND}" ]]
+then
+ export TORCH_COMMAND="pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/rocm5.2"
+fi
+
+for preq in "${GIT}" "${python_cmd}"
+do
+ if ! hash "${preq}" &>/dev/null
+ then
+ printf "\n%s\n" "${delimiter}"
+ printf "\e[1m\e[31mERROR: %s is not installed, aborting...\e[0m" "${preq}"
+ printf "\n%s\n" "${delimiter}"
+ exit 1
+ fi
+done
+
+if ! "${python_cmd}" -c "import venv" &>/dev/null
+then
+ printf "\n%s\n" "${delimiter}"
+ printf "\e[1m\e[31mERROR: python3-venv is not installed, aborting...\e[0m"
+ printf "\n%s\n" "${delimiter}"
+ exit 1
+fi
+
+printf "\n%s\n" "${delimiter}"
+printf "Create and activate python venv"
+printf "\n%s\n" "${delimiter}"
+cd "${install_dir}"/"${clone_dir}"/ || { printf "\e[1m\e[31mERROR: Can't cd to %s/%s/, aborting...\e[0m" "${install_dir}" "${clone_dir}"; exit 1; }
+if [[ ! -d "${venv_dir}" ]]
+then
+ "${python_cmd}" -m venv "${venv_dir}"
+ first_launch=1
+fi
+# shellcheck source=/dev/null
+if [[ -f "${venv_dir}"/bin/activate ]]
+then
+ source "${venv_dir}"/bin/activate
+else
+ printf "\n%s\n" "${delimiter}"
+ printf "\e[1m\e[31mERROR: Cannot activate python venv, aborting...\e[0m"
+ printf "\n%s\n" "${delimiter}"
+ exit 1
+fi
+
+printf "\n%s\n" "${delimiter}"
+printf "Launching launcher.py..."
+printf "\n%s\n" "${delimiter}"
+exec "${python_cmd}" "${LAUNCH_SCRIPT}" "$@"
\ No newline at end of file