Spaces:
Runtime error
Runtime error
| from tuneavideo.pipelines.pipeline_tuneavideo import TuneAVideoPipeline | |
| from tuneavideo.models.unet import UNet3DConditionModel | |
| from tuneavideo.util import save_videos_grid | |
| import torch | |
| import gradio as gr | |
| model_list = [ | |
| "runwayml/stable-diffusion-v1-5", | |
| "CompVis/stable-diffusion-v1-4", | |
| "prompthero/openjourney", | |
| "dreamlike-art/dreamlike-photoreal-2.0", | |
| "dreamlike-art/dreamlike-diffusion-1.0" | |
| ] | |
| def tune_video_predict( | |
| pipe_id: str, | |
| prompt: str, | |
| video_length: int, | |
| height: int, | |
| width: int, | |
| num_inference_steps: int, | |
| guidance_scale: float, | |
| ): | |
| unet = UNet3DConditionModel.from_pretrained("Tune-A-Video-library/a-man-is-surfing", subfolder='unet', torch_dtype=torch.float16).to('cuda') | |
| pipe = TuneAVideoPipeline.from_pretrained(pipe_id, unet=unet, torch_dtype=torch.float16).to("cuda") | |
| video = pipe(prompt, video_length=video_length, height=height, width=width, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).videos | |
| output_path = save_videos_grid(video, save_path='output', path=f"{prompt}.gif") | |
| return output_path | |
| demo_inputs = [ | |
| gr.Dropdown( | |
| label="Model", | |
| choices=model_list, | |
| value="CompVis/stable-diffusion-v1-4", | |
| ), | |
| gr.Textbox( | |
| label="Prompt", | |
| value='a flower blooming' | |
| ), | |
| gr.Slider( | |
| label="Video Length", | |
| minimum=1, | |
| maximum=50, | |
| value=8, | |
| step=1, | |
| ), | |
| gr.Slider( | |
| label="Height", | |
| minimum=128, | |
| maximum=1280, | |
| value=416, | |
| step=32, | |
| ), | |
| gr.Slider( | |
| label="Width", | |
| minimum=128, | |
| maximum=1280, | |
| value=416, | |
| step=32, | |
| ), | |
| gr.Slider( | |
| label="Num Inference Steps", | |
| minimum=1, | |
| maximum=100, | |
| value=50, | |
| step=1, | |
| ), | |
| gr.Slider( | |
| label="Guidance Scale", | |
| minimum=0.0, | |
| maximum=100, | |
| value=7.5, | |
| step=0.5, | |
| ) | |
| ] | |
| demo_outputs = gr.outputs.Video(type="gif", label="Output") | |
| examples = [ | |
| ["CompVis/stable-diffusion-v1-4", "a panda is surfing", 5, 416, 416, 50, 7.5], | |
| ["sd-dreambooth-library/disco-diffusion-style", "ddfusion style on the church", 5, 416, 416, 50, 7.5], | |
| #["sd-dreambooth-library/nasa-space-v2-768", "nasa style galaxy moving", 5, 416, 416, 50, 7.5], | |
| ["sd-dreambooth-library/mr-potato-head", "sks mr potato head, wearing a pink hat, is surfing.", 5, 416, 416, 50, 7.5], | |
| ["sd-dreambooth-library/mr-potato-head", "sks mr potato head is surfing in the forest.", 5, 416, 416, 50, 7.5], | |
| ] | |
| description = "This is an application that generates video based on a text prompt. To get started, simply input text. The default model in the dropdown is a generic model that you can generate anything. Alternatively, for more photorealistic generations, you can use other models in the dropdown. These models are Dreambooth models, and they're trained with a specific object name, so make sure you know what the object is called. You can find an example prompt for a dreambooth model in Examples section right below the interface." | |
| title = "Tune-A-Video: One-Shot Tuning of Image Diffusion Models for Text-to-Video Generation" | |
| demo_app = gr.Interface( | |
| fn=tune_video_predict, | |
| inputs=demo_inputs, | |
| outputs=demo_outputs, | |
| examples=examples, | |
| cache_examples=False, | |
| title=title, | |
| theme="huggingface", | |
| description=description | |
| ) | |
| demo_app.launch(debug=True, enable_queue=True) | |