import torch from diffusers import StableVideoDiffusionPipeline from diffusers.utils import load_image import spaces def compile_model(): # Load the model model_id = "stabilityai/stable-video-diffusion-img2vid-xt" pipe = StableVideoDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, variant="fp16") pipe.to('cuda') @spaces.GPU(duration=1500) # AoT compilation def compile_transformer(): # Capture example inputs with spaces.aoti_capture(pipe.unet) as call: image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png") pipe(image).frames # Export and compile exported = torch.export.export( pipe.unet, args=call.args, kwargs=call.kwargs, ) return spaces.aoti_compile(exported) compiled_unet = compile_transformer() spaces.aoti_apply(compiled_unet, pipe.unet) return pipe def generate_video(prompt: str, pipe): # For simplicity, use a placeholder image; in real app, generate image from text first image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png") # Placeholder # Generate video frames = pipe(image, decode_chunk_size=8).frames[0] # Save as video (placeholder path) import imageio video_path = f"/tmp/generated_video_{hash(prompt)}.mp4" imageio.mimsave(video_path, frames, fps=7) return video_path