FantasyTalking

Paused

App Files Files Community

Muhammadidrees commited on Oct 15

Commit

64e6a93

verified ·

1 Parent(s): 33478b0

Upload 2 files

Browse files

Files changed (2) hide show

app.py +32 -62
infer.py +1 -1

app.py CHANGED Viewed

@@ -15,43 +15,18 @@ except Exception as e:
 os.environ["GRADIO_TEMP_DIR"] = "./tmp"
 try:
-    # Define full dummy args with all attributes expected by load_models
-    class DummyArgs:
-        # Core model paths
-        wan_model_dir = "./models/Wan2.1-I2V-14B-720P"
-        fantasytalking_model_path = "./models/fantasytalking_model.ckpt"
-        wav2vec_model_dir = "./models/wav2vec2-base-960h"
-        # Required inference-related parameters
-        image_path = "./assets/images/woman.png"
-        audio_path = "./assets/audios/woman.wav"
-        prompt = "A woman is talking."
-        output_dir = "./output"
-        image_size = 512
-        audio_scale = 1.0
-        prompt_cfg_scale = 5.0
-        audio_cfg_scale = 5.0
-        max_num_frames = 81
-        inference_steps = 20
-        fps = 23
-        seed = 1111
-        # ✅ The missing one that caused your error:
-        num_persistent_param_in_dit = 7 * 10**9  # adjust if needed
-    # Preload models
-    print("🔄 Loading models into memory...")
-    args = DummyArgs()
-    pipe, fantasytalking, wav2vec_processor, wav2vec = load_models(args)
-    print("✅ Models loaded successfully.")
-except Exception as e:
-    print(f"❌ Error loading models: {e}")
-    pipe = fantasytalking = wav2vec_processor = wav2vec = None
-    raise e  # fail fast if model load fails
-# pipe,fantasytalking,wav2vec_processor,wav2vec = None,None,None,None
 @spaces.GPU(duration=1200)
 def generate_video(
     image_path,
@@ -65,35 +40,30 @@ def generate_video(
     inference_steps,
     seed,
 ):
-    try:
-        output_dir = Path("./output")
-        output_dir.mkdir(parents=True, exist_ok=True)
-        image_path = Path(image_path).absolute().as_posix()
-        audio_path = Path(audio_path).absolute().as_posix()
-        args = create_args(
-            image_path=image_path,
-            audio_path=audio_path,
-            prompt=prompt or "A person is talking.",
-            output_dir=str(output_dir),
-            audio_weight=audio_weight,
-            prompt_cfg_scale=prompt_cfg_scale,
-            audio_cfg_scale=audio_cfg_scale,
-            image_size=image_size,
-            max_num_frames=max_num_frames,
-            inference_steps=inference_steps,
-            seed=seed,
-        )
-        # ✅ Run inference using preloaded models
-        save_path = main(args, pipe, fantasytalking, wav2vec_processor, wav2vec)
-        print(f"✅ Video saved at {save_path}")
-        return save_path
-    except Exception as e:
-        print(f"❌ Error generating video: {e}")
-        return None

 os.environ["GRADIO_TEMP_DIR"] = "./tmp"
 try:
+        global pipe, fantasytalking, wav2vec_processor, wav2vec
+        pipe,fantasytalking,wav2vec_processor,wav2vec = load_models(args)
+        output_path=main(
+            args,pipe,fantasytalking,wav2vec_processor,wav2vec
+        )
+        return output_path  # Ensure the output path is returned
+    except Exception as e:
+        print(f"Error during processing: {str(e)}")
+        raise gr.Error(f"Error during processing: {str(e)}")
+pipe,fantasytalking,wav2vec_processor,wav2vec = None,None,None,None
 @spaces.GPU(duration=1200)
 def generate_video(
     image_path,
     inference_steps,
     seed,
 ):
+    # Create the temp directory if it doesn't exist
+    output_dir = Path("./output")
+    output_dir.mkdir(parents=True, exist_ok=True)
+    # Convert paths to absolute Path objects and normalize them
+    print(image_path)
+    image_path = Path(image_path).absolute().as_posix()
+    audio_path = Path(audio_path).absolute().as_posix()
+    # Parse the arguments
+    args = create_args(
+        image_path=image_path,
+        audio_path=audio_path,
+        prompt=prompt,
+        output_dir=str(output_dir),
+        audio_weight=audio_weight,
+        prompt_cfg_scale=prompt_cfg_scale,
+        audio_cfg_scale=audio_cfg_scale,
+        image_size=image_size,
+        max_num_frames=max_num_frames,
+        inference_steps=inference_steps,
+        seed=seed,
+    )

infer.py CHANGED Viewed

@@ -124,7 +124,7 @@ def parse_args():
 import torch
 from huggingface_hub import snapshot_download
-from models.wan_video_pipeline.pipeline_wan_video import WanVideoPipeline
 from transformers import Wav2Vec2Processor, Wav2Vec2Model
 from models import FantasyTalkingAudioConditionModel  # adjust import if needed
 from model_manager import ModelManager  # assuming this exists in your repo

 import torch
 from huggingface_hub import snapshot_download
+from diffusers import WanVideoPipeline
 from transformers import Wav2Vec2Processor, Wav2Vec2Model
 from models import FantasyTalkingAudioConditionModel  # adjust import if needed
 from model_manager import ModelManager  # assuming this exists in your repo