Muhammadidrees commited on
Commit
64e6a93
·
verified ·
1 Parent(s): 33478b0

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +32 -62
  2. infer.py +1 -1
app.py CHANGED
@@ -15,43 +15,18 @@ except Exception as e:
15
  os.environ["GRADIO_TEMP_DIR"] = "./tmp"
16
 
17
  try:
18
- # Define full dummy args with all attributes expected by load_models
19
- class DummyArgs:
20
- # Core model paths
21
- wan_model_dir = "./models/Wan2.1-I2V-14B-720P"
22
- fantasytalking_model_path = "./models/fantasytalking_model.ckpt"
23
- wav2vec_model_dir = "./models/wav2vec2-base-960h"
24
-
25
- # Required inference-related parameters
26
- image_path = "./assets/images/woman.png"
27
- audio_path = "./assets/audios/woman.wav"
28
- prompt = "A woman is talking."
29
- output_dir = "./output"
30
- image_size = 512
31
- audio_scale = 1.0
32
- prompt_cfg_scale = 5.0
33
- audio_cfg_scale = 5.0
34
- max_num_frames = 81
35
- inference_steps = 20
36
- fps = 23
37
- seed = 1111
38
-
39
- # ✅ The missing one that caused your error:
40
- num_persistent_param_in_dit = 7 * 10**9 # adjust if needed
41
-
42
- # Preload models
43
- print("🔄 Loading models into memory...")
44
- args = DummyArgs()
45
- pipe, fantasytalking, wav2vec_processor, wav2vec = load_models(args)
46
- print("✅ Models loaded successfully.")
47
-
48
- except Exception as e:
49
- print(f"❌ Error loading models: {e}")
50
- pipe = fantasytalking = wav2vec_processor = wav2vec = None
51
- raise e # fail fast if model load fails
52
-
53
 
54
- # pipe,fantasytalking,wav2vec_processor,wav2vec = None,None,None,None
55
  @spaces.GPU(duration=1200)
56
  def generate_video(
57
  image_path,
@@ -65,35 +40,30 @@ def generate_video(
65
  inference_steps,
66
  seed,
67
  ):
68
- try:
69
- output_dir = Path("./output")
70
- output_dir.mkdir(parents=True, exist_ok=True)
71
-
72
- image_path = Path(image_path).absolute().as_posix()
73
- audio_path = Path(audio_path).absolute().as_posix()
74
 
75
- args = create_args(
76
- image_path=image_path,
77
- audio_path=audio_path,
78
- prompt=prompt or "A person is talking.",
79
- output_dir=str(output_dir),
80
- audio_weight=audio_weight,
81
- prompt_cfg_scale=prompt_cfg_scale,
82
- audio_cfg_scale=audio_cfg_scale,
83
- image_size=image_size,
84
- max_num_frames=max_num_frames,
85
- inference_steps=inference_steps,
86
- seed=seed,
87
- )
88
 
89
- # Run inference using preloaded models
90
- save_path = main(args, pipe, fantasytalking, wav2vec_processor, wav2vec)
91
- print(f"✅ Video saved at {save_path}")
92
- return save_path
93
 
94
- except Exception as e:
95
- print(f"❌ Error generating video: {e}")
96
- return None
 
 
 
 
 
 
 
 
 
 
97
 
98
 
99
 
 
15
  os.environ["GRADIO_TEMP_DIR"] = "./tmp"
16
 
17
  try:
18
+ global pipe, fantasytalking, wav2vec_processor, wav2vec
19
+
20
+ pipe,fantasytalking,wav2vec_processor,wav2vec = load_models(args)
21
+ output_path=main(
22
+ args,pipe,fantasytalking,wav2vec_processor,wav2vec
23
+ )
24
+ return output_path # Ensure the output path is returned
25
+ except Exception as e:
26
+ print(f"Error during processing: {str(e)}")
27
+ raise gr.Error(f"Error during processing: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ pipe,fantasytalking,wav2vec_processor,wav2vec = None,None,None,None
30
  @spaces.GPU(duration=1200)
31
  def generate_video(
32
  image_path,
 
40
  inference_steps,
41
  seed,
42
  ):
43
+ # Create the temp directory if it doesn't exist
44
+ output_dir = Path("./output")
45
+ output_dir.mkdir(parents=True, exist_ok=True)
 
 
 
46
 
47
+ # Convert paths to absolute Path objects and normalize them
48
+ print(image_path)
49
+ image_path = Path(image_path).absolute().as_posix()
50
+ audio_path = Path(audio_path).absolute().as_posix()
 
 
 
 
 
 
 
 
 
51
 
52
+ # Parse the arguments
 
 
 
53
 
54
+ args = create_args(
55
+ image_path=image_path,
56
+ audio_path=audio_path,
57
+ prompt=prompt,
58
+ output_dir=str(output_dir),
59
+ audio_weight=audio_weight,
60
+ prompt_cfg_scale=prompt_cfg_scale,
61
+ audio_cfg_scale=audio_cfg_scale,
62
+ image_size=image_size,
63
+ max_num_frames=max_num_frames,
64
+ inference_steps=inference_steps,
65
+ seed=seed,
66
+ )
67
 
68
 
69
 
infer.py CHANGED
@@ -124,7 +124,7 @@ def parse_args():
124
 
125
  import torch
126
  from huggingface_hub import snapshot_download
127
- from models.wan_video_pipeline.pipeline_wan_video import WanVideoPipeline
128
  from transformers import Wav2Vec2Processor, Wav2Vec2Model
129
  from models import FantasyTalkingAudioConditionModel # adjust import if needed
130
  from model_manager import ModelManager # assuming this exists in your repo
 
124
 
125
  import torch
126
  from huggingface_hub import snapshot_download
127
+ from diffusers import WanVideoPipeline
128
  from transformers import Wav2Vec2Processor, Wav2Vec2Model
129
  from models import FantasyTalkingAudioConditionModel # adjust import if needed
130
  from model_manager import ModelManager # assuming this exists in your repo