kevinwang676 commited on
Commit
45248cd
·
1 Parent(s): 89f76e6

Update vc_infer_pipeline.py

Browse files
Files changed (1) hide show
  1. vc_infer_pipeline.py +6 -36
vc_infer_pipeline.py CHANGED
@@ -2,7 +2,7 @@ import numpy as np, parselmouth, torch, pdb
2
  from time import time as ttime
3
  import torch.nn.functional as F
4
  import scipy.signal as signal
5
- import pyworld, os, traceback, faiss, librosa, torchcrepe
6
  from scipy import signal
7
  from functools import lru_cache
8
 
@@ -53,16 +53,7 @@ class VC(object):
53
  self.t_max = self.sr * self.x_max # 免查询时长阈值
54
  self.device = config.device
55
 
56
- def get_f0(
57
- self,
58
- input_audio_path,
59
- x,
60
- p_len,
61
- f0_up_key,
62
- f0_method,
63
- filter_radius,
64
- inp_f0=None,
65
- ):
66
  global input_audio_path2wav
67
  time_step = self.window / self.sr * 1000
68
  f0_min = 50
@@ -86,31 +77,10 @@ class VC(object):
86
  f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
87
  )
88
  elif f0_method == "harvest":
89
- input_audio_path2wav[input_audio_path] = x.astype(np.double)
90
- f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
91
- if filter_radius > 2:
92
  f0 = signal.medfilt(f0, 3)
93
- elif f0_method == "crepe":
94
- model = "full"
95
- # Pick a batch size that doesn't cause memory errors on your gpu
96
- batch_size = 512
97
- # Compute pitch using first gpu
98
- audio = torch.tensor(np.copy(x))[None].float()
99
- f0, pd = torchcrepe.predict(
100
- audio,
101
- self.sr,
102
- self.window,
103
- f0_min,
104
- f0_max,
105
- model,
106
- batch_size=batch_size,
107
- device=self.device,
108
- return_periodicity=True,
109
- )
110
- pd = torchcrepe.filter.median(pd, 3)
111
- f0 = torchcrepe.filter.mean(f0, 3)
112
- f0[pd < 0.1] = 0
113
- f0 = f0[0].cpu().numpy()
114
  f0 *= pow(2, f0_up_key / 12)
115
  # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
116
  tf0 = self.sr // self.window # 每秒f0点数
@@ -133,7 +103,7 @@ class VC(object):
133
  ) + 1
134
  f0_mel[f0_mel <= 1] = 1
135
  f0_mel[f0_mel > 255] = 255
136
- f0_coarse = np.rint(f0_mel).astype(np.int16)
137
  return f0_coarse, f0bak # 1-0
138
 
139
  def vc(
 
2
  from time import time as ttime
3
  import torch.nn.functional as F
4
  import scipy.signal as signal
5
+ import pyworld, os, traceback, faiss,librosa
6
  from scipy import signal
7
  from functools import lru_cache
8
 
 
53
  self.t_max = self.sr * self.x_max # 免查询时长阈值
54
  self.device = config.device
55
 
56
+ def get_f0(self, input_audio_path,x, p_len, f0_up_key, f0_method,filter_radius, inp_f0=None):
 
 
 
 
 
 
 
 
 
57
  global input_audio_path2wav
58
  time_step = self.window / self.sr * 1000
59
  f0_min = 50
 
77
  f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
78
  )
79
  elif f0_method == "harvest":
80
+ input_audio_path2wav[input_audio_path]=x.astype(np.double)
81
+ f0=cache_harvest_f0(input_audio_path,self.sr,f0_max,f0_min,10)
82
+ if(filter_radius>2):
83
  f0 = signal.medfilt(f0, 3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  f0 *= pow(2, f0_up_key / 12)
85
  # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
86
  tf0 = self.sr // self.window # 每秒f0点数
 
103
  ) + 1
104
  f0_mel[f0_mel <= 1] = 1
105
  f0_mel[f0_mel > 255] = 255
106
+ f0_coarse = np.rint(f0_mel).astype(int)
107
  return f0_coarse, f0bak # 1-0
108
 
109
  def vc(