kevinwang676 commited on
Commit
78f6a16
·
1 Parent(s): e5ccbbc

Update vc_infer_pipeline.py

Browse files
Files changed (1) hide show
  1. vc_infer_pipeline.py +36 -6
vc_infer_pipeline.py CHANGED
@@ -2,7 +2,7 @@ import numpy as np, parselmouth, torch, pdb
2
  from time import time as ttime
3
  import torch.nn.functional as F
4
  import scipy.signal as signal
5
- import pyworld, os, traceback, faiss,librosa
6
  from scipy import signal
7
  from functools import lru_cache
8
 
@@ -53,7 +53,16 @@ class VC(object):
53
  self.t_max = self.sr * self.x_max # 免查询时长阈值
54
  self.device = config.device
55
 
56
- def get_f0(self, input_audio_path,x, p_len, f0_up_key, f0_method,filter_radius, inp_f0=None):
 
 
 
 
 
 
 
 
 
57
  global input_audio_path2wav
58
  time_step = self.window / self.sr * 1000
59
  f0_min = 50
@@ -77,10 +86,31 @@ class VC(object):
77
  f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
78
  )
79
  elif f0_method == "harvest":
80
- input_audio_path2wav[input_audio_path]=x.astype(np.double)
81
- f0=cache_harvest_f0(input_audio_path,self.sr,f0_max,f0_min,10)
82
- if(filter_radius>2):
83
  f0 = signal.medfilt(f0, 3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  f0 *= pow(2, f0_up_key / 12)
85
  # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
86
  tf0 = self.sr // self.window # 每秒f0点数
@@ -103,7 +133,7 @@ class VC(object):
103
  ) + 1
104
  f0_mel[f0_mel <= 1] = 1
105
  f0_mel[f0_mel > 255] = 255
106
- f0_coarse = np.rint(f0_mel).astype(int)
107
  return f0_coarse, f0bak # 1-0
108
 
109
  def vc(
 
2
  from time import time as ttime
3
  import torch.nn.functional as F
4
  import scipy.signal as signal
5
+ import pyworld, os, traceback, faiss, librosa, torchcrepe
6
  from scipy import signal
7
  from functools import lru_cache
8
 
 
53
  self.t_max = self.sr * self.x_max # 免查询时长阈值
54
  self.device = config.device
55
 
56
+ def get_f0(
57
+ self,
58
+ input_audio_path,
59
+ x,
60
+ p_len,
61
+ f0_up_key,
62
+ f0_method,
63
+ filter_radius,
64
+ inp_f0=None,
65
+ ):
66
  global input_audio_path2wav
67
  time_step = self.window / self.sr * 1000
68
  f0_min = 50
 
86
  f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
87
  )
88
  elif f0_method == "harvest":
89
+ input_audio_path2wav[input_audio_path] = x.astype(np.double)
90
+ f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
91
+ if filter_radius > 2:
92
  f0 = signal.medfilt(f0, 3)
93
+ elif f0_method == "crepe":
94
+ model = "full"
95
+ # Pick a batch size that doesn't cause memory errors on your gpu
96
+ batch_size = 512
97
+ # Compute pitch using first gpu
98
+ audio = torch.tensor(np.copy(x))[None].float()
99
+ f0, pd = torchcrepe.predict(
100
+ audio,
101
+ self.sr,
102
+ self.window,
103
+ f0_min,
104
+ f0_max,
105
+ model,
106
+ batch_size=batch_size,
107
+ device=self.device,
108
+ return_periodicity=True,
109
+ )
110
+ pd = torchcrepe.filter.median(pd, 3)
111
+ f0 = torchcrepe.filter.mean(f0, 3)
112
+ f0[pd < 0.1] = 0
113
+ f0 = f0[0].cpu().numpy()
114
  f0 *= pow(2, f0_up_key / 12)
115
  # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
116
  tf0 = self.sr // self.window # 每秒f0点数
 
133
  ) + 1
134
  f0_mel[f0_mel <= 1] = 1
135
  f0_mel[f0_mel > 255] = 255
136
+ f0_coarse = np.rint(f0_mel).astype(np.int)
137
  return f0_coarse, f0bak # 1-0
138
 
139
  def vc(