Spaces:
Runtime error
Runtime error
Commit
·
45248cd
1
Parent(s):
89f76e6
Update vc_infer_pipeline.py
Browse files- vc_infer_pipeline.py +6 -36
vc_infer_pipeline.py
CHANGED
|
@@ -2,7 +2,7 @@ import numpy as np, parselmouth, torch, pdb
|
|
| 2 |
from time import time as ttime
|
| 3 |
import torch.nn.functional as F
|
| 4 |
import scipy.signal as signal
|
| 5 |
-
import pyworld, os, traceback, faiss,
|
| 6 |
from scipy import signal
|
| 7 |
from functools import lru_cache
|
| 8 |
|
|
@@ -53,16 +53,7 @@ class VC(object):
|
|
| 53 |
self.t_max = self.sr * self.x_max # 免查询时长阈值
|
| 54 |
self.device = config.device
|
| 55 |
|
| 56 |
-
def get_f0(
|
| 57 |
-
self,
|
| 58 |
-
input_audio_path,
|
| 59 |
-
x,
|
| 60 |
-
p_len,
|
| 61 |
-
f0_up_key,
|
| 62 |
-
f0_method,
|
| 63 |
-
filter_radius,
|
| 64 |
-
inp_f0=None,
|
| 65 |
-
):
|
| 66 |
global input_audio_path2wav
|
| 67 |
time_step = self.window / self.sr * 1000
|
| 68 |
f0_min = 50
|
|
@@ -86,31 +77,10 @@ class VC(object):
|
|
| 86 |
f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
|
| 87 |
)
|
| 88 |
elif f0_method == "harvest":
|
| 89 |
-
input_audio_path2wav[input_audio_path]
|
| 90 |
-
f0
|
| 91 |
-
if
|
| 92 |
f0 = signal.medfilt(f0, 3)
|
| 93 |
-
elif f0_method == "crepe":
|
| 94 |
-
model = "full"
|
| 95 |
-
# Pick a batch size that doesn't cause memory errors on your gpu
|
| 96 |
-
batch_size = 512
|
| 97 |
-
# Compute pitch using first gpu
|
| 98 |
-
audio = torch.tensor(np.copy(x))[None].float()
|
| 99 |
-
f0, pd = torchcrepe.predict(
|
| 100 |
-
audio,
|
| 101 |
-
self.sr,
|
| 102 |
-
self.window,
|
| 103 |
-
f0_min,
|
| 104 |
-
f0_max,
|
| 105 |
-
model,
|
| 106 |
-
batch_size=batch_size,
|
| 107 |
-
device=self.device,
|
| 108 |
-
return_periodicity=True,
|
| 109 |
-
)
|
| 110 |
-
pd = torchcrepe.filter.median(pd, 3)
|
| 111 |
-
f0 = torchcrepe.filter.mean(f0, 3)
|
| 112 |
-
f0[pd < 0.1] = 0
|
| 113 |
-
f0 = f0[0].cpu().numpy()
|
| 114 |
f0 *= pow(2, f0_up_key / 12)
|
| 115 |
# with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
|
| 116 |
tf0 = self.sr // self.window # 每秒f0点数
|
|
@@ -133,7 +103,7 @@ class VC(object):
|
|
| 133 |
) + 1
|
| 134 |
f0_mel[f0_mel <= 1] = 1
|
| 135 |
f0_mel[f0_mel > 255] = 255
|
| 136 |
-
f0_coarse = np.rint(f0_mel).astype(
|
| 137 |
return f0_coarse, f0bak # 1-0
|
| 138 |
|
| 139 |
def vc(
|
|
|
|
| 2 |
from time import time as ttime
|
| 3 |
import torch.nn.functional as F
|
| 4 |
import scipy.signal as signal
|
| 5 |
+
import pyworld, os, traceback, faiss,librosa
|
| 6 |
from scipy import signal
|
| 7 |
from functools import lru_cache
|
| 8 |
|
|
|
|
| 53 |
self.t_max = self.sr * self.x_max # 免查询时长阈值
|
| 54 |
self.device = config.device
|
| 55 |
|
| 56 |
+
def get_f0(self, input_audio_path,x, p_len, f0_up_key, f0_method,filter_radius, inp_f0=None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
global input_audio_path2wav
|
| 58 |
time_step = self.window / self.sr * 1000
|
| 59 |
f0_min = 50
|
|
|
|
| 77 |
f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
|
| 78 |
)
|
| 79 |
elif f0_method == "harvest":
|
| 80 |
+
input_audio_path2wav[input_audio_path]=x.astype(np.double)
|
| 81 |
+
f0=cache_harvest_f0(input_audio_path,self.sr,f0_max,f0_min,10)
|
| 82 |
+
if(filter_radius>2):
|
| 83 |
f0 = signal.medfilt(f0, 3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
f0 *= pow(2, f0_up_key / 12)
|
| 85 |
# with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
|
| 86 |
tf0 = self.sr // self.window # 每秒f0点数
|
|
|
|
| 103 |
) + 1
|
| 104 |
f0_mel[f0_mel <= 1] = 1
|
| 105 |
f0_mel[f0_mel > 255] = 255
|
| 106 |
+
f0_coarse = np.rint(f0_mel).astype(int)
|
| 107 |
return f0_coarse, f0bak # 1-0
|
| 108 |
|
| 109 |
def vc(
|