Spaces:

kevinwang676
/

test-1

Runtime error

App Files Files

kevinwang676 commited on Jun 27, 2023

Commit

45248cd

1 Parent(s): 89f76e6

Update vc_infer_pipeline.py

Browse files

Files changed (1) hide show

vc_infer_pipeline.py +6 -36

vc_infer_pipeline.py CHANGED Viewed

@@ -2,7 +2,7 @@ import numpy as np, parselmouth, torch, pdb
 from time import time as ttime
 import torch.nn.functional as F
 import scipy.signal as signal
-import pyworld, os, traceback, faiss, librosa, torchcrepe
 from scipy import signal
 from functools import lru_cache
@@ -53,16 +53,7 @@ class VC(object):
         self.t_max = self.sr * self.x_max  # 免查询时长阈值
         self.device = config.device
-    def get_f0(
-        self,
-        input_audio_path,
-        x,
-        p_len,
-        f0_up_key,
-        f0_method,
-        filter_radius,
-        inp_f0=None,
-    ):
         global input_audio_path2wav
         time_step = self.window / self.sr * 1000
         f0_min = 50
@@ -86,31 +77,10 @@ class VC(object):
                     f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
                 )
         elif f0_method == "harvest":
-            input_audio_path2wav[input_audio_path] = x.astype(np.double)
-            f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
-            if filter_radius > 2:
                 f0 = signal.medfilt(f0, 3)
-        elif f0_method == "crepe":
-            model = "full"
-            # Pick a batch size that doesn't cause memory errors on your gpu
-            batch_size = 512
-            # Compute pitch using first gpu
-            audio = torch.tensor(np.copy(x))[None].float()
-            f0, pd = torchcrepe.predict(
-                audio,
-                self.sr,
-                self.window,
-                f0_min,
-                f0_max,
-                model,
-                batch_size=batch_size,
-                device=self.device,
-                return_periodicity=True,
-            )
-            pd = torchcrepe.filter.median(pd, 3)
-            f0 = torchcrepe.filter.mean(f0, 3)
-            f0[pd < 0.1] = 0
-            f0 = f0[0].cpu().numpy()
         f0 *= pow(2, f0_up_key / 12)
         # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
         tf0 = self.sr // self.window  # 每秒f0点数
@@ -133,7 +103,7 @@ class VC(object):
         ) + 1
         f0_mel[f0_mel <= 1] = 1
         f0_mel[f0_mel > 255] = 255
-        f0_coarse = np.rint(f0_mel).astype(np.int16)
         return f0_coarse, f0bak  # 1-0
     def vc(

 from time import time as ttime
 import torch.nn.functional as F
 import scipy.signal as signal
+import pyworld, os, traceback, faiss,librosa
 from scipy import signal
 from functools import lru_cache
         self.t_max = self.sr * self.x_max  # 免查询时长阈值
         self.device = config.device
+    def get_f0(self, input_audio_path,x, p_len, f0_up_key, f0_method,filter_radius, inp_f0=None):
         global input_audio_path2wav
         time_step = self.window / self.sr * 1000
         f0_min = 50
                     f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
                 )
         elif f0_method == "harvest":
+            input_audio_path2wav[input_audio_path]=x.astype(np.double)
+            f0=cache_harvest_f0(input_audio_path,self.sr,f0_max,f0_min,10)
+            if(filter_radius>2):
                 f0 = signal.medfilt(f0, 3)
         f0 *= pow(2, f0_up_key / 12)
         # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
         tf0 = self.sr // self.window  # 每秒f0点数
         ) + 1
         f0_mel[f0_mel <= 1] = 1
         f0_mel[f0_mel > 255] = 255
+        f0_coarse = np.rint(f0_mel).astype(int)
         return f0_coarse, f0bak  # 1-0
     def vc(