Spaces:

kevinwang676
/

test-1

Runtime error

App Files Files

kevinwang676 commited on Jun 27, 2023

Commit

78f6a16

1 Parent(s): e5ccbbc

Update vc_infer_pipeline.py

Browse files

Files changed (1) hide show

vc_infer_pipeline.py +36 -6

vc_infer_pipeline.py CHANGED Viewed

@@ -2,7 +2,7 @@ import numpy as np, parselmouth, torch, pdb
 from time import time as ttime
 import torch.nn.functional as F
 import scipy.signal as signal
-import pyworld, os, traceback, faiss,librosa
 from scipy import signal
 from functools import lru_cache
@@ -53,7 +53,16 @@ class VC(object):
         self.t_max = self.sr * self.x_max  # 免查询时长阈值
         self.device = config.device
-    def get_f0(self, input_audio_path,x, p_len, f0_up_key, f0_method,filter_radius, inp_f0=None):
         global input_audio_path2wav
         time_step = self.window / self.sr * 1000
         f0_min = 50
@@ -77,10 +86,31 @@ class VC(object):
                     f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
                 )
         elif f0_method == "harvest":
-            input_audio_path2wav[input_audio_path]=x.astype(np.double)
-            f0=cache_harvest_f0(input_audio_path,self.sr,f0_max,f0_min,10)
-            if(filter_radius>2):
                 f0 = signal.medfilt(f0, 3)
         f0 *= pow(2, f0_up_key / 12)
         # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
         tf0 = self.sr // self.window  # 每秒f0点数
@@ -103,7 +133,7 @@ class VC(object):
         ) + 1
         f0_mel[f0_mel <= 1] = 1
         f0_mel[f0_mel > 255] = 255
-        f0_coarse = np.rint(f0_mel).astype(int)
         return f0_coarse, f0bak  # 1-0
     def vc(

 from time import time as ttime
 import torch.nn.functional as F
 import scipy.signal as signal
+import pyworld, os, traceback, faiss, librosa, torchcrepe
 from scipy import signal
 from functools import lru_cache
         self.t_max = self.sr * self.x_max  # 免查询时长阈值
         self.device = config.device
+    def get_f0(
+        self,
+        input_audio_path,
+        x,
+        p_len,
+        f0_up_key,
+        f0_method,
+        filter_radius,
+        inp_f0=None,
+    ):
         global input_audio_path2wav
         time_step = self.window / self.sr * 1000
         f0_min = 50
                     f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
                 )
         elif f0_method == "harvest":
+            input_audio_path2wav[input_audio_path] = x.astype(np.double)
+            f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
+            if filter_radius > 2:
                 f0 = signal.medfilt(f0, 3)
+        elif f0_method == "crepe":
+            model = "full"
+            # Pick a batch size that doesn't cause memory errors on your gpu
+            batch_size = 512
+            # Compute pitch using first gpu
+            audio = torch.tensor(np.copy(x))[None].float()
+            f0, pd = torchcrepe.predict(
+                audio,
+                self.sr,
+                self.window,
+                f0_min,
+                f0_max,
+                model,
+                batch_size=batch_size,
+                device=self.device,
+                return_periodicity=True,
+            )
+            pd = torchcrepe.filter.median(pd, 3)
+            f0 = torchcrepe.filter.mean(f0, 3)
+            f0[pd < 0.1] = 0
+            f0 = f0[0].cpu().numpy()
         f0 *= pow(2, f0_up_key / 12)
         # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
         tf0 = self.sr // self.window  # 每秒f0点数
         ) + 1
         f0_mel[f0_mel <= 1] = 1
         f0_mel[f0_mel > 255] = 255
+        f0_coarse = np.rint(f0_mel).astype(np.int)
         return f0_coarse, f0bak  # 1-0
     def vc(