admin commited on
Commit
7dddb7e
·
1 Parent(s): ee01edd
Files changed (4) hide show
  1. app.py +52 -49
  2. model.py +8 -4
  3. requirements.txt +5 -3
  4. utils.py +49 -12
app.py CHANGED
@@ -9,18 +9,16 @@ import librosa.display
9
  import matplotlib.pyplot as plt
10
  from collections import Counter
11
  from model import EvalNet
12
- from utils import get_modelist, find_files, embed_img
13
-
14
-
15
- TRANSLATE = {
16
- "m_chest": "Chest Voice, Male",
17
- "f_chest": "Chest Voice, Female",
18
- "m_falsetto": "Falsetto Voice, Male",
19
- "f_falsetto": "Falsetto Voice, Female",
20
- }
21
- CLASSES = list(TRANSLATE.keys())
22
- TEMP_DIR = "./__pycache__/tmp"
23
- SAMPLE_RATE = 22050
24
 
25
 
26
  def wav2mel(audio_path: str, width=0.496145124716553):
@@ -97,35 +95,39 @@ def most_frequent_value(lst: list):
97
 
98
 
99
  def infer(wav_path: str, log_name: str, folder_path=TEMP_DIR):
100
- if os.path.exists(folder_path):
101
- shutil.rmtree(folder_path)
 
 
 
102
 
103
- if not wav_path:
104
- return None, "Please input an audio!"
105
 
106
- spec = log_name.split("_")[-3]
107
- os.makedirs(folder_path, exist_ok=True)
108
- try:
109
  model = EvalNet(log_name, len(TRANSLATE)).model
110
  eval("wav2%s" % spec)(wav_path)
 
 
 
 
 
 
111
 
112
- except Exception as e:
113
- return None, f"{e}"
 
114
 
115
- jpgs = find_files(folder_path, ".jpg")
116
- preds = []
117
- for jpg in jpgs:
118
- input = embed_img(jpg)
119
- output: torch.Tensor = model(input)
120
- preds.append(torch.max(output.data, 1)[1])
121
 
122
- pred_id = most_frequent_value(preds)
123
- return os.path.basename(wav_path), TRANSLATE[CLASSES[pred_id]]
124
 
125
 
126
  if __name__ == "__main__":
127
  warnings.filterwarnings("ignore")
128
- models = get_modelist(assign_model="AlexNet_mel")
129
  examples = []
130
  example_wavs = find_files()
131
  for wav in example_wavs:
@@ -135,34 +137,35 @@ if __name__ == "__main__":
135
  gr.Interface(
136
  fn=infer,
137
  inputs=[
138
- gr.Audio(label="Upload a recording", type="filepath"),
139
- gr.Dropdown(choices=models, label="Select a model", value=models[0]),
140
  ],
141
  outputs=[
142
- gr.Textbox(label="Audio filename", show_copy_button=True),
143
- gr.Textbox(label="Singing method recognition", show_copy_button=True),
 
144
  ],
145
  examples=examples,
146
  cache_examples=False,
147
  allow_flagging="never",
148
- title="It is recommended to keep the recording length around 5s, too long will affect the recognition efficiency.",
149
  )
150
 
151
  gr.Markdown(
152
- """
153
- # Cite
154
- ```bibtex
155
- @dataset{zhaorui_liu_2021_5676893,
156
- author = {Zhaorui Liu and Zijin Li},
157
- title = {Music Data Sharing Platform for Computational Musicology Research (CCMUSIC DATASET)},
158
- month = nov,
159
- year = 2021,
160
- publisher = {Zenodo},
161
- version = {1.1},
162
- doi = {10.5281/zenodo.5676893},
163
- url = {https://doi.org/10.5281/zenodo.5676893}
164
- }
165
- ```"""
166
  )
167
 
168
  demo.launch()
 
9
  import matplotlib.pyplot as plt
10
  from collections import Counter
11
  from model import EvalNet
12
+ from utils import (
13
+ get_modelist,
14
+ find_files,
15
+ embed_img,
16
+ _L,
17
+ SAMPLE_RATE,
18
+ TEMP_DIR,
19
+ TRANSLATE,
20
+ CLASSES,
21
+ )
 
 
22
 
23
 
24
  def wav2mel(audio_path: str, width=0.496145124716553):
 
95
 
96
 
97
  def infer(wav_path: str, log_name: str, folder_path=TEMP_DIR):
98
+ status = "Success"
99
+ filename = result = None
100
+ try:
101
+ if os.path.exists(folder_path):
102
+ shutil.rmtree(folder_path)
103
 
104
+ if not wav_path:
105
+ raise ValueError("请输入音频!")
106
 
107
+ spec = log_name.split("_")[-3]
108
+ os.makedirs(folder_path, exist_ok=True)
 
109
  model = EvalNet(log_name, len(TRANSLATE)).model
110
  eval("wav2%s" % spec)(wav_path)
111
+ jpgs = find_files(folder_path, ".jpg")
112
+ preds = []
113
+ for jpg in jpgs:
114
+ input = embed_img(jpg)
115
+ output: torch.Tensor = model(input)
116
+ preds.append(torch.max(output.data, 1)[1])
117
 
118
+ pred_id = most_frequent_value(preds)
119
+ filename = os.path.basename(wav_path)
120
+ result = TRANSLATE[CLASSES[pred_id]]
121
 
122
+ except Exception as e:
123
+ status = f"{e}"
 
 
 
 
124
 
125
+ return status, filename, result
 
126
 
127
 
128
  if __name__ == "__main__":
129
  warnings.filterwarnings("ignore")
130
+ models = get_modelist(assign_model="alexnet_mel")
131
  examples = []
132
  example_wavs = find_files()
133
  for wav in example_wavs:
 
137
  gr.Interface(
138
  fn=infer,
139
  inputs=[
140
+ gr.Audio(label=_L("上传录音"), type="filepath"),
141
+ gr.Dropdown(choices=models, label=_L("选择模型"), value=models[0]),
142
  ],
143
  outputs=[
144
+ gr.Textbox(label=_L("状态栏"), show_copy_button=True),
145
+ gr.Textbox(label=_L("音频文件名"), show_copy_button=True),
146
+ gr.Textbox(label=_L("唱法识别"), show_copy_button=True),
147
  ],
148
  examples=examples,
149
  cache_examples=False,
150
  allow_flagging="never",
151
+ title=_L("建议录音时长保持在 5s 左右, 过长会影响识别效率"),
152
  )
153
 
154
  gr.Markdown(
155
+ f"# {_L('引用')}"
156
+ + """
157
+ ```bibtex
158
+ @dataset{zhaorui_liu_2021_5676893,
159
+ author = {Zhaorui Liu and Zijin Li},
160
+ title = {Music Data Sharing Platform for Computational Musicology Research (CCMUSIC DATASET)},
161
+ month = nov,
162
+ year = 2021,
163
+ publisher = {Zenodo},
164
+ version = {1.1},
165
+ doi = {10.5281/zenodo.5676893},
166
+ url = {https://doi.org/10.5281/zenodo.5676893}
167
+ }
168
+ ```"""
169
  )
170
 
171
  demo.launch()
model.py CHANGED
@@ -1,7 +1,7 @@
1
  import torch
2
  import torch.nn as nn
3
  import torchvision.models as models
4
- from datasets import load_dataset
5
  from utils import MODEL_DIR
6
 
7
 
@@ -17,7 +17,7 @@ class EvalNet:
17
  self.m_type, self.input_size = self._model_info(m_ver)
18
 
19
  if not hasattr(models, m_ver):
20
- raise Exception("Unsupported model.")
21
 
22
  self.model = eval("models.%s()" % m_ver)
23
  linear_output = self._set_outsize()
@@ -34,11 +34,15 @@ class EvalNet:
34
  if ver == bb["ver"]:
35
  return bb
36
 
37
- print("Backbone name not found, using default option - alexnet.")
38
  return backbone_list[0]
39
 
40
  def _model_info(self, m_ver: str):
41
- backbone_list = load_dataset("monetjoe/cv_backbones", split="train")
 
 
 
 
42
  backbone = self._get_backbone(m_ver, backbone_list)
43
  m_type = str(backbone["type"])
44
  input_size = int(backbone["input_size"])
 
1
  import torch
2
  import torch.nn as nn
3
  import torchvision.models as models
4
+ from modelscope.msdatasets import MsDataset
5
  from utils import MODEL_DIR
6
 
7
 
 
17
  self.m_type, self.input_size = self._model_info(m_ver)
18
 
19
  if not hasattr(models, m_ver):
20
+ raise Exception("不支持的模型")
21
 
22
  self.model = eval("models.%s()" % m_ver)
23
  linear_output = self._set_outsize()
 
34
  if ver == bb["ver"]:
35
  return bb
36
 
37
+ print("未找到骨干网络名称,使用默认选项 - alexnet")
38
  return backbone_list[0]
39
 
40
  def _model_info(self, m_ver: str):
41
+ backbone_list = MsDataset.load(
42
+ "monetjoe/cv_backbones",
43
+ split="v1",
44
+ trust_remote_code=True,
45
+ )
46
  backbone = self._get_backbone(m_ver, backbone_list)
47
  m_type = str(backbone["type"])
48
  input_size = int(backbone["input_size"])
requirements.txt CHANGED
@@ -1,5 +1,7 @@
1
- torch
2
- pillow
 
 
3
  librosa
4
  matplotlib
5
- torchvision
 
1
+ torch==2.6.0+cu118
2
+ -f https://download.pytorch.org/whl/torch
3
+ torchvision==0.21.0+cu118
4
+ -f https://download.pytorch.org/whl/torchvision
5
  librosa
6
  matplotlib
7
+ modelscope[framework]==1.21.0
utils.py CHANGED
@@ -1,15 +1,55 @@
1
  import os
2
  import torch
3
  import torchvision.transforms as transforms
4
- from huggingface_hub import snapshot_download
 
5
  from PIL import Image
6
 
7
- MODEL_DIR = snapshot_download(
8
- "ccmusic-database/chest_falsetto",
9
- cache_dir="./__pycache__",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  )
11
 
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def toCUDA(x):
14
  if hasattr(x, "cuda"):
15
  if torch.cuda.is_available():
@@ -30,19 +70,16 @@ def find_files(folder_path=f"{MODEL_DIR}/examples", ext=".wav"):
30
 
31
 
32
  def get_modelist(model_dir=MODEL_DIR, assign_model=""):
33
- try:
34
- entries = os.listdir(model_dir)
35
- except OSError as e:
36
- print(f"Cannot access {model_dir}: {e}")
37
- return
38
-
39
  output = []
40
- for entry in entries:
 
41
  full_path = os.path.join(model_dir, entry)
 
42
  if entry == ".git" or entry == "examples":
43
- print(f"Skip .git / examples dir: {full_path}")
44
  continue
45
 
 
46
  if os.path.isdir(full_path):
47
  model = os.path.basename(full_path)
48
  if assign_model and assign_model.lower() in model:
 
1
  import os
2
  import torch
3
  import torchvision.transforms as transforms
4
+ import huggingface_hub
5
+ import modelscope
6
  from PIL import Image
7
 
8
+ EN_US = os.getenv("LANG") != "zh_CN.UTF-8"
9
+
10
+ ZH2EN = {
11
+ "上传录音": "Upload a recording",
12
+ "选择模型": "Select a model",
13
+ "状态栏": "Status",
14
+ "音频文件名": "Audio filename",
15
+ "唱法识别": "Singing method recognition",
16
+ "建议录音时长保持在 5s 左右, 过长会影响识别效率": "It is recommended to keep the recording length around 5s, too long will affect the recognition efficiency.",
17
+ "引用": "Cite",
18
+ "男真声": "Chest Voice, Male",
19
+ "女真声": "Chest Voice, Female",
20
+ "男假声": "Falsetto Voice, Male",
21
+ "女假声": "Falsetto Voice, Female",
22
+ }
23
+
24
+
25
+ MODEL_DIR = (
26
+ huggingface_hub.snapshot_download(
27
+ "ccmusic-database/chest_falsetto",
28
+ cache_dir="./__pycache__",
29
+ )
30
+ if EN_US
31
+ else modelscope.snapshot_download(
32
+ "ccmusic-database/chest_falsetto",
33
+ cache_dir="./__pycache__",
34
+ )
35
  )
36
 
37
 
38
+ def _L(zh_txt: str):
39
+ return ZH2EN[zh_txt] if EN_US else zh_txt
40
+
41
+
42
+ TRANSLATE = {
43
+ "m_chest": _L("男真声"),
44
+ "f_chest": _L("女真声"),
45
+ "m_falsetto": _L("男假声"),
46
+ "f_falsetto": _L("女假声"),
47
+ }
48
+ CLASSES = list(TRANSLATE.keys())
49
+ TEMP_DIR = "./__pycache__/tmp"
50
+ SAMPLE_RATE = 22050
51
+
52
+
53
  def toCUDA(x):
54
  if hasattr(x, "cuda"):
55
  if torch.cuda.is_available():
 
70
 
71
 
72
  def get_modelist(model_dir=MODEL_DIR, assign_model=""):
 
 
 
 
 
 
73
  output = []
74
+ for entry in os.listdir(model_dir):
75
+ # 获取完整路径
76
  full_path = os.path.join(model_dir, entry)
77
+ # 跳过'.git'文件夹
78
  if entry == ".git" or entry == "examples":
79
+ print(f"跳过 .git examples 文件夹: {full_path}")
80
  continue
81
 
82
+ # 检查条目是文件还是目录
83
  if os.path.isdir(full_path):
84
  model = os.path.basename(full_path)
85
  if assign_model and assign_model.lower() in model: