Spaces:
Runtime error
Runtime error
| try: | |
| import cn2an | |
| except ImportError: | |
| print("The 'cn2an' module is not installed. Please install it using 'pip install cn2an'.") | |
| exit(1) | |
| try: | |
| import jieba | |
| except ImportError: | |
| print("The 'jieba' module is not installed. Please install it using 'pip install jieba'.") | |
| exit(1) | |
| import re | |
| import numpy as np | |
| import wave | |
| import jieba.posseg as pseg | |
| def save_audio(file_name, audio, rate=24000): | |
| """ | |
| 保存音频文件 | |
| :param file_name: | |
| :param audio: | |
| :param rate: | |
| :return: | |
| """ | |
| import os | |
| from config import DEFAULT_DIR | |
| audio = (audio * 32767).astype(np.int16) | |
| # 检查默认目录 | |
| if not os.path.exists(DEFAULT_DIR): | |
| os.makedirs(DEFAULT_DIR) | |
| full_path = os.path.join(DEFAULT_DIR, file_name) | |
| with wave.open(full_path, "w") as wf: | |
| wf.setnchannels(1) | |
| wf.setsampwidth(2) | |
| wf.setframerate(rate) | |
| wf.writeframes(audio.tobytes()) | |
| return full_path | |
| def combine_audio(wavs): | |
| """ | |
| 合并多段音频 | |
| :param wavs: | |
| :return: | |
| """ | |
| wavs = [normalize_audio(w) for w in wavs] # 先对每段音频归一化 | |
| combined_audio = np.concatenate(wavs, axis=1) # 沿着时间轴合并 | |
| return normalize_audio(combined_audio) # 合并后再次归一化 | |
| def normalize_audio(audio): | |
| """ | |
| Normalize audio array to be between -1 and 1 | |
| :param audio: Input audio array | |
| :return: Normalized audio array | |
| """ | |
| audio = np.clip(audio, -1, 1) | |
| max_val = np.max(np.abs(audio)) | |
| if max_val > 0: | |
| audio = audio / max_val | |
| return audio | |
| def combine_audio_with_crossfade(audio_arrays, crossfade_duration=0.1, rate=24000): | |
| """ | |
| Combine audio arrays with crossfade to avoid clipping noise at the junctions. | |
| :param audio_arrays: List of audio arrays to combine | |
| :param crossfade_duration: Duration of the crossfade in seconds | |
| :param rate: Sample rate of the audio | |
| :return: Combined audio array | |
| """ | |
| crossfade_samples = int(crossfade_duration * rate) | |
| combined_audio = np.array([], dtype=np.float32) | |
| for i in range(len(audio_arrays)): | |
| audio_arrays[i] = np.squeeze(audio_arrays[i]) # Ensure all arrays are 1D | |
| if i == 0: | |
| combined_audio = audio_arrays[i] # Start with the first audio array | |
| else: | |
| # Apply crossfade between the end of the current combined audio and the start of the next array | |
| overlap = np.minimum(len(combined_audio), crossfade_samples) | |
| crossfade_end = combined_audio[-overlap:] | |
| crossfade_start = audio_arrays[i][:overlap] | |
| # Crossfade by linearly blending the audio samples | |
| t = np.linspace(0, 1, overlap) | |
| crossfaded = crossfade_end * (1 - t) + crossfade_start * t | |
| # Combine audio by replacing the end of the current combined audio with the crossfaded audio | |
| combined_audio[-overlap:] = crossfaded | |
| # Append the rest of the new array | |
| combined_audio = np.concatenate((combined_audio, audio_arrays[i][overlap:])) | |
| return combined_audio | |
| def remove_chinese_punctuation(text): | |
| """ | |
| 移除文本中的中文标点符号 [:;!(),【】『』「」《》-‘“’”:,;!\(\)\[\]><\-] 替换为 , | |
| :param text: | |
| :return: | |
| """ | |
| chinese_punctuation_pattern = r"[:;!(),【】『』「」《》-‘“’”:,;!\(\)\[\]><\-·]" | |
| text = re.sub(chinese_punctuation_pattern, ',', text) | |
| # 使用正则表达式将多个连续的句号替换为一个句号 | |
| text = re.sub(r'[。,]{2,}', '。', text) | |
| # 删除开头和结尾的 , 号 | |
| text = re.sub(r'^,|,$', '', text) | |
| return text | |
| def remove_english_punctuation(text): | |
| """ | |
| 移除文本中的中文标点符号 [:;!(),【】『』「」《》-‘“’”:,;!\(\)\[\]><\-] 替换为 , | |
| :param text: | |
| :return: | |
| """ | |
| chinese_punctuation_pattern = r"[:;!(),【】『』「」《》-‘“’”:,;!\(\)\[\]><\-·]" | |
| text = re.sub(chinese_punctuation_pattern, ',', text) | |
| # 使用正则表达式将多个连续的句号替换为一个句号 | |
| text = re.sub(r'[,\.]{2,}', '.', text) | |
| # 删除开头和结尾的 , 号 | |
| text = re.sub(r'^,|,$', '', text) | |
| return text | |
| def text_normalize(text): | |
| """ | |
| 对文本进行归一化处理 (PaddlePaddle版本) | |
| :param text: | |
| :return: | |
| """ | |
| from zh_normalization import TextNormalizer | |
| # ref: https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/t2s/frontend/zh_normalization | |
| tx = TextNormalizer() | |
| sentences = tx.normalize(text) | |
| _txt = ''.join(sentences) | |
| return _txt | |
| def convert_numbers_to_chinese(text): | |
| """ | |
| 将文本中的数字转换为中文数字 例如 123 -> 一百二十三 | |
| :param text: | |
| :return: | |
| """ | |
| return cn2an.transform(text, "an2cn") | |
| def detect_language(sentence): | |
| # ref: https://github.com/2noise/ChatTTS/blob/main/ChatTTS/utils/infer_utils.py#L55 | |
| chinese_char_pattern = re.compile(r'[\u4e00-\u9fff]') | |
| english_word_pattern = re.compile(r'\b[A-Za-z]+\b') | |
| chinese_chars = chinese_char_pattern.findall(sentence) | |
| english_words = english_word_pattern.findall(sentence) | |
| if len(chinese_chars) > len(english_words): | |
| return "zh" | |
| else: | |
| return "en" | |
| def split_text(text, min_length=60): | |
| """ | |
| 将文本分割为长度不小于min_length的句子 | |
| :param text: | |
| :param min_length: | |
| :return: | |
| """ | |
| # 短句分割符号 | |
| sentence_delimiters = re.compile(r'([。?!\.]+)') | |
| # 匹配多个连续的回车符 作为段落点 强制分段 | |
| paragraph_delimiters = re.compile(r'(\s*\n\s*)+') | |
| paragraphs = re.split(paragraph_delimiters, text) | |
| result = [] | |
| for paragraph in paragraphs: | |
| if not paragraph.strip(): | |
| continue # 跳过空段落 | |
| # 小于阈值的段落直接分开 | |
| if len(paragraph.strip()) < min_length: | |
| result.append(paragraph.strip()) | |
| continue | |
| # 大于的再计算拆分 | |
| sentences = re.split(sentence_delimiters, paragraph) | |
| current_sentence = '' | |
| for sentence in sentences: | |
| if re.match(sentence_delimiters, sentence): | |
| current_sentence += sentence.strip() + '' | |
| if len(current_sentence) >= min_length: | |
| result.append(current_sentence.strip()) | |
| current_sentence = '' | |
| else: | |
| current_sentence += sentence.strip() | |
| if current_sentence: | |
| if len(current_sentence) < min_length and len(result) > 0: | |
| result[-1] += current_sentence | |
| else: | |
| result.append(current_sentence) | |
| if detect_language(text[:1024]) == "zh": | |
| result = [normalize_zh(_.strip()) for _ in result if _.strip()] | |
| else: | |
| result = [normalize_en(_.strip()) for _ in result if _.strip()] | |
| return result | |
| def normalize_en(text): | |
| # 不再在 ChatTTS 外正则化文本 | |
| # from tn.english.normalizer import Normalizer | |
| # normalizer = Normalizer() | |
| # text = normalizer.normalize(text) | |
| # text = remove_english_punctuation(text) | |
| return text | |
| def normalize_zh(text): | |
| # 不再在 ChatTTS 外正则化文本 | |
| # from tn.chinese.normalizer import Normalizer | |
| # normalizer = Normalizer() | |
| # text = normalizer.normalize(text) | |
| # text = remove_chinese_punctuation(text) | |
| text = process_ddd(text) | |
| return text | |
| def batch_split(items, batch_size=5): | |
| """ | |
| 将items划分为大小为batch_size的批次 | |
| :param items: | |
| :param batch_size: | |
| :return: | |
| """ | |
| return [items[i:i + batch_size] for i in range(0, len(items), batch_size)] | |
| # 读取 txt 文件,支持自动判断文件编码 | |
| def read_long_text(file_path): | |
| """ | |
| 读取长文本文件,自动判断文件编码 | |
| :param file_path: 文件路径 | |
| :return: 文本内容 | |
| """ | |
| encodings = ['utf-8', 'gbk', 'iso-8859-1', 'utf-16'] | |
| for encoding in encodings: | |
| try: | |
| with open(file_path, 'r', encoding=encoding) as file: | |
| return file.read() | |
| except (UnicodeDecodeError, LookupError): | |
| continue | |
| raise ValueError("无法识别文件编码") | |
| def replace_tokens(text): | |
| remove_tokens = ['UNK'] | |
| for token in remove_tokens: | |
| text = re.sub(r'\[' + re.escape(token) + r'\]', '', text) | |
| tokens = ['uv_break', 'laugh','lbreak'] | |
| for token in tokens: | |
| text = re.sub(r'\[' + re.escape(token) + r'\]', f'uu{token}uu', text) | |
| text = text.replace('_', '') | |
| return text | |
| def restore_tokens(text): | |
| tokens = ['uvbreak', 'laugh', 'UNK', 'lbreak'] | |
| for token in tokens: | |
| text = re.sub(r'uu' + re.escape(token) + r'uu', f'[{token}]', text) | |
| text = text.replace('[uvbreak]', '[uv_break]') | |
| return text | |
| def process_ddd(text): | |
| """ | |
| 处理“地”、“得” 字的使用,都替换为“的” | |
| 依据:地、得的使用,主要是在动词和形容词前后,本方法没有严格按照语法替换,因为时常遇到用错的情况。 | |
| 另外受 jieba 分词准确率的影响,部分情况下可能会出漏掉。例如:小红帽疑惑地问 | |
| :param text: 输入的文本 | |
| :return: 处理后的文本 | |
| """ | |
| word_list = [(word, flag) for word, flag in pseg.cut(text, use_paddle=False)] | |
| # print(word_list) | |
| processed_words = [] | |
| for i, (word, flag) in enumerate(word_list): | |
| if word in ["地", "得"]: | |
| # Check previous and next word's flag | |
| # prev_flag = word_list[i - 1][1] if i > 0 else None | |
| # next_flag = word_list[i + 1][1] if i + 1 < len(word_list) else None | |
| # if prev_flag in ['v', 'a'] or next_flag in ['v', 'a']: | |
| if flag in ['uv', 'ud']: | |
| processed_words.append("的") | |
| else: | |
| processed_words.append(word) | |
| else: | |
| processed_words.append(word) | |
| return ''.join(processed_words) | |
| def replace_space_between_chinese(text): | |
| return re.sub(r'(?<=[\u4e00-\u9fff])\s+(?=[\u4e00-\u9fff])', '', text) | |
| if __name__ == '__main__': | |
| # txts = [ | |
| # "快速地跑过红色的大门", | |
| # "笑得很开心,学得很好", | |
| # "小红帽疑惑地问?", | |
| # "大灰狼慌张地回答", | |
| # "哦,这是为了更好地听你说话。", | |
| # "大灰狼不耐烦地说:“为了更好地抱你。”", | |
| # "他跑得很快,工作做得非常认真,这是他努力地结果。得到", | |
| # ] | |
| # for txt in txts: | |
| # print(txt, '-->', process_ddd(txt)) | |
| txts = [ | |
| "电影中梁朝伟扮演的陈永仁的编号27149", | |
| "这块黄金重达324.75克 我们班的最高总分为583分", | |
| "12\~23 -1.5\~2", | |
| "居维埃·拉色别德①、杜梅里②、卡特法日③," | |
| ] | |
| for txt in txts: | |
| print(txt, '-->', text_normalize(txt)) | |
| # print(txt, '-->', convert_numbers_to_chinese(txt)) | |