From 62c71075ac25223c9d01c34dda339d86a4785725 Mon Sep 17 00:00:00 2001 From: "lyuxiang.lx" Date: Mon, 8 Jul 2024 16:40:46 +0800 Subject: [PATCH] update dockerfile --- cosyvoice/dataset/processor.py | 4 ++-- runtime/python/Dockerfile | 13 ++++++------- webui.py | 8 ++++---- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/cosyvoice/dataset/processor.py b/cosyvoice/dataset/processor.py index eb99f3c..fa8b339 100644 --- a/cosyvoice/dataset/processor.py +++ b/cosyvoice/dataset/processor.py @@ -106,7 +106,7 @@ def filter(data, yield sample -def resample(data, resample_rate=22050, mode='train'): +def resample(data, resample_rate=22050, min_sample_rate=16000, mode='train'): """ Resample data. Inplace operation. @@ -123,7 +123,7 @@ def resample(data, resample_rate=22050, mode='train'): sample_rate = sample['sample_rate'] waveform = sample['speech'] if sample_rate != resample_rate: - if sample_rate < resample_rate: + if sample_rate < min_sample_rate: continue sample['sample_rate'] = resample_rate sample['speech'] = torchaudio.transforms.Resample( diff --git a/runtime/python/Dockerfile b/runtime/python/Dockerfile index 1f11a63..00d8c52 100644 --- a/runtime/python/Dockerfile +++ b/runtime/python/Dockerfile @@ -1,15 +1,14 @@ -FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 +FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime ENV DEBIAN_FRONTEND=noninteractive WORKDIR /opt/CosyVoice RUN sed -i s@/archive.ubuntu.com/@/mirrors.aliyun.com/@g /etc/apt/sources.list RUN apt-get update -y -RUN apt-get -y install python3-dev cmake python3-pip git unzip +RUN apt-get -y install git unzip git-lfs +RUN git lfs install RUN git clone --recursive https://github.com/FunAudioLLM/CosyVoice.git -RUN cd CosyVoice && pip3 install --default-timeout=3600 -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com -RUN apt install git-lfs && git lfs install -RUN cd CosyVoice && git clone https://www.modelscope.cn/iic/CosyVoice-ttsfrd.git pretrained_models/CosyVoice-ttsfrd -RUN cd CosyVoice/pretrained_models/CosyVoice-ttsfrd && unzip resource.zip -d . && pip3 install ttsfrd-0.3.6-cp38-cp38-linux_x86_64.whl +# here we use python==3.10 because we cannot find an image which have both python3.8 and torch2.0.1-cu118 installed +RUN cd CosyVoice && pip3 install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/ --trusted-host=mirrors.aliyun.com RUN cd CosyVoice/runtime/python && python3 -m grpc_tools.protoc -I. --python_out=. --grpc_python_out=. cosyvoice.proto -CMD ["/bin/bash", "-c", "cd /opt/CosyVoice/CosyVoice/runtime/python && . ./path/sh && python3 server.py --port 50000 --max_conc 4 --model_dir iic/CosyVoice-300M && sleep infinity"] \ No newline at end of file +CMD ["/bin/bash", "-c", "cd /opt/CosyVoice/CosyVoice/runtime/python && python3 server.py --port 50000 --max_conc 4 --model_dir iic/CosyVoice-300M && sleep infinity"] \ No newline at end of file diff --git a/webui.py b/webui.py index e13f967..7fe63ca 100644 --- a/webui.py +++ b/webui.py @@ -59,10 +59,10 @@ def postprocess(speech, top_db=60, hop_length=220, win_length=440): return speech inference_mode_list = ['预训练音色', '3s极速复刻', '跨语种复刻', '自然语言控制'] -instruct_dict = {'预训练音色': '1. 选择预训练音色\n2.点击生成音频按钮', - '3s极速复刻': '1. 选择prompt音频文件,或录入prompt音频,若同时提供,优先选择prompt音频文件\n2. 输入prompt文本\n3.点击生成音频按钮', - '跨语种复刻': '1. 选择prompt音频文件,或录入prompt音频,若同时提供,优先选择prompt音频文件\n2.点击生成音频按钮', - '自然语言控制': '1. 输入instruct文本\n2.点击生成音频按钮'} +instruct_dict = {'预训练音色': '1. 选择预训练音色\n2. 点击生成音频按钮', + '3s极速复刻': '1. 选择prompt音频文件,或录入prompt音频,注意不超过30s,若同时提供,优先选择prompt音频文件\n2. 输入prompt文本\n3. 点击生成音频按钮', + '跨语种复刻': '1. 选择prompt音频文件,或录入prompt音频,注意不超过30s,若同时提供,优先选择prompt音频文件\n2. 点击生成音频按钮', + '自然语言控制': '1. 选择预训练音色\n2. 输入instruct文本\n3. 点击生成音频按钮'} def change_instruction(mode_checkbox_group): return instruct_dict[mode_checkbox_group]