mirror of
https://github.com/FunAudioLLM/CosyVoice.git
synced 2026-02-04 17:39:25 +08:00
use wav file rather than tensor
This commit is contained in:
@@ -41,11 +41,11 @@ def read_json_lists(list_file):
|
||||
return results
|
||||
|
||||
|
||||
def load_wav(wav, target_sr):
|
||||
def load_wav(wav, target_sr, min_sr=16000):
|
||||
speech, sample_rate = torchaudio.load(wav, backend='soundfile')
|
||||
speech = speech.mean(dim=0, keepdim=True)
|
||||
if sample_rate != target_sr:
|
||||
assert sample_rate > target_sr, 'wav sample rate {} must be greater than {}'.format(sample_rate, target_sr)
|
||||
assert sample_rate >= min_sr, 'wav sample rate {} must be greater than {}'.format(sample_rate, target_sr)
|
||||
speech = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sr)(speech)
|
||||
return speech
|
||||
|
||||
|
||||
Reference in New Issue
Block a user