From 0d990d60740bf174904a5185cce910b847bd3684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A6=BE=E6=81=AF?= Date: Fri, 30 Jan 2026 18:10:36 +0800 Subject: [PATCH] =?UTF-8?q?[BUG=20FIX]=20=E4=BD=BF=E7=94=A8=20float64=20?= =?UTF-8?q?=E9=81=BF=E5=85=8D=E7=B2=BE=E5=BA=A6=E8=AF=AF=E5=B7=AE=E9=97=AE?= =?UTF-8?q?=E9=A2=98=EF=BC=8C=E5=BC=83=E7=94=A8=20CPU=20=E8=AE=A1=E7=AE=97?= =?UTF-8?q?=EF=BC=8C=E9=81=BF=E5=85=8D=E6=8B=96=E7=B4=AF=E6=80=A7=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cosyvoice/hifigan/generator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cosyvoice/hifigan/generator.py b/cosyvoice/hifigan/generator.py index 045cb4e..bbc2a21 100644 --- a/cosyvoice/hifigan/generator.py +++ b/cosyvoice/hifigan/generator.py @@ -713,8 +713,8 @@ class CausalHiFTGenerator(HiFTGenerator): @torch.inference_mode() def inference(self, speech_feat: torch.Tensor, finalize: bool = True) -> torch.Tensor: # mel->f0 NOTE f0_predictor precision is crucial for causal inference, move self.f0_predictor to cpu if necessary - self.f0_predictor.to('cpu') - f0 = self.f0_predictor(speech_feat.cpu(), finalize=finalize).to(speech_feat) + self.f0_predictor.to(torch.float64) + f0 = self.f0_predictor(speech_feat.to(torch.float64), finalize=finalize).to(speech_feat) # f0->source s = self.f0_upsamp(f0[:, None]).transpose(1, 2) # bs,n,t s, _, _ = self.m_source(s)