mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-05 18:09:23 +08:00
Fix audio type conversion (#259)
* Fix conversion between audio dtypes * Run Pytest in CI * Add pytest tests path in pyproject.toml * Fix usages * Use other PR's test format (more or less) * Support legacy arguments * Fix pyproject.toml and test location * Omit `test` arg in CI, given by pyproject.toml --------- Co-authored-by: Freddy Boulton <alfonsoboulton@gmail.com>
This commit is contained in:
@@ -12,7 +12,14 @@ from fastapi import WebSocket
|
||||
from fastapi.websockets import WebSocketDisconnect, WebSocketState
|
||||
|
||||
from .tracks import AsyncStreamHandler, StreamHandlerImpl
|
||||
from .utils import AdditionalOutputs, CloseStream, DataChannel, split_output
|
||||
from .utils import (
|
||||
AdditionalOutputs,
|
||||
CloseStream,
|
||||
DataChannel,
|
||||
audio_to_float32,
|
||||
audio_to_int16,
|
||||
split_output,
|
||||
)
|
||||
|
||||
|
||||
class WebSocketDataChannel(DataChannel):
|
||||
@@ -31,14 +38,12 @@ def convert_to_mulaw(
|
||||
audio_data: np.ndarray, original_rate: int, target_rate: int
|
||||
) -> bytes:
|
||||
"""Convert audio data to 8kHz mu-law format"""
|
||||
|
||||
if audio_data.dtype != np.float32:
|
||||
audio_data = audio_data.astype(np.float32) / 32768.0
|
||||
audio_data = audio_to_float32(audio_data)
|
||||
|
||||
if original_rate != target_rate:
|
||||
audio_data = librosa.resample(audio_data, orig_sr=original_rate, target_sr=8000)
|
||||
|
||||
audio_data = (audio_data * 32768).astype(np.int16)
|
||||
audio_data = audio_to_int16(audio_data)
|
||||
|
||||
return audioop.lin2ulaw(audio_data, 2) # type: ignore
|
||||
|
||||
@@ -122,14 +127,13 @@ class WebSocketHandler:
|
||||
)
|
||||
|
||||
if self.stream_handler.input_sample_rate != 8000:
|
||||
audio_array = audio_array.astype(np.float32) / 32768.0
|
||||
audio_array = audio_to_float32(audio_array)
|
||||
audio_array = librosa.resample(
|
||||
audio_array,
|
||||
orig_sr=8000,
|
||||
target_sr=self.stream_handler.input_sample_rate,
|
||||
)
|
||||
audio_array = (audio_array * 32768).astype(np.int16)
|
||||
|
||||
audio_array = audio_to_int16(audio_array)
|
||||
try:
|
||||
if isinstance(self.stream_handler, AsyncStreamHandler):
|
||||
await self.stream_handler.receive(
|
||||
|
||||
Reference in New Issue
Block a user