Files
gradio-webrtc/frontend/shared/VideoChat/helpers/player.ts
neil.xh f476f9cf29 gs对话接入
本次代码评审新增并完善了gs视频聊天功能,包括前后端接口定义、状态管理及UI组件实现,并引入了新的依赖库以支持更多互动特性。
Link: https://code.alibaba-inc.com/xr-paas/gradio_webrtc/codereview/21273476
* 更新python 部分

* 合并videochat前端部分

* Merge branch 'feature/update-fastrtc-0.0.19' of http://gitlab.alibaba-inc.com/xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19

* 替换audiowave

* 导入路径修改

* 合并websocket mode逻辑

* feat: gaussian avatar chat

* 增加其他渲染的入参

* feat: ws连接和使用

* Merge branch 'feature/update-fastrtc-0.0.19' of http://gitlab.alibaba-inc.com/xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19

* 右边距离超出容器宽度,则向左移动

* 配置传递

* Merge branch 'feature/update-fastrtc-0.0.19' of gitlab.alibaba-inc.com:xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19

* 高斯包异常

* 同步webrtc_utils

* 更新webrtc_utils

* 兼容on_chat_datachannel

* 修复设备名称列表没有正常显示的问题

* copy 传递 webrtc_id

* Merge branch 'feature/update-fastrtc-0.0.19' of gitlab.alibaba-inc.com:xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19

* 保证webrtc 完成后再进行websocket连接

* feat: 音频表情数据接入

* dist 上传

* canvas 隐藏

* feat: 高斯文件下载进度透出

* Merge branch 'feature/update-fastrtc-0.0.19' of http://gitlab.alibaba-inc.com/xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19

* 修改无法获取权限问题

* Merge branch 'feature/update-fastrtc-0.0.19' of gitlab.alibaba-inc.com:xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19

* 先获取权限再获取设备

* fix: gs资源下载完成前不处理ws数据

* fix: merge

* 话术调整

* Merge branch 'feature/update-fastrtc-0.0.19' of gitlab.alibaba-inc.com:xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19

* 修复设备切换后重新对话,又切换回默认设备的问题

* Merge branch 'feature/update-fastrtc-0.0.19' of http://gitlab.alibaba-inc.com/xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19

* 更新localvideo 尺寸

* Merge branch 'feature/update-fastrtc-0.0.19' of gitlab.alibaba-inc.com:xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19

* 不能默认default

* 修改音频权限问题

* 更新打包结果

* fix: 对话按钮状态跟gs资源挂钩,删除无用代码

* fix: merge

* feat: gs渲染模块从npm包引入

* fix

* 新增对话记录

* Merge branch 'feature/update-fastrtc-0.0.19' of http://gitlab.alibaba-inc.com/xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19

* 样式修改

* 更新包

* fix: gs数字人初始化位置和静音

* 对话记录滚到底部

* 至少100%高度

* Merge branch 'feature/update-fastrtc-0.0.19' of gitlab.alibaba-inc.com:xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19

* 略微上移文本框

* 开始连接时清空对话记录

* fix: update gs render npm

* Merge branch 'feature/update-fastrtc-0.0.19' of http://gitlab.alibaba-inc.com/xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19

* 逻辑保证

* Merge branch 'feature/update-fastrtc-0.0.19' of gitlab.alibaba-inc.com:xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19

* feat: 音频初始化配置是否静音

* actionsbar在有字幕时调整位置

* Merge branch 'feature/update-fastrtc-0.0.19' of http://gitlab.alibaba-inc.com/xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19

* 样式优化

* feat: 增加readme

* fix: 资源图片

* fix: docs

* fix: update gs render sdk

* fix: gs模式下画面位置计算

* fix: update readme

* 设备判断,太窄处理

* Merge branch 'feature/update-fastrtc-0.0.19' of gitlab.alibaba-inc.com:xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19

* 是否有权限和是否有设备分开

* feat: gs 下载和加载钩子函数分离

* Merge branch 'feature/update-fastrtc-0.0.19' of http://gitlab.alibaba-inc.com/xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19

* fix: update gs render sdk

* 替换

* dist

* 上传文件

* del
2025-04-16 19:09:04 +08:00

269 lines
7.9 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import type EventEmitter from 'eventemitter3'
import { nanoid } from 'nanoid'
import { PlayerEventTypes } from '../interface/eventType'
interface IOption {
// 传入的数据是采用多少位编码默认16位
channels: number
// 缓存时间 单位 ms
fftSize: number
inputCodec: 'Int8' | 'Int16' | 'Int32' | 'Float32'
// analyserNode fftSize
onended: (extParams?: IExtInfo) => void
// 采样率 单位Hz
sampleRate: number
// 是否静音
isMute: boolean
}
interface ITypedArrays {
Float32: typeof Float32Array
Int16: typeof Int16Array
Int32: typeof Int32Array
Int8: typeof Int8Array
}
type IExtInfo = Record<string, unknown>
interface ISamples {
data: Float32Array
end_of_batch: boolean
startTime?: number
}
export class Player {
static isTypedArray(
data: Int8Array | Int16Array | Int32Array | Float32Array
) {
// 检测输入的数据是否为 TypedArray 类型或 ArrayBuffer 类型
return (
(data.byteLength &&
data.buffer &&
data.buffer.constructor === ArrayBuffer) ||
data.constructor === ArrayBuffer
)
}
id = nanoid()
analyserNode?: AnalyserNode
audioCtx?: AudioContext
// 是否自动播放
autoPlay = true
bufferSource?: AudioBufferSourceNode
convertValue = 32768
ee: EventEmitter
gainNode?: GainNode
option: IOption = {
inputCodec: 'Int16', // 传入的数据是采用多少位编码默认16位
channels: 1, // 声道数
sampleRate: 8000, // 采样率 单位Hz
fftSize: 2048, // analyserNode fftSize
onended: () => {}
}
samplesList: ISamples[] = []
startTime?: number
typedArray?:
| typeof Int8Array
| typeof Int16Array
| typeof Int32Array
| typeof Float32Array
_firstStartRelativeTime?: number
_firstStartAbsoluteTime?: number
constructor(option: IOption, ee: EventEmitter) {
this.ee = ee
this.init(option)
}
async continue() {
await this.audioCtx!.resume()
}
destroy() {
this.samplesList = []
this.audioCtx?.close()
this.audioCtx = undefined
}
feed(audioOptions: {
audio: Int8Array | Int16Array | Int32Array | Float32Array
end_of_batch: boolean
}) {
let { audio } = audioOptions
const { end_of_batch } = audioOptions
if (!audio) {
return
}
this._isSupported(audio)
// 获取格式化后的buffer
audio = this._getFormattedValue(audio)
// 开始拷贝buffer数据
// 新建一个Float32Array的空间
const data = new Float32Array(audio.length)
// 复制传入的新数据
// 从历史buff位置开始
data.set(audio, 0)
// 将新的完整buff数据赋值给samples
const samples = {
data,
end_of_batch
}
this.samplesList.push(samples)
this.flush(samples, this.samplesList.length - 1)
}
flush(samples: ISamples, index: number) {
if (!(samples && this.autoPlay && this.audioCtx)) return
const { data, end_of_batch } = samples
if (this.bufferSource) {
this.bufferSource.onended = () => {}
}
this.bufferSource = this.audioCtx!.createBufferSource()
if (typeof this.option.onended === 'function') {
this.bufferSource.onended = () => {
if (!end_of_batch && index === this.samplesList.length - 1) {
this.ee.emit(PlayerEventTypes.Player_WaitNextAudioClip)
}
this.option.onended()
}
}
const length = data.length / this.option.channels
const audioBuffer = this.audioCtx!.createBuffer(
this.option.channels,
length,
this.option.sampleRate
)
for (let channel = 0; channel < this.option.channels; channel++) {
const audioData = audioBuffer.getChannelData(channel)
let offset = channel
let decrement = 50
for (let i = 0; i < length; i++) {
audioData[i] = data[offset]
/* fadein */
if (i < 50) {
audioData[i] = (audioData[i] * i) / 50
}
/* fadeout */
if (i >= length - 51) {
audioData[i] = (audioData[i] * decrement--) / 50
}
offset += this.option.channels
}
}
if (this.startTime! < this.audioCtx!.currentTime) {
this.startTime = this.audioCtx!.currentTime
}
this.bufferSource.buffer = audioBuffer
this.bufferSource.connect(this.gainNode!)
this.bufferSource.connect(this.analyserNode!) // bufferSource连接到analyser
this.bufferSource.start(this.startTime)
samples.startTime = this.startTime
if (this._firstStartAbsoluteTime === undefined) {
this._firstStartAbsoluteTime = Date.now()
}
if (this._firstStartRelativeTime === undefined) {
this._firstStartRelativeTime = this.startTime
this.ee.emit(PlayerEventTypes.Player_StartSpeaking, this)
}
this.startTime! += audioBuffer.duration
}
init(option: IOption) {
this.option = Object.assign(this.option, option) // 实例最终配置参数
this.convertValue = this._getConvertValue()
this.typedArray = this._getTypedArray()
this.initAudioContext()
}
initAudioContext() {
// 初始化音频上下文的东西
this.audioCtx = new (window.AudioContext || window.webkitAudioContext)()
// 控制音量的 GainNode
// https://developer.mozilla.org/en-US/docs/Web/API/BaseAudioContext/createGain
this.gainNode = this.audioCtx.createGain()
this.gainNode.gain.value = this.option.isMute ? 0 : 1
this.gainNode.connect(this.audioCtx.destination)
this.startTime = this.audioCtx.currentTime
this.analyserNode = this.audioCtx.createAnalyser()
this.analyserNode.fftSize = this.option.fftSize
}
setMute(isMute: boolean) {
this.gainNode!.gain.value = isMute ? 0 : 1;
}
async pause() {
await this.audioCtx!.suspend()
}
async updateAutoPlay(value: boolean) {
if (this.autoPlay !== value && value) {
this.autoPlay = value
this.samplesList.forEach((sample, index) => {
this.flush(sample, index)
})
} else {
this.autoPlay = value
}
}
volume(volume: number) {
this.gainNode!.gain.value = volume
}
_getFormattedValue(data: Int8Array | Int16Array | Int32Array | Float32Array) {
const TargetArray = this.typedArray!
if (data.constructor === ArrayBuffer) {
data = new TargetArray(data)
} else {
data = new TargetArray(data.buffer)
}
const float32 = new Float32Array(data.length)
for (let i = 0; i < data.length; i++) {
// buffer 缓冲区的数据需要是IEEE754 里32位的线性PCM范围从-1到+1
// 所以对数据进行除法
// 除以对应的位数范围,得到-1到+1的数据
// float32[i] = data[i] / 0x8000;
float32[i] = data[i] / this.convertValue
}
return float32
}
private _isSupported(
data: Int8Array | Int16Array | Int32Array | Float32Array
) {
// 数据类型是否支持
// 目前支持 ArrayBuffer 或者 TypedArray
if (!Player.isTypedArray(data))
throw new Error('请传入ArrayBuffer或者任意TypedArray')
return true
}
private _getConvertValue() {
// 根据传入的目标编码位数
// 选定转换数据所需要的基本值
const inputCodecs = {
Int8: 128,
Int16: 32768,
Int32: 2147483648,
Float32: 1
}
if (!inputCodecs[this.option.inputCodec])
throw new Error(
'wrong codec.please input one of these codecs:Int8,Int16,Int32,Float32'
)
return inputCodecs[this.option.inputCodec]
}
private _getTypedArray() {
// 根据传入的目标编码位数
// 选定前端的所需要的保存的二进制数据格式
// 完整TypedArray请看文档
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypedArray
const typedArrays = {
Int8: Int8Array,
Int16: Int16Array,
Int32: Int32Array,
Float32: Float32Array
} as ITypedArrays
if (!typedArrays[this.option.inputCodec])
throw new Error(
'wrong codec.please input one of these codecs:Int8,Int16,Int32,Float32'
)
return typedArrays[this.option.inputCodec]
}
}