mirror of
https://github.com/HumanAIGC-Engineering/gradio-webrtc.git
synced 2026-02-05 18:09:23 +08:00
gs对话接入
本次代码评审新增并完善了gs视频聊天功能,包括前后端接口定义、状态管理及UI组件实现,并引入了新的依赖库以支持更多互动特性。 Link: https://code.alibaba-inc.com/xr-paas/gradio_webrtc/codereview/21273476 * 更新python 部分 * 合并videochat前端部分 * Merge branch 'feature/update-fastrtc-0.0.19' of http://gitlab.alibaba-inc.com/xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19 * 替换audiowave * 导入路径修改 * 合并websocket mode逻辑 * feat: gaussian avatar chat * 增加其他渲染的入参 * feat: ws连接和使用 * Merge branch 'feature/update-fastrtc-0.0.19' of http://gitlab.alibaba-inc.com/xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19 * 右边距离超出容器宽度,则向左移动 * 配置传递 * Merge branch 'feature/update-fastrtc-0.0.19' of gitlab.alibaba-inc.com:xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19 * 高斯包异常 * 同步webrtc_utils * 更新webrtc_utils * 兼容on_chat_datachannel * 修复设备名称列表没有正常显示的问题 * copy 传递 webrtc_id * Merge branch 'feature/update-fastrtc-0.0.19' of gitlab.alibaba-inc.com:xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19 * 保证webrtc 完成后再进行websocket连接 * feat: 音频表情数据接入 * dist 上传 * canvas 隐藏 * feat: 高斯文件下载进度透出 * Merge branch 'feature/update-fastrtc-0.0.19' of http://gitlab.alibaba-inc.com/xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19 * 修改无法获取权限问题 * Merge branch 'feature/update-fastrtc-0.0.19' of gitlab.alibaba-inc.com:xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19 * 先获取权限再获取设备 * fix: gs资源下载完成前不处理ws数据 * fix: merge * 话术调整 * Merge branch 'feature/update-fastrtc-0.0.19' of gitlab.alibaba-inc.com:xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19 * 修复设备切换后重新对话,又切换回默认设备的问题 * Merge branch 'feature/update-fastrtc-0.0.19' of http://gitlab.alibaba-inc.com/xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19 * 更新localvideo 尺寸 * Merge branch 'feature/update-fastrtc-0.0.19' of gitlab.alibaba-inc.com:xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19 * 不能默认default * 修改音频权限问题 * 更新打包结果 * fix: 对话按钮状态跟gs资源挂钩,删除无用代码 * fix: merge * feat: gs渲染模块从npm包引入 * fix * 新增对话记录 * Merge branch 'feature/update-fastrtc-0.0.19' of http://gitlab.alibaba-inc.com/xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19 * 样式修改 * 更新包 * fix: gs数字人初始化位置和静音 * 对话记录滚到底部 * 至少100%高度 * Merge branch 'feature/update-fastrtc-0.0.19' of gitlab.alibaba-inc.com:xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19 * 略微上移文本框 * 开始连接时清空对话记录 * fix: update gs render npm * Merge branch 'feature/update-fastrtc-0.0.19' of http://gitlab.alibaba-inc.com/xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19 * 逻辑保证 * Merge branch 'feature/update-fastrtc-0.0.19' of gitlab.alibaba-inc.com:xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19 * feat: 音频初始化配置是否静音 * actionsbar在有字幕时调整位置 * Merge branch 'feature/update-fastrtc-0.0.19' of http://gitlab.alibaba-inc.com/xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19 * 样式优化 * feat: 增加readme * fix: 资源图片 * fix: docs * fix: update gs render sdk * fix: gs模式下画面位置计算 * fix: update readme * 设备判断,太窄处理 * Merge branch 'feature/update-fastrtc-0.0.19' of gitlab.alibaba-inc.com:xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19 * 是否有权限和是否有设备分开 * feat: gs 下载和加载钩子函数分离 * Merge branch 'feature/update-fastrtc-0.0.19' of http://gitlab.alibaba-inc.com/xr-paas/gradio_webrtc into feature/update-fastrtc-0.0.19 * fix: update gs render sdk * 替换 * dist * 上传文件 * del
This commit is contained in:
268
frontend/shared/VideoChat/helpers/player.ts
Normal file
268
frontend/shared/VideoChat/helpers/player.ts
Normal file
@@ -0,0 +1,268 @@
|
||||
import type EventEmitter from 'eventemitter3'
|
||||
import { nanoid } from 'nanoid'
|
||||
|
||||
import { PlayerEventTypes } from '../interface/eventType'
|
||||
interface IOption {
|
||||
// 传入的数据是采用多少位编码,默认16位
|
||||
channels: number
|
||||
// 缓存时间 单位 ms
|
||||
fftSize: number
|
||||
|
||||
inputCodec: 'Int8' | 'Int16' | 'Int32' | 'Float32'
|
||||
// analyserNode fftSize
|
||||
onended: (extParams?: IExtInfo) => void
|
||||
// 采样率 单位Hz
|
||||
sampleRate: number
|
||||
// 是否静音
|
||||
isMute: boolean
|
||||
}
|
||||
interface ITypedArrays {
|
||||
Float32: typeof Float32Array
|
||||
Int16: typeof Int16Array
|
||||
Int32: typeof Int32Array
|
||||
Int8: typeof Int8Array
|
||||
}
|
||||
type IExtInfo = Record<string, unknown>
|
||||
interface ISamples {
|
||||
data: Float32Array
|
||||
end_of_batch: boolean
|
||||
startTime?: number
|
||||
}
|
||||
export class Player {
|
||||
static isTypedArray(
|
||||
data: Int8Array | Int16Array | Int32Array | Float32Array
|
||||
) {
|
||||
// 检测输入的数据是否为 TypedArray 类型或 ArrayBuffer 类型
|
||||
return (
|
||||
(data.byteLength &&
|
||||
data.buffer &&
|
||||
data.buffer.constructor === ArrayBuffer) ||
|
||||
data.constructor === ArrayBuffer
|
||||
)
|
||||
}
|
||||
id = nanoid()
|
||||
analyserNode?: AnalyserNode
|
||||
audioCtx?: AudioContext
|
||||
// 是否自动播放
|
||||
autoPlay = true
|
||||
bufferSource?: AudioBufferSourceNode
|
||||
convertValue = 32768
|
||||
ee: EventEmitter
|
||||
gainNode?: GainNode
|
||||
option: IOption = {
|
||||
inputCodec: 'Int16', // 传入的数据是采用多少位编码,默认16位
|
||||
channels: 1, // 声道数
|
||||
sampleRate: 8000, // 采样率 单位Hz
|
||||
fftSize: 2048, // analyserNode fftSize
|
||||
onended: () => {}
|
||||
}
|
||||
samplesList: ISamples[] = []
|
||||
|
||||
startTime?: number
|
||||
typedArray?:
|
||||
| typeof Int8Array
|
||||
| typeof Int16Array
|
||||
| typeof Int32Array
|
||||
| typeof Float32Array
|
||||
|
||||
_firstStartRelativeTime?: number
|
||||
_firstStartAbsoluteTime?: number
|
||||
|
||||
constructor(option: IOption, ee: EventEmitter) {
|
||||
this.ee = ee
|
||||
this.init(option)
|
||||
}
|
||||
|
||||
async continue() {
|
||||
await this.audioCtx!.resume()
|
||||
}
|
||||
destroy() {
|
||||
this.samplesList = []
|
||||
this.audioCtx?.close()
|
||||
this.audioCtx = undefined
|
||||
}
|
||||
feed(audioOptions: {
|
||||
audio: Int8Array | Int16Array | Int32Array | Float32Array
|
||||
end_of_batch: boolean
|
||||
}) {
|
||||
let { audio } = audioOptions
|
||||
const { end_of_batch } = audioOptions
|
||||
if (!audio) {
|
||||
return
|
||||
}
|
||||
this._isSupported(audio)
|
||||
// 获取格式化后的buffer
|
||||
audio = this._getFormattedValue(audio)
|
||||
// 开始拷贝buffer数据
|
||||
// 新建一个Float32Array的空间
|
||||
const data = new Float32Array(audio.length)
|
||||
// 复制传入的新数据
|
||||
// 从历史buff位置开始
|
||||
data.set(audio, 0)
|
||||
// 将新的完整buff数据赋值给samples
|
||||
const samples = {
|
||||
data,
|
||||
end_of_batch
|
||||
}
|
||||
this.samplesList.push(samples)
|
||||
this.flush(samples, this.samplesList.length - 1)
|
||||
}
|
||||
flush(samples: ISamples, index: number) {
|
||||
if (!(samples && this.autoPlay && this.audioCtx)) return
|
||||
const { data, end_of_batch } = samples
|
||||
if (this.bufferSource) {
|
||||
this.bufferSource.onended = () => {}
|
||||
}
|
||||
this.bufferSource = this.audioCtx!.createBufferSource()
|
||||
if (typeof this.option.onended === 'function') {
|
||||
this.bufferSource.onended = () => {
|
||||
if (!end_of_batch && index === this.samplesList.length - 1) {
|
||||
this.ee.emit(PlayerEventTypes.Player_WaitNextAudioClip)
|
||||
}
|
||||
this.option.onended()
|
||||
}
|
||||
}
|
||||
const length = data.length / this.option.channels
|
||||
const audioBuffer = this.audioCtx!.createBuffer(
|
||||
this.option.channels,
|
||||
length,
|
||||
this.option.sampleRate
|
||||
)
|
||||
|
||||
for (let channel = 0; channel < this.option.channels; channel++) {
|
||||
const audioData = audioBuffer.getChannelData(channel)
|
||||
let offset = channel
|
||||
let decrement = 50
|
||||
for (let i = 0; i < length; i++) {
|
||||
audioData[i] = data[offset]
|
||||
/* fadein */
|
||||
if (i < 50) {
|
||||
audioData[i] = (audioData[i] * i) / 50
|
||||
}
|
||||
/* fadeout */
|
||||
if (i >= length - 51) {
|
||||
audioData[i] = (audioData[i] * decrement--) / 50
|
||||
}
|
||||
offset += this.option.channels
|
||||
}
|
||||
}
|
||||
|
||||
if (this.startTime! < this.audioCtx!.currentTime) {
|
||||
this.startTime = this.audioCtx!.currentTime
|
||||
}
|
||||
this.bufferSource.buffer = audioBuffer
|
||||
this.bufferSource.connect(this.gainNode!)
|
||||
this.bufferSource.connect(this.analyserNode!) // bufferSource连接到analyser
|
||||
this.bufferSource.start(this.startTime)
|
||||
samples.startTime = this.startTime
|
||||
if (this._firstStartAbsoluteTime === undefined) {
|
||||
this._firstStartAbsoluteTime = Date.now()
|
||||
}
|
||||
if (this._firstStartRelativeTime === undefined) {
|
||||
this._firstStartRelativeTime = this.startTime
|
||||
this.ee.emit(PlayerEventTypes.Player_StartSpeaking, this)
|
||||
}
|
||||
this.startTime! += audioBuffer.duration
|
||||
}
|
||||
init(option: IOption) {
|
||||
this.option = Object.assign(this.option, option) // 实例最终配置参数
|
||||
this.convertValue = this._getConvertValue()
|
||||
this.typedArray = this._getTypedArray()
|
||||
this.initAudioContext()
|
||||
}
|
||||
initAudioContext() {
|
||||
// 初始化音频上下文的东西
|
||||
this.audioCtx = new (window.AudioContext || window.webkitAudioContext)()
|
||||
// 控制音量的 GainNode
|
||||
// https://developer.mozilla.org/en-US/docs/Web/API/BaseAudioContext/createGain
|
||||
this.gainNode = this.audioCtx.createGain()
|
||||
this.gainNode.gain.value = this.option.isMute ? 0 : 1
|
||||
this.gainNode.connect(this.audioCtx.destination)
|
||||
this.startTime = this.audioCtx.currentTime
|
||||
this.analyserNode = this.audioCtx.createAnalyser()
|
||||
this.analyserNode.fftSize = this.option.fftSize
|
||||
}
|
||||
setMute(isMute: boolean) {
|
||||
this.gainNode!.gain.value = isMute ? 0 : 1;
|
||||
}
|
||||
async pause() {
|
||||
await this.audioCtx!.suspend()
|
||||
}
|
||||
async updateAutoPlay(value: boolean) {
|
||||
if (this.autoPlay !== value && value) {
|
||||
this.autoPlay = value
|
||||
this.samplesList.forEach((sample, index) => {
|
||||
this.flush(sample, index)
|
||||
})
|
||||
} else {
|
||||
this.autoPlay = value
|
||||
}
|
||||
}
|
||||
|
||||
volume(volume: number) {
|
||||
this.gainNode!.gain.value = volume
|
||||
}
|
||||
_getFormattedValue(data: Int8Array | Int16Array | Int32Array | Float32Array) {
|
||||
const TargetArray = this.typedArray!
|
||||
if (data.constructor === ArrayBuffer) {
|
||||
data = new TargetArray(data)
|
||||
} else {
|
||||
data = new TargetArray(data.buffer)
|
||||
}
|
||||
|
||||
const float32 = new Float32Array(data.length)
|
||||
|
||||
for (let i = 0; i < data.length; i++) {
|
||||
// buffer 缓冲区的数据,需要是IEEE754 里32位的线性PCM,范围从-1到+1
|
||||
// 所以对数据进行除法
|
||||
// 除以对应的位数范围,得到-1到+1的数据
|
||||
// float32[i] = data[i] / 0x8000;
|
||||
float32[i] = data[i] / this.convertValue
|
||||
}
|
||||
return float32
|
||||
}
|
||||
|
||||
private _isSupported(
|
||||
data: Int8Array | Int16Array | Int32Array | Float32Array
|
||||
) {
|
||||
// 数据类型是否支持
|
||||
// 目前支持 ArrayBuffer 或者 TypedArray
|
||||
if (!Player.isTypedArray(data))
|
||||
throw new Error('请传入ArrayBuffer或者任意TypedArray')
|
||||
return true
|
||||
}
|
||||
|
||||
private _getConvertValue() {
|
||||
// 根据传入的目标编码位数
|
||||
// 选定转换数据所需要的基本值
|
||||
const inputCodecs = {
|
||||
Int8: 128,
|
||||
Int16: 32768,
|
||||
Int32: 2147483648,
|
||||
Float32: 1
|
||||
}
|
||||
if (!inputCodecs[this.option.inputCodec])
|
||||
throw new Error(
|
||||
'wrong codec.please input one of these codecs:Int8,Int16,Int32,Float32'
|
||||
)
|
||||
return inputCodecs[this.option.inputCodec]
|
||||
}
|
||||
|
||||
private _getTypedArray() {
|
||||
// 根据传入的目标编码位数
|
||||
// 选定前端的所需要的保存的二进制数据格式
|
||||
// 完整TypedArray请看文档
|
||||
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypedArray
|
||||
const typedArrays = {
|
||||
Int8: Int8Array,
|
||||
Int16: Int16Array,
|
||||
Int32: Int32Array,
|
||||
Float32: Float32Array
|
||||
} as ITypedArrays
|
||||
if (!typedArrays[this.option.inputCodec])
|
||||
throw new Error(
|
||||
'wrong codec.please input one of these codecs:Int8,Int16,Int32,Float32'
|
||||
)
|
||||
return typedArrays[this.option.inputCodec]
|
||||
}
|
||||
}
|
||||
610
frontend/shared/VideoChat/helpers/processor.ts
Normal file
610
frontend/shared/VideoChat/helpers/processor.ts
Normal file
@@ -0,0 +1,610 @@
|
||||
import EventEmitter from "eventemitter3";
|
||||
import PQueue from "p-queue";
|
||||
|
||||
import { mergeBlob, unpack } from "../binary_utils";
|
||||
import {
|
||||
EventTypes,
|
||||
PlayerEventTypes,
|
||||
ProcessorEventTypes,
|
||||
} from "../interface/eventType";
|
||||
import { Player } from "./player";
|
||||
|
||||
export type IPayload = Record<string, string | number | object | Blob>;
|
||||
|
||||
interface IDataRecords {
|
||||
channel_names?: string[];
|
||||
data_id: number;
|
||||
data_offset: number;
|
||||
data_type: string;
|
||||
sample_rate: number;
|
||||
shape: number[];
|
||||
}
|
||||
interface IEvent {
|
||||
avatar_status?: string;
|
||||
event_type: string;
|
||||
speech_id: string;
|
||||
}
|
||||
interface IParsedData {
|
||||
batch_id?: number;
|
||||
batch_name?: string;
|
||||
data_records: Record<string, IDataRecords>;
|
||||
end_of_batch: boolean;
|
||||
events: IEvent[];
|
||||
}
|
||||
interface IAvatarMotionData {
|
||||
// 数据大小,首包存在该值
|
||||
binary_size: number;
|
||||
// 是否首包
|
||||
first_package: boolean;
|
||||
// 数据分片,非首包存在该值
|
||||
motion_data_slice?: Blob;
|
||||
// 分片数量,首包存在该值
|
||||
segment_num?: number;
|
||||
// 分片索引,非首包存在该值
|
||||
slice_index?: number;
|
||||
// 是否使用二进制帧,首包存在该值
|
||||
use_binary_frame?: boolean;
|
||||
// 初始化的音频是否静音
|
||||
is_audio_mute?: boolean;
|
||||
}
|
||||
|
||||
interface IAvatarMotionGroupBase {
|
||||
arkitFaceArrayBufferArray?: ArrayBuffer[];
|
||||
batch_id?: number;
|
||||
batch_name?: string;
|
||||
binSize?: number;
|
||||
jsonSize?: number;
|
||||
merged_motion_data: Uint8Array;
|
||||
motion_data_slices: Blob[];
|
||||
player?: Player;
|
||||
tts2faceArrayBufferArray?: ArrayBuffer[];
|
||||
}
|
||||
interface IAvatarMotionGroup extends IAvatarMotionGroupBase {
|
||||
binary_size: number;
|
||||
first_package: boolean;
|
||||
segment_num?: number;
|
||||
use_binary_frame?: boolean;
|
||||
}
|
||||
const InputCodecs: Record<string, "Int8" | "Int16" | "Int32" | "Float32"> = {
|
||||
int16: "Int16",
|
||||
int32: "Int32",
|
||||
float32: "Float32",
|
||||
};
|
||||
const TypedArrays: Record<
|
||||
string,
|
||||
typeof Int16Array | typeof Int32Array | typeof Float32Array
|
||||
> = {
|
||||
int16: Int16Array,
|
||||
int32: Int32Array,
|
||||
float32: Float32Array,
|
||||
};
|
||||
|
||||
export class Processor {
|
||||
private ee: EventEmitter;
|
||||
private _motionDataGroupHandlerQueue = new PQueue({
|
||||
concurrency: 1,
|
||||
});
|
||||
private _motionDataGroups: IAvatarMotionGroup[] = [];
|
||||
private _arkit_face_sample_rate?: number;
|
||||
private _arkit_face_channel_names?: string[];
|
||||
private _tts2face_sample_rate?: number;
|
||||
private _tts2face_channel_names?: string[];
|
||||
private _maxBatchId?: number;
|
||||
private _arkitFaceShape?: number;
|
||||
private _tts2FaceShape?: number;
|
||||
constructor(ee: EventEmitter) {
|
||||
this.ee = ee;
|
||||
}
|
||||
add(payload: IPayload) {
|
||||
const { avatar_motion_data } = payload;
|
||||
this._motionDataGroupHandlerQueue.add(
|
||||
async () =>
|
||||
await this._motionDataGroupHandler(
|
||||
avatar_motion_data as IAvatarMotionData,
|
||||
),
|
||||
);
|
||||
}
|
||||
clear() {
|
||||
this._motionDataGroups.forEach((group) => {
|
||||
group.player?.destroy();
|
||||
});
|
||||
this._motionDataGroups = [];
|
||||
}
|
||||
setMute(isMute: boolean) {
|
||||
this._motionDataGroups.forEach((group) => {
|
||||
group.player?.setMute(isMute);
|
||||
});
|
||||
}
|
||||
getArkitFaceFrame() {
|
||||
return {
|
||||
arkitFace: this._getArkitFaceFrame(),
|
||||
};
|
||||
}
|
||||
getLastBatchId() {
|
||||
let batch_id = undefined;
|
||||
this._motionDataGroups.forEach((group) => {
|
||||
if (group.batch_id) {
|
||||
batch_id = group.batch_id;
|
||||
}
|
||||
});
|
||||
return batch_id;
|
||||
}
|
||||
getTtt2FaceFrame() {
|
||||
return {
|
||||
tts2Face: this._getTts2FaceFrame(),
|
||||
};
|
||||
}
|
||||
|
||||
interrupt() {
|
||||
this._motionDataGroups.forEach((group) => {
|
||||
if (group.batch_id) {
|
||||
this._maxBatchId = group.batch_id;
|
||||
}
|
||||
group.player?.destroy();
|
||||
});
|
||||
this._motionDataGroups = [];
|
||||
}
|
||||
|
||||
private _getArkitFaceFrame() {
|
||||
if (!this._motionDataGroups.length) {
|
||||
return null;
|
||||
}
|
||||
const targetMotion = this._motionDataGroups.find(
|
||||
(_motion) => _motion.player,
|
||||
);
|
||||
|
||||
if (!targetMotion) {
|
||||
return null;
|
||||
}
|
||||
const { arkitFaceArrayBufferArray, player } = targetMotion!;
|
||||
if (
|
||||
player &&
|
||||
player._firstStartAbsoluteTime &&
|
||||
arkitFaceArrayBufferArray &&
|
||||
arkitFaceArrayBufferArray.length > 0 &&
|
||||
this._arkitFaceShape &&
|
||||
this._arkit_face_sample_rate
|
||||
) {
|
||||
const offsetTime = Date.now() - player._firstStartAbsoluteTime;
|
||||
let lastIndex = 0;
|
||||
let firstSampleStartTime: number;
|
||||
player.samplesList.forEach((item, index) => {
|
||||
if (
|
||||
firstSampleStartTime === undefined &&
|
||||
item.startTime !== undefined
|
||||
) {
|
||||
firstSampleStartTime = item.startTime;
|
||||
}
|
||||
if (
|
||||
item.startTime !== undefined &&
|
||||
item.startTime - firstSampleStartTime <= offsetTime / 1000
|
||||
) {
|
||||
lastIndex = index;
|
||||
}
|
||||
});
|
||||
const samples = player.samplesList[lastIndex];
|
||||
const subOffsetTime = offsetTime - samples.startTime! * 1000;
|
||||
const offset = Math.floor(
|
||||
(subOffsetTime / 1000) * this._arkit_face_sample_rate,
|
||||
);
|
||||
const arkitFaceFloat32ArrayArray = new Float32Array(
|
||||
arkitFaceArrayBufferArray[lastIndex],
|
||||
);
|
||||
const subData = arkitFaceFloat32ArrayArray?.slice(
|
||||
offset * this._arkitFaceShape,
|
||||
offset * this._arkitFaceShape + this._arkitFaceShape,
|
||||
);
|
||||
if (subData?.length) {
|
||||
const result = {};
|
||||
const channelNames = this._arkit_face_channel_names || [];
|
||||
channelNames.forEach((channelName, index) => {
|
||||
Object.assign(result, {
|
||||
[channelName]: subData[index],
|
||||
});
|
||||
});
|
||||
return result;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
private _getTts2FaceFrame() {
|
||||
if (!this._motionDataGroups.length) {
|
||||
return null;
|
||||
}
|
||||
const targetMotion = this._motionDataGroups.find(
|
||||
(_motion) => _motion.player,
|
||||
);
|
||||
if (!targetMotion) {
|
||||
return null;
|
||||
}
|
||||
const { tts2faceArrayBufferArray, player } = targetMotion!;
|
||||
if (
|
||||
player &&
|
||||
player._firstStartAbsoluteTime &&
|
||||
tts2faceArrayBufferArray &&
|
||||
tts2faceArrayBufferArray.length > 0 &&
|
||||
this._tts2FaceShape &&
|
||||
this._tts2face_sample_rate
|
||||
) {
|
||||
const offsetTime = Date.now() - player._firstStartAbsoluteTime;
|
||||
let lastIndex = 0;
|
||||
let firstSampleStartTime: number;
|
||||
player.samplesList.forEach((item, index) => {
|
||||
if (
|
||||
firstSampleStartTime === undefined &&
|
||||
item.startTime !== undefined
|
||||
) {
|
||||
firstSampleStartTime = item.startTime;
|
||||
}
|
||||
if (
|
||||
item.startTime !== undefined &&
|
||||
item.startTime - firstSampleStartTime <= offsetTime / 1000
|
||||
) {
|
||||
lastIndex = index;
|
||||
}
|
||||
});
|
||||
const samples = player.samplesList[lastIndex];
|
||||
const subOffsetTime = offsetTime - samples.startTime! * 1000;
|
||||
const offset = Math.floor(
|
||||
(subOffsetTime / 1000) * this._tts2face_sample_rate,
|
||||
);
|
||||
const arkitFaceFloat32ArrayArray = new Float32Array(
|
||||
tts2faceArrayBufferArray[lastIndex],
|
||||
);
|
||||
const subData = arkitFaceFloat32ArrayArray?.slice(
|
||||
offset * this._tts2FaceShape,
|
||||
offset * this._tts2FaceShape + this._tts2FaceShape,
|
||||
);
|
||||
if (subData?.length) {
|
||||
return subData;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private async _motionDataGroupHandler(avatar_motion_data: IAvatarMotionData) {
|
||||
try {
|
||||
const {
|
||||
first_package,
|
||||
motion_data_slice,
|
||||
segment_num,
|
||||
binary_size,
|
||||
use_binary_frame,
|
||||
is_audio_mute
|
||||
} = avatar_motion_data;
|
||||
if (first_package) {
|
||||
const lastMotionGroup =
|
||||
this._motionDataGroups[this._motionDataGroups.length - 1];
|
||||
if (lastMotionGroup) {
|
||||
// 检测上一大片数量是否丢包
|
||||
if (
|
||||
lastMotionGroup.segment_num !==
|
||||
lastMotionGroup.motion_data_slices.length
|
||||
) {
|
||||
// 丢包触发错误
|
||||
this.ee.emit(EventTypes.ErrorReceived, 'lost data packets');
|
||||
}
|
||||
}
|
||||
this._motionDataGroups.push({
|
||||
first_package,
|
||||
binary_size,
|
||||
segment_num,
|
||||
use_binary_frame,
|
||||
motion_data_slices: [],
|
||||
merged_motion_data: new Uint8Array(binary_size),
|
||||
});
|
||||
} else {
|
||||
if (this._motionDataGroups.length === 0) {
|
||||
return;
|
||||
}
|
||||
if (!motion_data_slice) {
|
||||
return;
|
||||
}
|
||||
const lastMotionGroup =
|
||||
this._motionDataGroups[this._motionDataGroups.length - 1];
|
||||
const prevMotionGroup =
|
||||
this._motionDataGroups[this._motionDataGroups.length - 2];
|
||||
lastMotionGroup.motion_data_slices.push(motion_data_slice);
|
||||
if (
|
||||
lastMotionGroup.motion_data_slices.length ===
|
||||
lastMotionGroup.segment_num
|
||||
) {
|
||||
// 单段不分小片段的情况,不需要mergeBlob,为了兼容后续逻辑,这里直接赋值
|
||||
const blob = lastMotionGroup.motion_data_slices[0]
|
||||
// const blob = mergeBlob(
|
||||
// lastMotionGroup.motion_data_slices,
|
||||
// lastMotionGroup.merged_motion_data,
|
||||
// );
|
||||
const { parsedData, jsonSize, binSize } = await unpack(blob);
|
||||
lastMotionGroup.jsonSize = jsonSize;
|
||||
lastMotionGroup.binSize = binSize;
|
||||
const bin = blob.slice(12 + lastMotionGroup.jsonSize!);
|
||||
if (bin.size !== lastMotionGroup.binSize) {
|
||||
this.ee.emit(ProcessorEventTypes.Chat_BinsizeError);
|
||||
}
|
||||
const batchCheckResult = this._connectBatch(
|
||||
parsedData,
|
||||
lastMotionGroup,
|
||||
prevMotionGroup,
|
||||
);
|
||||
if (!batchCheckResult) {
|
||||
return;
|
||||
}
|
||||
await this._handleArkitFaceConfig(
|
||||
parsedData,
|
||||
lastMotionGroup,
|
||||
prevMotionGroup,
|
||||
bin,
|
||||
);
|
||||
// await this._handletts2faceConfig(
|
||||
// parsedData,
|
||||
// lastMotionGroup,
|
||||
// prevMotionGroup,
|
||||
// bin,
|
||||
// );
|
||||
await this._handleAudioConfig(
|
||||
parsedData,
|
||||
lastMotionGroup,
|
||||
prevMotionGroup,
|
||||
bin,
|
||||
is_audio_mute || false
|
||||
);
|
||||
this._handleEvents(parsedData);
|
||||
}
|
||||
}
|
||||
} catch (err: unknown) {
|
||||
console.error('err', err)
|
||||
this.ee.emit(EventTypes.ErrorReceived, (err as Error).message);
|
||||
}
|
||||
}
|
||||
private async _handleAudioConfig(
|
||||
parsedData: IParsedData,
|
||||
lastMotionGroup: IAvatarMotionGroup,
|
||||
prevMotionGroup: IAvatarMotionGroup,
|
||||
bin: Blob,
|
||||
isPlayerMute: boolean
|
||||
) {
|
||||
const { data_records = {}, end_of_batch } = parsedData;
|
||||
const { audio } = data_records;
|
||||
if (audio) {
|
||||
const { sample_rate, shape, data_offset, data_type } = audio;
|
||||
const inputCodec = InputCodecs[data_type];
|
||||
const targetTypedArray = TypedArrays[data_type];
|
||||
if (lastMotionGroup.player === undefined) {
|
||||
if (
|
||||
prevMotionGroup &&
|
||||
prevMotionGroup.player &&
|
||||
prevMotionGroup.batch_id === lastMotionGroup.batch_id
|
||||
) {
|
||||
lastMotionGroup.player = prevMotionGroup.player;
|
||||
} else if (sample_rate) {
|
||||
lastMotionGroup.player = new Player(
|
||||
{
|
||||
inputCodec,
|
||||
channels: 1,
|
||||
sampleRate: sample_rate,
|
||||
fftSize: 1024,
|
||||
isMute: isPlayerMute,
|
||||
onended: (option) => {
|
||||
if (!option) {
|
||||
return;
|
||||
}
|
||||
const {
|
||||
end_of_batch: innerEndOfBatch,
|
||||
lastMotionGroup: innerLastMotion,
|
||||
} = option;
|
||||
if (innerEndOfBatch) {
|
||||
const { batch_id, player } =
|
||||
innerLastMotion as IAvatarMotionGroup;
|
||||
this.ee.emit(PlayerEventTypes.Player_EndSpeaking, player);
|
||||
this._motionDataGroups = this._motionDataGroups.filter(
|
||||
(item) => item.batch_id! > batch_id!,
|
||||
);
|
||||
if (
|
||||
this._motionDataGroups.length &&
|
||||
this._motionDataGroups[0].player
|
||||
) {
|
||||
this._motionDataGroups[0].player.updateAutoPlay(true);
|
||||
} else {
|
||||
this.ee.emit(PlayerEventTypes.Player_NoLegacy);
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
this.ee,
|
||||
);
|
||||
}
|
||||
if (end_of_batch) {
|
||||
const originEnded = lastMotionGroup.player!.option.onended;
|
||||
lastMotionGroup.player!.option.onended = () => {
|
||||
originEnded({
|
||||
end_of_batch,
|
||||
lastMotionGroup,
|
||||
});
|
||||
};
|
||||
}
|
||||
}
|
||||
const shapeLength = shape.reduce(
|
||||
(acc: number, cur: number) => acc * cur,
|
||||
inputCodec === "Int16" ? 2 : 4,
|
||||
);
|
||||
const audioBlobSliceStart = data_offset;
|
||||
const audioBlobSliceEnd = data_offset + shapeLength;
|
||||
const audioBlob = bin.slice(audioBlobSliceStart, audioBlobSliceEnd);
|
||||
const audioArrayBuffer = await audioBlob.arrayBuffer();
|
||||
// 如果前一段还没播放结束,后一段已接收到,那么后一段则不能自动播放
|
||||
const prevHasPlayerMotionDataGroup = this._motionDataGroups.find(
|
||||
(item) => item.player,
|
||||
);
|
||||
if (
|
||||
this._motionDataGroups.length &&
|
||||
lastMotionGroup.player &&
|
||||
prevHasPlayerMotionDataGroup &&
|
||||
prevHasPlayerMotionDataGroup.player !== lastMotionGroup.player
|
||||
) {
|
||||
lastMotionGroup.player.autoPlay = false;
|
||||
}
|
||||
if (lastMotionGroup.player) {
|
||||
lastMotionGroup.player.feed({
|
||||
audio: new targetTypedArray(audioArrayBuffer),
|
||||
end_of_batch,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// 特殊事件motion挂上这个
|
||||
if (
|
||||
prevMotionGroup &&
|
||||
prevMotionGroup.player &&
|
||||
lastMotionGroup.batch_id === prevMotionGroup.batch_id
|
||||
) {
|
||||
lastMotionGroup.player = prevMotionGroup.player;
|
||||
}
|
||||
}
|
||||
}
|
||||
private async _handleArkitFaceConfig(
|
||||
parsedData: IParsedData,
|
||||
lastMotionGroup: IAvatarMotionGroup,
|
||||
prevMotionGroup: IAvatarMotionGroup,
|
||||
bin: Blob,
|
||||
) {
|
||||
const { data_records = {} } = parsedData;
|
||||
const { arkit_face } = data_records;
|
||||
if (arkit_face) {
|
||||
const { channel_names, shape, data_offset, sample_rate } =
|
||||
arkit_face as IDataRecords;
|
||||
if (channel_names && !this._arkit_face_channel_names) {
|
||||
this._arkit_face_channel_names = channel_names;
|
||||
this._arkit_face_sample_rate = sample_rate;
|
||||
}
|
||||
if (lastMotionGroup.arkitFaceArrayBufferArray === undefined) {
|
||||
if (
|
||||
prevMotionGroup &&
|
||||
prevMotionGroup.arkitFaceArrayBufferArray &&
|
||||
prevMotionGroup.batch_id === lastMotionGroup.batch_id
|
||||
) {
|
||||
lastMotionGroup.arkitFaceArrayBufferArray =
|
||||
prevMotionGroup.arkitFaceArrayBufferArray;
|
||||
} else {
|
||||
lastMotionGroup.arkitFaceArrayBufferArray = [];
|
||||
}
|
||||
const shapeLength = shape.reduce(
|
||||
(acc: number, cur: number) => acc * cur,
|
||||
4,
|
||||
);
|
||||
this._arkitFaceShape = shape[1];
|
||||
const arkitFaceBlob = bin.slice(data_offset, data_offset + shapeLength);
|
||||
const arkitFaceArrayBuffer = await arkitFaceBlob.arrayBuffer();
|
||||
lastMotionGroup.arkitFaceArrayBufferArray.push(arkitFaceArrayBuffer);
|
||||
}
|
||||
} else {
|
||||
if (
|
||||
prevMotionGroup &&
|
||||
prevMotionGroup.arkitFaceArrayBufferArray &&
|
||||
lastMotionGroup.batch_id === prevMotionGroup.batch_id
|
||||
) {
|
||||
lastMotionGroup.arkitFaceArrayBufferArray =
|
||||
prevMotionGroup.arkitFaceArrayBufferArray;
|
||||
}
|
||||
}
|
||||
}
|
||||
private async _handletts2faceConfig(
|
||||
parsedData: IParsedData,
|
||||
lastMotionGroup: IAvatarMotionGroup,
|
||||
prevMotionGroup: IAvatarMotionGroup,
|
||||
bin: Blob,
|
||||
) {
|
||||
const { data_records = {} } = parsedData;
|
||||
const { tts2face } = data_records;
|
||||
if (tts2face) {
|
||||
const { channel_names, shape, data_offset, sample_rate } =
|
||||
tts2face as IDataRecords;
|
||||
if (channel_names && !this._tts2face_channel_names) {
|
||||
this._tts2face_channel_names = channel_names;
|
||||
this._tts2face_sample_rate = sample_rate;
|
||||
}
|
||||
if (lastMotionGroup.tts2faceArrayBufferArray === undefined) {
|
||||
if (
|
||||
prevMotionGroup &&
|
||||
prevMotionGroup.tts2faceArrayBufferArray &&
|
||||
prevMotionGroup.batch_id === lastMotionGroup.batch_id
|
||||
) {
|
||||
lastMotionGroup.tts2faceArrayBufferArray =
|
||||
prevMotionGroup.tts2faceArrayBufferArray;
|
||||
} else {
|
||||
lastMotionGroup.tts2faceArrayBufferArray = [];
|
||||
}
|
||||
const shapeLength = shape.reduce(
|
||||
(acc: number, cur: number) => acc * cur,
|
||||
4,
|
||||
);
|
||||
this._tts2FaceShape = shape[1];
|
||||
const tts2faceBlob = bin.slice(data_offset, data_offset + shapeLength);
|
||||
const tts2faceArrayBuffer = await tts2faceBlob.arrayBuffer();
|
||||
lastMotionGroup.tts2faceArrayBufferArray.push(tts2faceArrayBuffer);
|
||||
}
|
||||
} else {
|
||||
if (
|
||||
prevMotionGroup &&
|
||||
prevMotionGroup.tts2faceArrayBufferArray &&
|
||||
lastMotionGroup.batch_id === prevMotionGroup.batch_id
|
||||
) {
|
||||
lastMotionGroup.tts2faceArrayBufferArray =
|
||||
prevMotionGroup.tts2faceArrayBufferArray;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private _handleEvents(parsedData: IParsedData) {
|
||||
const { events } = parsedData;
|
||||
if (events && events.length) {
|
||||
events.forEach((e) => {
|
||||
switch (e.event_type) {
|
||||
case "interrupt_speech":
|
||||
// console.log('HandleEvents: interrupt_speech')
|
||||
break;
|
||||
case "change_status":
|
||||
// console.log('HandleEvents: change_status')
|
||||
this.ee.emit(ProcessorEventTypes.Change_Status, e);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
private _connectBatch(
|
||||
parsedData: IParsedData,
|
||||
lastMotionGroup: IAvatarMotionGroup,
|
||||
prevMotionGroup: IAvatarMotionGroup,
|
||||
) {
|
||||
let batchCheckResult = true;
|
||||
// 处理二进制batch_id
|
||||
if (parsedData.batch_id && lastMotionGroup.batch_id === undefined) {
|
||||
lastMotionGroup.batch_id = parsedData.batch_id;
|
||||
}
|
||||
// 特殊事件motion如果没有batch_id,也可挂上此batch_id
|
||||
if (
|
||||
!lastMotionGroup.batch_id &&
|
||||
prevMotionGroup &&
|
||||
prevMotionGroup.batch_id
|
||||
) {
|
||||
lastMotionGroup.batch_id = prevMotionGroup.batch_id;
|
||||
}
|
||||
// 特殊事件motion如果没有batch_name,也可挂上此batch_name
|
||||
if (parsedData.batch_name && lastMotionGroup.batch_name === undefined) {
|
||||
lastMotionGroup.batch_name = parsedData.batch_name;
|
||||
}
|
||||
// 处理打断后,如果仍接收到上一个batch的motionData, 那么重新销毁
|
||||
if (
|
||||
this._maxBatchId &&
|
||||
lastMotionGroup.batch_id &&
|
||||
lastMotionGroup.batch_id <= this._maxBatchId
|
||||
) {
|
||||
this.clear();
|
||||
batchCheckResult = false;
|
||||
}
|
||||
return batchCheckResult;
|
||||
}
|
||||
}
|
||||
40
frontend/shared/VideoChat/helpers/ws.ts
Normal file
40
frontend/shared/VideoChat/helpers/ws.ts
Normal file
@@ -0,0 +1,40 @@
|
||||
import EventEmitter from "eventemitter3";
|
||||
import { WsEventTypes } from "../interface/eventType";
|
||||
|
||||
export class WS extends EventEmitter {
|
||||
engine: WebSocket;
|
||||
|
||||
private _inited = false;
|
||||
|
||||
constructor(url: string) {
|
||||
super();
|
||||
this._init(url);
|
||||
}
|
||||
private _init(url: string) {
|
||||
if (this._inited) {
|
||||
return;
|
||||
}
|
||||
this._inited = true;
|
||||
this.engine = new WebSocket(url);
|
||||
this.engine.addEventListener("error", (event) => {
|
||||
this.emit(WsEventTypes.WS_ERROR, event);
|
||||
});
|
||||
this.engine.addEventListener("open", () => {
|
||||
this.emit(WsEventTypes.WS_OPEN);
|
||||
});
|
||||
this.engine.addEventListener("message", (event) => {
|
||||
this.emit(WsEventTypes.WS_MESSAGE, event.data);
|
||||
});
|
||||
this.engine.addEventListener("close", () => {
|
||||
this.emit(WsEventTypes.WS_CLOSE);
|
||||
});
|
||||
}
|
||||
public send(data: string | Int8Array | Uint8Array) {
|
||||
this.engine?.send(data);
|
||||
}
|
||||
public stop() {
|
||||
this.emit(WsEventTypes.WS_CLOSE);
|
||||
this._inited = false;
|
||||
this.engine?.close();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user