Update to MiniCPM-o 2.6

This commit is contained in:
yiranyyu
2025-01-14 15:33:44 +08:00
parent b75a362dd6
commit 53c0174797
123 changed files with 16848 additions and 2952 deletions

View File

@@ -0,0 +1,3 @@
<template>
<div>Chatbot</div>
</template>

View File

@@ -0,0 +1,971 @@
<template>
<!-- <ExtraInfo webVersion="非websocket_0111" :modelVersion="modelVersion" /> -->
<div class="video-page">
<div class="video-page-header">
<div class="voice-container" v-if="!isCalling">
<SvgIcon name="voice" class="voice-icon" />
<SvgIcon name="voice" class="voice-icon" />
<SvgIcon name="voice" class="voice-icon" />
</div>
<div class="voice-container" v-else>
<Voice
:dataArray="dataArray"
:isCalling="isCalling"
:isPlaying="playing"
:configList="videoConfigList"
:boxStyle="{ height: '45px' }"
:itemStyle="{ width: '3px', margin: '0 1px' }"
/>
</div>
<!-- <SelectTimbre v-model:timbre="timbre" v-model:audioData="audioData" v-model:disabled="isCalling" /> -->
</div>
<div class="video-page-content">
<div class="video-page-content-video" v-loading="loading" element-loading-background="#f3f3f3">
<video ref="videoRef" autoplay playsinline muted />
<canvas ref="canvasRef" canvas-id="canvasId" style="display: none" />
<div class="switch-camera" v-if="isMobile()" @click="switchCamera">
<SvgIcon name="switch-camera" class="icon" />
</div>
</div>
<div class="video-page-content-right">
<div class="output-content">
<ModelOutput
v-if="outputData.length > 0"
:outputData="outputData"
containerClass="output-content"
/>
</div>
<div class="skip-box">
<!-- <DelayTips
v-if="delayTimestamp > 200 || delayCount > 2"
:delayTimestamp="delayTimestamp"
:delayCount="delayCount"
/> -->
<LikeAndDislike v-model:feedbackStatus="feedbackStatus" v-model:curResponseId="curResponseId" />
<SkipBtn :disabled="skipDisabled" @click="skipVoice" />
</div>
</div>
</div>
<div class="video-page-btn">
<el-button v-show="!isCalling" type="success" :disabled="callDisabled" @click="initRecording">
{{ callDisabled ? t('notReadyBtn') : t('videoCallBtn') }}
</el-button>
<el-button v-show="isCalling" @click="stopRecording" type="danger">
<SvgIcon name="phone-icon" className="phone-icon" />
<span class="btn-text">{{ t('hangUpBtn') }}</span>
<CountDown v-model="isCalling" @timeUp="stopRecording" />
</el-button>
</div>
<IdeasList v-if="showIdeasList" :ideasList="videoIdeasList" />
</div>
</template>
<script setup>
import { sendMessage, stopMessage, uploadConfig } from '@/apis';
import { encodeWAV } from '@/hooks/useVoice';
import { getNewUserId, setNewUserId } from '@/hooks/useRandomId';
import { fetchEventSource } from '@microsoft/fetch-event-source';
import { MicVAD } from '@ricky0123/vad-web';
import { videoIdeasList, videoConfigList, showIdeasList } from '@/enums';
import { isMobile, maxCount, getChunkLength } from '@/utils';
import { mergeBase64ToBlob } from './merge';
import { useI18n } from 'vue-i18n';
const { t } = useI18n();
import WebSocketService from '@/utils/websocket';
let ctrl = new AbortController();
let socket = null;
const audioData = ref({
base64Str: '',
type: 'mp3'
}); // 自定义音色base64
const isCalling = defineModel();
const videoRef = ref();
const videoStream = ref(null);
const interval = ref();
const canvasRef = ref();
const videoImage = ref([]);
const videoLoaded = ref(false);
const taskQueue = ref([]);
const running = ref(false);
const outputData = ref([]);
const isFirstReturn = ref(true);
const audioPlayQueue = ref([]);
const base64List = ref([]);
const playing = ref(false);
const timbre = ref([1]);
const isReturnError = ref(false);
const textQueue = ref('');
const textAnimationInterval = ref();
const analyser = ref();
const dataArray = ref();
const animationFrameId = ref();
const skipDisabled = ref(true);
const stop = ref(false);
const isFrontCamera = ref(true);
const loading = ref(false);
const isEnd = ref(false); // sse接口关闭认为模型已完成本次返回
const isFirstPiece = ref(true);
const allVoice = ref([]);
const callDisabled = ref(true);
const feedbackStatus = ref('');
const curResponseId = ref('');
const delayTimestamp = ref(0); // 当前发送片延时
const delayCount = ref(0); // 当前剩余多少ms未发送到接口
const modelVersion = ref('');
let mediaStream;
let audioRecorder;
let audioStream;
let intervalId;
let audioContext;
let audioChunks = [];
let count = 0;
let audioDOM;
onBeforeUnmount(() => {
stopRecording();
});
const vadStartTime = ref();
let myvad = null;
let vadTimer = null; // vad定时器用于检测1s内人声是否停止1s内停止可认为是vad误触直接忽略1s内未停止则认为是人声已自动跳过当前对话
const vadStart = async () => {
myvad = await MicVAD.new({
onSpeechStart: () => {
console.log('Speech start', +new Date());
// if (!skipDisabled.value) {
vadTimer && clearTimeout(vadTimer);
vadTimer = setTimeout(() => {
// vadStartTime.value = +new Date();
console.log('打断时间: ', +new Date());
skipVoice();
}, 500);
// }
},
onSpeechEnd: audio => {
vadTimer && clearTimeout(vadTimer);
console.log('Speech end', +new Date());
// debugger;
// do something with `audio` (Float32Array of audio samples at sample rate 16000)...
},
baseAssetPath: '/'
});
myvad.start();
};
onMounted(async () => {
const { code, message } = await stopMessage();
if (code !== 0) {
ElMessage({
type: 'error',
message: message,
duration: 3000,
customClass: 'system-error'
});
return;
}
callDisabled.value = false;
});
const delay = ms => {
return new Promise(resolve => setTimeout(resolve, ms));
};
const initRecording = async () => {
uploadUserConfig()
.then(async () => {
if (!audioDOM) {
audioDOM = new Audio();
audioDOM.playsinline = true;
audioDOM.preload = 'auto';
}
// 每次call都需要生成新uid
setNewUserId();
buildConnect();
await delay(100);
// if (socket) {
// socket.close();
// }
// socket = new WebSocketService(
// `/ws/stream${window.location.search}&uid=${getNewUserId()}&service=minicpmo-server`
// );
// socket.connect();
initVideoStream('environment');
if (localStorage.getItem('canStopByVoice') === 'true') {
console.log('vad start');
vadStart();
}
})
.catch(() => {});
};
// 切换摄像头
const switchCamera = () => {
if (!isCalling.value) {
return;
}
isFrontCamera.value = !isFrontCamera.value;
const facingMode = isFrontCamera.value ? 'environment' : 'user'; // 'user' 前置, 'environment' 后置
initVideoStream(facingMode);
};
const initVideoStream = async facingMode => {
if (mediaStream) {
mediaStream.getTracks().forEach(track => track.stop());
videoStream.value = null;
}
outputData.value = [];
isCalling.value = true;
loading.value = true;
if (!videoStream.value) {
try {
mediaStream = await window.navigator.mediaDevices.getUserMedia({
video: { facingMode },
audio: true
});
videoStream.value = mediaStream;
videoRef.value.srcObject = mediaStream;
loading.value = false;
console.log('打开后: ', +new Date());
// takePhotos();
audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
console.log('samplate: ', audioContext);
const audioSource = audioContext.createMediaStreamSource(mediaStream);
interval.value = setInterval(() => dealImage(), 50);
// 创建 ScriptProcessorNode 用于捕获音频数据
const processor = audioContext.createScriptProcessor(256, 1, 1);
processor.onaudioprocess = event => {
if (!isCalling.value) return;
if (isReturnError.value) {
stopRecording();
return;
}
const data = event.inputBuffer.getChannelData(0);
audioChunks.push(new Float32Array(data));
// 检查是否已经收集到1秒钟的数据
const totalBufferLength = audioChunks.reduce((total, curr) => total + curr.length, 0);
const chunkLength = getChunkLength(audioContext.sampleRate);
if (totalBufferLength >= chunkLength) {
// 合并到一个完整的数据数组并裁剪成1秒钟
const mergedBuffer = mergeBuffers(audioChunks, totalBufferLength);
const oneSecondBuffer = mergedBuffer.slice(0, audioContext.sampleRate);
// 保存并处理成WAV格式
addQueue(+new Date(), () => saveAudioChunk(oneSecondBuffer, +new Date()));
// 保留多余的数据备用
audioChunks = [mergedBuffer.slice(audioContext.sampleRate)];
}
};
analyser.value = audioContext.createAnalyser();
// 将音频节点连接到分析器
audioSource.connect(analyser.value);
// 分析器设置
analyser.value.fftSize = 256;
const bufferLength = analyser.value.frequencyBinCount;
dataArray.value = new Uint8Array(bufferLength);
// 开始绘制音波
drawBars();
audioSource.connect(processor);
processor.connect(audioContext.destination);
} catch {}
}
};
const drawText = async () => {
if (textQueue.value.length > 0) {
outputData.value[outputData.value.length - 1].text += textQueue.value[0];
textQueue.value = textQueue.value.slice(1);
} else {
cancelAnimationFrame(textAnimationInterval.value);
}
textAnimationInterval.value = requestAnimationFrame(drawText);
};
const getStopValue = () => {
return stop.value;
};
const getPlayingValue = () => {
return playing.value;
};
const getStopStatus = () => {
return localStorage.getItem('canStopByVoice') === 'true';
};
const saveAudioChunk = (buffer, timestamp) => {
return new Promise(resolve => {
if (!getStopStatus() && getPlayingValue()) {
resolve();
return;
}
const wavBlob = encodeWAV(buffer, audioContext.sampleRate);
let reader = new FileReader();
reader.readAsDataURL(wavBlob);
reader.onloadend = async function () {
let base64data = reader.result.split(',')[1];
const imgBase64 = videoImage.value[videoImage.value.length - 1]?.src;
if (!(base64data && imgBase64)) {
resolve();
return;
}
const strBase64 = imgBase64.split(',')[1];
count++;
let obj = {
messages: [
{
role: 'user',
content: [
{
type: 'input_audio',
input_audio: {
data: base64data,
format: 'wav',
timestamp: String(timestamp)
}
}
]
}
]
};
obj.messages[0].content.unshift({
type: 'image_data',
image_data: {
data: count === maxCount ? strBase64 : '',
type: 2
}
});
if (count === maxCount) {
count = 0;
}
// socket.send(JSON.stringify(obj));
// socket.on('message', data => {
// console.log('message: ', data);
// delayTimestamp.value = +new Date() - timestamp;
// delayCount.value = taskQueue.value.length;
// resolve();
// });
// 将Base64音频数据发送到后端
try {
await sendMessage(obj);
delayTimestamp.value = +new Date() - timestamp;
delayCount.value = taskQueue.value.length;
} catch (err) {}
resolve();
};
});
};
const mergeBuffers = (buffers, length) => {
const result = new Float32Array(length);
let offset = 0;
for (let buffer of buffers) {
result.set(buffer, offset);
offset += buffer.length;
}
return result;
};
const stopRecording = () => {
isCalling.value = false;
clearInterval(interval.value);
interval.value = null;
if (audioRecorder && audioRecorder.state !== 'inactive') {
audioRecorder.stop();
}
if (animationFrameId.value) {
cancelAnimationFrame(animationFrameId.value);
}
if (audioContext && audioContext.state !== 'closed') {
audioContext.close();
}
destroyVideoStream();
taskQueue.value = [];
audioPlayQueue.value = [];
base64List.value = [];
ctrl.abort();
ctrl = new AbortController();
isReturnError.value = false;
skipDisabled.value = true;
playing.value = false;
audioDOM?.pause();
stopMessage();
if (socket) {
socket.close();
}
if (
outputData.value[outputData.value.length - 1]?.type === 'BOT' &&
outputData.value[outputData.value.length - 1].audio === '' &&
allVoice.value.length > 0
) {
outputData.value[outputData.value.length - 1].audio = mergeBase64ToBlob(allVoice.value);
}
myvad && myvad.destroy();
};
// 建立连接
const buildConnect = () => {
const obj = {
messages: [
{
role: 'user',
content: [{ type: 'none' }]
}
],
stream: true
};
isEnd.value = false;
ctrl.abort();
ctrl = new AbortController();
const url = `/api/v1/completions${window.location.search}`;
fetchEventSource(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
service: 'minicpmo-server',
uid: getNewUserId()
},
body: JSON.stringify(obj),
signal: ctrl.signal,
openWhenHidden: true,
async onopen(response) {
isFirstPiece.value = true;
isFirstReturn.value = true;
allVoice.value = [];
base64List.value = [];
console.log('onopen', response);
if (response.status !== 200) {
ElMessage({
type: 'error',
message: 'At limit. Please try again soon.',
duration: 3000,
customClass: 'system-error'
});
isReturnError.value = true;
} else {
isReturnError.value = false;
drawText();
}
},
onmessage(msg) {
const data = JSON.parse(msg.data);
if (data.response_id) {
curResponseId.value = data.response_id;
}
if (data.choices[0]?.text) {
textQueue.value += data.choices[0].text.replace('<end>', '');
console.warn('text return time -------------------------------', +new Date());
}
// 首次返回的是前端发给后端的音频片段,需要单独处理
if (isFirstReturn.value) {
console.log('第一次');
isFirstReturn.value = false;
// 如果后端返回的音频为空,需要重连
if (!data.choices[0].audio) {
buildConnect();
return;
}
outputData.value.push({
type: 'USER',
audio: `data:audio/wav;base64,${data.choices[0].audio}`
});
outputData.value.push({
type: 'BOT',
text: '',
audio: ''
});
return;
}
if (data.choices[0]?.audio) {
console.log('audio return time -------------------------------', +new Date());
if (!getStopValue() && isCalling.value) {
skipDisabled.value = false;
base64List.value.push(`data:audio/wav;base64,${data.choices[0].audio}`);
addAudioQueue(() => truePlay(data.choices[0].audio));
}
allVoice.value.push(`data:audio/wav;base64,${data.choices[0].audio}`);
} else {
// 发生异常了,直接重连
buildConnect();
}
if (data.choices[0].text.includes('<end>')) {
// isEnd.value = true;
console.log('收到结束标记了:', +new Date());
if (
outputData.value[outputData.value.length - 1]?.type === 'BOT' &&
outputData.value[outputData.value.length - 1].audio === '' &&
allVoice.value.length > 0
) {
outputData.value[outputData.value.length - 1].audio = mergeBase64ToBlob(allVoice.value);
}
}
},
onclose() {
console.log('onclose', +new Date());
isEnd.value = true;
if (
outputData.value[outputData.value.length - 1]?.type === 'BOT' &&
outputData.value[outputData.value.length - 1].audio === '' &&
allVoice.value.length > 0
) {
outputData.value[outputData.value.length - 1].audio = mergeBase64ToBlob(allVoice.value);
}
// sse关闭后如果待播放的音频列表为空说明模型出错了此次连接没有返回音频则直接重连
vadStartTime.value = +new Date();
if (audioPlayQueue.value.length === 0) {
let startIndex = taskQueue.value.findIndex(item => item.time >= vadStartTime.value - 1000);
if (startIndex !== -1) {
taskQueue.value = taskQueue.value.slice(startIndex);
}
buildConnect();
}
},
onerror(err) {
console.log('onerror', err);
ctrl.abort();
ctrl = new AbortController();
throw err;
}
});
};
// 返回的语音放到队列里,挨个播放
const addAudioQueue = async item => {
audioPlayQueue.value.push(item);
if (isFirstPiece.value) {
await delay(1500);
isFirstPiece.value = false;
}
if (audioPlayQueue.value.length > 0 && !playing.value) {
playing.value = true;
playAudio();
}
};
// 控制播放队列执行
const playAudio = () => {
console.log('剩余播放列表:', audioPlayQueue.value, +new Date());
if (!isEnd.value && base64List.value.length >= 2) {
const remainLen = base64List.value.length;
const blob = mergeBase64ToBlob(base64List.value);
audioDOM.src = blob;
audioDOM.play();
console.error('前期合并后播放开始时间: ', +new Date());
audioDOM.onended = () => {
console.error('前期合并后播放结束时间: ', +new Date());
base64List.value = base64List.value.slice(remainLen);
audioPlayQueue.value = audioPlayQueue.value.slice(remainLen);
playAudio();
};
return;
}
if (isEnd.value && base64List.value.length >= 2) {
const blob = mergeBase64ToBlob(base64List.value);
audioDOM.src = blob;
audioDOM.play();
console.error('合并后播放开始时间: ', +new Date());
audioDOM.onended = () => {
console.error('合并后播放结束时间: ', +new Date());
// URL.revokeObjectURL(url);
base64List.value = [];
audioPlayQueue.value = [];
playing.value = false;
skipDisabled.value = true;
if (isCalling.value && !isReturnError.value) {
// skipDisabled.value = true;
taskQueue.value = [];
// 打断前记录一下打断时间或vad触发事件
// vadStartTime.value = +new Date();
// // 每次完成后只保留当前时刻往前推1s的语音
// console.log(
// '截取前长度:',
// taskQueue.value.map(item => item.time)
// );
// let startIndex = taskQueue.value.findIndex(item => item.time >= vadStartTime.value - 1000);
// if (startIndex !== -1) {
// taskQueue.value = taskQueue.value.slice(startIndex);
// console.log(
// '截取后长度:',
// taskQueue.value.map(item => item.time),
// vadStartTime.value
// );
// }
buildConnect();
}
};
return;
}
base64List.value.shift();
const _truePlay = audioPlayQueue.value.shift();
if (_truePlay) {
_truePlay().finally(() => {
playAudio();
});
} else {
playing.value = false;
if (isEnd.value) {
console.warn('play done................');
skipDisabled.value = true;
}
// 播放完成后且正在通话且接口未返回错误时开始下一次连接
if (isEnd.value && isCalling.value && !isReturnError.value) {
// skipDisabled.value = true;
taskQueue.value = [];
// 跳过之后,只保留当前时间点两秒内到之后的音频片段
// vadStartTime.value = +new Date();
// console.log(
// '截取前长度:',
// taskQueue.value.map(item => item.time)
// );
// let startIndex = taskQueue.value.findIndex(item => item.time >= vadStartTime.value - 1000);
// if (startIndex !== -1) {
// taskQueue.value = taskQueue.value.slice(startIndex);
// console.log(
// '截取后长度:',
// taskQueue.value.map(item => item.time),
// vadStartTime.value
// );
// }
buildConnect();
}
}
};
// 播放音频
const truePlay = voice => {
console.log('promise: ', +new Date());
return new Promise(resolve => {
audioDOM.src = 'data:audio/wav;base64,' + voice;
console.error('播放开始时间:', +new Date());
audioDOM
.play()
.then(() => {
console.log('Audio played successfully');
})
.catch(error => {
if (error.name === 'NotAllowedError' || error.name === 'SecurityError') {
console.error('User interaction required or permission issue:', error);
// ElMessage.warning('音频播放失败');
console.error('播放失败时间');
// alert('Please interact with the page (like clicking a button) to enable audio playback.');
} else {
console.error('Error playing audio:', error);
}
});
// .finally(() => {
// resolve();
// });
audioDOM.onerror = () => {
console.error('播放失败时间', +new Date());
resolve();
};
audioDOM.onended = () => {
console.error('播放结束时间: ', +new Date());
// URL.revokeObjectURL(url);
resolve();
};
});
};
// 当队列中任务数大于0时开始处理队列中的任务
const addQueue = (time, item) => {
taskQueue.value.push({ func: item, time });
if (taskQueue.value.length > 0 && !running.value) {
running.value = true;
processQueue();
}
};
const processQueue = () => {
const item = taskQueue.value.shift();
if (item?.func) {
item.func()
.then(res => {
console.log('已处理事件: ', res);
})
.finally(() => processQueue());
} else {
running.value = false;
}
};
const destroyVideoStream = () => {
videoStream.value?.getTracks().forEach(track => track.stop());
videoStream.value = null;
// 将srcObject设置为null以切断与MediaStream 对象的链接,以便将其释放
videoRef.value.srcObject = null;
videoImage.value = [];
videoLoaded.value = false;
clearInterval(intervalId);
clearInterval(interval.value);
interval.value = null;
};
const dealImage = () => {
if (!videoRef.value) {
return;
}
const canvas = canvasRef.value;
canvasRef.value.width = videoRef.value.videoWidth;
canvasRef.value.height = videoRef.value.videoHeight;
const context = canvas.getContext('2d');
context.drawImage(videoRef.value, 0, 0, canvasRef.value.width, canvasRef.value.height);
const imageDataUrl = canvas.toDataURL('image/webp', 0.8);
videoImage.value.push({ src: imageDataUrl });
};
const drawBars = () => {
// AnalyserNode接口的 getByteFrequencyData() 方法将当前频率数据复制到传入的 Uint8Array无符号字节数组中。
analyser.value.getByteFrequencyData(dataArray.value);
animationFrameId.value = requestAnimationFrame(drawBars);
};
// 跳过当前片段
const skipVoice = async () => {
// 打断前记录一下打断时间或vad触发事件
vadStartTime.value = +new Date();
if (!skipDisabled.value) {
if (
outputData.value[outputData.value.length - 1]?.type === 'BOT' &&
outputData.value[outputData.value.length - 1].audio === ''
) {
outputData.value[outputData.value.length - 1].audio = mergeBase64ToBlob(allVoice.value);
}
base64List.value = [];
audioPlayQueue.value = [];
// 跳过之后,只保留当前时间点两秒内到之后的音频片段
console.log(
'截取前长度:',
taskQueue.value.map(item => item.time)
);
let startIndex = taskQueue.value.findIndex(item => item.time >= vadStartTime.value - 1000);
if (startIndex !== -1) {
taskQueue.value = taskQueue.value.slice(startIndex);
console.log(
'截取后长度:',
taskQueue.value.map(item => item.time),
vadStartTime.value
);
}
stop.value = true;
audioDOM?.pause();
setTimeout(() => {
skipDisabled.value = true;
}, 300);
try {
playing.value = false;
await stopMessage();
stop.value = false;
// playing.value = false;
buildConnect();
// cancelAnimationFrame(animationFrameId.value);
} catch (err) {}
}
};
// 每次call先上传当前用户配置
const uploadUserConfig = async () => {
if (!localStorage.getItem('configData')) {
return new Promise(resolve => resolve());
}
const {
videoQuality,
useAudioPrompt,
voiceClonePrompt,
assistantPrompt,
vadThreshold,
audioFormat,
base64Str
} = JSON.parse(localStorage.getItem('configData'));
const obj = {
messages: [
{
role: 'user',
content: [
{
type: 'input_audio',
input_audio: {
data: base64Str,
format: audioFormat
}
},
{
type: 'options',
options: {
hd_video: videoQuality,
use_audio_prompt: useAudioPrompt,
vad_threshold: vadThreshold,
voice_clone_prompt: voiceClonePrompt,
assistant_prompt: assistantPrompt
}
}
]
}
]
};
const { code, message, data } = await uploadConfig(obj);
modelVersion.value = data?.choices?.content || '';
return new Promise((resolve, reject) => {
if (code !== 0) {
ElMessage({
type: 'error',
message: message,
duration: 3000,
customClass: 'system-error'
});
reject();
} else {
resolve();
}
});
};
</script>
<style lang="less" scoped>
.video-page {
flex: 1;
height: 100%;
display: flex;
flex-direction: column;
&-header {
width: 100%;
display: flex;
align-items: center;
justify-content: center;
padding: 0 16px 16px;
box-shadow: 0 0.5px 0 0 #e0e0e0;
margin-bottom: 16px;
.header-icon {
display: flex;
align-items: center;
img {
width: 24px;
height: 24px;
margin-right: 8px;
}
span {
color: rgba(23, 23, 23, 0.9);
font-family: PingFang SC;
font-size: 16px;
font-style: normal;
font-weight: 500;
line-height: normal;
margin-right: 40px;
flex-shrink: 0;
}
}
.voice-container {
display: flex;
.voice-icon {
width: 191px;
height: 45px;
}
}
}
&-content {
flex: 1;
margin-bottom: 16px;
display: flex;
height: 0;
&-video {
width: 50%;
height: 100%;
background: #f3f3f3;
flex-shrink: 0;
position: relative;
video {
width: 100%;
height: 100%;
object-fit: contain;
}
.switch-camera {
position: absolute;
top: 10px;
right: 10px;
width: 36px;
height: 36px;
background: #ffffff;
border-radius: 6px;
display: flex;
justify-content: center;
align-items: center;
font-size: 24px;
z-index: 999;
.icon {
width: 20px;
height: 20px;
}
}
}
&-right {
margin-left: 16px;
flex: 1;
padding: 0 16px;
display: flex;
flex-direction: column;
.output-content {
flex: 1;
overflow: auto;
}
.skip-box {
display: flex;
align-items: center;
justify-content: flex-end;
margin-top: 16px;
}
}
}
&-btn {
text-align: center;
padding: 8px 0;
.el-button {
width: 284px;
height: 46px;
border-radius: 8px;
}
.el-button.el-button--success {
background: #647fff;
border-color: #647fff;
&:hover {
opacity: 0.8;
}
span {
color: #fff;
font-family: PingFang SC;
font-size: 16px;
font-style: normal;
font-weight: 500;
line-height: normal;
}
}
.el-button.el-button--success.is-disabled {
background: #f3f3f3;
border-color: #f3f3f3;
span {
color: #d1d1d1;
}
}
.el-button.el-button--danger {
border-color: #dc3545;
background-color: #dc3545;
color: #ffffff;
font-family: PingFang SC;
font-size: 16px;
font-style: normal;
font-weight: 500;
line-height: normal;
.phone-icon {
margin-right: 10px;
}
.btn-text {
margin-right: 10px;
}
.btn-desc {
margin-right: 16px;
}
}
}
}
.video-size {
position: absolute;
bottom: 10px;
right: 10px;
background: rgba(0, 0, 0, 0.5);
color: #fff;
padding: 4px 8px;
border-radius: 4px;
font-size: 12px;
}
</style>

View File

@@ -0,0 +1,955 @@
<template>
<ExtraInfo webVersion="websocket_0107" :modelVersion="modelVersion" />
<div class="video-page">
<div class="video-page-header">
<div style="display: flex; align-items: center" class="header-icon">
<img src="@/assets/images/voice-icon.png" />
<span>Audio Choice</span>
</div>
<div class="voice-container" v-if="!isCalling">
<SvgIcon name="voice" class="voice-icon" />
<SvgIcon name="voice" class="voice-icon" />
<SvgIcon name="voice" class="voice-icon" />
</div>
<div class="voice-container" v-else>
<Voice
:dataArray="dataArray"
:isCalling="isCalling"
:isPlaying="playing"
:configList="videoConfigList"
:boxStyle="{ height: '45px' }"
:itemStyle="{ width: '3px', margin: '0 1px' }"
/>
</div>
<!-- <SelectTimbre v-model:timbre="timbre" v-model:audioData="audioData" v-model:disabled="isCalling" /> -->
</div>
<div class="video-page-content">
<div class="video-page-content-video" v-loading="loading" element-loading-background="#f3f3f3">
<video ref="videoRef" autoplay playsinline muted />
<canvas ref="canvasRef" canvas-id="canvasId" style="display: none" />
<div class="switch-camera" v-if="isMobile()" @click="switchCamera">
<SvgIcon name="switch-camera" class="icon" />
</div>
<!-- <div class="video-size" v-if="width || height">{{ width }} x {{ height }}</div> -->
</div>
<div class="video-page-content-right">
<div class="output-content">
<ModelOutput
v-if="outputData.length > 0"
:outputData="outputData"
containerClass="output-content"
/>
</div>
<div class="skip-box">
<DelayTips
v-if="delayTimestamp > 200 || delayCount > 2"
:delayTimestamp="delayTimestamp"
:delayCount="delayCount"
/>
<LikeAndDislike v-model:feedbackStatus="feedbackStatus" v-model:curResponseId="curResponseId" />
<SkipBtn :disabled="skipDisabled" @click="skipVoice" />
</div>
</div>
</div>
<div class="video-page-btn">
<el-button v-show="!isCalling" type="success" :disabled="callDisabled" @click="initRecording">
{{ callDisabled ? 'Not ready yet, please wait' : 'Call MiniCPM' }}
</el-button>
<el-button v-show="isCalling" @click="stopRecording" type="danger">
<SvgIcon name="phone-icon" className="phone-icon" />
<span class="btn-text">Hang Up</span>
<CountDown v-model="isCalling" @timeUp="stopRecording" />
</el-button>
</div>
<IdeasList v-if="showIdeasList" :ideasList="videoIdeasList" />
</div>
</template>
<script setup>
import { sendMessage, stopMessage, uploadConfig } from '@/apis';
import { encodeWAV } from '@/hooks/useVoice';
import { getNewUserId, setNewUserId } from '@/hooks/useRandomId';
import { fetchEventSource } from '@microsoft/fetch-event-source';
import { MicVAD } from '@ricky0123/vad-web';
import { videoIdeasList, videoConfigList, showIdeasList } from '@/enums';
import { isMobile, maxCount, getChunkLength } from '@/utils';
import { mergeBase64ToBlob } from './merge';
import WebSocketService from '@/utils/websocket';
let ctrl = new AbortController();
let socket = null;
const audioData = ref({
base64Str: '',
type: 'mp3'
}); // 自定义音色base64
const isCalling = defineModel();
const videoRef = ref();
const videoStream = ref(null);
const interval = ref();
const canvasRef = ref();
const videoImage = ref([]);
const videoLoaded = ref(false);
const taskQueue = ref([]);
const running = ref(false);
const outputData = ref([]);
const isFirstReturn = ref(true);
const audioPlayQueue = ref([]);
const base64List = ref([]);
const playing = ref(false);
const timbre = ref([1]);
const isReturnError = ref(false);
const textQueue = ref('');
const textAnimationInterval = ref();
const analyser = ref();
const dataArray = ref();
const animationFrameId = ref();
const skipDisabled = ref(true);
const stop = ref(false);
const isFrontCamera = ref(true);
const loading = ref(false);
const isEnd = ref(false); // sse接口关闭认为模型已完成本次返回
const isFirstPiece = ref(true);
const allVoice = ref([]);
const callDisabled = ref(true);
const feedbackStatus = ref('');
const curResponseId = ref('');
const delayTimestamp = ref(0); // 当前发送片延时
const delayCount = ref(0); // 当前剩余多少ms未发送到接口
const modelVersion = ref('');
let mediaStream;
let audioRecorder;
let audioStream;
let audioContext;
let audioChunks = [];
let count = 0;
let audioDOM;
onBeforeUnmount(() => {
stopRecording();
});
const vadStartTime = ref();
let myvad = null;
let vadTimer = null; // vad定时器用于检测1s内人声是否停止1s内停止可认为是vad误触直接忽略1s内未停止则认为是人声已自动跳过当前对话
const vadStart = async () => {
myvad = await MicVAD.new({
onSpeechStart: () => {
console.log('Speech start', +new Date());
if (!skipDisabled.value) {
vadTimer && clearTimeout(vadTimer);
vadTimer = setTimeout(() => {
// vadStartTime.value = +new Date();
console.log('打断时间: ', +new Date());
skipVoice();
}, 1000);
}
},
onSpeechEnd: audio => {
vadTimer && clearTimeout(vadTimer);
console.log('Speech end', +new Date());
// debugger;
// do something with `audio` (Float32Array of audio samples at sample rate 16000)...
}
});
myvad.start();
};
onMounted(async () => {
const { code, message } = await stopMessage();
if (code !== 0) {
ElMessage({
type: 'error',
message: message,
duration: 3000,
customClass: 'system-error'
});
return;
}
callDisabled.value = false;
});
const delay = ms => {
return new Promise(resolve => setTimeout(resolve, ms));
};
const initRecording = async () => {
uploadUserConfig()
.then(async () => {
if (!audioDOM) {
audioDOM = new Audio();
audioDOM.playsinline = true;
audioDOM.preload = 'auto';
}
// 每次call都需要生成新uid
setNewUserId();
buildConnect();
await delay(100);
initVideoStream('environment');
if (socket) {
socket.close();
}
socket = new WebSocketService(
`/ws/stream${window.location.search}&uid=${getNewUserId()}&service=minicpmo-server`
);
socket.connect();
initVideoStream('environment');
if (localStorage.getItem('canStopByVoice') === 'true') {
vadStart();
}
})
.catch(() => {});
};
// 切换摄像头
const switchCamera = () => {
if (!isCalling.value) {
return;
}
isFrontCamera.value = !isFrontCamera.value;
const facingMode = isFrontCamera.value ? 'environment' : 'user'; // 'user' 前置, 'environment' 后置
initVideoStream(facingMode);
};
const initVideoStream = async facingMode => {
if (mediaStream) {
mediaStream.getTracks().forEach(track => track.stop());
videoStream.value = null;
}
outputData.value = [];
isCalling.value = true;
loading.value = true;
if (!videoStream.value) {
try {
mediaStream = await window.navigator.mediaDevices.getUserMedia({
video: { facingMode },
audio: true
});
console.log('mediaStream', mediaStream);
videoStream.value = mediaStream;
videoRef.value.srcObject = mediaStream;
loading.value = false;
console.log('打开后: ', +new Date());
// takePhotos();
audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
console.log('samplate: ', audioContext);
const audioSource = audioContext.createMediaStreamSource(mediaStream);
interval.value = setInterval(() => dealImage(), 50);
// 创建 ScriptProcessorNode 用于捕获音频数据
const processor = audioContext.createScriptProcessor(256, 1, 1);
processor.onaudioprocess = event => {
if (!isCalling.value) return;
if (isReturnError.value) {
stopRecording();
return;
}
const data = event.inputBuffer.getChannelData(0);
audioChunks.push(new Float32Array(data));
// 检查是否已经收集到1秒钟的数据
const totalBufferLength = audioChunks.reduce((total, curr) => total + curr.length, 0);
// const chunkLength = audioContext.sampleRate;
const chunkLength = getChunkLength(audioContext.sampleRate);
if (totalBufferLength >= chunkLength) {
// 合并到一个完整的数据数组并裁剪成1秒钟
const mergedBuffer = mergeBuffers(audioChunks, totalBufferLength);
const oneSecondBuffer = mergedBuffer.slice(0, audioContext.sampleRate);
// 保存并处理成WAV格式
addQueue(+new Date(), () => saveAudioChunk(oneSecondBuffer, +new Date()));
// 保留多余的数据备用
audioChunks = [mergedBuffer.slice(audioContext.sampleRate)];
}
};
analyser.value = audioContext.createAnalyser();
// 将音频节点连接到分析器
audioSource.connect(analyser.value);
// 分析器设置
analyser.value.fftSize = 256;
const bufferLength = analyser.value.frequencyBinCount;
dataArray.value = new Uint8Array(bufferLength);
// 开始绘制音波
drawBars();
audioSource.connect(processor);
processor.connect(audioContext.destination);
} catch {}
}
};
const drawText = async () => {
if (textQueue.value.length > 0) {
outputData.value[outputData.value.length - 1].text += textQueue.value[0];
textQueue.value = textQueue.value.slice(1);
} else {
cancelAnimationFrame(textAnimationInterval.value);
}
textAnimationInterval.value = requestAnimationFrame(drawText);
};
const getStopValue = () => {
return stop.value;
};
const getPlayingValue = () => {
return playing.value;
};
const getStopStatus = () => {
return localStorage.getItem('canStopByVoice') === 'true';
};
const saveAudioChunk = (buffer, timestamp) => {
return new Promise(resolve => {
if (!getStopStatus() && getPlayingValue()) {
resolve();
return;
}
const wavBlob = encodeWAV(buffer, audioContext.sampleRate);
let reader = new FileReader();
reader.readAsDataURL(wavBlob);
reader.onloadend = async function () {
let base64data = reader.result.split(',')[1];
const imgBase64 = videoImage.value[videoImage.value.length - 1]?.src;
if (!(base64data && imgBase64)) {
resolve();
return;
}
const strBase64 = imgBase64.split(',')[1];
count++;
let obj = {
messages: [
{
role: 'user',
content: [
{
type: 'input_audio',
input_audio: {
data: base64data,
format: 'wav',
timestamp: String(timestamp)
}
}
]
}
]
};
obj.messages[0].content.unshift({
type: 'image_data',
image_data: {
data: count === maxCount ? strBase64 : '',
type: 2
}
});
if (count === maxCount) {
count = 0;
}
socket.send(JSON.stringify(obj));
socket.on('message', data => {
console.log('message: ', data);
delayTimestamp.value = +new Date() - timestamp;
delayCount.value = taskQueue.value.length;
resolve();
});
// 将Base64音频数据发送到后端
// try {
// await sendMessage(obj);
// delayTimestamp.value = +new Date() - timestamp;
// delayCount.value = taskQueue.value.length;
// } catch (err) {}
// resolve();
};
});
};
const mergeBuffers = (buffers, length) => {
const result = new Float32Array(length);
let offset = 0;
for (let buffer of buffers) {
result.set(buffer, offset);
offset += buffer.length;
}
return result;
};
const stopRecording = () => {
isCalling.value = false;
clearInterval(interval.value);
interval.value = null;
if (audioRecorder && audioRecorder.state !== 'inactive') {
audioRecorder.stop();
}
if (animationFrameId.value) {
cancelAnimationFrame(animationFrameId.value);
}
if (audioContext && audioContext.state !== 'closed') {
audioContext.close();
}
destroyVideoStream();
taskQueue.value = [];
audioPlayQueue.value = [];
base64List.value = [];
ctrl.abort();
ctrl = new AbortController();
isReturnError.value = false;
skipDisabled.value = true;
playing.value = false;
audioDOM?.pause();
stopMessage();
if (socket) {
socket.close();
}
if (
outputData.value[outputData.value.length - 1]?.type === 'BOT' &&
outputData.value[outputData.value.length - 1].audio === '' &&
allVoice.value.length > 0
) {
outputData.value[outputData.value.length - 1].audio = mergeBase64ToBlob(allVoice.value);
}
myvad && myvad.destroy();
};
// 建立连接
const buildConnect = () => {
const obj = {
messages: [
{
role: 'user',
content: [{ type: 'none' }]
}
],
stream: true
};
isEnd.value = false;
ctrl.abort();
ctrl = new AbortController();
const url = `/api/v1/completions${window.location.search}`;
fetchEventSource(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
service: 'minicpmo-server',
uid: getNewUserId()
},
body: JSON.stringify(obj),
signal: ctrl.signal,
openWhenHidden: true,
async onopen(response) {
isFirstPiece.value = true;
isFirstReturn.value = true;
allVoice.value = [];
base64List.value = [];
console.log('onopen', response);
if (response.status !== 200) {
ElMessage({
type: 'error',
message: 'At limit. Please try again soon.',
duration: 3000,
customClass: 'system-error'
});
isReturnError.value = true;
} else {
isReturnError.value = false;
drawText();
}
},
onmessage(msg) {
const data = JSON.parse(msg.data);
if (data.response_id) {
curResponseId.value = data.response_id;
}
if (data.choices[0]?.text) {
textQueue.value += data.choices[0].text.replace('<end>', '');
console.warn('text return time -------------------------------', +new Date());
}
// 首次返回的是前端发给后端的音频片段,需要单独处理
if (isFirstReturn.value) {
console.log('第一次');
isFirstReturn.value = false;
// 如果后端返回的音频为空,需要重连
if (!data.choices[0].audio) {
buildConnect();
return;
}
outputData.value.push({
type: 'USER',
audio: `data:audio/wav;base64,${data.choices[0].audio}`
});
outputData.value.push({
type: 'BOT',
text: '',
audio: ''
});
return;
}
if (data.choices[0]?.audio) {
console.log('audio return time -------------------------------', +new Date());
if (!getStopValue() && isCalling.value) {
skipDisabled.value = false;
base64List.value.push(`data:audio/wav;base64,${data.choices[0].audio}`);
addAudioQueue(() => truePlay(data.choices[0].audio));
}
allVoice.value.push(`data:audio/wav;base64,${data.choices[0].audio}`);
} else {
// 发生异常了,直接重连
buildConnect();
}
if (data.choices[0].text.includes('<end>')) {
console.log('收到结束标记了:', +new Date());
if (
outputData.value[outputData.value.length - 1]?.type === 'BOT' &&
outputData.value[outputData.value.length - 1].audio === '' &&
allVoice.value.length > 0
) {
outputData.value[outputData.value.length - 1].audio = mergeBase64ToBlob(allVoice.value);
}
}
},
onclose() {
console.log('onclose', +new Date());
isEnd.value = true;
outputData.value[outputData.value.length - 1].audio = mergeBase64ToBlob(allVoice.value);
// sse关闭后如果待播放的音频列表为空说明模型出错了此次连接没有返回音频则直接重连
vadStartTime.value = +new Date();
if (audioPlayQueue.value.length === 0) {
let startIndex = taskQueue.value.findIndex(item => item.time >= vadStartTime.value - 1000);
console.log('taskQueue111111111: ', taskQueue.value, startIndex);
if (startIndex !== -1) {
taskQueue.value = taskQueue.value.slice(startIndex);
console.log('截取后长度:', taskQueue.value, vadStartTime.value);
}
buildConnect();
}
},
onerror(err) {
console.log('onerror', err);
ctrl.abort();
ctrl = new AbortController();
throw err;
}
});
};
// 返回的语音放到队列里,挨个播放
const addAudioQueue = async item => {
audioPlayQueue.value.push(item);
if (isFirstPiece.value) {
await delay(1500);
isFirstPiece.value = false;
}
if (audioPlayQueue.value.length > 0 && !playing.value) {
playing.value = true;
playAudio();
}
};
// 控制播放队列执行
const playAudio = () => {
console.log('剩余播放列表:', audioPlayQueue.value, +new Date());
if (!isEnd.value && base64List.value.length >= 2) {
const remainLen = base64List.value.length;
const blob = mergeBase64ToBlob(base64List.value);
audioDOM.src = blob;
audioDOM.play();
console.error('前期合并后播放开始时间: ', +new Date());
audioDOM.onended = () => {
console.error('前期合并后播放结束时间: ', +new Date());
base64List.value = base64List.value.slice(remainLen);
audioPlayQueue.value = audioPlayQueue.value.slice(remainLen);
playAudio();
};
return;
}
if (isEnd.value && base64List.value.length >= 2) {
const blob = mergeBase64ToBlob(base64List.value);
audioDOM.src = blob;
audioDOM.play();
console.error('合并后播放开始时间: ', +new Date());
audioDOM.onended = () => {
console.error('合并后播放结束时间: ', +new Date());
// URL.revokeObjectURL(url);
base64List.value = [];
audioPlayQueue.value = [];
playing.value = false;
skipDisabled.value = true;
if (isCalling.value && !isReturnError.value) {
// skipDisabled.value = true;
taskQueue.value = [];
// 打断前记录一下打断时间或vad触发事件
// vadStartTime.value = +new Date();
// // 每次完成后只保留当前时刻往前推1s的语音
// console.log(
// '截取前长度:',
// taskQueue.value.map(item => item.time)
// );
// let startIndex = taskQueue.value.findIndex(item => item.time >= vadStartTime.value - 1000);
// if (startIndex !== -1) {
// taskQueue.value = taskQueue.value.slice(startIndex);
// console.log(
// '截取后长度:',
// taskQueue.value.map(item => item.time),
// vadStartTime.value
// );
// }
buildConnect();
}
};
return;
}
base64List.value.shift();
const _truePlay = audioPlayQueue.value.shift();
if (_truePlay) {
_truePlay().finally(() => {
playAudio();
});
} else {
playing.value = false;
if (isEnd.value) {
console.warn('play done................');
skipDisabled.value = true;
}
// 播放完成后且正在通话且接口未返回错误时开始下一次连接
if (isEnd.value && isCalling.value && !isReturnError.value) {
// skipDisabled.value = true;
taskQueue.value = [];
// // 跳过之后,只保留当前时间点两秒内到之后的音频片段
// vadStartTime.value = +new Date();
// console.log(
// '截取前长度:',
// taskQueue.value.map(item => item.time)
// );
// let startIndex = taskQueue.value.findIndex(item => item.time >= vadStartTime.value - 1000);
// if (startIndex !== -1) {
// taskQueue.value = taskQueue.value.slice(startIndex);
// console.log(
// '截取后长度:',
// taskQueue.value.map(item => item.time),
// vadStartTime.value
// );
// }
buildConnect();
}
}
};
// 播放音频
const truePlay = voice => {
console.log('promise: ', +new Date());
return new Promise(resolve => {
audioDOM.src = 'data:audio/wav;base64,' + voice;
console.error('播放开始时间:', +new Date());
audioDOM
.play()
.then(() => {
console.log('Audio played successfully');
})
.catch(error => {
if (error.name === 'NotAllowedError' || error.name === 'SecurityError') {
console.error('User interaction required or permission issue:', error);
// ElMessage.warning('音频播放失败');
console.error('播放失败时间');
// alert('Please interact with the page (like clicking a button) to enable audio playback.');
} else {
console.error('Error playing audio:', error);
}
});
// .finally(() => {
// resolve();
// });
audioDOM.onerror = () => {
console.error('播放失败时间', +new Date());
resolve();
};
audioDOM.onended = () => {
console.error('播放结束时间: ', +new Date());
// URL.revokeObjectURL(url);
resolve();
};
});
};
// 当队列中任务数大于0时开始处理队列中的任务
const addQueue = (time, item) => {
taskQueue.value.push({ func: item, time });
if (taskQueue.value.length > 0 && !running.value) {
running.value = true;
processQueue();
}
};
const processQueue = () => {
const item = taskQueue.value.shift();
if (item?.func) {
item.func()
.then(res => {
console.log('已处理事件: ', res);
})
.finally(() => processQueue());
} else {
running.value = false;
}
};
const destroyVideoStream = () => {
videoStream.value?.getTracks().forEach(track => track.stop());
videoStream.value = null;
// 将srcObject设置为null以切断与MediaStream 对象的链接,以便将其释放
videoRef.value.srcObject = null;
videoImage.value = [];
videoLoaded.value = false;
clearInterval(interval.value);
interval.value = null;
};
const dealImage = () => {
if (!videoRef.value) {
return;
}
const canvas = canvasRef.value;
canvasRef.value.width = videoRef.value.videoWidth;
canvasRef.value.height = videoRef.value.videoHeight;
const context = canvas.getContext('2d');
context.drawImage(videoRef.value, 0, 0, canvasRef.value.width, canvasRef.value.height);
const imageDataUrl = canvas.toDataURL('image/webp', 0.8);
videoImage.value.push({ src: imageDataUrl });
};
const drawBars = () => {
// AnalyserNode接口的 getByteFrequencyData() 方法将当前频率数据复制到传入的 Uint8Array无符号字节数组中。
analyser.value.getByteFrequencyData(dataArray.value);
animationFrameId.value = requestAnimationFrame(drawBars);
};
// 跳过当前片段
const skipVoice = async () => {
// 打断前记录一下打断时间或vad触发事件
vadStartTime.value = +new Date();
if (!skipDisabled.value) {
if (
outputData.value[outputData.value.length - 1]?.type === 'BOT' &&
outputData.value[outputData.value.length - 1].audio === ''
) {
outputData.value[outputData.value.length - 1].audio = mergeBase64ToBlob(allVoice.value);
}
base64List.value = [];
audioPlayQueue.value = [];
// 跳过之后,只保留当前时间点两秒内到之后的音频片段
console.log(
'截取前长度:',
taskQueue.value.map(item => item.time)
);
let startIndex = taskQueue.value.findIndex(item => item.time >= vadStartTime.value - 1000);
if (startIndex !== -1) {
taskQueue.value = taskQueue.value.slice(startIndex);
console.log(
'截取后长度:',
taskQueue.value.map(item => item.time),
vadStartTime.value
);
}
stop.value = true;
audioDOM?.pause();
setTimeout(() => {
skipDisabled.value = true;
}, 300);
try {
playing.value = false;
await stopMessage();
stop.value = false;
// playing.value = false;
buildConnect();
// cancelAnimationFrame(animationFrameId.value);
} catch (err) {}
}
};
// 每次call先上传当前用户配置
const uploadUserConfig = async () => {
if (!localStorage.getItem('configData')) {
return new Promise(resolve => resolve());
}
const {
videoQuality,
useAudioPrompt,
voiceClonePrompt,
assistantPrompt,
vadThreshold,
audioFormat,
base64Str
} = JSON.parse(localStorage.getItem('configData'));
const obj = {
messages: [
{
role: 'user',
content: [
{
type: 'input_audio',
input_audio: {
data: base64Str,
format: audioFormat
}
},
{
type: 'options',
options: {
hd_video: videoQuality,
use_audio_prompt: useAudioPrompt,
vad_threshold: vadThreshold,
voice_clone_prompt: voiceClonePrompt,
assistant_prompt: assistantPrompt
}
}
]
}
]
};
const { code, message, data } = await uploadConfig(obj);
modelVersion.value = data?.choices?.content || '';
return new Promise((resolve, reject) => {
if (code !== 0) {
ElMessage({
type: 'error',
message: message,
duration: 3000,
customClass: 'system-error'
});
reject();
} else {
resolve();
}
});
};
</script>
<style lang="less">
.video-page {
height: 100%;
display: flex;
flex-direction: column;
&-header {
display: flex;
align-items: center;
padding: 0 16px 16px;
box-shadow: 0 0.5px 0 0 #e0e0e0;
margin-bottom: 16px;
justify-content: space-between;
.header-icon {
display: flex;
align-items: center;
img {
width: 24px;
height: 24px;
margin-right: 8px;
}
span {
color: rgba(23, 23, 23, 0.9);
font-family: PingFang SC;
font-size: 16px;
font-style: normal;
font-weight: 500;
line-height: normal;
margin-right: 40px;
flex-shrink: 0;
}
}
.voice-container {
display: flex;
.voice-icon {
width: 191px;
height: 45px;
}
}
}
&-content {
flex: 1;
margin-bottom: 16px;
display: flex;
height: 0;
&-video {
width: 50%;
height: 100%;
background: #f3f3f3;
flex-shrink: 0;
position: relative;
video {
width: 100%;
height: 100%;
object-fit: contain;
}
.switch-camera {
position: absolute;
top: 10px;
right: 10px;
width: 36px;
height: 36px;
background: #ffffff;
border-radius: 6px;
display: flex;
justify-content: center;
align-items: center;
font-size: 24px;
z-index: 999;
.icon {
width: 20px;
height: 20px;
}
}
}
&-right {
margin-left: 16px;
flex: 1;
padding: 0 16px;
display: flex;
flex-direction: column;
.output-content {
flex: 1;
overflow: auto;
}
.skip-box {
display: flex;
align-items: center;
justify-content: flex-end;
margin-top: 16px;
}
}
}
&-btn {
text-align: center;
padding: 8px 0;
.el-button {
width: 284px;
height: 46px;
border-radius: 8px;
}
.el-button.el-button--success {
background: #647fff;
border-color: #647fff;
&:hover {
opacity: 0.8;
}
span {
color: #fff;
font-family: PingFang SC;
font-size: 16px;
font-style: normal;
font-weight: 500;
line-height: normal;
}
}
.el-button.el-button--success.is-disabled {
background: #f3f3f3;
border-color: #f3f3f3;
span {
color: #d1d1d1;
}
}
.el-button.el-button--danger {
border-color: #dc3545;
background-color: #dc3545;
color: #ffffff;
font-family: PingFang SC;
font-size: 16px;
font-style: normal;
font-weight: 500;
line-height: normal;
.phone-icon {
margin-right: 10px;
}
.btn-text {
margin-right: 10px;
}
.btn-desc {
margin-right: 16px;
}
}
}
}
.video-size {
position: absolute;
bottom: 10px;
right: 10px;
background: rgba(0, 0, 0, 0.5);
color: #fff;
padding: 4px 8px;
border-radius: 4px;
font-size: 12px;
}
</style>

View File

@@ -0,0 +1,833 @@
<template>
<!-- <ExtraInfo webVersion="非websocket_0112" :modelVersion="modelVersion" /> -->
<div class="voice-page">
<div class="voice-page-header">
<div class="voice-container" v-if="!isCalling">
<SvgIcon name="voice" class="voice-icon" />
<SvgIcon name="voice" class="voice-icon" />
<SvgIcon name="voice" class="voice-icon" />
</div>
<div class="voice-container" v-else>
<Voice
:dataArray="dataArray"
:isCalling="isCalling"
:isPlaying="playing"
:configList="videoConfigList"
:boxStyle="{ height: '45px' }"
:itemStyle="{ width: '3px', margin: '0 1px' }"
/>
</div>
<!-- <SelectTimbre v-model:timbre="timbre" v-model:audioData="audioData" v-model:disabled="isCalling" /> -->
</div>
<div class="voice-page-output">
<div class="output-content">
<ModelOutput v-if="outputData.length > 0" :outputData="outputData" containerClass="output-content" />
</div>
<div class="skip-box">
<!-- <DelayTips
v-if="delayTimestamp > 200 || delayCount > 2"
:delayTimestamp="delayTimestamp"
:delayCount="delayCount"
/> -->
<LikeAndDislike v-model:feedbackStatus="feedbackStatus" v-model:curResponseId="curResponseId" />
<SkipBtn :disabled="skipDisabled" @click="skipVoice" />
</div>
</div>
<div class="voice-page-btn">
<el-button v-show="!isCalling" type="success" :disabled="callDisabled" @click="initRecording">
{{ callDisabled ? t('notReadyBtn') : t('audioCallBtn') }}
</el-button>
<el-button v-show="isCalling" @click="stopRecording" type="danger">
<SvgIcon name="phone-icon" className="phone-icon" />
<span class="btn-text">{{ t('hangUpBtn') }}</span>
<CountDown v-model="isCalling" @timeUp="stopRecording" />
</el-button>
</div>
<IdeasList v-if="showIdeasList" :ideasList="voiceIdeasList" />
</div>
</template>
<script setup>
import { sendMessage, stopMessage, uploadConfig } from '@/apis';
import { encodeWAV } from '@/hooks/useVoice';
import { getNewUserId, setNewUserId } from '@/hooks/useRandomId';
import { fetchEventSource } from '@microsoft/fetch-event-source';
import { MicVAD } from '@ricky0123/vad-web';
import { videoConfigList, voiceConfigList, voiceIdeasList, showIdeasList } from '@/enums';
import { getChunkLength } from '@/utils';
import { mergeBase64ToBlob } from './merge';
import WebSocketService from '@/utils/websocket';
import { useI18n } from 'vue-i18n';
const { t } = useI18n();
let ctrl = new AbortController();
let socket = null;
const audioData = ref({
base64Str: '',
type: 'mp3'
}); // 自定义音色base64
const isCalling = defineModel();
const taskQueue = ref([]);
const running = ref(false);
const outputData = ref([]);
const textQueue = ref('');
const textAnimationInterval = ref();
const isFirstReturn = ref(true); // 首次返回的音频是前端发给后端的音频片段,需要单独处理
const audioPlayQueue = ref([]);
const base64List = ref([]);
const playing = ref(false);
const skipDisabled = ref(true);
const stop = ref(false);
const timbre = ref([1]);
const isReturnError = ref(false);
const allVoice = ref([]);
const callDisabled = ref(true);
const feedbackStatus = ref('');
const curResponseId = ref('');
const delayTimestamp = ref(0); // 当前发送片延时
const delayCount = ref(0); // 当前剩余多少ms未发送到接口
const modelVersion = ref('');
let audioDOM = new Audio();
const isEnd = ref(false); // sse接口关闭认为模型已完成本次返回
// 页面卸载时关闭录音
onBeforeUnmount(() => {
stopRecording();
});
const vadStartTime = ref();
let myvad = null;
let vadTimer = null; // vad定时器用于检测1s内人声是否停止1s内停止可认为是vad误触直接忽略1s内未停止则认为是人声已自动跳过当前对话
const vadStart = async () => {
myvad = await MicVAD.new({
onSpeechStart: () => {
console.log('Speech start detected');
// if (!skipDisabled.value) {
vadTimer && clearTimeout(vadTimer);
vadTimer = setTimeout(() => {
console.log('打断时间: ', +new Date());
skipVoice();
}, 500);
// }
},
onSpeechEnd: audio => {
vadTimer && clearTimeout(vadTimer);
// debugger;
// do something with `audio` (Float32Array of audio samples at sample rate 16000)...
},
baseAssetPath: '/'
});
console.log('vad: ', myvad);
myvad.start();
};
onMounted(async () => {
const { code, message } = await stopMessage();
if (code !== 0) {
ElMessage({
type: 'error',
message: message,
duration: 3000,
customClass: 'system-error'
});
return;
}
callDisabled.value = false;
});
const delay = ms => {
return new Promise(resolve => setTimeout(resolve, ms));
};
const initRecording = async () => {
uploadUserConfig()
.then(async () => {
// 每次call都需要生成新uid
setNewUserId();
outputData.value = [];
buildConnect();
isCalling.value = true;
await delay(100);
// if (socket) {
// socket.close();
// }
// socket = new WebSocketService(
// `/ws/stream${window.location.search}&uid=${getNewUserId()}&service=minicpmo-server`
// );
// socket.connect();
// 建立连接后稍等一会儿再传送数据
startRecording();
if (localStorage.getItem('canStopByVoice') === 'true') {
vadStart();
}
})
.catch(() => {});
};
let audioContext;
const analyser = ref();
const dataArray = ref();
let mediaRecorder;
let audioChunks = [];
const animationFrameId = ref();
const isFirstPiece = ref(true);
const startRecording = async () => {
// 获取用户音频流
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
// 创建 AudioContext 和 MediaStreamAudioSourceNode
audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
const source = audioContext.createMediaStreamSource(stream);
analyser.value = audioContext.createAnalyser();
// 将音频节点连接到分析器
source.connect(analyser.value);
// 分析器设置
analyser.value.fftSize = 256;
const bufferLength = analyser.value.frequencyBinCount;
dataArray.value = new Uint8Array(bufferLength);
// 开始绘制音波
drawBars();
// 创建 ScriptProcessorNode 用于捕获音频数据
const processor = audioContext.createScriptProcessor(256, 1, 1);
processor.onaudioprocess = event => {
if (!isCalling.value) return;
if (isReturnError.value) {
stopRecording();
return;
}
const data = event.inputBuffer.getChannelData(0);
audioChunks.push(new Float32Array(data));
// 检查是否已经收集到1秒钟的数据
const totalBufferLength = audioChunks.reduce((total, curr) => total + curr.length, 0);
const chunkLength = getChunkLength(audioContext.sampleRate);
if (totalBufferLength >= chunkLength) {
// 合并到一个完整的数据数组并裁剪成1秒钟
const mergedBuffer = mergeBuffers(audioChunks, totalBufferLength);
const oneSecondBuffer = mergedBuffer.slice(0, chunkLength);
// 保存并处理成WAV格式
addQueue(+new Date(), () => saveAudioChunk(oneSecondBuffer, +new Date()));
// 保留多余的数据备用
audioChunks = [mergedBuffer.slice(chunkLength)];
}
};
source.connect(processor);
processor.connect(audioContext.destination);
};
const stopRecording = () => {
isCalling.value = false;
if (animationFrameId.value) {
cancelAnimationFrame(animationFrameId.value);
}
if (audioContext && audioContext.state !== 'closed') {
audioContext.close();
}
ctrl.abort();
ctrl = new AbortController();
taskQueue.value = [];
audioPlayQueue.value = [];
base64List.value = [];
isReturnError.value = false;
skipDisabled.value = true;
playing.value = false;
audioDOM.pause();
stopMessage();
if (socket) {
socket.close();
}
if (
outputData.value[outputData.value.length - 1]?.type === 'BOT' &&
outputData.value[outputData.value.length - 1].audio === '' &&
allVoice.value.length > 0
) {
outputData.value[outputData.value.length - 1].audio = mergeBase64ToBlob(allVoice.value);
}
myvad && myvad.destroy();
};
const getStopValue = () => {
return stop.value;
};
const getPlayingValue = () => {
return playing.value;
};
const getStopStatus = () => {
return localStorage.getItem('canStopByVoice') === 'true';
};
const saveAudioChunk = (buffer, timestamp) => {
return new Promise(resolve => {
if (!getStopStatus() && getPlayingValue()) {
resolve();
return;
}
const wavBlob = encodeWAV(buffer, audioContext.sampleRate);
let reader = new FileReader();
reader.readAsDataURL(wavBlob);
reader.onloadend = async function () {
let base64data = reader.result.split(',')[1];
if (!base64data) {
resolve();
return;
}
const obj = {
uid: getNewUserId(),
messages: [
{
role: 'user',
content: [
{
type: 'input_audio',
input_audio: {
data: base64data,
format: 'wav',
timestamp: String(timestamp)
}
}
]
}
]
};
// socket.send(JSON.stringify(obj));
// socket.on('message', data => {
// console.log('message: ', data);
// delayTimestamp.value = +new Date() - timestamp;
// delayCount.value = taskQueue.value.length;
// resolve();
// });
// 将Base64音频数据发送到后端
try {
await sendMessage(obj);
delayTimestamp.value = +new Date() - timestamp;
delayCount.value = taskQueue.value.length;
} catch (err) {}
resolve();
};
});
};
const mergeBuffers = (buffers, length) => {
const result = new Float32Array(length);
let offset = 0;
for (let buffer of buffers) {
result.set(buffer, offset);
offset += buffer.length;
}
return result;
};
// 建立连接
const buildConnect = async () => {
const obj = {
messages: [
{
role: 'user',
content: [{ type: 'none' }]
}
],
stream: true
};
isEnd.value = false;
ctrl.abort();
ctrl = new AbortController();
const url = `/api/v1/completions${window.location.search}`;
fetchEventSource(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
service: 'minicpmo-server',
uid: getNewUserId()
},
body: JSON.stringify(obj),
signal: ctrl.signal,
openWhenHidden: true,
async onopen(response) {
console.log('onopen', response);
isFirstPiece.value = true;
isFirstReturn.value = true;
allVoice.value = [];
base64List.value = [];
if (response.status !== 200) {
ElMessage({
type: 'error',
message: 'At limit. Please try again soon.',
duration: 3000,
customClass: 'system-error'
});
isReturnError.value = true;
} else {
isReturnError.value = false;
// skipDisabled.value = false;
drawText();
}
},
onmessage(msg) {
const data = JSON.parse(msg.data);
if (data.response_id) {
curResponseId.value = data.response_id;
}
if (data.choices[0]?.text) {
textQueue.value += data.choices[0].text.replace('<end>', '');
console.warn('text return time -------------------------------', +new Date());
}
// 首次返回的是前端发给后端的音频片段,需要单独处理
if (isFirstReturn.value) {
console.log('第一次');
isFirstReturn.value = false;
// 如果后端返回的音频为空,需要重连
if (!data.choices[0].audio) {
buildConnect();
return;
}
outputData.value.push({
type: 'USER',
audio: `data:audio/wav;base64,${data.choices[0].audio}`
});
outputData.value.push({
type: 'BOT',
text: '',
audio: ''
});
return;
}
if (data.choices[0]?.audio) {
console.warn('audio return time -------------------------------', +new Date());
if (!getStopValue() && isCalling.value) {
skipDisabled.value = false;
base64List.value.push(`data:audio/wav;base64,${data.choices[0].audio}`);
addAudioQueue(() => truePlay(data.choices[0].audio));
}
allVoice.value.push(`data:audio/wav;base64,${data.choices[0].audio}`);
} else {
// 发生异常了,直接重连
buildConnect();
}
if (data.choices[0].text.includes('<end>')) {
// isEnd.value = true;
console.log('收到结束标记了:', +new Date());
if (
outputData.value[outputData.value.length - 1]?.type === 'BOT' &&
outputData.value[outputData.value.length - 1].audio === '' &&
allVoice.value.length > 0
) {
outputData.value[outputData.value.length - 1].audio = mergeBase64ToBlob(allVoice.value);
}
}
},
onclose() {
console.log('onclose', +new Date());
isEnd.value = true;
if (
outputData.value[outputData.value.length - 1]?.type === 'BOT' &&
outputData.value[outputData.value.length - 1].audio === '' &&
allVoice.value.length > 0
) {
outputData.value[outputData.value.length - 1].audio = mergeBase64ToBlob(allVoice.value);
}
vadStartTime.value = +new Date();
if (audioPlayQueue.value.length === 0) {
let startIndex = taskQueue.value.findIndex(item => item.time >= vadStartTime.value - 2000);
console.log('taskQueue111111111: ', taskQueue.value, startIndex);
if (startIndex !== -1) {
taskQueue.value = taskQueue.value.slice(startIndex);
console.log('截取后长度:', taskQueue.value, vadStartTime.value);
}
buildConnect();
}
},
onerror(err) {
console.log('onerror', err);
ctrl.abort();
ctrl = new AbortController();
throw err;
}
});
};
const drawText = async () => {
if (textQueue.value.length > 0) {
outputData.value[outputData.value.length - 1].text += textQueue.value[0];
textQueue.value = textQueue.value.slice(1);
} else {
cancelAnimationFrame(textAnimationInterval.value);
}
textAnimationInterval.value = requestAnimationFrame(drawText);
};
// 返回的语音放到队列里,挨个播放
const addAudioQueue = async item => {
audioPlayQueue.value.push(item);
if (isFirstPiece.value) {
await delay(500);
isFirstPiece.value = false;
}
if (audioPlayQueue.value.length > 0 && !playing.value) {
playing.value = true;
playAudio();
}
};
// 控制播放队列执行
const playAudio = () => {
console.log('剩余播放列表:', audioPlayQueue.value, +new Date());
if (!isEnd.value && base64List.value.length >= 2) {
const remainLen = base64List.value.length;
const blob = mergeBase64ToBlob(base64List.value);
audioDOM.src = blob;
audioDOM.play();
console.error('前期合并后播放开始时间: ', +new Date());
audioDOM.onended = () => {
console.error('前期合并后播放结束时间: ', +new Date());
base64List.value = base64List.value.slice(remainLen);
audioPlayQueue.value = audioPlayQueue.value.slice(remainLen);
playAudio();
};
return;
}
if (isEnd.value && base64List.value.length >= 2) {
const blob = mergeBase64ToBlob(base64List.value);
// let audio = new Audio();
audioDOM.src = blob;
audioDOM.play();
console.error('最后合并后播放开始时间: ', +new Date());
audioDOM.onended = () => {
console.error('合并后播放结束时间: ', +new Date());
// URL.revokeObjectURL(url);
base64List.value = [];
audioPlayQueue.value = [];
playing.value = false;
skipDisabled.value = true;
if (isCalling.value && !isReturnError.value) {
// skipDisabled.value = true;
taskQueue.value = [];
// 打断前记录一下打断时间或vad触发事件
// vadStartTime.value = +new Date();
// // 每次完成后只保留当前时刻往前推1s的语音
// console.log('截取前长度:', JSON.parse(JSON.stringify(taskQueue.value.map(item => item.time))));
// let startIndex = taskQueue.value.findIndex(item => item.time >= vadStartTime.value - 2000);
// if (startIndex !== -1) {
// taskQueue.value = taskQueue.value.slice(startIndex);
// console.log(
// '截取后长度:',
// taskQueue.value.map(item => item.time),
// vadStartTime.value
// );
// }
buildConnect();
}
};
return;
}
base64List.value.shift();
const item = audioPlayQueue.value.shift();
if (item) {
item().finally(() => playAudio());
} else {
playing.value = false;
if (isEnd.value) {
console.warn('play done................');
skipDisabled.value = true;
}
// 播放完成后且正在通话且接口未返回错误时开始下一次连接
if (isEnd.value && isCalling.value && !isReturnError.value) {
// skipDisabled.value = true;
taskQueue.value = [];
// 打断前记录一下打断时间或vad触发事件
// vadStartTime.value = +new Date();
// // 每次完成后只保留当前时刻往前推1s的语音
// console.log(
// '截取前长度:',
// taskQueue.value.map(item => item.time)
// );
// let startIndex = taskQueue.value.findIndex(item => item.time >= vadStartTime.value - 2000);
// if (startIndex !== -1) {
// taskQueue.value = taskQueue.value.slice(startIndex);
// console.log(
// '截取后长度:',
// taskQueue.value.map(item => item.time),
// vadStartTime.value
// );
// }
buildConnect();
}
}
};
// 播放音频
const truePlay = async voice => {
return new Promise(resolve => {
audioDOM.src = 'data:audio/wav;base64,' + voice;
console.error('播放开始时间:', +new Date());
audioDOM
.play()
.then(() => {
// console.error('播放结束时间: ', +new Date());
})
.catch(error => {
resolve();
if (error.name === 'NotAllowedError' || error.name === 'SecurityError') {
console.error('User interaction required or permission issue:', error);
ElMessage.warning('音频播放失败');
} else {
console.error('Error playing audio:', error);
}
});
audioDOM.onended = () => {
console.error('播放结束时间: ', +new Date());
// URL.revokeObjectURL(url);
resolve();
};
});
};
// 当队列中任务数大于0时开始处理队列中的任务
const addQueue = (time, item) => {
taskQueue.value.push({ func: item, time });
if (taskQueue.value.length > 0 && !running.value) {
running.value = true;
processQueue();
}
};
const processQueue = () => {
const item = taskQueue.value.shift();
if (item?.func) {
item.func().then(() => {
console.warn('shift!!!!!!!!!');
processQueue();
});
} else {
running.value = false;
}
};
const drawBars = () => {
// AnalyserNode接口的 getByteFrequencyData() 方法将当前频率数据复制到传入的 Uint8Array无符号字节数组中。
analyser.value.getByteFrequencyData(dataArray.value);
animationFrameId.value = requestAnimationFrame(drawBars);
};
// 跳过当前片段
const skipVoice = async () => {
// 打断前记录一下打断时间或vad触发事件
vadStartTime.value = +new Date();
if (!skipDisabled.value) {
if (
outputData.value[outputData.value.length - 1]?.type === 'BOT' &&
outputData.value[outputData.value.length - 1].audio === ''
) {
outputData.value[outputData.value.length - 1].audio = mergeBase64ToBlob(allVoice.value);
}
base64List.value = [];
audioPlayQueue.value = [];
// 跳过之后,只保留当前时间点两秒内到之后的音频片段
console.log(
'截取前长度:',
taskQueue.value.map(item => item.time)
);
let startIndex = taskQueue.value.findIndex(item => item.time >= vadStartTime.value - 1000);
if (startIndex !== -1) {
taskQueue.value = taskQueue.value.slice(startIndex);
console.log(
'截取后长度:',
taskQueue.value.map(item => item.time),
vadStartTime.value
);
}
stop.value = true;
audioDOM.pause();
setTimeout(() => {
skipDisabled.value = true;
}, 300);
try {
playing.value = false;
await stopMessage();
stop.value = false;
// playing.value = false;
buildConnect();
// cancelAnimationFrame(animationFrameId.value);
} catch (err) {}
}
};
// 每次call先上传当前用户配置
const uploadUserConfig = async () => {
if (!localStorage.getItem('configData')) {
return new Promise(resolve => resolve());
}
const {
videoQuality,
useAudioPrompt,
voiceClonePrompt,
assistantPrompt,
vadThreshold,
audioFormat,
base64Str
} = JSON.parse(localStorage.getItem('configData'));
const obj = {
messages: [
{
role: 'user',
content: [
{
type: 'input_audio',
input_audio: {
data: base64Str,
format: audioFormat
}
},
{
type: 'options',
options: {
hd_video: videoQuality,
use_audio_prompt: useAudioPrompt,
vad_threshold: vadThreshold,
voice_clone_prompt: voiceClonePrompt,
assistant_prompt: assistantPrompt
}
}
]
}
]
};
const { code, message, data } = await uploadConfig(obj);
modelVersion.value = data?.choices?.content || '';
return new Promise((resolve, reject) => {
if (code !== 0) {
ElMessage({
type: 'error',
message: message,
duration: 3000,
customClass: 'system-error'
});
reject();
} else {
resolve();
}
});
};
</script>
<style lang="less" scoped>
.voice-page {
flex: 1;
height: 100%;
display: flex;
flex-direction: column;
&-header {
display: flex;
align-items: center;
justify-content: center;
padding: 0 16px 16px;
box-shadow: 0 0.5px 0 0 #e0e0e0;
margin-bottom: 16px;
.header-icon {
display: flex;
align-items: center;
img {
width: 24px;
height: 24px;
margin-right: 8px;
}
span {
color: rgba(23, 23, 23, 0.9);
font-family: PingFang SC;
font-size: 16px;
font-style: normal;
font-weight: 500;
line-height: normal;
margin-right: 40px;
flex-shrink: 0;
}
}
.voice-container {
display: flex;
.voice-icon {
width: 191px;
height: 45px;
}
}
}
&-output {
flex: 1;
height: 0;
padding: 0 16px;
margin-bottom: 16px;
display: flex;
flex-direction: column;
.output-content {
flex: 1;
overflow: auto;
}
.skip-box {
display: flex;
align-items: center;
justify-content: flex-end;
margin-top: 16px;
}
}
&-btn {
text-align: center;
padding: 8px 0;
.el-button {
width: 284px;
height: 46px;
border-radius: 8px;
}
.el-button.el-button--success {
background: #647fff;
border-color: #647fff;
&:hover {
opacity: 0.8;
}
span {
color: #fff;
font-family: PingFang SC;
font-size: 16px;
font-style: normal;
font-weight: 500;
line-height: normal;
}
}
.el-button.el-button--success.is-disabled {
background: #f3f3f3;
border-color: #f3f3f3;
span {
color: #d1d1d1;
}
}
.el-button.el-button--danger {
border-color: #dc3545;
background-color: #dc3545;
color: #ffffff;
font-family: PingFang SC;
font-size: 16px;
font-style: normal;
font-weight: 500;
line-height: normal;
.phone-icon {
margin-right: 10px;
}
.btn-text {
margin-right: 10px;
}
.btn-desc {
margin-right: 16px;
}
.time {
display: flex;
align-items: center;
.time-minute,
.time-second {
width: 26px;
height: 26px;
display: flex;
justify-content: center;
align-items: center;
border-radius: 3.848px;
background: rgba(47, 47, 47, 0.5);
}
.time-colon {
margin: 0 3px;
}
}
}
}
}
</style>

View File

@@ -0,0 +1,829 @@
<template>
<ExtraInfo webVersion="websocket_0107" :modelVersion="modelVersion" />
<div class="voice-page">
<div class="voice-page-header">
<div class="header-icon">
<img src="@/assets/images/voice-icon.png" />
<span>Audio Choice</span>
</div>
<div class="voice-container" v-if="!isCalling">
<SvgIcon name="voice" class="voice-icon" />
<SvgIcon name="voice" class="voice-icon" />
<SvgIcon name="voice" class="voice-icon" />
</div>
<div class="voice-container" v-else>
<Voice
:dataArray="dataArray"
:isCalling="isCalling"
:isPlaying="playing"
:configList="videoConfigList"
:boxStyle="{ height: '45px' }"
:itemStyle="{ width: '3px', margin: '0 1px' }"
/>
</div>
<!-- <SelectTimbre v-model:timbre="timbre" v-model:audioData="audioData" v-model:disabled="isCalling" /> -->
</div>
<div class="voice-page-output">
<div class="output-content">
<ModelOutput v-if="outputData.length > 0" :outputData="outputData" containerClass="output-content" />
</div>
<div class="skip-box">
<DelayTips
v-if="delayTimestamp > 200 || delayCount > 2"
:delayTimestamp="delayTimestamp"
:delayCount="delayCount"
/>
<LikeAndDislike v-model:feedbackStatus="feedbackStatus" v-model:curResponseId="curResponseId" />
<SkipBtn :disabled="skipDisabled" @click="skipVoice" />
</div>
</div>
<div class="voice-page-btn">
<el-button v-show="!isCalling" type="success" :disabled="callDisabled" @click="initRecording">
{{ callDisabled ? 'Not ready yet, please wait' : 'Call MiniCPM' }}
</el-button>
<el-button v-show="isCalling" @click="stopRecording" type="danger">
<SvgIcon name="phone-icon" className="phone-icon" />
<span class="btn-text">Hang Up</span>
<CountDown v-model="isCalling" @timeUp="stopRecording" />
</el-button>
</div>
<IdeasList v-if="showIdeasList" :ideasList="voiceIdeasList" />
</div>
</template>
<script setup>
import { sendMessage, stopMessage, uploadConfig } from '@/apis';
import { encodeWAV } from '@/hooks/useVoice';
import { getNewUserId, setNewUserId } from '@/hooks/useRandomId';
import { fetchEventSource } from '@microsoft/fetch-event-source';
import { MicVAD } from '@ricky0123/vad-web';
import { videoConfigList, voiceConfigList, voiceIdeasList, showIdeasList } from '@/enums';
import { getChunkLength } from '@/utils';
import { mergeBase64ToBlob } from './merge';
import WebSocketService from '@/utils/websocket';
let ctrl = new AbortController();
let socket = null;
const audioData = ref({
base64Str: '',
type: 'mp3'
}); // 自定义音色base64
const isCalling = defineModel();
const taskQueue = ref([]);
const running = ref(false);
const outputData = ref([]);
const textQueue = ref('');
const textAnimationInterval = ref();
const isFirstReturn = ref(true); // 首次返回的音频是前端发给后端的音频片段,需要单独处理
const audioPlayQueue = ref([]);
const base64List = ref([]);
const playing = ref(false);
const skipDisabled = ref(true);
const stop = ref(false);
const timbre = ref([1]);
const isReturnError = ref(false);
const allVoice = ref([]);
const callDisabled = ref(true);
const feedbackStatus = ref('');
const curResponseId = ref('');
const delayTimestamp = ref(0); // 当前发送片延时
const delayCount = ref(0); // 当前剩余多少ms未发送到接口
const modelVersion = ref('');
let audioDOM = new Audio();
const isEnd = ref(false); // sse接口关闭认为模型已完成本次返回
// 页面卸载时关闭录音
onBeforeUnmount(() => {
stopRecording();
});
const vadStartTime = ref();
let myvad = null;
let vadTimer = null; // vad定时器用于检测1s内人声是否停止1s内停止可认为是vad误触直接忽略1s内未停止则认为是人声已自动跳过当前对话
const vadStart = async () => {
myvad = await MicVAD.new({
onSpeechStart: () => {
console.log('Speech start detected');
if (!skipDisabled.value) {
vadTimer && clearTimeout(vadTimer);
vadTimer = setTimeout(() => {
console.log('打断时间: ', +new Date());
skipVoice();
}, 500);
}
},
onSpeechEnd: audio => {
vadTimer && clearTimeout(vadTimer);
// debugger;
// do something with `audio` (Float32Array of audio samples at sample rate 16000)...
}
});
console.log('vad: ', myvad);
myvad.start();
};
onMounted(async () => {
const { code, message } = await stopMessage();
if (code !== 0) {
ElMessage({
type: 'error',
message: message,
duration: 3000,
customClass: 'system-error'
});
return;
}
callDisabled.value = false;
});
const delay = ms => {
return new Promise(resolve => setTimeout(resolve, ms));
};
const initRecording = async () => {
uploadUserConfig()
.then(async () => {
// 每次call都需要生成新uid
setNewUserId();
outputData.value = [];
buildConnect();
isCalling.value = true;
await delay(100);
if (socket) {
socket.close();
}
socket = new WebSocketService(
`/ws/stream${window.location.search}&uid=${getNewUserId()}&service=minicpmo-server`
);
socket.connect();
// 建立连接后稍等一会儿再传送数据
startRecording();
if (localStorage.getItem('canStopByVoice') === 'true') {
vadStart();
}
})
.catch(() => {});
};
let audioContext;
const analyser = ref();
const dataArray = ref();
let mediaRecorder;
let audioChunks = [];
const animationFrameId = ref();
const isFirstPiece = ref(true);
const startRecording = async () => {
// 获取用户音频流
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
// 创建 AudioContext 和 MediaStreamAudioSourceNode
audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
const source = audioContext.createMediaStreamSource(stream);
analyser.value = audioContext.createAnalyser();
// 将音频节点连接到分析器
source.connect(analyser.value);
// 分析器设置
analyser.value.fftSize = 256;
const bufferLength = analyser.value.frequencyBinCount;
dataArray.value = new Uint8Array(bufferLength);
// 开始绘制音波
drawBars();
// 创建 ScriptProcessorNode 用于捕获音频数据
const processor = audioContext.createScriptProcessor(256, 1, 1);
processor.onaudioprocess = event => {
if (!isCalling.value) return;
if (isReturnError.value) {
stopRecording();
return;
}
const data = event.inputBuffer.getChannelData(0);
audioChunks.push(new Float32Array(data));
// 检查是否已经收集到1秒钟的数据
const totalBufferLength = audioChunks.reduce((total, curr) => total + curr.length, 0);
const chunkLength = getChunkLength(audioContext.sampleRate);
if (totalBufferLength >= chunkLength) {
// 合并到一个完整的数据数组并裁剪成1秒钟
const mergedBuffer = mergeBuffers(audioChunks, totalBufferLength);
const oneSecondBuffer = mergedBuffer.slice(0, chunkLength);
// 保存并处理成WAV格式
addQueue(+new Date(), () => saveAudioChunk(oneSecondBuffer, +new Date()));
// 保留多余的数据备用
audioChunks = [mergedBuffer.slice(chunkLength)];
}
};
source.connect(processor);
processor.connect(audioContext.destination);
};
const stopRecording = () => {
isCalling.value = false;
if (animationFrameId.value) {
cancelAnimationFrame(animationFrameId.value);
}
if (audioContext && audioContext.state !== 'closed') {
audioContext.close();
}
ctrl.abort();
ctrl = new AbortController();
taskQueue.value = [];
audioPlayQueue.value = [];
base64List.value = [];
isReturnError.value = false;
skipDisabled.value = true;
playing.value = false;
audioDOM.pause();
stopMessage();
if (socket) {
socket.close();
}
if (
outputData.value[outputData.value.length - 1]?.type === 'BOT' &&
outputData.value[outputData.value.length - 1].audio === '' &&
allVoice.value.length > 0
) {
outputData.value[outputData.value.length - 1].audio = mergeBase64ToBlob(allVoice.value);
}
myvad && myvad.destroy();
};
const getStopValue = () => {
return stop.value;
};
const getPlayingValue = () => {
return playing.value;
};
const getStopStatus = () => {
return localStorage.getItem('canStopByVoice') === 'true';
};
const saveAudioChunk = (buffer, timestamp) => {
return new Promise(resolve => {
if (!getStopStatus() && getPlayingValue()) {
resolve();
return;
}
const wavBlob = encodeWAV(buffer, audioContext.sampleRate);
let reader = new FileReader();
reader.readAsDataURL(wavBlob);
reader.onloadend = async function () {
let base64data = reader.result.split(',')[1];
if (!base64data) {
resolve();
return;
}
const obj = {
uid: getNewUserId(),
messages: [
{
role: 'user',
content: [
{
type: 'input_audio',
input_audio: {
data: base64data,
format: 'wav',
timestamp: String(timestamp)
}
}
]
}
]
};
socket.send(JSON.stringify(obj));
socket.on('message', data => {
console.log('message: ', data);
delayTimestamp.value = +new Date() - timestamp;
delayCount.value = taskQueue.value.length;
resolve();
});
// 将Base64音频数据发送到后端
// try {
// await sendMessage(obj);
// delayTimestamp.value = +new Date() - timestamp;
// delayCount.value = taskQueue.value.length;
// } catch (err) {}
// resolve();
};
});
};
const mergeBuffers = (buffers, length) => {
const result = new Float32Array(length);
let offset = 0;
for (let buffer of buffers) {
result.set(buffer, offset);
offset += buffer.length;
}
return result;
};
// 建立连接
const buildConnect = async () => {
const obj = {
messages: [
{
role: 'user',
content: [{ type: 'none' }]
}
],
stream: true
};
isEnd.value = false;
ctrl.abort();
ctrl = new AbortController();
const url = `/api/v1/completions${window.location.search}`;
fetchEventSource(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
service: 'minicpmo-server',
uid: getNewUserId()
},
body: JSON.stringify(obj),
signal: ctrl.signal,
openWhenHidden: true,
async onopen(response) {
console.log('onopen', response);
isFirstPiece.value = true;
isFirstReturn.value = true;
allVoice.value = [];
base64List.value = [];
if (response.status !== 200) {
ElMessage({
type: 'error',
message: 'At limit. Please try again soon.',
duration: 3000,
customClass: 'system-error'
});
isReturnError.value = true;
} else {
isReturnError.value = false;
// skipDisabled.value = false;
drawText();
}
},
onmessage(msg) {
const data = JSON.parse(msg.data);
if (data.response_id) {
curResponseId.value = data.response_id;
}
if (data.choices[0]?.text) {
textQueue.value += data.choices[0].text.replace('<end>', '');
console.warn('text return time -------------------------------', +new Date());
}
// 首次返回的是前端发给后端的音频片段,需要单独处理
if (isFirstReturn.value) {
console.log('第一次');
isFirstReturn.value = false;
// 如果后端返回的音频为空,需要重连
if (!data.choices[0].audio) {
buildConnect();
return;
}
outputData.value.push({
type: 'USER',
audio: `data:audio/wav;base64,${data.choices[0].audio}`
});
outputData.value.push({
type: 'BOT',
text: '',
audio: ''
});
return;
}
if (data.choices[0]?.audio) {
console.warn('audio return time -------------------------------', +new Date());
if (!getStopValue() && isCalling.value) {
skipDisabled.value = false;
base64List.value.push(`data:audio/wav;base64,${data.choices[0].audio}`);
addAudioQueue(() => truePlay(data.choices[0].audio));
}
allVoice.value.push(`data:audio/wav;base64,${data.choices[0].audio}`);
} else {
// 发生异常了,直接重连
buildConnect();
}
if (data.choices[0].text.includes('<end>')) {
console.log('收到结束标记了:', +new Date());
if (
outputData.value[outputData.value.length - 1]?.type === 'BOT' &&
outputData.value[outputData.value.length - 1].audio === '' &&
allVoice.value.length > 0
) {
outputData.value[outputData.value.length - 1].audio = mergeBase64ToBlob(allVoice.value);
}
}
},
onclose() {
console.log('onclose', +new Date());
isEnd.value = true;
outputData.value[outputData.value.length - 1].audio = mergeBase64ToBlob(allVoice.value);
vadStartTime.value = +new Date();
if (audioPlayQueue.value.length === 0) {
let startIndex = taskQueue.value.findIndex(item => item.time >= vadStartTime.value - 1000);
console.log('taskQueue111111111: ', taskQueue.value, startIndex);
if (startIndex !== -1) {
taskQueue.value = taskQueue.value.slice(startIndex);
console.log('截取后长度:', taskQueue.value, vadStartTime.value);
}
buildConnect();
}
},
onerror(err) {
console.log('onerror', err);
ctrl.abort();
ctrl = new AbortController();
throw err;
}
});
};
const drawText = async () => {
if (textQueue.value.length > 0) {
outputData.value[outputData.value.length - 1].text += textQueue.value[0];
textQueue.value = textQueue.value.slice(1);
} else {
cancelAnimationFrame(textAnimationInterval.value);
}
textAnimationInterval.value = requestAnimationFrame(drawText);
};
// 返回的语音放到队列里,挨个播放
const addAudioQueue = async item => {
audioPlayQueue.value.push(item);
if (isFirstPiece.value) {
await delay(500);
isFirstPiece.value = false;
}
if (audioPlayQueue.value.length > 0 && !playing.value) {
playing.value = true;
playAudio();
}
};
// 控制播放队列执行
const playAudio = () => {
console.log('剩余播放列表:', audioPlayQueue.value, +new Date());
if (!isEnd.value && base64List.value.length >= 2) {
const remainLen = base64List.value.length;
const blob = mergeBase64ToBlob(base64List.value);
audioDOM.src = blob;
audioDOM.play();
console.error('前期合并后播放开始时间: ', +new Date());
audioDOM.onended = () => {
console.error('前期合并后播放结束时间: ', +new Date());
base64List.value = base64List.value.slice(remainLen);
audioPlayQueue.value = audioPlayQueue.value.slice(remainLen);
playAudio();
};
return;
}
if (isEnd.value && base64List.value.length >= 2) {
const blob = mergeBase64ToBlob(base64List.value);
// let audio = new Audio();
audioDOM.src = blob;
audioDOM.play();
console.error('最后合并后播放开始时间: ', +new Date());
audioDOM.onended = () => {
console.error('合并后播放结束时间: ', +new Date());
// URL.revokeObjectURL(url);
base64List.value = [];
audioPlayQueue.value = [];
playing.value = false;
skipDisabled.value = true;
if (isCalling.value && !isReturnError.value) {
// skipDisabled.value = true;
taskQueue.value = [];
// 打断前记录一下打断时间或vad触发事件
// vadStartTime.value = +new Date();
// // 每次完成后只保留当前时刻往前推1s的语音
// console.log(
// '截取前长度:',
// taskQueue.value.map(item => item.time)
// );
// let startIndex = taskQueue.value.findIndex(item => item.time >= vadStartTime.value - 1000);
// if (startIndex !== -1) {
// taskQueue.value = taskQueue.value.slice(startIndex);
// console.log(
// '截取后长度:',
// taskQueue.value.map(item => item.time),
// vadStartTime.value
// );
// }
buildConnect();
}
};
return;
}
base64List.value.shift();
const item = audioPlayQueue.value.shift();
if (item) {
item().finally(() => playAudio());
} else {
playing.value = false;
if (isEnd.value) {
console.warn('play done................');
skipDisabled.value = true;
}
// 播放完成后且正在通话且接口未返回错误时开始下一次连接
if (isEnd.value && isCalling.value && !isReturnError.value) {
// skipDisabled.value = true;
taskQueue.value = [];
// 打断前记录一下打断时间或vad触发事件
// vadStartTime.value = +new Date();
// // 每次完成后只保留当前时刻往前推1s的语音
// console.log(
// '截取前长度:',
// taskQueue.value.map(item => item.time)
// );
// let startIndex = taskQueue.value.findIndex(item => item.time >= vadStartTime.value - 1000);
// if (startIndex !== -1) {
// taskQueue.value = taskQueue.value.slice(startIndex);
// console.log(
// '截取后长度:',
// taskQueue.value.map(item => item.time),
// vadStartTime.value
// );
// }
buildConnect();
}
}
};
// 播放音频
const truePlay = async voice => {
return new Promise(resolve => {
audioDOM.src = 'data:audio/wav;base64,' + voice;
console.error('播放开始时间:', +new Date());
audioDOM
.play()
.then(() => {
// console.error('播放结束时间: ', +new Date());
})
.catch(error => {
resolve();
if (error.name === 'NotAllowedError' || error.name === 'SecurityError') {
console.error('User interaction required or permission issue:', error);
ElMessage.warning('音频播放失败');
} else {
console.error('Error playing audio:', error);
}
});
audioDOM.onended = () => {
console.error('播放结束时间: ', +new Date());
// URL.revokeObjectURL(url);
resolve();
};
});
};
// 当队列中任务数大于0时开始处理队列中的任务
const addQueue = (time, item) => {
taskQueue.value.push({ func: item, time });
if (taskQueue.value.length > 0 && !running.value) {
running.value = true;
processQueue();
}
};
const processQueue = () => {
const item = taskQueue.value.shift();
if (item?.func) {
item.func().then(() => {
console.warn('shift!!!!!!!!!');
processQueue();
});
} else {
running.value = false;
}
};
const drawBars = () => {
// AnalyserNode接口的 getByteFrequencyData() 方法将当前频率数据复制到传入的 Uint8Array无符号字节数组中。
analyser.value.getByteFrequencyData(dataArray.value);
animationFrameId.value = requestAnimationFrame(drawBars);
};
// 跳过当前片段
const skipVoice = async () => {
// 打断前记录一下打断时间或vad触发事件
vadStartTime.value = +new Date();
if (!skipDisabled.value) {
if (
outputData.value[outputData.value.length - 1]?.type === 'BOT' &&
outputData.value[outputData.value.length - 1].audio === ''
) {
outputData.value[outputData.value.length - 1].audio = mergeBase64ToBlob(allVoice.value);
}
base64List.value = [];
audioPlayQueue.value = [];
// 跳过之后,只保留当前时间点两秒内到之后的音频片段
console.log(
'截取前长度:',
taskQueue.value.map(item => item.time)
);
let startIndex = taskQueue.value.findIndex(item => item.time >= vadStartTime.value - 1000);
if (startIndex !== -1) {
taskQueue.value = taskQueue.value.slice(startIndex);
console.log(
'截取后长度:',
taskQueue.value.map(item => item.time),
vadStartTime.value
);
}
stop.value = true;
audioDOM.pause();
setTimeout(() => {
skipDisabled.value = true;
}, 300);
try {
playing.value = false;
await stopMessage();
stop.value = false;
// playing.value = false;
buildConnect();
// cancelAnimationFrame(animationFrameId.value);
} catch (err) {}
}
};
// 每次call先上传当前用户配置
const uploadUserConfig = async () => {
if (!localStorage.getItem('configData')) {
return new Promise(resolve => resolve());
}
const {
videoQuality,
useAudioPrompt,
voiceClonePrompt,
assistantPrompt,
vadThreshold,
audioFormat,
base64Str
} = JSON.parse(localStorage.getItem('configData'));
const obj = {
messages: [
{
role: 'user',
content: [
{
type: 'input_audio',
input_audio: {
data: base64Str,
format: audioFormat
}
},
{
type: 'options',
options: {
hd_video: videoQuality,
use_audio_prompt: useAudioPrompt,
vad_threshold: vadThreshold,
voice_clone_prompt: voiceClonePrompt,
assistant_prompt: assistantPrompt
}
}
]
}
]
};
const { code, message, data } = await uploadConfig(obj);
modelVersion.value = data?.choices?.content || '';
return new Promise((resolve, reject) => {
if (code !== 0) {
ElMessage({
type: 'error',
message: message,
duration: 3000,
customClass: 'system-error'
});
reject();
} else {
resolve();
}
});
};
</script>
<style lang="less">
.voice-page {
flex: 1;
height: 100%;
display: flex;
flex-direction: column;
&-header {
display: flex;
align-items: center;
padding: 0 16px 16px;
box-shadow: 0 0.5px 0 0 #e0e0e0;
margin-bottom: 16px;
justify-content: space-between;
.header-icon {
display: flex;
align-items: center;
img {
width: 24px;
height: 24px;
margin-right: 8px;
}
span {
color: rgba(23, 23, 23, 0.9);
font-family: PingFang SC;
font-size: 16px;
font-style: normal;
font-weight: 500;
line-height: normal;
margin-right: 40px;
flex-shrink: 0;
}
}
.voice-container {
display: flex;
.voice-icon {
width: 191px;
height: 45px;
}
}
}
&-output {
flex: 1;
height: 0;
padding: 0 16px;
margin-bottom: 16px;
display: flex;
flex-direction: column;
.output-content {
flex: 1;
overflow: auto;
}
.skip-box {
display: flex;
align-items: center;
justify-content: flex-end;
margin-top: 16px;
}
}
&-btn {
text-align: center;
padding: 8px 0;
.el-button {
width: 284px;
height: 46px;
border-radius: 8px;
}
.el-button.el-button--success {
background: #647fff;
border-color: #647fff;
&:hover {
opacity: 0.8;
}
span {
color: #fff;
font-family: PingFang SC;
font-size: 16px;
font-style: normal;
font-weight: 500;
line-height: normal;
}
}
.el-button.el-button--success.is-disabled {
background: #f3f3f3;
border-color: #f3f3f3;
span {
color: #d1d1d1;
}
}
.el-button.el-button--danger {
border-color: #dc3545;
background-color: #dc3545;
color: #ffffff;
font-family: PingFang SC;
font-size: 16px;
font-style: normal;
font-weight: 500;
line-height: normal;
.phone-icon {
margin-right: 10px;
}
.btn-text {
margin-right: 10px;
}
.btn-desc {
margin-right: 16px;
}
.time {
display: flex;
align-items: center;
.time-minute,
.time-second {
width: 26px;
height: 26px;
display: flex;
justify-content: center;
align-items: center;
border-radius: 3.848px;
background: rgba(47, 47, 47, 0.5);
}
.time-colon {
margin: 0 3px;
}
}
}
}
}
</style>

View File

@@ -0,0 +1,36 @@
import lame from '@breezystack/lamejs';
export const audioBufferToMp3Base64 = audioBuffer => {
const mp3Encoder = new lame.Mp3Encoder(1, 16000, 128);
const sampleBlockSize = 1152;
const mp3Data = [];
for (let i = 0; i < audioBuffer.length; i += sampleBlockSize) {
const sampleChunk = audioBuffer.subarray(i, i + sampleBlockSize);
const mp3buf = mp3Encoder.encodeBuffer(sampleChunk);
if (mp3buf.length > 0) {
mp3Data.push(new Int8Array(mp3buf));
}
}
const mp3buf = mp3Encoder.flush();
if (mp3buf.length > 0) {
mp3Data.push(new Int8Array(mp3buf));
}
const mp3Blob = new Blob(mp3Data, { type: 'audio/mp3' });
const url = URL.createObjectURL(mp3Blob);
let dom = document.querySelector('#voice-box');
let audio = document.createElement('audio');
audio.controls = true;
audio.src = url;
dom.appendChild(audio);
return new Promise(resolve => {
const reader = new FileReader();
reader.onloadend = () => {
const base64String = reader.result.split(',')[1];
resolve(base64String);
};
reader.readAsDataURL(mp3Blob);
});
};

View File

@@ -0,0 +1,132 @@
// Convert Base64 to ArrayBuffer
const base64ToArrayBuffer = base64 => {
const binaryString = atob(base64.split(',')[1]); // Remove data URI scheme if present
const len = binaryString.length;
const bytes = new Uint8Array(len);
for (let i = 0; i < len; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
return bytes.buffer;
};
// Parse WAV header and get audio data section
const parseWav = buffer => {
const view = new DataView(buffer);
const format = view.getUint16(20, true);
const channels = view.getUint16(22, true);
const sampleRate = view.getUint32(24, true);
const bitsPerSample = view.getUint16(34, true);
const dataOffset = 44;
const dataSize = view.getUint32(40, true);
const audioData = new Uint8Array(buffer, dataOffset, dataSize);
return {
format,
channels,
sampleRate,
bitsPerSample,
audioData
};
};
// Create WAV header for combined audio data
const createWavHeader = (audioDataSize, sampleRate, channels, bitsPerSample) => {
const arrayBuffer = new ArrayBuffer(44);
const view = new DataView(arrayBuffer);
const writeString = (view, offset, string) => {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
};
writeString(view, 0, 'RIFF'); // ChunkID
view.setUint32(4, 36 + audioDataSize, true); // ChunkSize
writeString(view, 8, 'WAVE'); // Format
writeString(view, 12, 'fmt '); // Subchunk1ID
view.setUint32(16, 16, true); // Subchunk1Size (PCM)
view.setUint16(20, 1, true); // AudioFormat (PCM)
view.setUint16(22, channels, true); // NumChannels
view.setUint32(24, sampleRate, true); // SampleRate
view.setUint32(28, (sampleRate * channels * bitsPerSample) / 8, true); // ByteRate
view.setUint16(32, (channels * bitsPerSample) / 8, true); // BlockAlign
view.setUint16(34, bitsPerSample, true); // BitsPerSample
writeString(view, 36, 'data'); // Subchunk2ID
view.setUint32(40, audioDataSize, true); // Subchunk2Size
return arrayBuffer;
};
// Merge multiple Base64 audio files and return a Blob
const mergeAudioFiles = base64AudioArray => {
let sampleRate, channels, bitsPerSample;
let combinedAudioData = new Uint8Array();
for (let i = 0; i < base64AudioArray.length; i++) {
const arrayBuffer = base64ToArrayBuffer(base64AudioArray[i]);
const wav = parseWav(arrayBuffer);
// Initialize properties based on the first audio file
if (i === 0) {
sampleRate = wav.sampleRate;
channels = wav.channels;
bitsPerSample = wav.bitsPerSample;
}
// Ensure all files have the same format
if (wav.sampleRate !== sampleRate || wav.channels !== channels || wav.bitsPerSample !== bitsPerSample) {
throw new Error('All audio files must have the same format.');
}
// Combine audio data
const newCombinedData = new Uint8Array(combinedAudioData.byteLength + wav.audioData.byteLength);
newCombinedData.set(combinedAudioData, 0);
newCombinedData.set(wav.audioData, combinedAudioData.byteLength);
combinedAudioData = newCombinedData;
}
const combinedAudioDataSize = combinedAudioData.byteLength;
const wavHeader = createWavHeader(combinedAudioDataSize, sampleRate, channels, bitsPerSample);
const combinedWavBuffer = new Uint8Array(wavHeader.byteLength + combinedAudioData.byteLength);
combinedWavBuffer.set(new Uint8Array(wavHeader), 0);
combinedWavBuffer.set(combinedAudioData, wavHeader.byteLength);
// Create a Blob from the combined audio data
const combinedBlob = new Blob([combinedWavBuffer], { type: 'audio/wav' });
return combinedBlob;
};
export const mergeBase64ToBlob = base64List => {
const combinedBlob = mergeAudioFiles(base64List);
const audioUrl = URL.createObjectURL(combinedBlob);
return audioUrl;
};
// 假设 base64Strings 是一个包含多个 Base64 编码 WAV 文件的数组
// 注意:这些 Base64 字符串不应该包含 URI 前缀 (例如 "audio/wav;base64,")
/**
*
* @param {Array} base64Strings
* @returns
*/
// 解码 Base64 字符串并合并二进制数据
export const mergeBase64WavFiles = base64Strings => {
const binaryDataArray = base64Strings.map(base64 => {
return Uint8Array.from(atob(base64), c => c.charCodeAt(0));
});
const totalLength = binaryDataArray.reduce((sum, arr) => sum + arr.length, 0);
const mergedArray = new Uint8Array(totalLength);
let offset = 0;
binaryDataArray.forEach(arr => {
mergedArray.set(arr, offset);
offset += arr.length;
});
// 重新编码为 Base64 字符串
const binaryString = String.fromCharCode(...mergedArray);
const mergedBase64 = btoa(binaryString);
return mergedBase64;
};

View File

@@ -0,0 +1,29 @@
const base64ToArrayBuffer = base64 => {
let binaryString = atob(base64);
let len = binaryString.length;
let bytes = new Uint8Array(len);
for (let i = 0; i < len; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
return bytes.buffer;
};
const concatenateArrayBuffers = buffers => {
let totalLength = buffers.reduce((acc, value) => acc + value.byteLength, 0);
let result = new Uint8Array(totalLength);
let offset = 0;
for (let buffer of buffers) {
result.set(new Uint8Array(buffer), offset);
offset += buffer.byteLength;
}
return result.buffer;
};
export const mergeMp3Base64ToBlob = base64Strings => {
let arrayBuffers = base64Strings.map(base64ToArrayBuffer);
let combinedArrayBuffer = concatenateArrayBuffers(arrayBuffers);
const blob = new Blob([combinedArrayBuffer], { type: 'audio/mp3' });
const url = URL.createObjectURL(blob);
console.log('url', url);
return url;
};

View File

@@ -0,0 +1,261 @@
<template>
<div class="home-page">
<div class="home-page-header">
<div class="home-page-header-logo">
<!-- <img src="@/assets/images/logo.png" /> -->
<SvgIcon name="miniCPM2.6" class="logo-icon" />
</div>
<div class="home-page-header-menu">
<div
class="home-page-header-menu-item"
v-for="(item, index) in tabList"
:key="item.type"
:class="`home-page-header-menu-item ${activeTab === item.type ? 'active-tab' : ''} ${item.disabled ? 'disabled-tab' : ''}`"
@click="handleClickTab(item.type, index)"
>
{{ getMenuTab(item.type) }}
</div>
</div>
<div class="home-page-header-switch">
<div class="change-language">
<div
:class="`change-language-item ${language === 'en' ? 'active-language' : ''}`"
@click="handleChangeLanguage('en')"
>
English
</div>
<div
:class="`change-language-item ${language === 'zh' ? 'active-language' : ''}`"
@click="handleChangeLanguage('zh')"
>
中文
</div>
</div>
</div>
</div>
<div :class="`home-page-content ${activeTab === 'chatbot' && 'no-padding'}`">
<VoiceCallWs v-if="isWebSocket && activeTab === 'voice'" v-model="isCalling" />
<VoiceCall v-else-if="!isWebSocket && activeTab === 'voice'" v-model="isCalling" />
<VideoCallWs v-else-if="isWebSocket && activeTab === 'video'" v-model="isCalling" />
<VideoCall v-else-if="!isWebSocket && activeTab === 'video'" v-model="isCalling" />
<iframe
src="https://minicpm-omni-webdemo-iframe.modelbest.cn"
frameborder="0"
width="100%"
height="100%"
v-else
/>
<div class="config-box" v-if="activeTab !== 'chatbot'">
<ModelConfig v-model:isCalling="isCalling" v-model:type="activeTab" />
</div>
</div>
</div>
</template>
<script setup>
import VoiceCall from './components/VoiceCall.vue';
import VoiceCallWs from './components/VoiceCall_0105.vue';
import VideoCall from './components/VideoCall.vue';
import VideoCallWs from './components/VideoCall_0105.vue';
import { useI18n } from 'vue-i18n';
import { useRoute, useRouter } from 'vue-router';
const route = useRoute();
const router = useRouter();
const typeObj = {
0: 'video',
1: 'voice',
2: 'chatbot'
};
const defaultType = typeObj[route.query.type] || 'voice';
const { t, locale } = useI18n();
const activeTab = ref(defaultType);
const language = ref(localStorage.getItem('language') || 'zh');
const isWebSocket = false;
const tabList = ref([
{
type: 'video',
text: 'Realtime Video Call'
},
{
type: 'voice',
text: 'Realtime Voice Call'
},
{
type: 'chatbot',
text: 'Chatbot'
// disabled: true
}
]);
const isCalling = ref(false);
const handleChangeLanguage = val => {
console.log('val: ', val);
language.value = val;
locale.value = val;
localStorage.setItem('language', val);
};
const getMenuTab = val => {
let text = '';
switch (val) {
case 'video':
text = t('menuTabVideo');
break;
case 'voice':
text = t('menuTabAudio');
break;
case 'chatbot':
text = t('menuTabChatbot');
break;
default:
break;
}
return text;
};
const handleClickTab = (val, index) => {
activeTab.value = val;
const port = route.query.port;
const type = index;
router.push({
path: '/',
query: {
port,
type
}
});
};
</script>
<style lang="less" scoped>
.home-page {
width: 100%;
height: 100%;
display: flex;
flex-direction: column;
&-header {
display: flex;
align-items: center;
&-logo {
width: 174px;
height: 46px;
display: flex;
align-items: center;
justify-content: center;
border-radius: 12px;
background: #ffffff;
flex-shrink: 0;
padding: 0 24px;
.logo-icon {
width: 100%;
height: 100%;
}
}
&-menu {
display: flex;
align-items: center;
margin-left: 16px;
&-item {
width: 260px;
height: 46px;
display: flex;
align-items: center;
justify-content: center;
background: #ffffff;
color: #252525;
font-family: PingFang SC;
font-size: 16px;
font-style: normal;
font-weight: 400;
line-height: normal;
border: 1px solid #dde1eb;
cursor: pointer;
user-select: none;
}
&-item + &-item {
border-left: none;
}
&-item:first-of-type {
border-radius: 12px 0 0 12px;
}
&-item:last-of-type {
border-radius: 0 12px 12px 0;
}
.active-tab {
color: #ffffff;
background: linear-gradient(90deg, #789efe 0.02%, #647fff 75.28%);
font-weight: 500;
}
.disabled-tab {
cursor: not-allowed;
border-color: #dde1eb;
color: #d1d1d1;
}
}
&-switch {
flex: 1;
display: flex;
align-items: center;
justify-content: flex-end;
.change-language {
display: flex;
align-items: center;
&-item {
width: 80px;
height: 32px;
display: flex;
justify-content: center;
align-items: center;
border: 1px solid #dde1eb;
background: #ffffff;
color: #252525;
font-family: PingFang SC;
font-size: 14px;
font-weight: 400;
line-height: normal;
cursor: pointer;
user-select: none;
}
&-item:first-of-type {
border-right: none;
border-radius: 12px 0 0 12px;
}
&-item:last-of-type {
border-radius: 0 12px 12px 0;
}
&-item.active-language {
color: #ffffff;
background: linear-gradient(90deg, #789efe 0.02%, #647fff 75.28%);
}
}
}
}
&-content {
flex: 1;
height: 0;
border-radius: 12px;
margin-top: 16px;
background: #ffffff;
padding: 18px;
display: flex;
.config-box {
width: 322px;
margin-left: 16px;
// border-left: 1px solid black;
box-shadow: -0.5px 0 0 0 #e0e0e0;
overflow: auto;
}
}
.no-padding {
padding: 0;
overflow: hidden;
background: #ffffff;
}
}
</style>
<style lang="less">
.el-popover.el-popper.config-popover {
padding: 18px;
border-radius: 12px;
}
</style>