- 前端sdk接入,由于安全原因,需要业务方服务端支持临时鉴权, 并返回临时鉴权token供前端sdk使用,服务端获取方式如下:
export async function getToken(appid: string, accessKey: string) {
const result = await fetch('https://openspeech.bytedance.com/api/v1/sts/token', {
method: 'POST',
headers: {
Authorization: `Bearer; ${accessKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
appid,
duration: 300, // 单位秒,默认1小时
}),
})
.then(res => res.json())
.then(res => res.jwt_token);
return result;
}
- 由于前端Websocket 不支持自定义header,需要通过query参数鉴权
- 不同的服务需要的auth参数不同,可查看具体服务demo
export function buildFullUrl(url: string, auth: Record<string, string>) {
const arr = [];
for (const key in auth) {
arr.push(`${key}=${encodeURIComponent(auth[key])}`);
}
return `${url}?${arr.join('&')}`;
}
// 例如
const url = buildFullUrl('wss://openspeech.bytedance.com/api/v1/tts', {
api_jwt: token,
})
小模型/大模型使用同一个类进行调用,可支持的参数不同,具体参数可以查看火山API文档
import { useRef, useState } from 'react';
import { LabASR } from 'byted-ailab-speech-sdk';
import { getToken } from './helper';
import { buildFullUrl } from './helps';
const ASRStory = () => {
const [header, setHeader] = useState('');
const [content, setContent] = useState('');
const [fullResponse, setFullResponse] = useState({});
const recordStopping = useRef(false);
const [asrClient] = useState(
LabASR({
onMessage: async (text, fullData) => {
setContent(text);
setFullResponse(fullData);
},
onStart() {
setHeader('正在录音');
setContent('');
},
onClose() {
setHeader('连接关闭');
// 自定义socket关闭逻辑
// stopASR();
},
onError() {
setHeader('连接异常');
// 自定义socket error逻辑
// stopASR();
},
})
);
const startASR = async () => {
recordStopping.current = false;
const appid = ''
const accessKey = ''
const auth: Record<string, string> = {};
// 小模型
const token = await getToken(appid, accessKey);
if (token) {
auth.api_jwt = token;
}
const fullUrl = buildFullUrl('wss://openspeech.bytedance.com/api/v2/asr', auth);
const cluster = ''
const workflowPunctuation = 'audio_in,resample,partition,vad,fe,decode,nlu_punctuate';
const params = {
url: fullUrl,
config: {
app: {
appid: appid,
token: 'access token',
cluster: cluster,
},
user: {
uid: 'xxx', // 业务方用户自定义, 方便问题排查
},
audio: {
format: 'pcm',
rate: 16000,
bits: 16,
channel: 1,
},
request: {
reqid: uuid(),
workflow: workflowPunctuation,
sequence: 1,
},
},
};
// 大模型
// const token = await getToken(appid, accessKey);
// if (token) {
// auth.api_resource_id = 'volc.bigasr.sauc.duration';
// auth.api_app_key = appid;
// auth.api_access_key = `Jwt; ${token}`;
// }
// const fullUrl = buildFullUrl(`wss://openspeech.bytedance.com/api/v3/sauc/bigmodel`, auth);
// const params = {
// url: fullUrl,
// config: {
// user: {
// uid: 'byted sdk demo',
// },
// audio: {
// format: 'pcm',
// rate: 16000,
// bits: 16,
// channel: 1,
// },
// request: {
// model_name: 'bigmodel',
// show_utterances: true,
// },
// },
// };
asrClient.connect(params);
await asrClient.startRecord({}, async (recordResult: Blob) => {
// 音频采集数据,可用于音频可视化需要
// const rawData = await recordResult.arrayBuffer();
// console.warn('rawData:', rawData);
});
};
const stopASR = () => {
// 正在关闭中...
if (recordStopping.current) {
return;
}
recordStopping.current = true;
asrClient.stopRecord();
};
return (
<div>
<button id='start' onClick={startASR}>
开始说话
</button>
<button id='stop' onClick={stopASR}>
结束说话
</button>
<div id='text-header'>{header}</div>
<div id='text-content'>{content}</div>
<pre>{JSON.stringify(fullResponse, null, 2)}</pre>
</div>
);
};
- 双向流式服务可以合成过程中,持续发送文本
- 没有文本发送时,需要关闭session和连接
import { useRef, useState } from 'react';
import { BidirectionalTTS } from 'byted-ailab-speech-sdk';
import { getToken } from './helper';
const BidirectionalTTSStory = () => {
const [audioUrl, setAudioUrl] = useState('');
const downloadCache = useRef(new Uint8Array(0));
const isServerError = useRef(false);
const sendTextRef = useRef<(text: string) => void>();
const client = useRef(BidirectionalTTS());
const startTTS = async () => {
setAudioUrl('');
downloadCache.current = new Uint8Array(0);
// 克隆音色服务
// const speaker = 'xxx'; // 控制台查看
// const resourceId = 'volc.megatts.default';
// 标准音色服务
const speaker = 'xxx';
const resourceId = 'volc.service_type.10029';
const appid = '';
const accessKey = '';
const auth: Record<string, string> = {};
// tob通过query鉴权
const token = await getToken(appid, accessKey);
if (token) {
auth.api_resource_id = resourceId;
auth.api_app_key = appid;
auth.api_access_key = `Jwt; ${token}`;
}
const fullUrl = buildFullUrl(`wss://openspeech.bytedance.com/api/v3/tts/bidirection`, auth);
const audioUrl = client.current.start({
debug: true,
url: fullUrl,
config: {
user: {
uid: 'bidirectional tts sdk DEMO', // 业务自定义,
},
namespace: 'BidirectionalTTS',
req_params: {
speaker,
audio_params: {
format: 'mp3',
sample_rate: 24000,
},
},
},
onStart: () => {
isServerError.current = false;
},
onMessage: (audioBuffer: ArrayBuffer) => {
// 下载缓存音频二进制包
const newDownloadCache = new Uint8Array(downloadCache.current.byteLength + audioBuffer.byteLength);
newDownloadCache.set(downloadCache.current, 0);
newDownloadCache.set(new Uint8Array(audioBuffer), downloadCache.current.byteLength);
downloadCache.current = newDownloadCache;
},
onSessionStarted: () => {
// sendText的调用需要在onSessionStarted之后调用
// 第一句文本可以在这里进行发送
client.current.sendText('测试文本。');
// 没有文本发送后可以直接结束
// client.current.finishSession();
},
onError: err => {
console.warn('err:', err);
},
onClose: () => {
// 结束后自动下载音频
// downloadAudio();
},
onTTSSentenceStart(val) {
console.info('onTTSSentenceStart:', val);
},
onTTSSentenceEnd(val) {
console.info('onTTSSentenceEnd:', val);
},
});
setAudioUrl(audioUrl);
sendTextRef.current = sendText;
};
function sendText(val: string) {
sendTextRef.current?.(val);
}
function downloadAudio() {
const blob = new Blob([downloadCache.current]);
const blobUrl = URL.createObjectURL(blob);
const aTag = document.createElement('a');
aTag.download = `${Date.now()}.mp3`;
aTag.href = blobUrl;
aTag.click();
URL.revokeObjectURL(blobUrl);
}
const [text, setText] = useState('今天天气怎么样?');
return (
<div>
<audio src={audioUrl} controls />
<div style={{ display: 'flex', gap: 10, marginTop: 10 }}>
<button onClick={() => startTTS()}>开始合成</button>
<button onClick={() => client.current.finishSession()}>finishSession</button>
<button onClick={() => client.current.finishConnection()}>finishConnection</button>
<button onClick={() => downloadAudio()}>下载音频</button>
</div>
<div style={{ marginTop: 10 }}>
<input value={text} onChange={e => setText(e.target.value)} />
<button onClick={() => client.current.sendText(text)}>持续合成</button>
</div>
</div>
);
};
import { useRef, useState } from 'react';
import { LabTTS } from 'byted-ailab-speech-sdk';
import { getToken } from './helper'; // 接入方自定义
import { buildFullUrl } from './helps';
const TTSStory = () => {
const [audioUrl, setAudioUrl] = useState('');
// 如有需要,可以缓存音频数据
const downloadCache = useRef(new Uint8Array(0));
const isServerError = useRef(false);
const startTTS = async () => {
setAudioUrl('');
downloadCache.current = new Uint8Array(0);
const text_type = 'plain';
const submitText = '你好啊,我是字节跳动的人工智能实验室语音合成技术负责人';
const speaker = 'BV001_streaming';
const auth: Record<string, string> = {};
// tob通过query鉴权
const appid = ''
const accessKey = ''
const cluster = ''
const token = await getToken(appid, accessKey);
if (token) {
auth.api_jwt = token;
}
const url = 'wss://openspeech.bytedance.com/api/v1/tts/ws_binary';
const serviceUrl = buildFullUrl(url, auth);
const audioUrl = LabTTS().start({
debug: true,
url: serviceUrl,
config: {
app: {
appid: appid,
token: 'access_token',
cluster: cluster,
},
user: {
uid: 'byted sdk DEMO', // 业务自定义,
},
audio: {
encoding: 'mp3',
rate: 24000,
voice_type: speaker,
},
request: {
reqid: uuid(),
text: submitText,
text_type,
operation: 'submit',
},
},
onStart: () => {
isServerError.current = false;
},
onMessage: (audioBuffer: ArrayBuffer) => {
// 下载缓存音频二进制包
const newDownloadCache = new Uint8Array(downloadCache.current.byteLength + audioBuffer.byteLength);
newDownloadCache.set(downloadCache.current, 0);
newDownloadCache.set(new Uint8Array(audioBuffer), downloadCache.current.byteLength);
downloadCache.current = newDownloadCache;
},
onError: err => {
console.warn('err:', err);
},
onClose: () => {
// downloadAudio();
},
});
setAudioUrl(audioUrl);
};
function downloadAudio() {
const blob = new Blob([downloadCache.current]);
const blobUrl = URL.createObjectURL(blob);
const aTag = document.createElement('a');
aTag.download = 'test.mp3';
aTag.href = blobUrl;
aTag.click();
URL.revokeObjectURL(blobUrl);
}
return (
<div>
<pre>{JSON.stringify(config, null, 2)}</pre>
<audio src={audioUrl} controls />
<button onClick={() => startTTS()}>开始合成</button>
<button onClick={() => downloadAudio()}>下载音频</button>
</div>
);
};