Skip to content

Commit

Permalink
✨ feat: Add openai tts
Browse files Browse the repository at this point in the history
  • Loading branch information
canisminor1990 committed Nov 7, 2023
1 parent a115c3b commit 3dfcf6b
Show file tree
Hide file tree
Showing 30 changed files with 301 additions and 36 deletions.
1 change: 0 additions & 1 deletion .eslintignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ __test__
# production
dist
es
lib
logs

# misc
Expand Down
1 change: 0 additions & 1 deletion .prettierignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ __snapshots__
# production
dist
es
lib
logs

# umi
Expand Down
4 changes: 2 additions & 2 deletions api/index.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import qs from 'query-string';

import cors from '../lib/cors';
import { fetchMicrosoftSpeech } from '../lib/fetchMicrosoftSpeech';
import { SsmlOptions } from '../lib/genSSML';
import { postMicrosoftSpeech } from '../lib/postMicrosoftSpeech';

export const config = {
runtime: 'edge',
Expand All @@ -11,7 +11,7 @@ export const config = {
export default async (req: Request) => {
const { text, ...options }: SsmlOptions & { text: string } = qs.parseUrl(req.url).query as any;

const res = await fetch(...postMicrosoftSpeech(text, options));
const res = await fetchMicrosoftSpeech(text, options);

const newResponse = new Response(res.body, res);

Expand Down
22 changes: 10 additions & 12 deletions lib/postMicrosoftSpeech.ts → lib/fetchMicrosoftSpeech.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { type SsmlOptions, genSSML } from './genSSML';
const API =
'https://southeastasia.api.speech.microsoft.com/accfreetrial/texttospeech/acc/v3.0-beta1/vcg/speak';

export const postMicrosoftSpeech = (text: string, options: SsmlOptions): [any, any] => {
export const fetchMicrosoftSpeech = async (text: string, options: SsmlOptions) => {
const data = JSON.stringify({
offsetInPlainText: 0,
properties: {
Expand All @@ -15,7 +15,7 @@ export const postMicrosoftSpeech = (text: string, options: SsmlOptions): [any, a
ttsAudioFormat: 'audio-24khz-160kbitrate-mono-mp3',
});

const DEFAULT_HEADERS = {
const DEFAULT_HEADERS = new Headers({
'accept': '*/*',
'accept-language': 'zh-CN,zh;q=0.9',
'authority': 'southeastasia.api.speech.microsoft.com',
Expand All @@ -30,15 +30,13 @@ export const postMicrosoftSpeech = (text: string, options: SsmlOptions): [any, a
'sec-fetch-site': 'same-site',
'user-agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
};
});

return [
API,
{
body: data,
headers: DEFAULT_HEADERS,
method: 'POST',
responseType: 'arraybuffer',
},
];
return await fetch(API, {
body: data,
headers: DEFAULT_HEADERS,
method: 'POST',
// @ts-ignore
responseType: 'arraybuffer',
});
};
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"docs:dev": "dumi dev",
"doctor": "father doctor",
"postinstall": "npm run setup",
"lint": "eslint \"{src,tests}/**/*.{js,jsx,ts,tsx}\" --fix",
"lint": "eslint \"{src,api,lib}/**/*.{js,jsx,ts,tsx}\" --fix",
"lint:md": "remark . --quiet --frail --output",
"prepare": "husky install",
"prepublishOnly": "npm run build",
Expand Down Expand Up @@ -65,6 +65,7 @@
"query-string": "^8",
"ssml-document": "^1",
"swr": "^2",
"url-join": "^5.0.0",
"uuid": "^9"
},
"devDependencies": {
Expand Down
10 changes: 10 additions & 0 deletions src/const/api.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import urlJoin from 'url-join';

export const MICROSOFT_SPEECH_PROXY_URL = process.env.MICROSOFT_SPEECH_PROXY_URL || '';
export const AZURE_SPEECH_KEY = process.env.AZURE_SPEECH_KEY || '';
export const AZURE_SPEECH_REGION = process.env.AZURE_SPEECH_REGION || '';
export const OPENAI_API_KEY = process.env.OPENAI_API_KEY || '';
export const OPENAI_PROXY_URL = process.env.OPENAI_PROXY_URL || 'https://api.openai.com/v1';
export const OPENAI_TTS_URL = (api?: string) => urlJoin(api || OPENAI_PROXY_URL, 'audio/speech');
export const OPENAI_STT_URL = (api: string) =>
urlJoin(api || OPENAI_PROXY_URL, 'audio/transcriptions');
1 change: 1 addition & 0 deletions src/data/openaiVoiceList.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
7 changes: 7 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
export { fetchAzureSpeech } from './services/fetchAzureSpeech';
export { fetchEdgeSpeech } from './services/fetchEdgeSpeech';
export { fetchMicrosoftSpeech } from './services/fetchMicrosoftSpeech';
export { fetchOpenaiSTT } from './services/fetchOpenaiSTT';
export { fetchOpenaiTTS } from './services/fetchOpenaiTTS';
export { useAzureSpeech } from './useAzureSpeech';
export { useEdgeSpeech } from './useEdgeSpeech';
export { useMicrosoftSpeech } from './useMicrosoftSpeech';
export { useOpenaiTTS } from './useOpenaiTTS';
export { usePersistedSpeechRecognition } from './useSpeechRecognition/usePersistedSpeechRecognition';
export { useSpeechRecognition } from './useSpeechRecognition/useSpeechRecognition';
export { useSpeechSynthes } from './useSpeechSynthes';
export {
getAzureVoiceList,
getEdgeVoiceList,
getOpenaiVoiceList,
getSpeechSynthesVoiceList,
} from './utils/getVoiceList';
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import {
SpeechSynthesizer,
} from 'microsoft-cognitiveservices-speech-sdk';

import { AZURE_SPEECH_KEY, AZURE_SPEECH_REGION } from '@/const/api';

import { type SsmlOptions, genSSML } from '../utils/genSSML';

export interface AzureSpeechOptions extends SsmlOptions {
Expand All @@ -18,12 +20,12 @@ export interface AzureSpeechOptions extends SsmlOptions {
}

// 纯文本生成语音
export const postAzureSpeech = async (
export const fetchAzureSpeech = async (
text: string,
{ api, ...options }: AzureSpeechOptions,
): Promise<AudioBufferSourceNode> => {
const key = api.key || process.env.AZURE_SPEECH_KEY || '';
const region = api.key || process.env.AZURE_SPEECH_REGION || '';
const key = api.key || AZURE_SPEECH_KEY;
const region = api.key || AZURE_SPEECH_REGION;
const speechConfig = SpeechConfig.fromSubscription(key, region);
speechConfig.setProperty(PropertyId.SpeechServiceResponse_RequestSentenceBoundary, 'true');
speechConfig.speechSynthesisOutputFormat = SpeechSynthesisOutputFormat.Webm24Khz16BitMonoOpus;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { getHeadersAndData } from '../utils/getHeadersAndData';
const API = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1';
const TOKEN = '6A5AA1D4EAFF4E9FB37E23D68491D6F4';

export const postEdgeSpeech = async (text: string, options: SsmlOptions) => {
export const fetchEdgeSpeech = async (text: string, options: SsmlOptions) => {
const connectId = uuidv4().replaceAll('-', '');
const date = new Date().toString();
const audioContext = new AudioContext();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,22 +1,28 @@
import qs from 'query-string';

import { MICROSOFT_SPEECH_PROXY_URL } from '@/const/api';

import { type SsmlOptions } from '../utils/genSSML';

export interface MicrosoftSpeechOptions extends SsmlOptions {
api?: string;
}

export const postMicrosoftSpeech = async (
export const fetchMicrosoftSpeech = async (
text: string,
{ api, ...options }: MicrosoftSpeechOptions,
): Promise<AudioBufferSourceNode> => {
const response: Response = await fetch(
qs.stringifyUrl({
query: { text, ...options },
url: api || process.env.MICROSOFT_SPEECH_PROXY_URL || '',
url: api || MICROSOFT_SPEECH_PROXY_URL,
}),
);

if (!response.ok) {
throw new Error('Network response was not ok');
}

const audioData = await response.arrayBuffer();
const audioContext = new AudioContext();
const audioBufferSource = audioContext.createBufferSource();
Expand Down
39 changes: 39 additions & 0 deletions src/services/fetchOpenaiSTT.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import { v4 as uuidv4 } from 'uuid';

import { OPENAI_API_KEY, OPENAI_STT_URL } from '@/const/api';

export interface OpenaiTtsOptions {
api: {
key: string;
proxy: string;
};
model?: 'whisper-1';
}

// 纯文本生成语音
export const fetchOpenaiSTT = async (
speech: Blob,
{ api, model = 'whisper-1' }: OpenaiTtsOptions,
): Promise<string> => {
const key = api.key || OPENAI_API_KEY;
const url = OPENAI_STT_URL(api.proxy);

const headers = new Headers({
'Authorization': `Bearer ${key}`,
'Content-Type': 'multipart/form-data',
});

const body = new FormData();
body.append('file', speech, `${uuidv4()}.webm`);
body.append('model', model);

const response: Response = await fetch(url, { body, headers, method: 'POST' });

if (!response.ok) {
throw new Error('Network response was not ok');
}

const json = await response.json();

return json?.text;
};
47 changes: 47 additions & 0 deletions src/services/fetchOpenaiTTS.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import { OPENAI_API_KEY, OPENAI_TTS_URL } from '@/const/api';

import { type SsmlOptions } from '../utils/genSSML';

export type OpenaiVoice = 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';

export interface OpenaiTtsOptions extends SsmlOptions {
api: {
key: string;
proxy: string;
};
model?: 'tts-1' | 'tts-1-hd';
name: OpenaiVoice;
}

// 纯文本生成语音
export const fetchOpenaiTTS = async (
text: string,
{ api, model = 'tts-1', ...options }: OpenaiTtsOptions,
): Promise<AudioBufferSourceNode> => {
const key = api.key || OPENAI_API_KEY;
const url = OPENAI_TTS_URL(api.proxy);

const headers = new Headers({
'Authorization': `Bearer ${key}`,
'Content-Type': 'application/json',
});

const body = JSON.stringify({
input: text,
model,
voice: options.name,
});

const response: Response = await fetch(url, { body, headers, method: 'POST' });

if (!response.ok) {
throw new Error('Network response was not ok');
}

const audioData = await response.arrayBuffer();
const audioContext = new AudioContext();
const audioBufferSource = audioContext.createBufferSource();
audioBufferSource.buffer = await audioContext.decodeAudioData(audioData);
audioBufferSource.connect(audioContext.destination);
return audioBufferSource;
};
File renamed without changes.
2 changes: 1 addition & 1 deletion src/useAzureSpeech/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ title: useAzureSpeech

- ENV: `AZURE_SPEECH_KEY` `AZURE_SPEECH_REGION`

<code src="./demos/AzureSpeech.tsx" nopadding></code>
<code src="./demos/index.tsx" nopadding></code>
4 changes: 2 additions & 2 deletions src/useAzureSpeech/index.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { useState } from 'react';
import useSWR from 'swr';

import { AzureSpeechOptions, postAzureSpeech } from '../services/postAzureSpeech';
import { AzureSpeechOptions, fetchAzureSpeech } from '../services/fetchAzureSpeech';

export const useAzureSpeech = (defaultText: string, options: AzureSpeechOptions) => {
const [data, setDate] = useState<AudioBufferSourceNode>();
Expand All @@ -11,7 +11,7 @@ export const useAzureSpeech = (defaultText: string, options: AzureSpeechOptions)

const { isLoading } = useSWR(
shouldFetch ? [options.name, text].join('-') : null,
() => postAzureSpeech(text, options),
() => fetchAzureSpeech(text, options),
{
onError: () => setShouldFetch(false),
onSuccess: (audioBufferSource) => {
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion src/useEdgeSpeech/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ title: useEdgeSpeech

## hooks

<code src="./demos/EdgeSpeech.tsx" nopadding></code>
<code src="./demos/index.tsx" nopadding></code>
4 changes: 2 additions & 2 deletions src/useEdgeSpeech/index.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { useState } from 'react';
import useSWR from 'swr';

import { postEdgeSpeech } from '../services/postEdgeSpeech';
import { fetchEdgeSpeech } from '../services/fetchEdgeSpeech';
import { SsmlOptions } from '../utils/genSSML';

export const useEdgeSpeech = (defaultText: string, options: SsmlOptions) => {
Expand All @@ -12,7 +12,7 @@ export const useEdgeSpeech = (defaultText: string, options: SsmlOptions) => {

const { isLoading } = useSWR(
shouldFetch ? [options.name, text].join('-') : null,
() => postEdgeSpeech(text, options),
() => fetchEdgeSpeech(text, options),
{
onError: () => setShouldFetch(false),
onSuccess: (audioBufferSource) => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@ import { Flexbox } from 'react-layout-kit';

const defaultText = '这是一段使用 Microsoft Speech 的语音演示';

const API = 'https://lobe-tts-preview.vercel.app/api/index';
export default () => {
const store = useCreateStore();
const options: any = useControls(
{
api: {
label: 'MICROSOFT_SPEECH_PROXY_URL',
value: API,
value: '',
},
name: {
options: getEdgeVoiceList(),
Expand Down
2 changes: 1 addition & 1 deletion src/useMicrosoftSpeech/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ title: useMicrosoftSpeech

> ex: <https://lobe-tts-preview.vercel.app/api/index>
<code src="./demos/MicrosoftSpeech.tsx" nopadding></code>
<code src="./demos/index.tsx" nopadding></code>
7 changes: 5 additions & 2 deletions src/useMicrosoftSpeech/index.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import { useState } from 'react';
import useSWR from 'swr';

import { type MicrosoftSpeechOptions, postMicrosoftSpeech } from '../services/postMicrosoftSpeech';
import {
type MicrosoftSpeechOptions,
fetchMicrosoftSpeech,
} from '../services/fetchMicrosoftSpeech';

export const useMicrosoftSpeech = (defaultText: string, options: MicrosoftSpeechOptions) => {
const [data, setDate] = useState<AudioBufferSourceNode>();
Expand All @@ -11,7 +14,7 @@ export const useMicrosoftSpeech = (defaultText: string, options: MicrosoftSpeech

const { isLoading } = useSWR(
shouldFetch ? [options.name, text].join('-') : null,
() => postMicrosoftSpeech(text, options),
() => fetchMicrosoftSpeech(text, options),
{
onError: () => setShouldFetch(false),
onSuccess: (audioBufferSource) => {
Expand Down
Loading

0 comments on commit 3dfcf6b

Please sign in to comment.