@@ -4,34 +4,9 @@ import RateLimiter from '@/common/utils/RateLimiter';
44import StrUtil from '@/common/utils/str-util' ;
55import { Cancelable } from '@/common/interfaces' ;
66import OpenAI from 'openai' ;
7-
8- import { z } from 'zod' ;
97import dpLog from '@/backend/ioc/logger' ;
10-
11- const WhisperResponseVerifySchema = z . object ( {
12- language : z . string ( ) ,
13- duration : z . union ( [ z . number ( ) , z . string ( ) ] ) ,
14- text : z . string ( ) ,
15- segments : z . array ( z . object ( {
16- seek : z . number ( ) ,
17- start : z . number ( ) ,
18- end : z . number ( ) ,
19- text : z . string ( )
20- } ) )
21- } ) ;
22-
23- export interface WhisperResponse {
24- language : string ;
25- duration : number ;
26- text : string ;
27- offset : number ;
28- segments : {
29- seek : number ;
30- start : number ;
31- end : number ;
32- text : string ;
33- } [ ] ;
34- }
8+ import { WhisperResponseFormatError } from '@/backend/errors/errors' ;
9+ import { WhisperResponse , WhisperResponseVerifySchema } from '@/common/types/video-info' ;
3510
3611class OpenAiWhisperRequest implements Cancelable {
3712 private readonly file : string ;
@@ -55,33 +30,41 @@ class OpenAiWhisperRequest implements Cancelable {
5530 public async invoke ( ) : Promise < WhisperResponse > {
5631 this . cancel ( ) ;
5732 await RateLimiter . wait ( 'whisper' ) ;
58- this . abortController = new AbortController ( ) ;
59- const transcription = await this . openAi . audio . transcriptions . create ( {
60- file : fs . createReadStream ( this . file ) ,
61- model : "whisper-1" ,
62- response_format : "verbose_json" ,
63- timestamp_granularities : [ "segment" ]
64- } , { signal : this . abortController . signal } ) ;
33+ const transcription = await this . doTranscription ( ) ;
6534 // 用 zed 校验一下 transcription 是否为 类型 TranscriptionVerbose
6635 const parseRes = WhisperResponseVerifySchema . safeParse ( transcription ) ;
6736 if ( ! parseRes . success ) {
6837 // dperror 为什么不匹配
6938 dpLog . error ( 'Invalid response from OpenAI' , parseRes . error . errors ) ;
70- throw new Error ( 'Invalid response from OpenAI' ) ;
39+ throw new WhisperResponseFormatError ( ) ;
7140 }
7241 return {
7342 language : transcription . language ,
74- duration : Number ( transcription . duration ) ,
43+ duration : transcription . duration ,
7544 text : transcription . text ,
76- offset : 0 ,
7745 segments : transcription . segments ?. map ( ( seg ) => ( {
7846 seek : seg . seek ,
7947 start : seg . start ,
8048 end : seg . end ,
8149 text : seg . text
82- } ) ) ?? [ ]
83- }
50+ } ) ) ?? [ ]
51+ } ;
52+
53+ }
8454
55+ private async doTranscription ( ) {
56+ this . abortController = new AbortController ( ) ;
57+ try {
58+ return await this . openAi . audio . transcriptions . create ( {
59+ file : fs . createReadStream ( this . file ) ,
60+ model : 'whisper-1' ,
61+ response_format : 'verbose_json' ,
62+ timestamp_granularities : [ 'segment' ]
63+ } , { signal : this . abortController . signal } ) ;
64+ } catch ( error ) {
65+ dpLog . error ( error ) ;
66+ throw error ;
67+ }
8568 }
8669
8770 public cancel ( ) : void {
0 commit comments