Skip to content

Commit 1920904

Browse files
committed
OpenAI audio transcription ready
1 parent 5212d6c commit 1920904

5 files changed

Lines changed: 79 additions & 37 deletions

File tree

src/main/java/com/github/jlangch/venice/util/openai/ChatCompletionTraditionalRequest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ public static ChatCompletionTraditionalRequest of (
7777
client,
7878
model,
7979
functionDispatcher == null
80-
? new DefaultFunctionDispatcher()
80+
? new DefaultFunctionDispatcher()
8181
: functionDispatcher);
8282
}
8383

src/main/java/com/github/jlangch/venice/util/openai/TemporaryFile.java

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@
3333

3434
public class TemporaryFile implements AutoCloseable {
3535

36-
public TemporaryFile(final File tmpDir, final File file) {
36+
public TemporaryFile(final Mode mode, final File tmpDir, final File file) {
37+
this.mode = mode;
3738
this.tmpDir = tmpDir;
3839
this.file = file;
3940
}
@@ -42,6 +43,8 @@ public static TemporaryFile of(final byte[] data, final String fileName) {
4243
Objects.requireNonNull(data);
4344
Objects.requireNonNull(fileName);
4445

46+
final Mode mode = Mode.DataMapping;
47+
4548
final String normalizedFileName = new File(fileName).getName();
4649

4750
File tmpDir = null;
@@ -51,10 +54,10 @@ public static TemporaryFile of(final byte[] data, final String fileName) {
5154
tmpDir = Files.createTempDirectory("openai-upload").toFile();
5255
file = new File(tmpDir, normalizedFileName);
5356
FileUtil.save(data, file, true);
54-
return new TemporaryFile(tmpDir, file);
57+
return new TemporaryFile(mode, tmpDir, file);
5558
}
5659
catch (Exception ex) {
57-
cleanup(tmpDir, file);
60+
cleanup(mode, tmpDir, file);
5861
throw new VncException("Failed to create TemporaryFile '" + normalizedFileName + "'");
5962
}
6063
}
@@ -63,21 +66,28 @@ public static TemporaryFile of(final InputStream is, final String fileName) {
6366
Objects.requireNonNull(is);
6467
Objects.requireNonNull(fileName);
6568

69+
final Mode mode = Mode.DataMapping;
70+
6671
File tmpDir = null;
6772
File file = null;
6873

6974
try {
7075
tmpDir = Files.createTempDirectory("openai-upload").toFile();
7176
file = new File(tmpDir, fileName);
7277
FileUtil.save(is, file, true);
73-
return new TemporaryFile(tmpDir, file);
78+
return new TemporaryFile(mode, tmpDir, file);
7479
}
7580
catch (Exception ex) {
76-
cleanup(tmpDir, file);
81+
cleanup(mode, tmpDir, file);
7782
throw new VncException("Failed to create TemporaryFile '" + fileName + "'");
7883
}
7984
}
8085

86+
public static TemporaryFile of(final File file) {
87+
Objects.requireNonNull(file);
88+
89+
return new TemporaryFile(Mode.Passthrough, null, file);
90+
}
8191

8292
public File getFile() {
8393
return file;
@@ -89,24 +99,29 @@ public Path getPath() {
8999

90100
@Override
91101
public void close() throws Exception {
92-
cleanup(tmpDir, file);
102+
cleanup(mode, tmpDir, file);
93103
}
94104

95105

96-
private static void cleanup(final File tmpDir, final File file) {
106+
private static void cleanup(final Mode mode, final File tmpDir, final File file) {
97107
try {
98-
if (file.exists()) {
99-
file.delete();
100-
}
101-
if (tmpDir.exists()) {
102-
tmpDir.delete();
108+
if (mode == Mode.DataMapping) {
109+
if (file.exists()) {
110+
file.delete();
111+
}
112+
if (tmpDir.exists()) {
113+
tmpDir.delete();
114+
}
103115
}
104116
}
105117
catch (Exception ignore) {
106118
}
107119
}
108120

121+
private static enum Mode { Passthrough, DataMapping }
122+
109123

124+
private final Mode mode;
110125
private final File tmpDir;
111126
private final File file;
112127
}

src/main/resources/com/github/jlangch/venice/openai-java.venice

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1531,24 +1531,27 @@
15311531
"""
15321532
(do
15331533
(load-module :openai-java)
1534-
(let [client (openai-java/client)
1535-
is (openai-java/create-speech
1534+
(let [demo-file (io/file "./audio.wav")
1535+
client (openai-java/client)
1536+
is (openai-java/create-speech
15361537
client
15371538
:GPT_4O_MINI_TTS
15381539
"Today is a wonderful day to build something people love!"
1539-
:format :MP3
1540+
:format :WAV
15401541
:voice "cedar")]
15411542
(->> (io/slurp-stream is :binary true)
1542-
(io/spit (io/file "./audio.mp3")))
1543-
(println "Audio saved to ./audio.mp3")
1543+
(io/spit demo-file))
1544+
(println "Audio saved to" demo-file)
15441545

1545-
(openai-java/transcribe client
1546-
:GTP_4O_TRANSCRIBE
1547-
(io/file "./audio.mp3")
1546+
(println "Transcribing" demo-file)
1547+
(let [response (openai-java/transcribe client
1548+
:GPT_4O_TRANSCRIBE
1549+
demo-file
15481550
:language "en"
15491551
:temperature 0.1
1550-
:response-format :TEXT)
1551-
))
1552+
:response-format :TEXT)]
1553+
(println "Transcription:")
1554+
(println (openai-java/transcription-text response)))))
15521555
""")
15531556
:see-also '(
15541557
"openai-java/transcription-text"
@@ -1559,20 +1562,20 @@
15591562

15601563
{ :pre [(instance-of? :OpenAIClient client)
15611564
(keyword? model)
1562-
(io/file? file)] }
1565+
(or (io/file? file) (bytebuf? file) (io/in-stream? file))] }
15631566

15641567
(let [opts (apply hash-map options)
15651568
chunking-strategy (:chunking-strategy opts)
15661569
language (:language opts)
15671570
prompt (:prompt opts)
15681571
response-format (:response-format opts)
15691572
temperature (:temperature opts)]
1570-
(let [params (. :SpeechCreateParams :builder)]
1571-
(. params :model (. :SpeechModel model))
1573+
(let [params (. :TranscriptionCreateParams :builder)]
1574+
(. params :model (. :AudioModel model))
15721575
(. params :file (io/->path file))
15731576
(when language (. params :language language))
15741577
(when prompt (. params :prompt prompt))
1575-
(when response-format (. params :responseFormat (. :SpeechCreateParams$ResponseFormat response-format)))
1578+
(when response-format (. params :responseFormat (. :AudioResponseFormat response-format)))
15761579
(when temperature (. params :temperature temperature))
15771580

15781581
(-> (. client :audio)
@@ -1592,7 +1595,7 @@
15921595
transcription-text [response]
15931596

15941597
{ :pre [(instance-of? :TranscriptionCreateResponse response) ] }
1595-
(if-let [transcription (. response :asTranscription)]
1598+
(if-let [transcription (java-unwrap-optional (. response :transcription))]
15961599
(. transcription :text)
15971600
nil))
15981601

src/test/java/com/github/jlangch/venice/util/openai/CreateSpeech.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@
2222
package com.github.jlangch.venice.util.openai;
2323

2424

25+
import java.io.File;
2526
import java.io.InputStream;
27+
import java.nio.file.Files;
2628

2729
import com.github.jlangch.venice.impl.util.io.IOStreamUtil;
2830
import com.openai.client.OpenAIClient;
@@ -33,10 +35,14 @@
3335
import com.openai.models.audio.speech.SpeechModel;
3436

3537

36-
public final class CreateSpeech {
38+
public class CreateSpeech {
39+
3740
private CreateSpeech() {}
3841

3942
public static void main(String[] args) {
43+
File veniceHomeDir = new File(System.getProperty("user.home"), "Desktop/venice");
44+
File audioFile = new File(veniceHomeDir, "audio.wav");
45+
4046
OpenAIClient client = OpenAIOkHttpClient.fromEnv();
4147

4248
SpeechCreateParams params = SpeechCreateParams.builder()
@@ -50,9 +56,12 @@ public static void main(String[] args) {
5056
HttpResponse speech = client.audio().speech().create(params);
5157
try (final InputStream is = speech.body()) {
5258
final byte[] audio = IOStreamUtil.copyIStoByteArray(is);
59+
60+
Files.write(audioFile.toPath(), audio);
61+
System.out.println("Created audio file " + audioFile);
5362
}
5463
catch(Exception ex) {
55-
64+
ex.printStackTrace();
5665
}
5766
}
5867
}

src/test/java/com/github/jlangch/venice/util/openai/CreateTranscription.java

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@
2222
package com.github.jlangch.venice.util.openai;
2323

2424

25+
import java.io.File;
26+
import java.io.FileInputStream;
2527
import java.io.InputStream;
28+
import java.nio.file.Files;
2629

2730
import com.github.jlangch.venice.impl.util.io.IOStreamUtil;
2831
import com.openai.client.OpenAIClient;
@@ -38,12 +41,15 @@
3841
import com.openai.models.audio.transcriptions.TranscriptionCreateResponse;
3942

4043

41-
public final class CreateTranscription {
44+
public class CreateTranscription {
4245
private CreateTranscription() {}
4346

4447
// See https://developers.openai.com/api/reference/java/resources/audio/subresources/transcriptions/methods/create
4548

4649
public static void main(String[] args) throws Exception {
50+
File veniceHomeDir = new File(System.getProperty("user.home"), "Desktop/venice");
51+
File audioFile = new File(veniceHomeDir, "audio.wav");
52+
4753
OpenAIClient client = OpenAIOkHttpClient.fromEnv();
4854

4955
SpeechCreateParams paramsSpeech = SpeechCreateParams.builder()
@@ -58,26 +64,35 @@ public static void main(String[] args) throws Exception {
5864
byte[] audio;
5965
try (final InputStream is = speech.body()) {
6066
audio = IOStreamUtil.copyIStoByteArray(is);
67+
68+
Files.write(audioFile.toPath(), audio);
69+
System.out.println("Created audio file " + audioFile);
6170
}
6271

72+
System.out.println("Transcribing audio file " + audioFile);
73+
6374
TranscriptionCreateParams paramsTranscribe = TranscriptionCreateParams.builder()
64-
.file(audio)
75+
.file(audioFile.toPath())
76+
.language("en")
77+
.temperature(0.2)
6578
.model(AudioModel.GPT_4O_TRANSCRIBE)
6679
.responseFormat(AudioResponseFormat.TEXT)
6780
.build();
6881

6982
TranscriptionCreateResponse response = client.audio().transcriptions().create(paramsTranscribe);
7083

7184

72-
Transcription transcription = response.asTranscription();
85+
Transcription transcription = response.transcription().orElse(null);
7386
if (transcription == null) {
7487
System.out.println("<no transcription>");
7588
}
7689
else {
77-
Transcription.Usage usage = transcription.usage().orElseGet(null);
78-
Transcription.Usage.Duration duration = usage.asDuration();
79-
Transcription.Usage.Tokens tokens = usage.asTokens();
80-
90+
Transcription.Usage usage = transcription.usage().orElse(null);
91+
if (usage != null) {
92+
Transcription.Usage.Duration duration = usage.duration().orElse(null);
93+
Transcription.Usage.Tokens tokens = usage.tokens().orElse(null);
94+
}
95+
System.out.println("\nTranscription:");
8196
System.out.println(transcription.text());
8297
}
8398
}

0 commit comments

Comments
 (0)