|
696 | 696 |
|
697 | 697 | | [![width: 15%]] | [![width: 85%]] | |
698 | 698 | | client | An OpenAI client | |
699 | | - | data | A data `bytebuf` | |
| 699 | + | data | The data. A `bytebuf` or a Java `:InputStream`| |
700 | 700 | | filename | A filename. E.g.: "planning.pdf" | |
701 | 701 | | purpose | A purpose: `:USER_DATA`, `:VISION`, `:BATCH`, \ |
702 | 702 | `:ASSISTANTS`, `:FINE_TUNE` | |
|
729 | 729 |
|
730 | 730 | ([client data filename purpose expires-after-seconds] |
731 | 731 | { :pre [(instance-of? :OpenAIClient client) |
732 | | - (bytebuf? data) |
| 732 | + (or (bytebuf? data) (io/in-stream? data)) |
733 | 733 | (string? filename) |
734 | 734 | (keyword? purpose) |
735 | 735 | (long? expires-after-seconds)] } |
|
1482 | 1482 |
|
1483 | 1483 | (defn |
1484 | 1484 | ^{ :arglists '( |
1485 | | - "(transcribe client model file & options)" ) |
| 1485 | + "(transcribe client model audio & options)" ) |
1486 | 1486 | :doc """ |
1487 | | - Transcribes audio from the input file. |
| 1487 | + Transcribes audio data. The data data can be supplied as an |
| 1488 | + `io/file`, a `bytebuf`, or a Java `:InputStream`. |
1488 | 1489 |
|
1489 | 1490 | Returns a transcription object in json, diarized_json, or verbose_json |
1490 | 1491 | format, or a stream of transcript events. |
|
1496 | 1497 | | :model m | The model to use for transcription. One of \ |
1497 | 1498 | `:GTP_4O_TRANSCRIBE`, `:GTP_4O_MINI_TRANSCRIBE`, or a \ |
1498 | 1499 | `:WHISPER_1`, or `:GTP_4O_TRANSCRIBE_DIARIZE` | |
1499 | | - | :file f | The audio file object (not file name) to transcribe, in one \ |
| 1500 | + | :audio a | The audio data to transcribe, in one \ |
1500 | 1501 | of these formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, \ |
1501 | | - or webm. | |
| 1502 | + or webm.¶\ |
| 1503 | + The data data can be supplied as an `io/file`, a `bytebuf`, \ |
| 1504 | + or a Java `:InputStream`. | |
1502 | 1505 |
|
1503 | 1506 |
|
1504 | 1507 | ¶**Parameter «options»** |
1505 | 1508 |
|
1506 | 1509 | | [![width: 15%]] | [![width: 85%]] | |
| 1510 | + | :filename f | If the passed audio is a `bytebuf` or a Java `:InputStream` \ |
| 1511 | + an explicit file name must be passed. E.g.: 'audio.wav' \ |
| 1512 | + The filename's extensions must match the audio type \ |
| 1513 | + `wav`, `mp3`, ... | |
1507 | 1514 | | :language l | The language of the input audio. Supplying the input \ |
1508 | 1515 | language in ISO-639-1 (e.g. "en", "de", "fr", "it", ...) \ |
1509 | 1516 | format will improve accuracy and latency.| |
|
1552 | 1559 | :response-format :TEXT)] |
1553 | 1560 | (println "Transcription:") |
1554 | 1561 | (println (openai-java/transcription-text response))))) |
| 1562 | + """, |
| 1563 | + """ |
| 1564 | + (do |
| 1565 | + (load-module :openai-java) |
| 1566 | + (let [client (openai-java/client) |
| 1567 | + is (openai-java/create-speech |
| 1568 | + client |
| 1569 | + :GPT_4O_MINI_TTS |
| 1570 | + "Today is a wonderful day to build something people love!" |
| 1571 | + :format :WAV |
| 1572 | + :voice "cedar")] |
| 1573 | + |
| 1574 | + (println "Transcribing..." ) |
| 1575 | + (let [response (openai-java/transcribe client |
| 1576 | + :GPT_4O_TRANSCRIBE |
| 1577 | + is |
| 1578 | + :filename "audio.wav" |
| 1579 | + :language "en" |
| 1580 | + :temperature 0.1 |
| 1581 | + :response-format :TEXT)] |
| 1582 | + (println "Transcription:") |
| 1583 | + (println (openai-java/transcription-text response))))) |
1555 | 1584 | """) |
1556 | 1585 | :see-also '( |
1557 | 1586 | "openai-java/transcription-text" |
1558 | 1587 | "openai-java/usage" |
1559 | 1588 | "openai-java/client") } |
1560 | 1589 |
|
1561 | | - transcribe [client model file & options] |
| 1590 | + transcribe [client model audio & options] |
1562 | 1591 |
|
1563 | 1592 | { :pre [(instance-of? :OpenAIClient client) |
1564 | 1593 | (keyword? model) |
1565 | | - (or (io/file? file) (bytebuf? file) (io/in-stream? file))] } |
| 1594 | + (or (io/file? audio) (bytebuf? audio) (io/in-stream? audio))] } |
1566 | 1595 |
|
1567 | 1596 | (let [opts (apply hash-map options) |
| 1597 | + filename (:filename opts) |
1568 | 1598 | chunking-strategy (:chunking-strategy opts) |
1569 | 1599 | language (:language opts) |
1570 | 1600 | prompt (:prompt opts) |
1571 | 1601 | response-format (:response-format opts) |
1572 | 1602 | temperature (:temperature opts)] |
1573 | | - (let [params (. :TranscriptionCreateParams :builder)] |
1574 | | - (. params :model (. :AudioModel model)) |
1575 | | - (. params :file (io/->path file)) |
1576 | | - (when language (. params :language language)) |
1577 | | - (when prompt (. params :prompt prompt)) |
1578 | | - (when response-format (. params :responseFormat (. :AudioResponseFormat response-format))) |
1579 | | - (when temperature (. params :temperature temperature)) |
1580 | | - (-> (. client :audio) |
1581 | | - (. :transcriptions) |
1582 | | - (. :create (. params :build)))))) |
| 1603 | + (when (and (not (io/file? audio)) (nil? filename)) |
| 1604 | + (throw (ex :VncException |
| 1605 | + """ |
| 1606 | + If the passed audio data is a `bytebuf` or a Java |
| 1607 | + `:InputStream` an explicit 'filename' option must be passed! |
| 1608 | + The filename's extensions must match the audio |
| 1609 | + type `wav`, `mp3`, ... |
| 1610 | + E.g.: `:filename "audio.wav"`. |
| 1611 | + """))) |
| 1612 | + ;; :TranscriptionCreateParams :file method throws exception when |
| 1613 | + ;; when passing a byte array or an input stream. |
| 1614 | + ;; The :TemporaryFile is a workaround until OpenAI fixes the API |
| 1615 | + (try-with [audio-file (if (io/file? audio) |
| 1616 | + (. :TemporaryFile :of audio) |
| 1617 | + (. :TemporaryFile :of audio filename))] |
| 1618 | + (let [params (. :TranscriptionCreateParams :builder)] |
| 1619 | + (. params :model (. :AudioModel model)) |
| 1620 | + (. params :file (. audio-file :getPath)) |
| 1621 | + (when language (. params :language language)) |
| 1622 | + (when prompt (. params :prompt prompt)) |
| 1623 | + (when response-format (. params :responseFormat (. :AudioResponseFormat response-format))) |
| 1624 | + (when temperature (. params :temperature temperature)) |
| 1625 | + (-> (. client :audio) |
| 1626 | + (. :transcriptions) |
| 1627 | + (. :create (. params :build))))))) |
1583 | 1628 |
|
1584 | 1629 |
|
1585 | 1630 | (defn |
|
0 commit comments