openai · PiotrBLL · Oct 31, 2024 · Oct 31, 2024 · Oct 31, 2024 · Oct 31, 2024
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1 @@
+.graph_dumps
diff --git a/Dockerfile.hpu b/Dockerfile.hpu
@@ -0,0 +1,34 @@
+# Use the official Gaudi Docker image with PyTorch
+FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest
+
+# Set environment variables for Habana
+ENV HABANA_VISIBLE_DEVICES=all
+ENV OMPI_MCA_btl_vader_single_copy_mechanism=none
+ENV PT_HPU_LAZY_ACC_PAR_MODE=0
+ENV PT_HPU_ENABLE_LAZY_COLLECTIVES=1
+
+# Set timezone to UTC and install essential packages
+ENV DEBIAN_FRONTEND="noninteractive" TZ=Etc/UTC
+RUN apt-get update && apt-get install -y \
+    tzdata \
+    python3-pip \
+    && rm -rf /var/lib/apt/lists/*
+
+# Download and install the static build of ffmpeg
+RUN mkdir -p /usr/local/bin/ffmpeg && \
+    cd /usr/local/bin/ffmpeg && \
+    wget https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz && \
+    tar -xf ffmpeg-release-amd64-static.tar.xz && \
+    cp -a ffmpeg-*-static/ffmpeg /usr/bin/ffmpeg && \
+    cp -a ffmpeg-*-static/ffprobe /usr/bin/ffprobe && \
+    rm -rf /usr/local/bin/ffmpeg
+
+COPY . /workspace/whisper
+WORKDIR /workspace/whisper
+
+# Copy HPU requirements
+COPY requirements_hpu.txt /workspace/requirements_hpu.txt
+
+# Install Python packages
+RUN pip install --upgrade pip \
+    && pip install -r requirements_hpu.txt
diff --git a/README.md b/README.md
@@ -93,6 +93,10 @@ Adding `--task translate` will translate the speech into English:
 
     whisper japanese.wav --language Japanese --task translate
 
+The following command will transcribe speech in audio files, using the Intel® Gaudi® HPU (`--device hpu` option):
+
+    whisper audio.flac audio.mp3 audio.wav --model turbo --device hpu
+
 Run the following to view all available options:
 
     whisper --help
@@ -140,6 +144,61 @@ result = whisper.decode(model, mel, options)
 print(result.text)
 ```
 
+## Intel® Gaudi® hpu usage
+
+### Build the Docker Image
+
+```bash
+docker build -t whisper_hpu:latest -f Dockerfile.hpu .
+```
+
+In the `Dockerfile.hpu`, we use the `vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.3.1:latest` base image, make sure to replace it with the appropriate version for your environment if needed.
+See the [PyTorch Docker Images for the Intel® Gaudi® Accelerator](https://developer.habana.ai/catalog/pytorch-container/) for more information.
+
+### Run the Container
+
+```bash
+docker run -it --runtime=habana whisper_hpu:latest
+```
+
+Using a mapping volume (`-v`) is optional, but it allows you to access the Whisper repository from within the container. 
+You can make this by adding `-v /path/to/your/whisper:/workspace/whisper` to the `docker run` command.
+If you decide to use the mapping make sure to replace `/path/to/your/whisper` with the path to the Whisper repository on your local machine.
+
+### Command-line usage with Intel® Gaudi® hpu
+
+To run the `transcribe` process with Intel® Gaudi® HPU, you can use the `--device hpu` option:
+
+```bash
+python3 -m whisper.transcribe audio_file.wav --model turbo --device hpu
+```
+
+* Note: Change `audio_file.wav` to the path of the audio file you want to transcribe. (Example file: https://www.kaggle.com/datasets/pavanelisetty/sample-audio-files-for-speech-recognition?resource=download)
+
+To run the `transcribe` tests with Intel® Gaudi® HPU, make sure to install the `pytest` package:
+
+```bash
+pip install pytest
+```
+
+and run the following command:
+
+```bash
+PYTHONPATH=. pytest -s tests/test_transcribe.py::test_transcribe_hpu
+```
+
+### Python usage with Intel® Gaudi® hpu
+
+To use Intel® Gaudi® hpu within Python, you can specify the device when loading the model:
+
+```python
+import whisper
+
+model = whisper.load_model("turbo", device="hpu")
+result = model.transcribe("audio.mp3")
+print(result["text"])
+```
+
 ## More examples
 
 Please use the [🙌 Show and tell](https://github.com/openai/whisper/discussions/categories/show-and-tell) category in Discussions for sharing more example usages of Whisper and third-party extensions such as web demos, integrations with other tools, ports for different platforms, etc.

diff --git a/notebooks/LibriSpeech.ipynb b/notebooks/LibriSpeech.ipynb