From d4558170fe731571473220fe00d31bf58e30b330 Mon Sep 17 00:00:00 2001 From: Xiaodong Ye Date: Mon, 25 Nov 2024 18:49:52 +0800 Subject: [PATCH] Add musa_simple Dockerfile for supporting Moore Threads GPU Signed-off-by: Xiaodong Ye --- docker/README.md | 23 +++++++++++++++++++---- docker/musa_simple/Dockerfile | 27 +++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 4 deletions(-) create mode 100644 docker/musa_simple/Dockerfile diff --git a/docker/README.md b/docker/README.md index 474503fdf..52544d42b 100644 --- a/docker/README.md +++ b/docker/README.md @@ -1,5 +1,5 @@ ### Install Docker Server -> [!IMPORTANT] +> [!IMPORTANT] > This was tested with Docker running on Linux.
If you can get it working on Windows or MacOS, please update this `README.md` with a PR!
[Install Docker Engine](https://docs.docker.com/engine/install) @@ -16,7 +16,7 @@ docker run --cap-add SYS_RESOURCE -e USE_MLOCK=0 -e MODEL=/var/model//` is the full path to the model file on the Docker host system. ### cuda_simple -> [!WARNING] +> [!WARNING] > Nvidia GPU CuBLAS support requires an Nvidia GPU with sufficient VRAM (approximately as much as the size in the table below) and Docker Nvidia support (see [container-toolkit/install-guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html))
A simple Dockerfile for CUDA-accelerated CuBLAS, where the model is located outside the Docker image: @@ -30,6 +30,21 @@ where `/` is the full path to the model file on the -------------------------------------------------------------------------- +### musa_simple +> [!WARNING] +> Moore Threads GPU MuBLAS support requires an MTT GPU with sufficient VRAM (approximately as much as the size in the table below) and MT CloudNative Toolkits support (see [download](https://developer.mthreads.com/sdk/download/CloudNative))
+ +A simple Dockerfile for MUSA-accelerated MuBLAS, where the model is located outside the Docker image: + +``` +cd ./musa_simple +docker build -t musa_simple . +docker run --cap-add SYS_RESOURCE -e USE_MLOCK=0 -e MODEL=/var/model/ -v :/var/model -t musa_simple +``` +where `/` is the full path to the model file on the Docker host system. + +-------------------------------------------------------------------------- + ### "Open-Llama-in-a-box" Download an Apache V2.0 licensed 3B params Open LLaMA model and install into a Docker image that runs an OpenBLAS-enabled llama-cpp-python server: ``` @@ -47,7 +62,7 @@ docker $ ls -lh *.bin lrwxrwxrwx 1 user user 24 May 23 18:30 model.bin -> q5_1.bin ``` -> [!NOTE] +> [!NOTE] > Make sure you have enough disk space to download the model. As the model is then copied into the image you will need at least **TWICE** as much disk space as the size of the model:
@@ -60,5 +75,5 @@ lrwxrwxrwx 1 user user 24 May 23 18:30 model.bin -> q5_ | 65B | 50 GB | -> [!NOTE] +> [!NOTE] > If you want to pass or tune additional parameters, customise `./start_server.sh` before running `docker build ...` diff --git a/docker/musa_simple/Dockerfile b/docker/musa_simple/Dockerfile new file mode 100644 index 000000000..2856ae5f3 --- /dev/null +++ b/docker/musa_simple/Dockerfile @@ -0,0 +1,27 @@ +ARG MUSA_IMAGE="rc3.1.0-devel-ubuntu22.04" +FROM mthreads/musa:${MUSA_IMAGE} + +# We need to set the host to 0.0.0.0 to allow outside access +ENV HOST 0.0.0.0 + +RUN apt-get update && apt-get upgrade -y \ + && apt-get install -y git build-essential \ + python3 python3-pip gcc wget \ + ocl-icd-opencl-dev opencl-headers clinfo \ + libclblast-dev libopenblas-dev \ + && mkdir -p /etc/OpenCL/vendors && cp /driver/etc/OpenCL/vendors/MT.icd /etc/OpenCL/vendors/MT.icd + +COPY . . + +# setting build related env vars +ENV MUSA_DOCKER_ARCH=all +ENV GGML_MUSA=1 + +# Install dependencies +RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context + +# Install llama-cpp-python (build with musa) +RUN CMAKE_ARGS="-DGGML_MUSA=on" pip install llama-cpp-python + +# Run the server +CMD python3 -m llama_cpp.server