.github/workflows/autotp.yml

name: autotp

on:
  pull_request:
    paths:
      - '.github/workflows/autotp.yml'
      - 'requirements/**'
      - 'deepspeed/__init__.py'
      - 'deepspeed/module_inject/**'
      - '!deepspeed/module_inject/containers/**' # exclude container dir
  workflow_dispatch:
  merge_group:
    branches: [ master ]
  schedule:
        - cron: "0 0 * * 0"

concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

jobs:
  unit-tests:
    runs-on: [self-hosted, cpu]

    steps:
      - uses: actions/checkout@v3

      - id: setup-venv
        uses: ./.github/workflows/setup-venv

      - name: Install gcc-9
        run: |
          sudo add-apt-repository -u ppa:ubuntu-toolchain-r/test
          sudo apt install -y gcc-9 g++-9
          # set gcc-9 and g++9 to default
          sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 99
          sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 99

      - name: Check gcc version
        run: |
          # Get gcc version
          gcc --version
          g++ --version

      - name: Detect instruction sets on instance
        run: |
          lscpu
          cat /proc/meminfo
          pip install cmake
          git clone https://github.com/intel/intel-extension-for-pytorch
          cd intel-extension-for-pytorch/tests/cpu/isa
          cmake .
          make
          ./cpu_features

      - name: Install numactl
        run: |
          sudo apt-get install -y numactl

      - name: Install oneCCL Bindings for PyTorch
        run: |
          pip install torch
          python -m pip install intel_extension_for_pytorch
          # the curl line is for troubleshooting
          curl -L https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
          python -m pip install oneccl_bind_pt --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/
          pip install py-cpuinfo
          # check installed version
          pip list |grep \\\<torch\\\>
          pip list |grep intel-extension-for-pytorch
          pip list |grep oneccl-bind-pt

      - name: Install oneCCL
        run: |
          git clone https://github.com/oneapi-src/oneCCL
          cd oneCCL
          mkdir build
          cd build
          cmake ..
          make -j install

      - name: Install transformers
        run: |
          git clone https://github.com/huggingface/transformers
          cd transformers
          git rev-parse --short HEAD
          pip install .

      - name: Install deepspeed
        run: |
          python -c "import torch;import intel_extension_for_pytorch as ipex;print(ipex._C._has_xpu())"
          # check why the host does not have AVX2 support
          pip install .[dev,1bit,autotuning,inf]
          ds_report

      - name: Python environment check
        run: |
          pip list

      - name: Download DeepSpeedExamples
        run: |
          git clone https://github.com/Microsoft/DeepSpeedExamples

      - name: Sanity check minimal
        run: |
          export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6
          source oneCCL/build/_install/env/setvars.sh
          python -c "import torch;import intel_extension_for_pytorch as ipex;print(ipex._C._has_xpu())"

      - name: AutoTP test (facebook/opt-1.3b)
        run: |
          export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6
          source oneCCL/build/_install/env/setvars.sh
          # modify MODEL to change the model name, other lines are the same
          export MODEL=facebook/opt-1.3b
          cd DeepSpeedExamples/inference/huggingface/text-generation
          deepspeed --num_gpus 2 --bind_cores_to_rank inference-test.py --model $MODEL --dtype bfloat16 --use_meta_tensor
          deepspeed --num_gpus 2 --bind_cores_to_rank ds-hf-compare.py --model $MODEL --dtype bfloat16 --num_inputs 1

      - name: AutoTP test (bigscience/bloom-3b)
        run: |
          export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6
          source oneCCL/build/_install/env/setvars.sh
          # modify MODEL to change the model name, other lines are the same
          export MODEL=bigscience/bloom-3b
          cd DeepSpeedExamples/inference/huggingface/text-generation
          deepspeed --num_gpus 2 --bind_cores_to_rank inference-test.py --model $MODEL --dtype bfloat16 --use_meta_tensor
          deepspeed --num_gpus 2 --bind_cores_to_rank ds-hf-compare.py --model $MODEL --dtype bfloat16 --num_inputs 1

      - name: AutoTP test (EleutherAI/gpt-j-6b)
        run: |
          export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6
          source oneCCL/build/_install/env/setvars.sh
          # modify MODEL to change the model name, other lines are the same
          export MODEL=EleutherAI/gpt-j-6b
          cd DeepSpeedExamples/inference/huggingface/text-generation
          deepspeed --num_gpus 2 --bind_cores_to_rank inference-test.py --model $MODEL --dtype bfloat16 --use_meta_tensor
          deepspeed --num_gpus 2 --bind_cores_to_rank ds-hf-compare.py --model $MODEL --dtype bfloat16 --num_inputs 1

      - name: AutoTP test (baichuan-inc/Baichuan-7B)
        run: |
          export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6
          source oneCCL/build/_install/env/setvars.sh
          # modify MODEL to change the model name, other lines are the same
          export MODEL=baichuan-inc/Baichuan-7B
          cd DeepSpeedExamples/inference/huggingface/text-generation
          deepspeed --num_gpus 2 --bind_cores_to_rank inference-test.py --model $MODEL --dtype bfloat16 --use_meta_tensor
          deepspeed --num_gpus 2 --bind_cores_to_rank ds-hf-compare.py --model $MODEL --dtype bfloat16 --num_inputs 1

#      - name: AutoTP test (bigcode/starcoder)
#        run: |
#          export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6
#          source oneCCL/build/_install/env/setvars.sh
#          # modify MODEL to change the model name, other lines are the same
#          export MODEL=bigcode/starcoder
#          cd DeepSpeedExamples/inference/huggingface/text-generation
#          deepspeed --num_gpus 2 --bind_cores_to_rank inference-test.py --model $MODEL --dtype bfloat16 --use_meta_tensor
#          deepspeed --num_gpus 2 --bind_cores_to_rank ds-hf-compare.py --model $MODEL --dtype bfloat16 --num_inputs 1
#
#      - name: AutoTP test (tiiuae/falcon-7b)
#        run: |
#          export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6
#          source oneCCL/build/_install/env/setvars.sh
#          # modify MODEL to change the model name, other lines are the same
#          export MODEL=tiiuae/falcon-7b
#          cd DeepSpeedExamples/inference/huggingface/text-generation
#          deepspeed --num_gpus 2 --bind_cores_to_rank inference-test.py --model $MODEL --dtype bfloat16 --use_meta_tensor
#          deepspeed --num_gpus 2 --bind_cores_to_rank ds-hf-compare.py --model $MODEL --dtype bfloat16 --num_inputs 1
#
#      - name: AutoTP test (google/flan-t5-xl)
#        run: |
#          export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6
#          source oneCCL/build/_install/env/setvars.sh
#          # modify MODEL to change the model name, other lines are the same
#          export MODEL=google/flan-t5-xl
#          cd DeepSpeedExamples/inference/huggingface/text-generation
#          deepspeed --num_gpus 2 --bind_cores_to_rank inference-test.py --model $MODEL --dtype bfloat16 --use_meta_tensor
#          deepspeed --num_gpus 2 --bind_cores_to_rank ds-hf-compare.py --model $MODEL --dtype bfloat16 --num_inputs 1
#
#      - name: AutoTP test (mistralai/Mistral-7B-v0.1)
#        run: |
#          export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6
#          source oneCCL/build/_install/env/setvars.sh
#          # modify MODEL to change the model name, other lines are the same
#          export MODEL=mistralai/Mistral-7B-v0.1
#          cd DeepSpeedExamples/inference/huggingface/text-generation
#          deepspeed --num_gpus 2 --bind_cores_to_rank inference-test.py --model $MODEL --dtype bfloat16 --use_meta_tensor
#          deepspeed --num_gpus 2 --bind_cores_to_rank ds-hf-compare.py --model $MODEL --dtype bfloat16 --num_inputs 1
#
#      - name: AutoTP test (mosaicml/mpt-7b)
#        run: |
#          export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6
#          source oneCCL/build/_install/env/setvars.sh
#          # modify MODEL to change the model name, other lines are the same
#          export MODEL=mosaicml/mpt-7b
#          cd DeepSpeedExamples/inference/huggingface/text-generation
#          deepspeed --num_gpus 2 --bind_cores_to_rank inference-test.py --model $MODEL --dtype bfloat16 --use_meta_tensor
#          deepspeed --num_gpus 2 --bind_cores_to_rank ds-hf-compare.py --model $MODEL --dtype bfloat16 --num_inputs 1
#
#      - name: AutoTP test (meta-llama/Llama-2-7b-hf)
#        run: |
#          export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6
#          source oneCCL/build/_install/env/setvars.sh
#          # modify MODEL to change the model name, other lines are the same
#          export MODEL=meta-llama/Llama-2-7b-hf
#          cd DeepSpeedExamples/inference/huggingface/text-generation
#          deepspeed --num_gpus 2 --bind_cores_to_rank inference-test.py --model $MODEL --dtype bfloat16 --use_meta_tensor
#          deepspeed --num_gpus 2 --bind_cores_to_rank ds-hf-compare.py --model $MODEL --dtype bfloat16 --num_inputs 1
#
#      - name: AutoTP test (EleutherAI/gpt-neox-20b)
#        run: |
#          export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6
#          source oneCCL/build/_install/env/setvars.sh
#          # modify MODEL to change the model name, other lines are the same
#          export MODEL=EleutherAI/gpt-neox-20
#          cd DeepSpeedExamples/inference/huggingface/text-generation
#          deepspeed --num_gpus 2 --bind_cores_to_rank inference-test.py --model $MODEL --dtype bfloat16 --use_meta_tensor
#          deepspeed --num_gpus 2 --bind_cores_to_rank ds-hf-compare.py --model $MODEL --dtype bfloat16 --num_inputs 1