Skip to content

Commit

Permalink
Add custom build environment
Browse files Browse the repository at this point in the history
It order to enable MPI backend we need MPI libraries in our build environment
To make mpi aware build environment we use official pytorch/manylinux2_28-builder
as a base and install EFA/openmpi

Usage: make -C poolside/build_env build
  • Loading branch information
dmitry-monakhov committed Jan 29, 2025
1 parent 21bf6ca commit fa2a8de
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 0 deletions.
42 changes: 42 additions & 0 deletions poolside/build_env/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
ARG IMAGE_NAME=pytorch/manylinux2_28-builder
ARG IMAGE_BASE_TAG="cuda12.6-main"

FROM ${IMAGE_NAME}:${IMAGE_BASE_TAG}

#
# Build trick #1
# Almalinux is not officially supported by aws-efa-installer
# but in a nutshell it is just an EL8 based distro, so we mimic to RockyLinux
#
# Build trick #2 (workaround for FindMPI.cmake bug)
# By unknown reason FindMPI.cmake[1] try to detect MPI library presence
# by lookup mpi-${LANG} PKG, but openmpi standard is known to have
# ompi-${LANG} pkg naming convention. So we have to manually create a symlinks
# and privided explicit PKG_CONFIG_PATH
#
#Footnotes
#[1]https://github.com/Kitware/CMake/blob/master/Modules/FindMPI.cmake#L1592-L1597
# Install EFA libs
ARG AWS_EFA_INSTALLER=1.34.0
RUN mkdir -p /tmp/efa \
&& cd /tmp/efa \
&& curl -sSL https://efa-installer.amazonaws.com/aws-efa-installer-${AWS_EFA_INSTALLER}.tar.gz | tar -xz \
&& cd aws-efa-installer \
&& cp /etc/os-release /etc/os-release.orig \
&& sed -i 's/AlmaLinux/Rocky Linux/g' /etc/os-release \
&& ./efa_installer.sh -y --skip-kmod --no-verify --enable-gdr \
&& mv /etc/os-release.orig /etc/os-release \
&& echo "/opt/amazon/openmpi/lib64" >> /etc/ld.so.conf.d/000_ompi.conf \
\
&& pushd /opt/amazon/openmpi/lib64/pkgconfig \
&& ln -s ompi.pc mpi.pc \
&& ln -s ompi-c.pc mpi-c.pc \
&& ln -s ompi-cxx.pc mpi-cxx.pc \
&& popd \
&& ldconfig \
&& cd / \
&& rm -rf /tmp/efa

ENV PATH=$PATH:/opt/amazon/openmpi/bin
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/amazon/openmpi/lib64
ENV PKG_CONFIG_PATH=/opt/amazon/openmpi/lib64/pkgconfig
29 changes: 29 additions & 0 deletions poolside/build_env/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Image base
IMAGE_NAME ?= 939990436136.dkr.ecr.us-east-2.amazonaws.com/pytorch/manylinux2_28-builder
IMAGE_BASE_TAG ?= cuda12.6-main

# Build dependencies
AWS_EFA_INSTALLER_VER ?=1.34.0

# Export options
EXPORT_PATH ?= ..
ZSTD_COMPRESS_OPTIONS ?= --ultra -22

# Out output options
IMAGE_TAG=${IMAGE_BASE_TAG}-efa${AWS_EFA_INSTALLER_VER}

build:
docker build --progress plain --rm \
--tag "${IMAGE_NAME}:${IMAGE_TAG}" \
--build-arg IMAGE_NAME="${IMAGE_NAME}" \
--build-arg IMAGE_BASE_TAG="${IMAGE_BASE_TAG}" \
--build-arg AWS_EFA_INSTALLER="${AWS_EFA_INSTALLER_VER}" \
-f Dockerfile ..

tar-img:
docker save \
"${IMAGE_NAME}:${IMAGE_TAG}" | \
zstdmt ${ZSTD_COMPRESS_OPTIONS} -v -f -o ${EXPORT_PATH}/${IMAGE_NAME}-${IMAGE_TAG}.tar.zst

push:
docker push "${IMAGE_NAME}:${IMAGE_TAG}"

0 comments on commit fa2a8de

Please sign in to comment.