forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
It order to enable MPI backend we need MPI libraries in our build environment To make mpi aware build environment we use official pytorch/manylinux2_28-builder as a base and install EFA/openmpi Usage: make -C poolside/build_env build
- Loading branch information
1 parent
21bf6ca
commit fa2a8de
Showing
2 changed files
with
71 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
ARG IMAGE_NAME=pytorch/manylinux2_28-builder | ||
ARG IMAGE_BASE_TAG="cuda12.6-main" | ||
|
||
FROM ${IMAGE_NAME}:${IMAGE_BASE_TAG} | ||
|
||
# | ||
# Build trick #1 | ||
# Almalinux is not officially supported by aws-efa-installer | ||
# but in a nutshell it is just an EL8 based distro, so we mimic to RockyLinux | ||
# | ||
# Build trick #2 (workaround for FindMPI.cmake bug) | ||
# By unknown reason FindMPI.cmake[1] try to detect MPI library presence | ||
# by lookup mpi-${LANG} PKG, but openmpi standard is known to have | ||
# ompi-${LANG} pkg naming convention. So we have to manually create a symlinks | ||
# and privided explicit PKG_CONFIG_PATH | ||
# | ||
#Footnotes | ||
#[1]https://github.com/Kitware/CMake/blob/master/Modules/FindMPI.cmake#L1592-L1597 | ||
# Install EFA libs | ||
ARG AWS_EFA_INSTALLER=1.34.0 | ||
RUN mkdir -p /tmp/efa \ | ||
&& cd /tmp/efa \ | ||
&& curl -sSL https://efa-installer.amazonaws.com/aws-efa-installer-${AWS_EFA_INSTALLER}.tar.gz | tar -xz \ | ||
&& cd aws-efa-installer \ | ||
&& cp /etc/os-release /etc/os-release.orig \ | ||
&& sed -i 's/AlmaLinux/Rocky Linux/g' /etc/os-release \ | ||
&& ./efa_installer.sh -y --skip-kmod --no-verify --enable-gdr \ | ||
&& mv /etc/os-release.orig /etc/os-release \ | ||
&& echo "/opt/amazon/openmpi/lib64" >> /etc/ld.so.conf.d/000_ompi.conf \ | ||
\ | ||
&& pushd /opt/amazon/openmpi/lib64/pkgconfig \ | ||
&& ln -s ompi.pc mpi.pc \ | ||
&& ln -s ompi-c.pc mpi-c.pc \ | ||
&& ln -s ompi-cxx.pc mpi-cxx.pc \ | ||
&& popd \ | ||
&& ldconfig \ | ||
&& cd / \ | ||
&& rm -rf /tmp/efa | ||
|
||
ENV PATH=$PATH:/opt/amazon/openmpi/bin | ||
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/amazon/openmpi/lib64 | ||
ENV PKG_CONFIG_PATH=/opt/amazon/openmpi/lib64/pkgconfig |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# Image base | ||
IMAGE_NAME ?= 939990436136.dkr.ecr.us-east-2.amazonaws.com/pytorch/manylinux2_28-builder | ||
IMAGE_BASE_TAG ?= cuda12.6-main | ||
|
||
# Build dependencies | ||
AWS_EFA_INSTALLER_VER ?=1.34.0 | ||
|
||
# Export options | ||
EXPORT_PATH ?= .. | ||
ZSTD_COMPRESS_OPTIONS ?= --ultra -22 | ||
|
||
# Out output options | ||
IMAGE_TAG=${IMAGE_BASE_TAG}-efa${AWS_EFA_INSTALLER_VER} | ||
|
||
build: | ||
docker build --progress plain --rm \ | ||
--tag "${IMAGE_NAME}:${IMAGE_TAG}" \ | ||
--build-arg IMAGE_NAME="${IMAGE_NAME}" \ | ||
--build-arg IMAGE_BASE_TAG="${IMAGE_BASE_TAG}" \ | ||
--build-arg AWS_EFA_INSTALLER="${AWS_EFA_INSTALLER_VER}" \ | ||
-f Dockerfile .. | ||
|
||
tar-img: | ||
docker save \ | ||
"${IMAGE_NAME}:${IMAGE_TAG}" | \ | ||
zstdmt ${ZSTD_COMPRESS_OPTIONS} -v -f -o ${EXPORT_PATH}/${IMAGE_NAME}-${IMAGE_TAG}.tar.zst | ||
|
||
push: | ||
docker push "${IMAGE_NAME}:${IMAGE_TAG}" |