Skip to content

Commit

Permalink
Create a shell script to install Docker Compose on Linux systems.
Browse files Browse the repository at this point in the history
Clean up Makefile to remove unnecessary operations.
Update python requirements to get the extra index url working for all cases.
Update Docker Compose file to fix $HOME bug where the ~ expands to the image home instead of the host home.
Build is not working because of GPG key rotation. See https://developer.nvidia.com/blog/updating-the-cuda-linux-gpg-repository-key for details.
Requirements installation also needs confirmation to check if PyTorch install works as hoped for.
  • Loading branch information
veritas9872 committed Apr 29, 2022
1 parent fea7fe6 commit d9d66df
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 30 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ FROM ${BUILD_IMAGE} AS train-builds-include
# This stage does not have anything installed. No variables are specified either.
# This stage is simply the `BUILD_IMAGE` with additional files and directories.
# All pip wheels are located in `/tmp/dist`.
# Using an image other than `BUILD_IMAGE` may contaminate the `/opt/conda` and other key directories.
# Using an image other than `BUILD_IMAGE` may contaminate `/opt/conda` and other key directories.

# The `train` image is the one actually used for training.
# It is designed to be separate from the `build` image,
Expand Down
30 changes: 15 additions & 15 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
.PHONY: env di up exec rebuild start down run ls

# Creates a `.env` file in PWD if it does not exist already or is empty.
# This will help prevent UID/GID bugs in `docker-compose.yaml`,
# which unfortunately cannot use shell outputs in the file.
# Image names have the usernames appended to them to prevent
# name collisions between different users.
ENV_FILE = .env
GID = $(shell id -g)
UID = $(shell id -u)
GRP = $(shell id -gn)
USR = $(shell id -un)
IMAGE_NAME = "${SERVICE}-${USR}"
env:
test -s ${ENV_FILE} || printf "GID=${GID}\nUID=${UID}\nGRP=${GRP}\nUSR=${USR}\nIMAGE_NAME=${IMAGE_NAME}\n" >> ${ENV_FILE}

# Create a `.dockerignore` file in PWD if it does not exist already or is empty.
# Set to ignore all files except requirements files at project root or `reqs`.
DI_FILE = .dockerignore
Expand All @@ -14,7 +28,7 @@ di:
# Change `SERVICE` to specify other services and projects.
SERVICE = full
COMMAND = /bin/zsh
PROJECT = "${SERVICE}-$(shell id -un)"
PROJECT = "${SERVICE}-${USR}"
up: # Start service. Creates a new container from the image.
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker compose -p ${PROJECT} up -d ${SERVICE}
rebuild: # Start service. Rebuilds the image from the Dockerfile before creating a new container.
Expand All @@ -29,17 +43,3 @@ run: # Used for debugging cases where service will not start.
docker compose -p ${PROJECT} run ${SERVICE}
ls: # List all services.
docker compose ls -a

# Creates a `.env` file in PWD if it does not exist already or is empty.
# This will help prevent UID/GID bugs in `docker-compose.yaml`,
# which unfortunately cannot use shell outputs in the file.
# Image names have the usernames appended to them to prevent
# name collisions between different users.
ENV_FILE = .env
GID = $(shell id -g)
UID = $(shell id -u)
GRP = $(shell id -gn)
USR = $(shell id -un)
IMAGE_NAME = "${SERVICE}-${USR}"
env:
test -s ${ENV_FILE} || printf "GID=${GID}\nUID=${UID}\nGRP=${GRP}\nUSR=${USR}\nIMAGE_NAME=${IMAGE_NAME}\n" >> ${ENV_FILE}
28 changes: 15 additions & 13 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ services:
# Use different image names for different users and projects.
# Otherwise, images will be repeatedly removed and recreated.
# The removed images will remain cached, however.
image: pytorch_source:${IMAGE_NAME:-train}
image: cresset:${IMAGE_NAME:-train}
# `ipc: host` removes the shared memory cap but is a known security vulnerability.
# ipc: host # Equivalent to `--ipc=host` in `docker run`. Disable this for WSL.
# shm_size: 1GB # Explicit shared memory limit. No security issues this way.
Expand All @@ -39,10 +39,13 @@ services:
# Setting `HOST_PATH:CONTAINER_PATH` allows the container to access `HOST_PATH` as `CONTAINER_PATH`.
# See https://docs.docker.com/storage/volumes for details.
# Current working directory `.` is connected to `PROJECT_ROOT`.
# Always use the ${HOME} variable to specify the host home directory.
# The `~` expands to the directory inside the image, not the user home directory.
# See https://github.com/docker/compose/issues/6506 for details.
volumes: # Add volumes as necessary. Equivalent to `-v` flag in `docker run`.
- .:${PROJECT_ROOT:-/opt/project} # Use this if the docker-compose.yaml file is at the project root.
# - ..:${PROJECT_ROOT:-/opt/project} # Use this if the docker-compose.yaml file is in a subdirectory.
# - ~/.ssh:/home/${USR}/.ssh # Bind host SSH configurations to the container for SSH port local forwarding.
# - ${HOME}/.ssh:/home/${USR}/.ssh # Bind host SSH configurations to the container for SSH port local forwarding.
# - ${DATA_PATH_1}:/mnt/data1 # Configurable data path settings for different host data locations.
build: # Options for building. Used when `--build` is called in `docker compose`.
target: train # Specify build target.
Expand All @@ -61,10 +64,10 @@ services:
GRP: ${GRP:-user} # `id -gn`
USR: ${USR:-user} # `id -un`
TZ: ${TZ:-Asia/Seoul} # Timezone settings used during the build.
DEB_OLD: ${DEB_OLD:-http://archive.ubuntu.com}
DEB_NEW: ${DEB_NEW:-http://mirror.kakao.com}
INDEX_URL: ${INDEX_URL:-http://mirror.kakao.com/pypi/simple}
TRUSTED_HOST: ${TRUSTED_HOST:-mirror.kakao.com}
# DEB_OLD: ${DEB_OLD:-http://archive.ubuntu.com}
# DEB_NEW: ${DEB_NEW:-http://mirror.kakao.com}
# INDEX_URL: ${INDEX_URL:-http://mirror.kakao.com/pypi/simple}
# TRUSTED_HOST: ${TRUSTED_HOST:-mirror.kakao.com}
working_dir: ${PROJECT_ROOT:-/opt/project}
# ports: # Uncomment if ports are necessary.
# - ${PORT:-8080}:22
Expand All @@ -83,7 +86,7 @@ services:

full: # Default service name. Change the name as necessary for each project.
hostname: full
image: pytorch_source:${IMAGE_NAME:-full}
image: cresset:${IMAGE_NAME:-full}
# ipc: host
# shm_size: 1GB
tty: true
Expand All @@ -94,8 +97,8 @@ services:
volumes:
- .:${PROJECT_ROOT:-/opt/project}
# - ..:${PROJECT_ROOT:-/opt/project} # Use this if docker-compose.yaml is in a subdirectory of another project.
# - ~/.vscode-server:/home/${USR}/.vscode-server # VSCode extensions can be preserved between containers this way.
# - ~/.ssh:/home/${USR}/.ssh # Bind host SSH configurations to the container for SSH port local forwarding.
# - ${HOME}/.vscode-server:/home/${USR}/.vscode-server # VSCode extensions can be preserved between containers this way.
# - ${HOME}/.ssh:/home/${USR}/.ssh # Bind host SSH configurations to the container for SSH port local forwarding.
# - ${DATA_PATH_1}:/mnt/data1 # Configurable data path settings for different host data locations.
build:
# Set `TARGET_STAGE` to `train-builds` to get just the wheels in `/tmp/dist`.
Expand Down Expand Up @@ -148,7 +151,7 @@ services:

deploy:
hostname: deploy
image: pytorch_source:${IMAGE_NAME:-deploy}
image: cresset:${IMAGE_NAME:-deploy}
tty: true
init: true
stdin_open: true
Expand Down Expand Up @@ -196,7 +199,7 @@ services:
# This layer may be useful for PyTorch contributors.
devel: # Skeleton service for development and debugging.
hostname: devel
image: pytorch_source:${IMAGE_NAME:-devel}
image: cresset:${IMAGE_NAME:-devel}
tty: true
init: true
stdin_open: true
Expand All @@ -212,9 +215,8 @@ services:

ngc: # NGC image service. Demonstrates the generality of the template.
hostname: ngc
image: pytorch_source:ngc-${YEAR:-22}.${MONTH:-02}
image: cresset:ngc-${YEAR:-22}.${MONTH:-02}
# ipc: host
# shm_size: 1GB
tty: true
init: true
stdin_open: true
Expand Down
19 changes: 19 additions & 0 deletions install_compose.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/sh

# Utility for installing Docker Compose on Linux systems.
# Visit https://docs.docker.com/compose/install for the full documentation.
# This script is separate from the Makefile because downloads are very slow in `make` commands.

COMPOSE_VERSION=v2.4.1
COMPOSE_OS_ARCH=linux-x86_64
COMPOSE_URL=https://github.com/docker/compose/releases/download/${COMPOSE_VERSION}/docker-compose-${COMPOSE_OS_ARCH}
COMPOSE_PATH=${HOME}/.docker/cli-plugins
COMPOSE_FILE=${COMPOSE_PATH}/docker-compose

if [ -s "${COMPOSE_FILE}" ]; then
echo "${COMPOSE_FILE} already exists!";
else
mkdir -p "${COMPOSE_PATH}"
curl -SL "${COMPOSE_URL}" -o "${COMPOSE_FILE}"
chmod +x "${COMPOSE_FILE}";
fi
6 changes: 5 additions & 1 deletion reqs/pip-train.requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Packages to be installed in `Dockerfile` in `train` or `full` modes.
# torch==1.11.0 --extra-index-url https://download.pytorch.org/whl/cu113 # Use if BUILD_MODE=exclude and CUDA_VERSION is 11.2+.

--extra-index-url https://download.pytorch.org/whl/cu113 # Use if BUILD_MODE=exclude and CUDA_VERSION is 11.x.
# torch==1.11.0 # Requires `extra-index-url` to be specified on a separate line to work on CUDA 11.
# torchvision==0.12.0

mkl # Essential package if BUILD_MODE=include and MKL_MODE=include. Unnecessary if MKL_MODE=exclude.
pytorch-lightning
torchmetrics
Expand Down

0 comments on commit d9d66df

Please sign in to comment.