|
| 1 | +ARG TAG_FOR_BASE_MICROMAMBA_IMAGE |
| 2 | +FROM mambaorg/micromamba:$TAG_FOR_BASE_MICROMAMBA_IMAGE |
| 3 | + |
| 4 | +ARG CUDA_MAJOR_MINOR_VERSION='' |
| 5 | +ARG ENV_IN_FILENAME |
| 6 | +ARG ARG_BASED_ENV_IN_FILENAME |
| 7 | +ARG IMAGE_VERSION |
| 8 | +LABEL "org.amazon.sagemaker-distribution.image.version"=$IMAGE_VERSION |
| 9 | + |
| 10 | +ARG AMZN_BASE="/opt/amazon/sagemaker" |
| 11 | +ARG DB_ROOT_DIR="/opt/db" |
| 12 | +ARG DIRECTORY_TREE_STAGE_DIR="${AMZN_BASE}/dir-staging" |
| 13 | + |
| 14 | +ARG NB_USER="sagemaker-user" |
| 15 | +ARG NB_UID=1000 |
| 16 | +ARG NB_GID=100 |
| 17 | + |
| 18 | +# https://www.openssl.org/source/ |
| 19 | +ARG FIPS_VALIDATED_SSL=3.0.8 |
| 20 | +ARG MIN_REQUIRED_MICROMAMBA_VERSION=1.5.11 |
| 21 | + |
| 22 | +ENV SAGEMAKER_LOGGING_DIR="/var/log/sagemaker/" |
| 23 | +ENV STUDIO_LOGGING_DIR="/var/log/studio/" |
| 24 | +ENV EDITOR="nano" |
| 25 | +ENV IMAGE_VERSION=$IMAGE_VERSION |
| 26 | +ENV PINNED_MICROMAMBA_MINOR_VERSION="1.5.*" |
| 27 | +ENV SAGEMAKER_RECOVERY_MODE_HOME=/tmp/sagemaker-recovery-mode-home |
| 28 | + |
| 29 | +USER root |
| 30 | +# Upgrade micromamba to the latest patch version in the pinned minor version range, if applicable |
| 31 | +RUN CURRENT_MICROMAMBA_VERSION=$(micromamba --version) && \ |
| 32 | + echo "Current micromamba version: $CURRENT_MICROMAMBA_VERSION" && \ |
| 33 | + if [[ "$CURRENT_MICROMAMBA_VERSION" == $PINNED_MICROMAMBA_MINOR_VERSION ]]; then \ |
| 34 | + echo "Upgrading micromamba to the latest $PINNED_MICROMAMBA_MINOR_VERSION version..." && \ |
| 35 | + micromamba self-update -c conda-forge --version "$MIN_REQUIRED_MICROMAMBA_VERSION" && \ |
| 36 | + micromamba clean --all --yes --force-pkgs-dirs; \ |
| 37 | + else \ |
| 38 | + echo "Micromamba is already at version $CURRENT_MICROMAMBA_VERSION (outside $PINNED_MICROMAMBA_MINOR_VERSION). No upgrade performed."; \ |
| 39 | + fi |
| 40 | + |
| 41 | +RUN usermod "--login=${NB_USER}" "--home=/home/${NB_USER}" --move-home "-u ${NB_UID}" "${MAMBA_USER}" && \ |
| 42 | + groupmod "--new-name=${NB_USER}" --non-unique "-g ${NB_GID}" "${MAMBA_USER}" && \ |
| 43 | + # Update the expected value of MAMBA_USER for the |
| 44 | + # _entrypoint.sh consistency check. |
| 45 | + echo "${NB_USER}" > "/etc/arg_mamba_user" && \ |
| 46 | + : |
| 47 | +ENV MAMBA_USER=$NB_USER |
| 48 | +ENV USER=$NB_USER |
| 49 | + |
| 50 | +RUN apt-get update && apt-get upgrade -y && \ |
| 51 | + apt-get install -y --no-install-recommends sudo gettext-base wget curl unzip git rsync build-essential openssh-client nano cron less mandoc && \ |
| 52 | + # We just install tzdata below but leave default time zone as UTC. This helps packages like Pandas to function correctly. |
| 53 | + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata krb5-user libkrb5-dev libsasl2-dev libsasl2-modules && \ |
| 54 | + chmod g+w /etc/passwd && \ |
| 55 | + echo "ALL ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers && \ |
| 56 | + touch /etc/krb5.conf.lock && chown ${NB_USER}:${MAMBA_USER} /etc/krb5.conf* && \ |
| 57 | + # Note that we do NOT run `rm -rf /var/lib/apt/lists/*` here. If we did, anyone building on top of our images will |
| 58 | + # not be able to run any `apt-get install` commands and that would hamper customizability of the images. |
| 59 | + curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \ |
| 60 | + unzip awscliv2.zip && \ |
| 61 | + sudo ./aws/install && \ |
| 62 | + rm -rf aws awscliv2.zip && \ |
| 63 | + : && \ |
| 64 | + echo "source /usr/local/bin/_activate_current_env.sh" | tee --append /etc/profile && \ |
| 65 | +# CodeEditor - create server, user data dirs |
| 66 | + mkdir -p /opt/amazon/sagemaker/sagemaker-code-editor-server-data /opt/amazon/sagemaker/sagemaker-code-editor-user-data \ |
| 67 | + && chown $MAMBA_USER:$MAMBA_USER /opt/amazon/sagemaker/sagemaker-code-editor-server-data /opt/amazon/sagemaker/sagemaker-code-editor-user-data && \ |
| 68 | +# create dir to store user data files |
| 69 | + mkdir -p /opt/amazon/sagemaker/user-data \ |
| 70 | + && chown $MAMBA_USER:$MAMBA_USER /opt/amazon/sagemaker/user-data && \ |
| 71 | +# Merge in OS directory tree contents. |
| 72 | + mkdir -p ${DIRECTORY_TREE_STAGE_DIR} |
| 73 | +COPY dirs/ ${DIRECTORY_TREE_STAGE_DIR}/ |
| 74 | +RUN rsync -a ${DIRECTORY_TREE_STAGE_DIR}/ / && \ |
| 75 | + rm -rf ${DIRECTORY_TREE_STAGE_DIR} && \ |
| 76 | +# CodeEditor - download the extensions |
| 77 | + mkdir -p /etc/code-editor/extensions && \ |
| 78 | + while IFS= read -r url || [ -n "$url" ]; do \ |
| 79 | + echo "Downloading extension from ${url}..." && \ |
| 80 | + wget --no-check-certificate -P /etc/code-editor/extensions "${url}"; \ |
| 81 | + done < /etc/code-editor/extensions.txt |
| 82 | + |
| 83 | +USER $MAMBA_USER |
| 84 | +COPY --chown=$MAMBA_USER:$MAMBA_USER $ENV_IN_FILENAME *.in /tmp/ |
| 85 | +ARG MAMBA_DOCKERFILE_ACTIVATE=1 |
| 86 | +ARG CONDA_OVERRIDE_CUDA=$CUDA_MAJOR_MINOR_VERSION |
| 87 | + |
| 88 | +# Make sure that $ENV_IN_FILENAME has a newline at the end before the `tee` command runs. Otherwise, nasty things |
| 89 | +# will happen. |
| 90 | +RUN if [[ -z $ARG_BASED_ENV_IN_FILENAME ]] ; \ |
| 91 | + then echo 'No ARG_BASED_ENV_IN_FILENAME passed' ; \ |
| 92 | + else envsubst < /tmp/$ARG_BASED_ENV_IN_FILENAME | tee --append /tmp/$ENV_IN_FILENAME ; \ |
| 93 | + fi && \ |
| 94 | + # Enforce dependencies are all installed from conda-forge |
| 95 | + micromamba install -y --name base --file /tmp/$ENV_IN_FILENAME && \ |
| 96 | + mkdir -p $SAGEMAKER_RECOVERY_MODE_HOME && \ |
| 97 | + chown $MAMBA_USER:$MAMBA_USER $SAGEMAKER_RECOVERY_MODE_HOME && \ |
| 98 | + SUPERVISOR_VERSION=$(grep "^conda-forge::supervisor\[" /tmp/$ENV_IN_FILENAME) && \ |
| 99 | + JUPYTERLAB_VERSION=$(grep "^conda-forge::jupyterlab\[" /tmp/$ENV_IN_FILENAME) && \ |
| 100 | + SAGEMAKER_JUPYTERLAB_VERSION=$(grep "^conda-forge::sagemaker-jupyterlab-extension" /tmp/$ENV_IN_FILENAME) && \ |
| 101 | + echo "Installing in sagemaker-recovery-mode micromamba environment: $JUPYTERLAB_VERSION $SAGEMAKER_JUPYTERLAB_VERSION" && \ |
| 102 | + micromamba create -n sagemaker-recovery-mode && \ |
| 103 | + micromamba install -n sagemaker-recovery-mode -y $JUPYTERLAB_VERSION $SAGEMAKER_JUPYTERLAB_VERSION $SUPERVISOR_VERSION && \ |
| 104 | + micromamba clean --all --yes --force-pkgs-dirs && \ |
| 105 | + rm -rf /tmp/*.in && \ |
| 106 | + sudo ln -s $(which python3) /usr/bin/python && \ |
| 107 | + # Update npm version |
| 108 | + npm i -g npm && \ |
| 109 | + # Enforce to use `conda-forge` as only channel, by removing `defaults` |
| 110 | + conda config --remove channels defaults && \ |
| 111 | + micromamba config append channels conda-forge --env && \ |
| 112 | + # Configure CodeEditor - Install extensions and set preferences |
| 113 | + extensionloc=/opt/amazon/sagemaker/sagemaker-code-editor-server-data/extensions && mkdir -p "${extensionloc}" \ |
| 114 | + # Loop through all vsix files in /etc/code-editor/extensions and install them |
| 115 | + && for ext in /etc/code-editor/extensions/*.vsix; do \ |
| 116 | + echo "Installing extension ${ext}..."; \ |
| 117 | + sagemaker-code-editor --install-extension "${ext}" --extensions-dir "${extensionloc}" --server-data-dir /opt/amazon/sagemaker/sagemaker-code-editor-server-data --user-data-dir /opt/amazon/sagemaker/sagemaker-code-editor-user-data; \ |
| 118 | + done \ |
| 119 | + # Copy the settings |
| 120 | + && cp /etc/code-editor/code_editor_machine_settings.json /opt/amazon/sagemaker/sagemaker-code-editor-server-data/data/Machine/settings.json && \ |
| 121 | + cp /etc/code-editor/code_editor_user_settings.json /opt/amazon/sagemaker/sagemaker-code-editor-server-data/data/User/settings.json && \ |
| 122 | + # Install glue kernels, and move to shared directory |
| 123 | + # Also patching base kernel so Studio background code doesn't start session silently |
| 124 | + install-glue-kernels && \ |
| 125 | + SITE_PACKAGES=$(pip show aws-glue-sessions | grep Location | awk '{print $2}') && \ |
| 126 | + jupyter-kernelspec install $SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_pyspark --user && \ |
| 127 | + jupyter-kernelspec install $SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_spark --user && \ |
| 128 | + mv /home/sagemaker-user/.local/share/jupyter/kernels/glue_pyspark /opt/conda/share/jupyter/kernels && \ |
| 129 | + mv /home/sagemaker-user/.local/share/jupyter/kernels/glue_spark /opt/conda/share/jupyter/kernels && \ |
| 130 | + sed -i '/if not store_history and (/i\ if "sm_analytics_runtime_check" in code:\n return await self._complete_cell()\n' \ |
| 131 | + "$SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_kernel_base/BaseKernel.py" && \ |
| 132 | + # Install FIPS Provider for OpenSSL, on top of existing OpenSSL installation |
| 133 | + # v3.0.8 is latest FIPS validated provider, so this is the one we install |
| 134 | + # But we need to run tests against the installed version. |
| 135 | + # see https://github.com/openssl/openssl/blob/master/README-FIPS.md https://www.openssl.org/source/ |
| 136 | + INSTALLED_SSL=$(micromamba list | grep openssl | tr -s ' ' | cut -d ' ' -f 3 | head -n 1) && \ |
| 137 | + # download source code for installed, and FIPS validated openssl versions |
| 138 | + curl -L https://github.com/openssl/openssl/releases/download/openssl-$FIPS_VALIDATED_SSL/openssl-$FIPS_VALIDATED_SSL.tar.gz > openssl-$FIPS_VALIDATED_SSL.tar.gz && \ |
| 139 | + curl -L https://github.com/openssl/openssl/releases/download/openssl-$INSTALLED_SSL/openssl-$INSTALLED_SSL.tar.gz > openssl-$INSTALLED_SSL.tar.gz && \ |
| 140 | + tar -xf openssl-$FIPS_VALIDATED_SSL.tar.gz && tar -xf openssl-$INSTALLED_SSL.tar.gz && cd openssl-$FIPS_VALIDATED_SSL && \ |
| 141 | + # Configure both versions to enable FIPS and build |
| 142 | + ./Configure enable-fips --prefix=/opt/conda --openssldir=/opt/conda/ssl && make && \ |
| 143 | + cd ../openssl-$INSTALLED_SSL && \ |
| 144 | + ./Configure enable-fips --prefix=/opt/conda --openssldir=/opt/conda/ssl && make && \ |
| 145 | + # Copy validated provider to installed version for testing |
| 146 | + cp ../openssl-$FIPS_VALIDATED_SSL/providers/fips.so providers/. && \ |
| 147 | + cp ../openssl-$FIPS_VALIDATED_SSL/providers/fipsmodule.cnf providers/. && \ |
| 148 | + make tests && cd ../openssl-$FIPS_VALIDATED_SSL && \ |
| 149 | + # After tests pass, install FIPS provider and remove source code |
| 150 | + make install_fips && cd .. && rm -rf ./openssl-* && \ |
| 151 | +# Create new config file with fips-enabled. Then user can override OPENSSL_CONF to enable FIPS |
| 152 | +# e.g. export OPENSSL_CONF=/opt/conda/ssl/openssl-fips.cnf |
| 153 | + cp /opt/conda/ssl/openssl.cnf /opt/conda/ssl/openssl-fips.cnf && \ |
| 154 | + sed -i "s:# .include fipsmodule.cnf:.include /opt/conda/ssl/fipsmodule.cnf:" /opt/conda/ssl/openssl-fips.cnf && \ |
| 155 | + sed -i 's:# fips = fips_sect:fips = fips_sect:' /opt/conda/ssl/openssl-fips.cnf && \ |
| 156 | +# Install Kerberos. |
| 157 | +# Make sure no dependency is added/updated |
| 158 | + pip install "krb5>=0.5.1,<0.6" && \ |
| 159 | + pip show krb5 | grep Require | xargs -i sh -c '[ $(echo {} | cut -d: -f2 | wc -w) -eq 0 ] ' && \ |
| 160 | +# https://stackoverflow.com/questions/122327 |
| 161 | + SYSTEM_PYTHON_PATH=$(python3 -c "from __future__ import print_function;import sysconfig; print(sysconfig.get_paths().get('purelib'))") && \ |
| 162 | + # Remove SparkRKernel as it's not supported \ |
| 163 | + jupyter-kernelspec remove -f -y sparkrkernel && \ |
| 164 | + # Patch Sparkmagic lib to support Custom Certificates \ |
| 165 | + # https://github.com/jupyter-incubator/sparkmagic/pull/435/files \ |
| 166 | + cp -a ${SYSTEM_PYTHON_PATH}/sagemaker_studio_analytics_extension/patches/configuration.py ${SYSTEM_PYTHON_PATH}/sparkmagic/utils/ && \ |
| 167 | + cp -a ${SYSTEM_PYTHON_PATH}/sagemaker_studio_analytics_extension/patches/reliablehttpclient.py ${SYSTEM_PYTHON_PATH}/sparkmagic/livyclientlib/reliablehttpclient.py && \ |
| 168 | + sed -i 's= "python"= "/opt/conda/bin/python"=g' /opt/conda/share/jupyter/kernels/pysparkkernel/kernel.json /opt/conda/share/jupyter/kernels/sparkkernel/kernel.json && \ |
| 169 | + sed -i 's="Spark"="SparkMagic Spark"=g' /opt/conda/share/jupyter/kernels/sparkkernel/kernel.json && \ |
| 170 | + sed -i 's="PySpark"="SparkMagic PySpark"=g' /opt/conda/share/jupyter/kernels/pysparkkernel/kernel.json && \ |
| 171 | + # Configure RTC - disable jupyter_collaboration by default |
| 172 | + jupyter labextension disable @jupyter/collaboration-extension |
| 173 | + |
| 174 | +# Patch glue kernels to use kernel wrapper |
| 175 | +COPY patch_glue_pyspark.json /opt/conda/share/jupyter/kernels/glue_pyspark/kernel.json |
| 176 | +COPY patch_glue_spark.json /opt/conda/share/jupyter/kernels/glue_spark/kernel.json |
| 177 | + |
| 178 | +USER root |
| 179 | + |
| 180 | +# Create logging directories for supervisor |
| 181 | +RUN mkdir -p $SAGEMAKER_LOGGING_DIR && \ |
| 182 | + chmod a+rw $SAGEMAKER_LOGGING_DIR && \ |
| 183 | + mkdir -p ${STUDIO_LOGGING_DIR} && \ |
| 184 | + chown ${NB_USER}:${MAMBA_USER} ${STUDIO_LOGGING_DIR} && \ |
| 185 | + |
| 186 | + # Clean up CodeEditor artifacts |
| 187 | + rm -rf /etc/code-editor && \ |
| 188 | + # Create supervisord runtime directory |
| 189 | + mkdir -p /var/run/supervisord && \ |
| 190 | + chmod a+rw /var/run/supervisord && \ |
| 191 | + # Create root directory for DB |
| 192 | + # Create logging directories for supervisor |
| 193 | + mkdir -p $DB_ROOT_DIR && \ |
| 194 | + chmod a+rw $DB_ROOT_DIR && \ |
| 195 | + HOME_DIR="/home/${NB_USER}/licenses" \ |
| 196 | + && mkdir -p ${HOME_DIR} \ |
| 197 | + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ |
| 198 | + && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \ |
| 199 | + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ |
| 200 | + && chmod +x /usr/local/bin/testOSSCompliance \ |
| 201 | + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ |
| 202 | + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python \ |
| 203 | + && rm -rf ${HOME_DIR}/oss_compliance* |
| 204 | + |
| 205 | +# Explicitly disable BuildKit for SM Studio Docker functionality |
| 206 | +ENV DOCKER_BUILDKIT=0 |
| 207 | +ENV PATH="/opt/conda/bin:/opt/conda/condabin:$PATH" |
| 208 | +WORKDIR "/home/${NB_USER}" |
| 209 | +ENV SHELL=/bin/bash |
| 210 | +ENV OPENSSL_MODULES=/opt/conda/lib64/ossl-modules/ |
| 211 | +USER $MAMBA_USER |
0 commit comments