|
| 1 | +ARG TAG_FOR_BASE_MICROMAMBA_IMAGE |
| 2 | +FROM mambaorg/micromamba:$TAG_FOR_BASE_MICROMAMBA_IMAGE |
| 3 | + |
| 4 | +ARG CUDA_MAJOR_MINOR_VERSION='' |
| 5 | +ARG ENV_IN_FILENAME |
| 6 | +ARG ARG_BASED_ENV_IN_FILENAME |
| 7 | + |
| 8 | +ARG AMZN_BASE="/opt/amazon/sagemaker" |
| 9 | +ARG DIRECTORY_TREE_STAGE_DIR="${AMZN_BASE}/dir-staging" |
| 10 | + |
| 11 | +ARG NB_USER="sagemaker-user" |
| 12 | +ARG NB_UID=1000 |
| 13 | +ARG NB_GID=100 |
| 14 | + |
| 15 | +ENV SAGEMAKER_LOGGING_DIR="/var/log/sagemaker/" |
| 16 | +ENV STUDIO_LOGGING_DIR="/var/log/studio/" |
| 17 | + |
| 18 | +USER root |
| 19 | +RUN usermod "--login=${NB_USER}" "--home=/home/${NB_USER}" --move-home "-u ${NB_UID}" "${MAMBA_USER}" && \ |
| 20 | + groupmod "--new-name=${NB_USER}" --non-unique "-g ${NB_GID}" "${MAMBA_USER}" && \ |
| 21 | + # Update the expected value of MAMBA_USER for the |
| 22 | + # _entrypoint.sh consistency check. |
| 23 | + echo "${NB_USER}" > "/etc/arg_mamba_user" && \ |
| 24 | + : |
| 25 | +ENV MAMBA_USER=$NB_USER |
| 26 | +ENV USER=$NB_USER |
| 27 | + |
| 28 | +RUN apt-get update && \ |
| 29 | + apt-get install -y --no-install-recommends sudo gettext-base wget curl unzip git rsync build-essential openssh-client nano && \ |
| 30 | + # We just install tzdata below but leave default time zone as UTC. This helps packages like Pandas to function correctly. |
| 31 | + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata krb5-user libkrb5-dev libsasl2-dev libsasl2-modules && \ |
| 32 | + chmod g+w /etc/passwd && \ |
| 33 | + echo "ALL ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers && \ |
| 34 | + touch /etc/krb5.conf.lock && chown ${NB_USER}:${MAMBA_USER} /etc/krb5.conf* && \ |
| 35 | + # Note that we do NOT run `rm -rf /var/lib/apt/lists/*` here. If we did, anyone building on top of our images will |
| 36 | + # not be able to run any `apt-get install` commands and that would hamper customizability of the images. |
| 37 | + curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \ |
| 38 | + unzip awscliv2.zip && \ |
| 39 | + sudo ./aws/install && \ |
| 40 | + rm -rf aws awscliv2.zip && \ |
| 41 | + : |
| 42 | +RUN echo "source /usr/local/bin/_activate_current_env.sh" | tee --append /etc/profile |
| 43 | + |
| 44 | +USER $MAMBA_USER |
| 45 | +COPY --chown=$MAMBA_USER:$MAMBA_USER $ENV_IN_FILENAME *.in /tmp/ |
| 46 | + |
| 47 | +# Make sure that $ENV_IN_FILENAME has a newline at the end before the `tee` command runs. Otherwise, nasty things |
| 48 | +# will happen. |
| 49 | +RUN if [[ -z $ARG_BASED_ENV_IN_FILENAME ]] ; \ |
| 50 | + then echo 'No ARG_BASED_ENV_IN_FILENAME passed' ; \ |
| 51 | + else envsubst < /tmp/$ARG_BASED_ENV_IN_FILENAME | tee --append /tmp/$ENV_IN_FILENAME ; \ |
| 52 | + fi |
| 53 | + |
| 54 | +ARG CONDA_OVERRIDE_CUDA=$CUDA_MAJOR_MINOR_VERSION |
| 55 | +RUN micromamba install -y --name base --file /tmp/$ENV_IN_FILENAME && \ |
| 56 | + micromamba clean --all --yes --force-pkgs-dirs && \ |
| 57 | + rm -rf /tmp/*.in |
| 58 | + |
| 59 | + |
| 60 | +ARG MAMBA_DOCKERFILE_ACTIVATE=1 |
| 61 | +RUN sudo ln -s $(which python3) /usr/bin/python |
| 62 | + |
| 63 | +# Install glue kernels, and move to shared directory |
| 64 | +# Also patching base kernel so Studio background code doesn't start session silently |
| 65 | +RUN install-glue-kernels && \ |
| 66 | + SITE_PACKAGES=$(pip show aws-glue-sessions | grep Location | awk '{print $2}') && \ |
| 67 | + jupyter-kernelspec install $SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_pyspark --user && \ |
| 68 | + jupyter-kernelspec install $SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_spark --user && \ |
| 69 | + mv /home/sagemaker-user/.local/share/jupyter/kernels/glue_pyspark /opt/conda/share/jupyter/kernels && \ |
| 70 | + mv /home/sagemaker-user/.local/share/jupyter/kernels/glue_spark /opt/conda/share/jupyter/kernels && \ |
| 71 | + sed -i '/if not store_history and (/i\ if "sm_analytics_runtime_check" in code:\n return await self._complete_cell()\n' \ |
| 72 | + "$SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_kernel_base/BaseKernel.py" |
| 73 | + |
| 74 | + |
| 75 | +# Patch glue kernels to use kernel wrapper |
| 76 | +COPY patch_glue_pyspark.json /opt/conda/share/jupyter/kernels/glue_pyspark/kernel.json |
| 77 | +COPY patch_glue_spark.json /opt/conda/share/jupyter/kernels/glue_spark/kernel.json |
| 78 | + |
| 79 | +USER root |
| 80 | +RUN HOME_DIR="/home/${NB_USER}/licenses" \ |
| 81 | + && mkdir -p ${HOME_DIR} \ |
| 82 | + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ |
| 83 | + && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \ |
| 84 | + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ |
| 85 | + && chmod +x /usr/local/bin/testOSSCompliance \ |
| 86 | + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ |
| 87 | + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python \ |
| 88 | + && rm -rf ${HOME_DIR}/oss_compliance* |
| 89 | + |
| 90 | +# Merge in OS directory tree contents. |
| 91 | +RUN mkdir -p ${DIRECTORY_TREE_STAGE_DIR} |
| 92 | +COPY dirs/ ${DIRECTORY_TREE_STAGE_DIR}/ |
| 93 | +RUN rsync -a ${DIRECTORY_TREE_STAGE_DIR}/ / && \ |
| 94 | + rm -rf ${DIRECTORY_TREE_STAGE_DIR} |
| 95 | + |
| 96 | +# Create logging directories for supervisor |
| 97 | +RUN mkdir -p $SAGEMAKER_LOGGING_DIR && \ |
| 98 | + chmod a+rw $SAGEMAKER_LOGGING_DIR && \ |
| 99 | + mkdir -p ${STUDIO_LOGGING_DIR} && \ |
| 100 | + chown ${NB_USER}:${MAMBA_USER} ${STUDIO_LOGGING_DIR} |
| 101 | + |
| 102 | +# Create supervisord runtime directory |
| 103 | +RUN mkdir -p /var/run/supervisord && \ |
| 104 | + chmod a+rw /var/run/supervisord |
| 105 | + |
| 106 | +USER $MAMBA_USER |
| 107 | +ENV PATH="/opt/conda/bin:/opt/conda/condabin:$PATH" |
| 108 | +WORKDIR "/home/${NB_USER}" |
| 109 | + |
| 110 | +# Install Kerberos. |
| 111 | +# Make sure no dependency is added/updated |
| 112 | +RUN pip install "krb5>=0.5.1,<0.6" && \ |
| 113 | + pip show krb5 | grep Require | xargs -i sh -c '[ $(echo {} | cut -d: -f2 | wc -w) -eq 0 ] ' |
| 114 | + |
| 115 | +# https://stackoverflow.com/questions/122327 |
| 116 | +RUN SYSTEM_PYTHON_PATH=$(python3 -c "from __future__ import print_function;import sysconfig; print(sysconfig.get_paths().get('purelib'))") && \ |
| 117 | + # Remove SparkRKernel as it's not supported \ |
| 118 | + jupyter-kernelspec remove -f -y sparkrkernel && \ |
| 119 | + # Patch Sparkmagic lib to support Custom Certificates \ |
| 120 | + # https://github.com/jupyter-incubator/sparkmagic/pull/435/files \ |
| 121 | + cp -a ${SYSTEM_PYTHON_PATH}/sagemaker_studio_analytics_extension/patches/configuration.py ${SYSTEM_PYTHON_PATH}/sparkmagic/utils/ && \ |
| 122 | + cp -a ${SYSTEM_PYTHON_PATH}/sagemaker_studio_analytics_extension/patches/reliablehttpclient.py ${SYSTEM_PYTHON_PATH}/sparkmagic/livyclientlib/reliablehttpclient.py && \ |
| 123 | + sed -i 's= "python"= "/opt/conda/bin/python"=g' /opt/conda/share/jupyter/kernels/pysparkkernel/kernel.json /opt/conda/share/jupyter/kernels/sparkkernel/kernel.json && \ |
| 124 | + sed -i 's="Spark"="SparkMagic Spark"=g' /opt/conda/share/jupyter/kernels/sparkkernel/kernel.json && \ |
| 125 | + sed -i 's="PySpark"="SparkMagic PySpark"=g' /opt/conda/share/jupyter/kernels/pysparkkernel/kernel.json |
| 126 | + |
| 127 | +ENV SHELL=/bin/bash |
0 commit comments