-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
160 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#!/usr/bin/env python | ||
|
||
from __future__ import print_function | ||
|
||
import moxing as mox | ||
import time | ||
import os | ||
|
||
if __name__ == '__main__': | ||
data_dir = '/cache/data_dir' | ||
start = time.time() | ||
data_url = os.environ['DLS_DATA_URL'] | ||
print('INFO: Start copying data from the blob storage ' + data_url + ' into SSD under ' + data_dir) | ||
mox.file.copy_parallel(data_url, data_dir) | ||
print('INFO: Copying completes! The copy task takes: ' + str(time.time() - start) + ' seconds') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/bash | ||
|
||
CROSSBOW_HOME=/home/work/user-job-dir/Crossbow | ||
|
||
[ ! -d "/cache/train_dir" ] && mkdir /cache/train_dir | ||
|
||
python $CROSSBOW_HOME/scripts/huawei/download_data.py | ||
|
||
bash $CROSSBOW_HOME/scripts/datasets/imagenet/prepare-imagenet.sh /cache/data_dir /cache/train_dir | ||
|
||
python $CROSSBOW_HOME/scripts/huawei/upload_data.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#!/bin/bash | ||
|
||
CROSSBOW_HOME=/crossbow | ||
|
||
cd $CROSSBOW_HOME \ | ||
&& git pull \ | ||
&& mvn package \ | ||
&& cd clib-multigpu \ | ||
&& ./genmakefile.sh \ | ||
&& make -j $(nproc) \ | ||
&& cd ../ \ | ||
&& ./scripts/build.sh | ||
|
||
python $CROSSBOW_HOME/scripts/huawei/download_data.py | ||
|
||
mv /home/work/user-job-dir/Crossbow-scripts/imagenet-test.metadata $CROSSBOW_HOME/data/imagenet/imagenet-test.metadata | ||
mv /home/work/user-job-dir/Crossbow-scripts/imagenet-train.metadata $CROSSBOW_HOME/data/imagenet/imagenet-train.metadata | ||
|
||
bash /home/work/user-job-dir/Crossbow-scripts/resnet-50.sh | ||
|
||
python $CROSSBOW_HOME/scripts/huawei/upload_data.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#!/usr/bin/env python | ||
|
||
from __future__ import print_function | ||
|
||
import moxing as mox | ||
import os | ||
|
||
if __name__ == '__main__': | ||
train_dir = '/cache/train_dir' | ||
train_url = os.environ['DLS_TRAIN_URL'] | ||
print('INFO: Copy trained model to ' + train_url) | ||
mox.file.copy_parallel(train_dir, train_url) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
# ModelArts example: https://github.com/huawei-clouds/modelarts-example/blob/master/CustomImage | ||
FROM swr.cn-north-1.myhuaweicloud.com/eiwizard/custom-gpu-cuda9-inner-moxing-cp36:1.1 as base | ||
|
||
# The pip source has been pre-configured to an internal source. Roll back to public sources. | ||
RUN rm $HOME/.pip/pip.conf | ||
|
||
# Fix the source lists | ||
RUN sed -i 's/cmc-cd-mirror.rnd.huawei.com/security.ubuntu.com/g' /etc/apt/sources.list | ||
|
||
# Replace the standard ubuntu source with Aliyun sources if buidling in mainland China | ||
RUN sed -i s/archive.ubuntu.com/mirrors.aliyun.com/g /etc/apt/sources.list \ | ||
&& sed -i s/security.ubuntu.com/mirrors.aliyun.com/g /etc/apt/sources.list | ||
|
||
# Add the NVIDIA package repo and fetch key | ||
# Reference: https://gitlab.com/nvidia/cuda/blob/ubuntu16.04/9.0/base/Dockerfile#L4 | ||
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates apt-transport-https gnupg-curl && \ | ||
rm -rf /var/lib/apt/lists/* && \ | ||
NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \ | ||
NVIDIA_GPGKEY_FPR=ae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80 && \ | ||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \ | ||
apt-key adv --export --no-emit-version -a $NVIDIA_GPGKEY_FPR | tail -n +5 > cudasign.pub && \ | ||
echo "$NVIDIA_GPGKEY_SUM cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \ | ||
echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \ | ||
echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list | ||
|
||
RUN apt update && apt install -y --no-install-recommends \ | ||
apt-utils \ | ||
build-essential \ | ||
cuda9.0 \ | ||
cuda-cublas-9-0 \ | ||
cuda-cufft-9-0 \ | ||
cuda-curand-9-0 \ | ||
cuda-cusolver-9-0 \ | ||
cuda-cusparse-9-0 \ | ||
libcudnn7=7.2.1.38-1+cuda9.0 \ | ||
libcudnn7-dev=7.2.1.38-1+cuda9.0 \ | ||
libnccl2=2.2.13-1+cuda9.0 \ | ||
libnccl-dev=2.2.13-1+cuda9.0 \ | ||
cuda-command-line-tools-9-0 \ | ||
libfreetype6-dev \ | ||
libhdf5-serial-dev \ | ||
libpng12-dev \ | ||
libzmq3-dev \ | ||
pkg-config \ | ||
software-properties-common \ | ||
unzip \ | ||
git \ | ||
wget \ | ||
openjdk-8-jdk \ | ||
maven \ | ||
libboost-all-dev \ | ||
graphviz \ | ||
cmake \ | ||
nasm \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
ENV CUDA_HOME /usr/local/cuda | ||
|
||
# OpenBLAS (TODO: install using apt install) | ||
RUN git clone --progress https://github.com/xianyi/OpenBLAS.git openblas \ | ||
&& cd openblas \ | ||
&& make -j $(nproc) \ | ||
&& make install | ||
ENV BLAS_HOME /opt/OpenBLAS | ||
ENV LD_LIBRARY_PATH $BLAS_HOME/lib:$LD_LIBRARY_PATH | ||
|
||
# libjpeg-turbo (TODO: install using apt install) | ||
RUN git clone --progress https://github.com/libjpeg-turbo/libjpeg-turbo.git \ | ||
&& cd libjpeg-turbo \ | ||
&& cmake -G"Unix Makefiles" && make -j $(nproc) | ||
ENV JPEG_HOME /libjpeg-turbo | ||
ENV LD_LIBRARY_PATH $JPEG_HOME/lib:$LD_LIBRARY_PATH | ||
ENV LD_LIBRARY_PATH $JPEG_HOME:$LD_LIBRARY_PATH | ||
|
||
# Crossbow | ||
ADD . /crossbow | ||
ENV CROSSBOW_HOME /crossbow | ||
RUN cd crossbow \ | ||
&& mvn package \ | ||
&& cd clib-multigpu \ | ||
&& ./genmakefile.sh \ | ||
&& make -j $(nproc) \ | ||
&& cd ../ \ | ||
&& ./scripts/build.sh | ||
|
||
# Install tensorflow-gpu 1.12.0 in the conda environment (pip has been redirected to conda pip) | ||
RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple tensorflow-gpu==1.12.0 # Run this if in the mainland China | ||
# RUN pip install tensorflow-gpu==1.12.0 | ||
|
||
WORKDIR / |