-
Notifications
You must be signed in to change notification settings - Fork 1
/
newjob.sh
executable file
·126 lines (110 loc) · 2.91 KB
/
newjob.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/bin/sh
# =============================================================================
#
# This file creates a new job with a PBS template.
# It looks inside "./jobs" subdir for the last job and increments it
# call it like this:
#
# $ ./newjob.sh <script_name> [hours epochs split mem]
#
# =============================================================================
# PYTHON SCRIPT FILENAME GOES IN ARGUMENT 1
if [[ $1 ]]; then
script=$1
else
echo "Add input script. Exiting."
exit
fi
# SET WALL TIME
if [[ $2 && $2 -gt 0 ]]; then
walltime=$2:00:00
else
walltime=24:00:00
fi
# SET EPOCHS
if [[ $3 ]]; then
EPOCHS=$3
else
EPOCHS=10
fi
# SET S_TRAIN
if [[ $4 ]]; then
S_TRAIN=$4
else
S_TRAIN=0
fi
# SET mem per cpu
if [[ $5 && $5 -gt 0 ]]; then
MEM=$5
else
MEM=4000
fi
# SET MESSAGING WHEN JOB BEGINS/ENDS
recalls=ALL
# SET CUDA MODULE TO LOAD
cuda=cuda/10.1.168
# SELECT NODES AND GPUS
nodes=1
ppn=28
gpus=1
# SET VIRTUAL ENVIRONMENT ROOT DIR (LOCATED ON '~/.venvs/...')
which_venv=tf-gpu2.1-3.7
# =============================================================================
# Shouldn't edit what follows unless we change stuff
# =============================================================================
# MATCH REPOSITORY TREE HERE
path=$(pwd)
job_logs=${path}/logs
job_path=${path}/jobs
job_suffix="-train"
modelpath=${path}/saved_model
DATADIR=~/nsynth
job_prefix=0
i=0
if ! test -f ${job_path}/0-*.sh; then
job_prefix=0
else
# GET LATEST PREFIX AND INCREMENT IT
l=$(ls ${job_path} | grep -v readme | sort -n)
for i in ${l[@]}
do
i=$i
done
job_prefix=$(basename $i "-train.sh" | cut -f1 -d- )
job_prefix=$((job_prefix+=1))
fi
env=/bin/bash # job environment
job_name=${job_prefix}${job_suffix}
job_output=${job_name}
venv=${path}/../.venvs/${which_venv}/bin/activate
script_path=${path}/${script}
function printSBATCH() {
local h=$1
printf "%s %s\n" "#SBATCH" "${h}"
}
# SET CLUSTER PATH NAME
name=PAS1309
j="${job_path}/${job_name}.sh"
printf "%s\n" "#!/bin/bash" > $j
printSBATCH "--time=${walltime}" >> $j
printSBATCH "--nodes=${nodes} --ntasks-per-node=${ppn} --gpus-per-node=${gpus} --gpu_cmode=shared" >> $j
printSBATCH "--account=${name} " >> $j
printSBATCH "--job-name=${job_output}" >> $j
printSBATCH "--mem-per-cpu=${MEM}" >> $j
printSBATCH "--mail-type=${recalls}" >> $j
printSBATCH "--mail-user=${EMAIL}" >> $j
printSBATCH "--error=${job_logs}/${job_name}-e.txt" >> $j
printSBATCH "--output=${job_logs}/${job_name}-o.txt" >> $j
printf "#" >> $j
printf "=%.0s" {1..80} >> $j
echo >> $j
echo "# job created on" $(date) >> $j
echo "# with these arguments:" >> $j
echo "# $@" >> $j
echo "#" >> $j
printf "%s %s\n" "source" "${venv}" >> $j
printf "%s %s %s\n" "module" "load" "${cuda}" >> $j
printf "%s %s %s %s %s %s %s\n" "python" "${script_path}" "$job_prefix" "$modelpath" "$EPOCHS" "$S_TRAIN" "$DATADIR" >> $j
echo "Finished making job: $j"
echo "with these arguments: $@"