Skip to content

Commit cdc5362

Browse files
committed
Updates to Jeff's test scripts on Falcon - supports numactl bindings.
1 parent 48283d0 commit cdc5362

File tree

3 files changed

+42
-12
lines changed

3 files changed

+42
-12
lines changed

eval/jy_scripts_falcon/1_multi_test.sh

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,32 @@ fi
88
gpus=(1)
99
#scale factors
1010
#msize=(15)
11-
msize=(15 16 17 18 19 20 21 22 23 24 25 26 27)
11+
#msize=(15 16 17 18 19 20 21 22 23 24 25 26 27)
12+
msize=(22 27)
1213
#square root of nodes
13-
snodes=(1 2 3 4 5 6 7 8)
14+
snodes=(8 7 6 5)
1415
#snodes=(3 4)
1516
#BTR
1617
btrs=(64)
1718
#btcs=(128 256 512)
1819

19-
declare -a g500ver=("g500_CompressionNoOptimizedNoOMP" "g500_CompressionOptimizedNoOMP" "g500_NoCompressionNoOptimizedNoOMP" "g500_NoCompressionOptimizedNoOMP" "g500_CompressionNoOptimizeOMP" "g500_CompressionOptimizedOMP" "g500_NoCompressionNoOptimizedOMP" "g500_NoCompressionOptimizedOMP")
20+
#All test variations
21+
#declare -a g500ver=("g500_CompressionNoOptimizedNoOMP" "g500_CompressionOptimizedNoOMP" "g500_NoCompressionNoOptimizedNoOMP" "g500_NoCompressionOptimizedNoOMP" "g500_CompressionNoOptimizeOMP" "g500_CompressionOptimizedOMP" "g500_NoCompressionNoOptimizedOMP" "g500_NoCompressionOptimizedOMP")
22+
23+
#Restrict tests for ScoreP
24+
#declare -a g500ver=("g500_CompressionNoOptimizedNoOMP" "g500_CompressionOptimizedNoOMP" "g500_NoCompressionNoOptimizedNoOMP" "g500_NoCompressionOptimizedNoOMP")
25+
#declare -a g500ver=("scp_CompOptOMP" "scp_CompOptNoOMP" "scp_NoCompOptNoOMP" "scp_NoCompOptOMP" "scp_CompNoOptOMP" "scp_CompNoOptNoOMP" "scp_NoCompNoOptNoOMP" "scp_NoCompNoOptOMP")
26+
declare -a g500ver=("scp_CompOptOMP")
2027

2128
for g500 in "${g500ver[@]}"
2229
do
2330
echo "Copying over a new g500 version, $g500ver"
2431
cd ../cpu_2d/
2532
cp -f g500_versions/${g500} g500
33+
#Sleep to try and get NFS to sync properly across nodes
34+
sleep 20
2635
cd -
27-
log="${g500}_mar16"
36+
log="${g500}_may16_numa"
2837

2938
for msc in ${msize[@]}
3039
do
@@ -53,15 +62,27 @@ do
5362

5463
for btr in ${btrs[@]}
5564
do
65+
# score-p
66+
# before script execution:
67+
export G500_ENABLE_RUNTIME_SCALASCA=yes
68+
69+
# pass score-p's $SCOREP_EXPERIMENT_DIRECTORY to mpirun
70+
71+
#mpirun -x LD_LIBRARY_PATH=$LD_LIBRARY_PATH -x SCOREP_EXPERIMENT_DIRECTORY=$SCOREP_EXPERIMENT_DIRECTORY -np ${np} -hostfile ${hostfile} --display-map -bynode ./test2.sh ${sf} ${c} "s4-bp128-d4" 64 | tee ${file}
5672
#for btc in ${btcs[@]}
5773
#do
5874
srp=$i
5975
n=`expr $i \* $i`
6076
logfile="result/falcon_${log}_nd${n}_scale${msc}_btr${btr}_btc${btr}.log"
61-
echo "./2_test_falcon.sh nd$n srp$srp msc$msc gpu$ngpus btr$btr btc$btr $logfile"
77+
scorepdir="scorep_results/falcon_${log}_nd${n}_scale${msc}_btr${btr}.scorep"
78+
if [ "x$G500_ENABLE_RUNTIME_SCALASCA" = "xyes" ]; then
79+
export SCOREP_EXPERIMENT_DIRECTORY=${scorepdir}
80+
echo "SCOREPDIR is ${SCOREP_EXPERIMENT_DIRECTORY}"
81+
fi
82+
echo "./2_test_falcon.sh nd$n srp$srp msc$msc gpu$ngpus btr$btr btc$btr $logfile $scorepdir"
6283
echo ""
63-
./2_test_falcon.sh $n $srp $msc $ngpus $btr $btr $logfile
64-
sleep 2
84+
./2_test_falcon.sh $n $srp $msc $ngpus $btr $btr $logfile $scorepdir
85+
sleep 1
6586
#done
6687

6788
done

eval/jy_scripts_falcon/2_test_falcon.sh

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,21 @@ GPUS=$4
66
BTR=$5
77
BTC=$6
88
LOGFILE=$7
9+
SCOREPDIR=$8
910
HOSTS=~/scripts/hostfile_falcon_64
1011

1112

1213
LD_LIBRARY_PATH=/opt/rh/devtoolset-2/root/usr/lib64:/opt/rh/devtoolset-2/root/usr/lib:/usr/lib64:/usr/lib:/usr/local/cuda/lib64:/usr/local/cuda/lib:/usr/local/lib:/usr/lib64/boost148/
1314
PATH=/opt/rh/devtoolset-2/root/usr/bin:/net/rd6/jyoung9/mpi/ompi184/bin:/usr/bin:/usr/sbin:/bin:/usr/local/bin:/bin:/usr/local/sbin:/usr/local/cuda/bin:/usr/include/boost148/
1415
MPIRUN=/net/rd6/jyoung9/mpi/ompi184/bin/mpirun
1516

17+
#Control for GPU and socket
18+
#CUDA_VISIBLE_DEVICES=$OMPI_COMM_WORLD_LOCAL_RANK
19+
#Bind the CPU and memory to the socket with the GPU being used
20+
cudavisible="CUDA_VISIBLE_DEVICES=0"
21+
22+
1623
source /net/rd6/jyoung9/.bashrc
17-
echo "$MPIRUN -v -x PATH=$PATH -x LD_LIBRARY_PATH=$LD_LIBRARY_PATH -hostfile $HOSTS -np $N -map-by node --display-map -mca btl openib,self -mca mpi_warn_on_fork 0 ./3_launch.sh $SF $SRP $GPUS $BTR $BTC &> ${LOGFILE}"
18-
$MPIRUN -v -x PATH=$PATH -x LD_LIBRARY_PATH=$LD_LIBRARY_PATH -hostfile $HOSTS -np $N -map-by node --display-map -mca btl openib,self -mca mpi_warn_on_fork 0 ./3_launch.sh $SF $SRP $GPUS $BTR $BTC &> ${LOGFILE}
24+
echo "$MPIRUN -v -x $cudavisible -x PATH=$PATH -x LD_LIBRARY_PATH=$LD_LIBRARY_PATH -x SCOREP_EXPERIMENT_DIRECTORY=$SCOREPDIR -hostfile $HOSTS -np $N -map-by node --display-map -mca btl openib,self -mca mpi_warn_on_fork 0 ./3_launch.sh $SF $SRP $GPUS $BTR $BTC &> ${LOGFILE}"
25+
$MPIRUN -v -x $cudavisible -x PATH=$PATH -x LD_LIBRARY_PATH=$LD_LIBRARY_PATH -x SCOREP_EXPERIMENT_DIRECTORY=$SCOREPDIR -hostfile $HOSTS -np $N -map-by node --display-map -mca btl openib,self -mca mpi_warn_on_fork 0 ./3_launch.sh $SF $SRP $GPUS $BTR $BTC &> ${LOGFILE}
1926
#$MPIRUN -v -x PATH=$PATH -x LD_LIBRARY_PATH=$LD_LIBRARY_PATH -hostfile $HOSTS -np $N -map-by slot --display-map -mca btl openib,self -mca mpi_warn_on_fork 0 ./3_launch.sh $SF $SRP $GPUS $BTR $BTC &> ${LOGFILE}

eval/jy_scripts_falcon/3_launch.sh

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ GPUS=$3
55
BTR=$4
66
BTC=$5
77

8-
export CUDA_VISIBLE_DEVICES=$OMPI_COMM_WORLD_LOCAL_RANK
9-
#echo "../cpu_2d/g500 -s $SF -C $SRP -gpus $GPUS -qs 1 -be s4-bp128-d4 -btr $BTR -btc $BTC"
10-
../cpu_2d/g500 -s $SF -C $SRP -gpus $GPUS -qs 1 -be s4-bp128-d4 -btr $BTR -btc $BTC
8+
export CUDA_VISIBLE_DEVICES=0
9+
NUMAEXEC="numactl --membind=0 --cpunodebind=0"
10+
11+
#echo "$NUMAEXEC ../cpu_2d/g500 -s $SF -C $SRP -gpus $GPUS -qs 1 -be s4-bp128-d4 -btr $BTR -btc $BTC"
12+
$NUMAEXEC ../cpu_2d/g500 -s $SF -C $SRP -gpus $GPUS -qs 1 -be s4-bp128-d4 -btr $BTR -btc $BTC

0 commit comments

Comments
 (0)