diff --git a/cluster_setup.md b/cluster_setup.md new file mode 100644 index 0000000..5e97f34 --- /dev/null +++ b/cluster_setup.md @@ -0,0 +1,19 @@ +Our cluster configuration uses docker host networking. There are a series of scripts to bring up the dockers that make up our cluster. You will likely need to tailor these scripts to meet the needs of your configuration. + +We have several scripts: +spark/docker/start_master_host.sh This brings up the spark master container using host networking. +spark/docker/start_worker_host.sh This brings up the spark worker container using host networking. +spark/docker/start_launcher_host.sh This brings up the spark launcher container using host networking. This is the container where our run_tpch.sh will launch the benchmark from. +dikeHDFS/start_server_host.sh This brings up the docker with HDFS, and NDP. + +There is a config file called spark/spark.config. It has the config of the addresses and hostnames needed by the above scripts. You need to modify it for your configuration. There is an example in our repo. + +You also need to configure dikeHDFS/start_server_host.sh with your IP address. Change the line with --add-host=dikehdfs to include your storage server's ip address. + +As an example, in our configuration we typically will follow this sequence. +1) From our master node we will run start_master_host.sh and start_launcher_host.sh +2) Next we go to the worker nodes and run start_worker_host.sh 1 8 +3) Note that the 1 8 above is the number of workers followed by the number of cores to use. +4) Launch the NDP server via dikeHDFS/start_server_host.sh + + diff --git a/demo.sh b/demo.sh index 2cf381f..2886671 100755 --- a/demo.sh +++ b/demo.sh @@ -4,22 +4,22 @@ printf "\nNext Test: Spark TPC-H query with HDFS storage and with no pushdown\n" read -n 1 -s -r -p "Press any key to continue with test." cd benchmark/tpch -./run_tpch.sh -t 6 -ds ndp --protocol ndphdfs +./run_tpch.sh --local -t 6 -ds ndp --protocol ndphdfs printf "\nTest Complete: Spark TPC-H query with HDFS storage and with no pushdown\n" printf "\nNext Test: Spark TPC-H query with HDFS storage and with pushdown enabled.\n" read -n 1 -s -r -p "Press any key to continue with test." -./run_tpch.sh -t 6 -ds ndp --protocol ndphdfs --pushdown +./run_tpch.sh --local -t 6 -ds ndp --protocol ndphdfs --pushdown printf "\nTest Complete: Spark TPC-H query with HDFS storage and with pushdown enabled.\n" -printf "\nNext Test: Spark TPC-H query with S3 storage and with no pushdown\n" -read -n 1 -s -r -p "Press any key to continue with test." -./run_tpch.sh -t 6 -ds ndp --protocol s3 -printf "Test Complete: Spark TPC-H query with S3 storage and with no pushdown\n" +#printf "\nNext Test: Spark TPC-H query with S3 storage and with no pushdown\n" +#read -n 1 -s -r -p "Press any key to continue with test." +#./run_tpch.sh --local -t 6 -ds ndp --protocol s3 +#printf "Test Complete: Spark TPC-H query with S3 storage and with no pushdown\n" -printf "\nNext Test: Spark TPC-H query with S3 and with pushdown enabled.\n" -read -n 1 -s -r -p "Press any key to continue with test." -./run_tpch.sh -t 6 -ds ndp --protocol s3 --pushdown -printf "\nTest Complete: Spark TPC-H query with S3 and with pushdown enabled.\n" +#printf "\nNext Test: Spark TPC-H query with S3 and with pushdown enabled.\n" +#read -n 1 -s -r -p "Press any key to continue with test." +#./run_tpch.sh --local -t 6 -ds ndp --protocol s3 --pushdown +#printf "\nTest Complete: Spark TPC-H query with S3 and with pushdown enabled.\n" diff --git a/dikeHDFS b/dikeHDFS index 5c7ac77..2a788cf 160000 --- a/dikeHDFS +++ b/dikeHDFS @@ -1 +1 @@ -Subproject commit 5c7ac77effe5f7350699061c7bdae23d565ed2dd +Subproject commit 2a788cf9bb52dbe4115a3526ca20d4cb32d35094 diff --git a/spark/docker/start-launcher.sh b/spark/docker/start-launcher.sh index cebaa0e..43e5417 100755 --- a/spark/docker/start-launcher.sh +++ b/spark/docker/start-launcher.sh @@ -11,78 +11,25 @@ rm -f "${ROOT_DIR}/volume/status/MASTER*" CMD="sleep 365d" RUNNING_MODE="daemon" -START_LOCAL="NO" -if [ ! -d spark.config ]; then - START_LOCAL="YES" -else - DOCKER_HOSTS="$(cat spark.config | grep DOCKER_HOSTS)" - IFS='=' read -a IP_ARRAY <<< "$DOCKER_HOSTS" - DOCKER_HOSTS=${IP_ARRAY[1]} - HOSTS="" - IFS=',' read -a IP_ARRAY <<< "$DOCKER_HOSTS" - for i in "${IP_ARRAY[@]}" - do - HOSTS="$HOSTS --add-host=$i" - done - DOCKER_HOSTS=$HOSTS - echo "Docker Hosts: $DOCKER_HOSTS" - LAUNCHER_IP="$(cat spark.config | grep LAUNCHER_IP)" - IFS='=' read -a IP_ARRAY <<< "$LAUNCHER_IP" - LAUNCHER_IP=${IP_ARRAY[1]} - echo "LAUNCHER_IP: $LAUNCHER_IP" -fi -DOCKER_ID="" if [ $RUNNING_MODE = "interactive" ]; then DOCKER_IT="-i -t" fi # --cpuset-cpus="9-12" \ -if [ ${START_LOCAL} == "YES" ]; then - DOCKER_RUN="docker run ${DOCKER_IT} --rm \ +DOCKER_RUN="docker run ${DOCKER_IT} --rm \ -p 5006:5006 \ --name sparklauncher \ --network dike-net \ -e MASTER=spark://sparkmaster:7077 \ -e SPARK_CONF_DIR=/conf \ -e SPARK_PUBLIC_DNS=localhost \ - --mount type=bind,source=$(pwd)/spark,target=/spark \ - --mount type=bind,source=$(pwd)/build,target=/build \ - --mount type=bind,source=$(pwd)/examples,target=/examples \ - --mount type=bind,source=$(pwd)/../data,target=/tpch-data \ - --mount type=bind,source=$(pwd)/../dikeHDFS,target=/dikeHDFS \ - --mount type=bind,source=$(pwd)/../benchmark/tpch,target=/tpch \ - --mount type=bind,source=$(pwd)/../pyNdp,target=/pyNdp \ - --mount type=bind,source=$(pwd)/../pushdown-datasource/pushdown-datasource,target=/pushdown-datasource \ - -v $(pwd)/conf/master:/conf \ - -v ${ROOT_DIR}/build/.m2:${DOCKER_HOME_DIR}/.m2 \ - -v ${ROOT_DIR}/build/.gnupg:${DOCKER_HOME_DIR}/.gnupg \ - -v ${ROOT_DIR}/build/.sbt:${DOCKER_HOME_DIR}/.sbt \ - -v ${ROOT_DIR}/build/.cache:${DOCKER_HOME_DIR}/.cache \ - -v ${ROOT_DIR}/build/.ivy2:${DOCKER_HOME_DIR}/.ivy2 \ - -v ${ROOT_DIR}/volume/status:/opt/volume/status \ - -v ${ROOT_DIR}/volume/logs:/opt/volume/logs \ - -v ${ROOT_DIR}/bin/:${DOCKER_HOME_DIR}/bin \ - -e "AWS_ACCESS_KEY_ID=${USER_NAME}" \ - -e "AWS_SECRET_ACCESS_KEY=admin123" \ - -e "AWS_EC2_METADATA_DISABLED=true" \ - -e RUNNING_MODE=${RUNNING_MODE} \ - -u ${USER_ID} \ - spark-run-${USER_NAME} ${CMD}" -else - DOCKER_RUN="docker run ${DOCKER_IT} --rm \ - -p 5006:5006 \ - --name sparklauncher \ - --network dike-net --ip ${LAUNCHER_IP} ${DOCKER_HOSTS} \ - -e MASTER=spark://sparkmaster:7077 \ - -e SPARK_CONF_DIR=/conf \ - -e SPARK_PUBLIC_DNS=localhost \ -e SPARK_MASTER="spark://sparkmaster:7077" \ - -e SPARK_DRIVER_HOST=${LAUNCHER_IP} \ --mount type=bind,source=$(pwd)/spark,target=/spark \ --mount type=bind,source=$(pwd)/build,target=/build \ --mount type=bind,source=$(pwd)/examples,target=/examples \ --mount type=bind,source=$(pwd)/../dikeHDFS,target=/dikeHDFS \ --mount type=bind,source=$(pwd)/../benchmark/tpch,target=/tpch \ + --mount type=bind,source=$(pwd)/../data,target=/tpch-data \ --mount type=bind,source=$(pwd)/../pushdown-datasource/pushdown-datasource,target=/pushdown-datasource \ -v $(pwd)/conf/master:/conf \ -v ${ROOT_DIR}/build/.m2:${DOCKER_HOME_DIR}/.m2 \ @@ -98,11 +45,10 @@ else -e "AWS_EC2_METADATA_DISABLED=true" \ -e RUNNING_MODE=${RUNNING_MODE} \ -u ${USER_ID} \ - spark-run-${USER_NAME} ${CMD}" -fi -echo "mode: $RUNNING_MODE" + v${DIKE_VERSION}-spark-run-${USER_NAME} ${CMD}" + if [ $RUNNING_MODE = "interactive" ]; then eval "${DOCKER_RUN}" else eval "${DOCKER_RUN}" & -fi \ No newline at end of file +fi diff --git a/spark/docker/start-master.sh b/spark/docker/start-master.sh index 975e69b..265f6b3 100755 --- a/spark/docker/start-master.sh +++ b/spark/docker/start-master.sh @@ -1,6 +1,7 @@ #!/bin/bash # Include the setup for our cached local directories. (.m2, .ivy2, etc) +source docker/spark_version source docker/setup.sh mkdir -p "${ROOT_DIR}/volume/logs" @@ -37,8 +38,8 @@ else fi fi echo "removing work and logs" -rm -rf build/spark-3.1.2/work/ -rm -rf build/spark-3.1.2/logs/ +rm -rf build/spark-$SPARK_VERSION/work/ +rm -rf build/spark-$SPARK_VERSION/logs/ # --cpuset-cpus="9-12" \ if [ ${START_LOCAL} == "YES" ]; then @@ -67,7 +68,7 @@ if [ ${START_LOCAL} == "YES" ]; then -v ${ROOT_DIR}/bin/:${DOCKER_HOME_DIR}/bin \ -e RUNNING_MODE=${RUNNING_MODE} \ -u ${USER_ID} \ - spark-run-${USER_NAME} ${CMD}" + v${DIKE_VERSION}-spark-run-${USER_NAME} ${CMD}" else DOCKER_RUN="docker run ${DOCKER_IT} --rm \ -p 4040:4040 -p 6066:6066 -p 7077:7077 -p 8080:8080 -p 5005:5005 -p 18080:18080 \ @@ -98,7 +99,7 @@ else -e "AWS_EC2_METADATA_DISABLED=true" \ -e RUNNING_MODE=${RUNNING_MODE} \ -u ${USER_ID} \ - spark-run-${USER_NAME} ${CMD}" + v${DIKE_VERSION}-spark-run-${USER_NAME} ${CMD}" fi if [ $RUNNING_MODE = "interactive" ]; then eval "${DOCKER_RUN}" diff --git a/spark/docker/start-worker-host.sh b/spark/docker/start-worker-host.sh index f5036f8..2989b3c 100755 --- a/spark/docker/start-worker-host.sh +++ b/spark/docker/start-worker-host.sh @@ -1,5 +1,5 @@ #!/bin/bash - +source docker/spark_version source docker/setup.sh mkdir -p "${ROOT_DIR}/volume/logs" @@ -20,8 +20,8 @@ if [ "$#" -ge 2 ] ; then CORES=$2 fi echo "removing work and logs" -rm -rf build/spark-3.1.2/work/ -rm -rf build/spark-3.1.2/logs/ +rm -rf build/spark-$SPARK_VERSION/work/ +rm -rf build/spark-$SPARK_VERSION/logs/ echo "Workers: $WORKERS" echo "Cores: $CORES" diff --git a/spark/docker/start-worker.sh b/spark/docker/start-worker.sh index 634dd5a..643b215 100755 --- a/spark/docker/start-worker.sh +++ b/spark/docker/start-worker.sh @@ -1,5 +1,5 @@ #!/bin/bash - +source docker/spark_version source docker/setup.sh mkdir -p "${ROOT_DIR}/volume/logs" @@ -20,27 +20,11 @@ if [ "$#" -ge 2 ] ; then CORES=$2 fi echo "removing work and logs" -rm -rf build/spark-3.1.2/work/ -rm -rf build/spark-3.1.2/logs/ +rm -rf build/spark-$SPARK_VERSION/work/ +rm -rf build/spark-$SPARK_VERSION/logs/ echo "Workers: $WORKERS" echo "Cores: $CORES" -DOCKER_HOSTS="$(cat spark.config | grep DOCKER_HOSTS)" -IFS='=' read -a IP_ARRAY <<< "$DOCKER_HOSTS" -DOCKER_HOSTS=${IP_ARRAY[1]} -HOSTS="" -IFS=',' read -a IP_ARRAY <<< "$DOCKER_HOSTS" -for i in "${IP_ARRAY[@]}" -do - HOSTS="$HOSTS --add-host=$i" -done -DOCKER_HOSTS=$HOSTS -echo "Docker Hosts: $DOCKER_HOSTS" - -WORKER_IP="$(cat spark.config | grep WORKER_IP)" -IFS='=' read -a IP_ARRAY <<< "$WORKER_IP" -WORKER_IP=${IP_ARRAY[1]} -echo "WORKER_IP: $WORKER_IP" if [ $RUNNING_MODE = "interactive" ]; then DOCKER_IT="-i -t" @@ -50,7 +34,7 @@ fi DOCKER_RUN="docker run ${DOCKER_IT} --rm -p 8081:8081 \ --expose 7012 --expose 7013 --expose 7014 --expose 7015 --expose 8881 \ --name sparkworker \ - --network dike-net --ip ${WORKER_IP} ${DOCKER_HOSTS} \ + --network dike-net \ -e SPARK_CONF_DIR=/conf \ -e SPARK_WORKER_INSTANCES=$WORKERS \ -e SPARK_WORKER_CORES=$CORES \ @@ -72,7 +56,7 @@ DOCKER_RUN="docker run ${DOCKER_IT} --rm -p 8081:8081 \ -v ${ROOT_DIR}/bin/:${DOCKER_HOME_DIR}/bin \ -e RUNNING_MODE=${RUNNING_MODE} \ -u ${USER_ID} \ - spark-run-${USER_NAME} ${CMD}" + v${DIKE_VERSION}-spark-run-${USER_NAME} ${CMD}" if [ $RUNNING_MODE = "interactive" ]; then diff --git a/spark/start.sh b/spark/start.sh index e554734..f57bed3 100755 --- a/spark/start.sh +++ b/spark/start.sh @@ -2,4 +2,7 @@ ./docker/start-master.sh && sleep 5 && ./docker/start-worker.sh -sleep 5 \ No newline at end of file +sleep 5 +./docker/start-launcher.sh + +sleep 5 diff --git a/start_hdfs.sh b/start_hdfs.sh index ea1e4d3..ee46ff9 100755 --- a/start_hdfs.sh +++ b/start_hdfs.sh @@ -19,7 +19,7 @@ echo $CMDSTATUS if [ $CMDSTATUS -ne 0 ]; then pushd benchmark/tpch echo "Initialize tpch CSV database in hdfs" - ./run_tpch.sh --mode initCsv --protocol hdfs || (echo "*** failed tpch init of CSV for hdfs $?" ; exit 1) + ./run_tpch.sh --local --mode initCsv --protocol hdfs || (echo "*** failed tpch init of CSV for hdfs $?" ; exit 1) popd fi