From 2c1fb2277a235db6b11fac54d082753e44d35b3e Mon Sep 17 00:00:00 2001 From: Rob Foley Date: Sat, 5 Feb 2022 19:21:50 -0500 Subject: [PATCH] Updated to use spark 3.2.1. Also changed to use a file spark/docker/spark_version to indicate which spark version to download and use. Just change this one file and it will change our spark version. --- benchmark/tpch/build_tpch.sh | 1 + benchmark/tpch/run_tpch.sh | 1 + pushdown-datasource/build.sh | 9 +++++---- pushdown-datasource/pushdown-datasource | 2 +- spark/docker/Dockerfile | 4 +++- spark/docker/build.sh | 7 ++++--- spark/docker/setup.sh | 1 - spark/docker/spark_version | 1 + spark/scripts/build.sh | 4 ++-- 9 files changed, 18 insertions(+), 12 deletions(-) create mode 100644 spark/docker/spark_version diff --git a/benchmark/tpch/build_tpch.sh b/benchmark/tpch/build_tpch.sh index fb68597..c6087a6 100755 --- a/benchmark/tpch/build_tpch.sh +++ b/benchmark/tpch/build_tpch.sh @@ -1,5 +1,6 @@ #!/bin/bash # Bring in environment including ${ROOT_DIR} etc. +source ../../spark/docker/spark_version source ../../spark/docker/setup.sh if [ ! -d tpch-spark/lib ]; then mkdir tpch-spark/lib diff --git a/benchmark/tpch/run_tpch.sh b/benchmark/tpch/run_tpch.sh index 16dc936..159ff00 100755 --- a/benchmark/tpch/run_tpch.sh +++ b/benchmark/tpch/run_tpch.sh @@ -1,5 +1,6 @@ #!/bin/bash +source ../../spark/docker/spark_version source ../../spark/docker/setup.sh if [ "$#" -lt 1 ]; then diff --git a/pushdown-datasource/build.sh b/pushdown-datasource/build.sh index 54c8c6c..6906181 100755 --- a/pushdown-datasource/build.sh +++ b/pushdown-datasource/build.sh @@ -1,4 +1,5 @@ #!/bin/bash +source ../spark/docker/spark_version source ../spark/docker/setup.sh SPARK_JAR_DIR=../spark/build/spark-${SPARK_VERSION}/jars/ if [ ! -d $SPARK_JAR_DIR ]; then @@ -14,10 +15,10 @@ fi echo "Copy over spark jars" cp $SPARK_JAR_DIR/*.jar pushdown-datasource/lib -SPARK_TEST_JAR_DIR=../spark/spark/ -cp $SPARK_TEST_JAR_DIR/sql/core/target/spark-sql_2.12-${SPARK_VERSION}-tests.jar pushdown-datasource/lib -cp $SPARK_TEST_JAR_DIR/sql/catalyst/target/spark-catalyst_2.12-${SPARK_VERSION}-tests.jar pushdown-datasource/lib -cp $SPARK_TEST_JAR_DIR/core/target/spark-core_2.12-${SPARK_VERSION}-tests.jar pushdown-datasource/lib +#SPARK_TEST_JAR_DIR=../spark/spark/ +#cp $SPARK_TEST_JAR_DIR/sql/core/target/spark-sql_2.12-${SPARK_VERSION}-tests.jar pushdown-datasource/lib +#cp $SPARK_TEST_JAR_DIR/sql/catalyst/target/spark-catalyst_2.12-${SPARK_VERSION}-tests.jar pushdown-datasource/lib +#cp $SPARK_TEST_JAR_DIR/core/target/spark-core_2.12-${SPARK_VERSION}-tests.jar pushdown-datasource/lib DIKECLIENTJAR=../dikeHDFS/client/ndp-hdfs/target/ndp-hdfs-1.0.jar diff --git a/pushdown-datasource/pushdown-datasource b/pushdown-datasource/pushdown-datasource index fcbbe97..8f877e3 160000 --- a/pushdown-datasource/pushdown-datasource +++ b/pushdown-datasource/pushdown-datasource @@ -1 +1 @@ -Subproject commit fcbbe973b5d23071d29e239f85c9744f2da1f857 +Subproject commit 8f877e3a2f624820104888082b57fd5965c2dec1 diff --git a/spark/docker/Dockerfile b/spark/docker/Dockerfile index 958b447..13af474 100644 --- a/spark/docker/Dockerfile +++ b/spark/docker/Dockerfile @@ -63,8 +63,10 @@ RUN echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | sudo tee /e && sudo apt-get install sbt # Setup Spark Environment -ENV SPARK_VERSION 3.2.0 +ARG SPARK_VERSION= +ENV SPARK_VERSION ${SPARK_VERSION} ENV SPARK_PACKAGE spark-${SPARK_VERSION}-bin-hadoop2.7.tgz +ENV SPARK_PACKAGE_FOLDER spark-${SPARK_VERSION}-bin-hadoop2.7 ENV SPARK_PACKAGE_URL https://downloads.apache.org/spark/spark-${SPARK_VERSION}/$SPARK_PACKAGE ENV SPARK_SRC /spark ENV SPARK_BUILD /build diff --git a/spark/docker/build.sh b/spark/docker/build.sh index d3dc337..2283ba2 100755 --- a/spark/docker/build.sh +++ b/spark/docker/build.sh @@ -14,7 +14,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +source spark_version +echo "SPARK_VERSION $SPARK_VERSION" ROOT_DIR=$(pwd) DOCKER_DIR=${ROOT_DIR} @@ -61,10 +62,10 @@ fi echo "User id is: $USER_ID" echo "Group id is: $GROUP_ID" -docker build -f Dockerfile --target builder -t spark_build . +docker build -f Dockerfile --target builder --build-arg SPARK_VERSION=$SPARK_VERSION -t spark_build . echo "Done building spark_build docker" -docker build -f Dockerfile -t spark_run . +docker build -f Dockerfile --build-arg SPARK_VERSION=$SPARK_VERSION -t spark_run . echo "Done building spark_run docker" # Set the home directory in the Docker container. diff --git a/spark/docker/setup.sh b/spark/docker/setup.sh index af052bd..b322353 100755 --- a/spark/docker/setup.sh +++ b/spark/docker/setup.sh @@ -18,7 +18,6 @@ ROOT_DIR=$(pwd) DOCKER_DIR=docker DOCKER_FILE="${DOCKER_DIR}/Dockerfile" -SPARK_VERSION="3.2.0" USER_NAME=${SUDO_USER:=$USER} USER_ID=$(id -u "${USER_NAME}") diff --git a/spark/docker/spark_version b/spark/docker/spark_version new file mode 100644 index 0000000..ff2c63a --- /dev/null +++ b/spark/docker/spark_version @@ -0,0 +1 @@ +SPARK_VERSION=3.2.1 diff --git a/spark/scripts/build.sh b/spark/scripts/build.sh index 250f21e..f1c3e2d 100755 --- a/spark/scripts/build.sh +++ b/spark/scripts/build.sh @@ -30,8 +30,8 @@ if [ "$1" == "spark" ]; then fi # Extract our built package into our install directory. echo "Extracting $SPARK_PACKAGE to $SPARK_HOME" - tar -xzf spark-3.2.0-bin-hadoop2.7.tgz -C /build \ - && mv $SPARK_BUILD/spark-3.2.0-bin-hadoop2.7 $SPARK_HOME + tar -xzf $SPARK_PACKAGE -C /build \ + && mv $SPARK_BUILD/$SPARK_PACKAGE_FOLDER $SPARK_HOME popd else echo "Building spark"