Skip to content

Commit

Permalink
Updated to use spark 3.2.1.
Browse files Browse the repository at this point in the history
Also changed to use a file spark/docker/spark_version to indicate which
spark version to download and use.  Just change this one file and
it will change our spark version.
  • Loading branch information
rf972 committed Feb 6, 2022
1 parent d257cae commit 2c1fb22
Show file tree
Hide file tree
Showing 9 changed files with 18 additions and 12 deletions.
1 change: 1 addition & 0 deletions benchmark/tpch/build_tpch.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/bin/bash
# Bring in environment including ${ROOT_DIR} etc.
source ../../spark/docker/spark_version
source ../../spark/docker/setup.sh
if [ ! -d tpch-spark/lib ]; then
mkdir tpch-spark/lib
Expand Down
1 change: 1 addition & 0 deletions benchmark/tpch/run_tpch.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/bin/bash

source ../../spark/docker/spark_version
source ../../spark/docker/setup.sh

if [ "$#" -lt 1 ]; then
Expand Down
9 changes: 5 additions & 4 deletions pushdown-datasource/build.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/bin/bash
source ../spark/docker/spark_version
source ../spark/docker/setup.sh
SPARK_JAR_DIR=../spark/build/spark-${SPARK_VERSION}/jars/
if [ ! -d $SPARK_JAR_DIR ]; then
Expand All @@ -14,10 +15,10 @@ fi
echo "Copy over spark jars"
cp $SPARK_JAR_DIR/*.jar pushdown-datasource/lib

SPARK_TEST_JAR_DIR=../spark/spark/
cp $SPARK_TEST_JAR_DIR/sql/core/target/spark-sql_2.12-${SPARK_VERSION}-tests.jar pushdown-datasource/lib
cp $SPARK_TEST_JAR_DIR/sql/catalyst/target/spark-catalyst_2.12-${SPARK_VERSION}-tests.jar pushdown-datasource/lib
cp $SPARK_TEST_JAR_DIR/core/target/spark-core_2.12-${SPARK_VERSION}-tests.jar pushdown-datasource/lib
#SPARK_TEST_JAR_DIR=../spark/spark/
#cp $SPARK_TEST_JAR_DIR/sql/core/target/spark-sql_2.12-${SPARK_VERSION}-tests.jar pushdown-datasource/lib
#cp $SPARK_TEST_JAR_DIR/sql/catalyst/target/spark-catalyst_2.12-${SPARK_VERSION}-tests.jar pushdown-datasource/lib
#cp $SPARK_TEST_JAR_DIR/core/target/spark-core_2.12-${SPARK_VERSION}-tests.jar pushdown-datasource/lib

DIKECLIENTJAR=../dikeHDFS/client/ndp-hdfs/target/ndp-hdfs-1.0.jar

Expand Down
4 changes: 3 additions & 1 deletion spark/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,10 @@ RUN echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | sudo tee /e
&& sudo apt-get install sbt

# Setup Spark Environment
ENV SPARK_VERSION 3.2.0
ARG SPARK_VERSION=
ENV SPARK_VERSION ${SPARK_VERSION}
ENV SPARK_PACKAGE spark-${SPARK_VERSION}-bin-hadoop2.7.tgz
ENV SPARK_PACKAGE_FOLDER spark-${SPARK_VERSION}-bin-hadoop2.7
ENV SPARK_PACKAGE_URL https://downloads.apache.org/spark/spark-${SPARK_VERSION}/$SPARK_PACKAGE
ENV SPARK_SRC /spark
ENV SPARK_BUILD /build
Expand Down
7 changes: 4 additions & 3 deletions spark/docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

source spark_version
echo "SPARK_VERSION $SPARK_VERSION"
ROOT_DIR=$(pwd)

DOCKER_DIR=${ROOT_DIR}
Expand Down Expand Up @@ -61,10 +62,10 @@ fi
echo "User id is: $USER_ID"
echo "Group id is: $GROUP_ID"

docker build -f Dockerfile --target builder -t spark_build .
docker build -f Dockerfile --target builder --build-arg SPARK_VERSION=$SPARK_VERSION -t spark_build .
echo "Done building spark_build docker"

docker build -f Dockerfile -t spark_run .
docker build -f Dockerfile --build-arg SPARK_VERSION=$SPARK_VERSION -t spark_run .
echo "Done building spark_run docker"

# Set the home directory in the Docker container.
Expand Down
1 change: 0 additions & 1 deletion spark/docker/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
ROOT_DIR=$(pwd)
DOCKER_DIR=docker
DOCKER_FILE="${DOCKER_DIR}/Dockerfile"
SPARK_VERSION="3.2.0"
USER_NAME=${SUDO_USER:=$USER}
USER_ID=$(id -u "${USER_NAME}")

Expand Down
1 change: 1 addition & 0 deletions spark/docker/spark_version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SPARK_VERSION=3.2.1
4 changes: 2 additions & 2 deletions spark/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ if [ "$1" == "spark" ]; then
fi
# Extract our built package into our install directory.
echo "Extracting $SPARK_PACKAGE to $SPARK_HOME"
tar -xzf spark-3.2.0-bin-hadoop2.7.tgz -C /build \
&& mv $SPARK_BUILD/spark-3.2.0-bin-hadoop2.7 $SPARK_HOME
tar -xzf $SPARK_PACKAGE -C /build \
&& mv $SPARK_BUILD/$SPARK_PACKAGE_FOLDER $SPARK_HOME
popd
else
echo "Building spark"
Expand Down

0 comments on commit 2c1fb22

Please sign in to comment.