Skip to content

Commit ee37612

Browse files
committed
1) Add support for HADOOP_CONF_DIR (and/or YARN_CONF_DIR - use either) : which is used to specify the client side configuration directory : which needs to be part of the CLASSPATH.
2) Move from var+=".." to var="$var.." : the former does not work on older bash shells unfortunately.
1 parent b05c9d2 commit ee37612

File tree

3 files changed

+54
-27
lines changed

3 files changed

+54
-27
lines changed

docs/running-on-yarn.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ If you want to test out the YARN deployment mode, you can use the current Spark
3030

3131
# Launching Spark on YARN
3232

33+
Ensure that HADOOP_CONF_DIR or YARN_CONF_DIR points to the directory which contains the (client side) configuration files for the hadoop cluster.
34+
This would be used to connect to the cluster, write to the dfs and submit jobs to the resource manager.
35+
3336
The command to launch the YARN Client is as follows:
3437

3538
SPARK_JAR=<SPARK_YAR_FILE> ./run spark.deploy.yarn.Client \

run

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -22,27 +22,27 @@ fi
2222
# values for that; it doesn't need a lot
2323
if [ "$1" = "spark.deploy.master.Master" -o "$1" = "spark.deploy.worker.Worker" ]; then
2424
SPARK_MEM=${SPARK_DAEMON_MEMORY:-512m}
25-
SPARK_DAEMON_JAVA_OPTS+=" -Dspark.akka.logLifecycleEvents=true"
25+
SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.akka.logLifecycleEvents=true"
2626
SPARK_JAVA_OPTS=$SPARK_DAEMON_JAVA_OPTS # Empty by default
2727
fi
2828

2929

3030
# Add java opts for master, worker, executor. The opts maybe null
3131
case "$1" in
3232
'spark.deploy.master.Master')
33-
SPARK_JAVA_OPTS+=" $SPARK_MASTER_OPTS"
33+
SPARK_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_MASTER_OPTS"
3434
;;
3535
'spark.deploy.worker.Worker')
36-
SPARK_JAVA_OPTS+=" $SPARK_WORKER_OPTS"
36+
SPARK_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_WORKER_OPTS"
3737
;;
3838
'spark.executor.StandaloneExecutorBackend')
39-
SPARK_JAVA_OPTS+=" $SPARK_EXECUTOR_OPTS"
39+
SPARK_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
4040
;;
4141
'spark.executor.MesosExecutorBackend')
42-
SPARK_JAVA_OPTS+=" $SPARK_EXECUTOR_OPTS"
42+
SPARK_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_EXECUTOR_OPTS"
4343
;;
4444
'spark.repl.Main')
45-
SPARK_JAVA_OPTS+=" $SPARK_REPL_OPTS"
45+
SPARK_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_REPL_OPTS"
4646
;;
4747
esac
4848

@@ -85,11 +85,11 @@ export SPARK_MEM
8585

8686
# Set JAVA_OPTS to be able to load native libraries and to set heap size
8787
JAVA_OPTS="$SPARK_JAVA_OPTS"
88-
JAVA_OPTS+=" -Djava.library.path=$SPARK_LIBRARY_PATH"
89-
JAVA_OPTS+=" -Xms$SPARK_MEM -Xmx$SPARK_MEM"
88+
JAVA_OPTS="$JAVA_OPTS -Djava.library.path=$SPARK_LIBRARY_PATH"
89+
JAVA_OPTS="$JAVA_OPTS -Xms$SPARK_MEM -Xmx$SPARK_MEM"
9090
# Load extra JAVA_OPTS from conf/java-opts, if it exists
9191
if [ -e $FWDIR/conf/java-opts ] ; then
92-
JAVA_OPTS+=" `cat $FWDIR/conf/java-opts`"
92+
JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
9393
fi
9494
export JAVA_OPTS
9595

@@ -110,30 +110,30 @@ fi
110110

111111
# Build up classpath
112112
CLASSPATH="$SPARK_CLASSPATH"
113-
CLASSPATH+=":$FWDIR/conf"
114-
CLASSPATH+=":$CORE_DIR/target/scala-$SCALA_VERSION/classes"
113+
CLASSPATH="$CLASSPATH:$FWDIR/conf"
114+
CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/classes"
115115
if [ -n "$SPARK_TESTING" ] ; then
116-
CLASSPATH+=":$CORE_DIR/target/scala-$SCALA_VERSION/test-classes"
117-
CLASSPATH+=":$STREAMING_DIR/target/scala-$SCALA_VERSION/test-classes"
116+
CLASSPATH="$CLASSPATH:$CORE_DIR/target/scala-$SCALA_VERSION/test-classes"
117+
CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/test-classes"
118118
fi
119-
CLASSPATH+=":$CORE_DIR/src/main/resources"
120-
CLASSPATH+=":$REPL_DIR/target/scala-$SCALA_VERSION/classes"
121-
CLASSPATH+=":$EXAMPLES_DIR/target/scala-$SCALA_VERSION/classes"
122-
CLASSPATH+=":$STREAMING_DIR/target/scala-$SCALA_VERSION/classes"
123-
CLASSPATH+=":$STREAMING_DIR/lib/org/apache/kafka/kafka/0.7.2-spark/*" # <-- our in-project Kafka Jar
119+
CLASSPATH="$CLASSPATH:$CORE_DIR/src/main/resources"
120+
CLASSPATH="$CLASSPATH:$REPL_DIR/target/scala-$SCALA_VERSION/classes"
121+
CLASSPATH="$CLASSPATH:$EXAMPLES_DIR/target/scala-$SCALA_VERSION/classes"
122+
CLASSPATH="$CLASSPATH:$STREAMING_DIR/target/scala-$SCALA_VERSION/classes"
123+
CLASSPATH="$CLASSPATH:$STREAMING_DIR/lib/org/apache/kafka/kafka/0.7.2-spark/*" # <-- our in-project Kafka Jar
124124
if [ -e "$FWDIR/lib_managed" ]; then
125-
CLASSPATH+=":$FWDIR/lib_managed/jars/*"
126-
CLASSPATH+=":$FWDIR/lib_managed/bundles/*"
125+
CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/jars/*"
126+
CLASSPATH="$CLASSPATH:$FWDIR/lib_managed/bundles/*"
127127
fi
128-
CLASSPATH+=":$REPL_DIR/lib/*"
128+
CLASSPATH="$CLASSPATH:$REPL_DIR/lib/*"
129129
if [ -e $REPL_BIN_DIR/target ]; then
130130
for jar in `find "$REPL_BIN_DIR/target" -name 'spark-repl-*-shaded-hadoop*.jar'`; do
131-
CLASSPATH+=":$jar"
131+
CLASSPATH="$CLASSPATH:$jar"
132132
done
133133
fi
134-
CLASSPATH+=":$BAGEL_DIR/target/scala-$SCALA_VERSION/classes"
134+
CLASSPATH="$CLASSPATH:$BAGEL_DIR/target/scala-$SCALA_VERSION/classes"
135135
for jar in `find $PYSPARK_DIR/lib -name '*jar'`; do
136-
CLASSPATH+=":$jar"
136+
CLASSPATH="$CLASSPATH:$jar"
137137
done
138138

139139
# Figure out the JAR file that our examples were packaged into. This includes a bit of a hack
@@ -147,6 +147,17 @@ if [ -e "$EXAMPLES_DIR/target/spark-examples-"*hadoop[12].jar ]; then
147147
export SPARK_EXAMPLES_JAR=`ls "$EXAMPLES_DIR/target/spark-examples-"*hadoop[12].jar`
148148
fi
149149

150+
# Add hadoop conf dir - else FileSystem.*, etc fail !
151+
# Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
152+
# the configurtion files.
153+
if [ "x" != "x$HADOOP_CONF_DIR" ]; then
154+
CLASSPATH="$CLASSPATH:$HADOOP_CONF_DIR"
155+
fi
156+
if [ "x" != "x$YARN_CONF_DIR" ]; then
157+
CLASSPATH="$CLASSPATH:$YARN_CONF_DIR"
158+
fi
159+
160+
150161
# Figure out whether to run our class with java or with the scala launcher.
151162
# In most cases, we'd prefer to execute our process with java because scala
152163
# creates a shell script as the parent of its Java process, which makes it
@@ -156,9 +167,9 @@ fi
156167
if [ "$SPARK_LAUNCH_WITH_SCALA" == "1" ]; then
157168
EXTRA_ARGS="" # Java options will be passed to scala as JAVA_OPTS
158169
else
159-
CLASSPATH+=":$SCALA_LIBRARY_PATH/scala-library.jar"
160-
CLASSPATH+=":$SCALA_LIBRARY_PATH/scala-compiler.jar"
161-
CLASSPATH+=":$SCALA_LIBRARY_PATH/jline.jar"
170+
CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-library.jar"
171+
CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/scala-compiler.jar"
172+
CLASSPATH="$CLASSPATH:$SCALA_LIBRARY_PATH/jline.jar"
162173
# The JVM doesn't read JAVA_OPTS by default so we need to pass it in
163174
EXTRA_ARGS="$JAVA_OPTS"
164175
fi

run2.cmd

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,19 @@ set CLASSPATH=%CLASSPATH%;%FWDIR%repl\lib\*
6363
set CLASSPATH=%CLASSPATH%;%FWDIR%python\lib\*
6464
set CLASSPATH=%CLASSPATH%;%BAGEL_DIR%\target\scala-%SCALA_VERSION%\classes
6565

66+
rem Add hadoop conf dir - else FileSystem.*, etc fail
67+
rem Note, this assumes that there is either a HADOOP_CONF_DIR or YARN_CONF_DIR which hosts
68+
rem the configurtion files.
69+
if "x%HADOOP_CONF_DIR%"=="x" goto no_hadoop_conf_dir
70+
set CLASSPATH=%CLASSPATH%;%HADOOP_CONF_DIR%
71+
:no_hadoop_conf_dir
72+
73+
if "x%YARN_CONF_DIR%"=="x" goto no_yarn_conf_dir
74+
set CLASSPATH=%CLASSPATH%;%YARN_CONF_DIR%
75+
:no_yarn_conf_dir
76+
77+
78+
6679
rem Figure out the JAR file that our examples were packaged into.
6780
rem First search in the build path from SBT:
6881
for %%d in ("examples/target/scala-%SCALA_VERSION%/spark-examples*.jar") do (

0 commit comments

Comments
 (0)