YanTangZhai · YanTangZhai · Aug 20, 2014 · Aug 6, 2014 · Aug 6, 2014 · Aug 6, 2014
diff --git a/.rat-excludes b/.rat-excludes
@@ -25,6 +25,7 @@ log4j-defaults.properties
 bootstrap-tooltip.js
 jquery-1.11.1.min.js
 sorttable.js
+.*avsc
 .*txt
 .*json
 .*data

diff --git a/README.md b/README.md
@@ -115,6 +115,15 @@ If your project is built with Maven, add this to your POM file's `<dependencies>
     </dependency>
 
 
+## A Note About Thrift JDBC server and CLI for Spark SQL
+
+Spark SQL supports Thrift JDBC server and CLI.
+See sql-programming-guide.md for more information about those features.
+You can use those features by setting `-Phive-thriftserver` when building Spark as follows.
+
+    $ sbt/sbt -Phive-thriftserver assembly
+
+
 ## Configuration
 
 Please refer to the [Configuration guide](http://spark.apache.org/docs/latest/configuration.html)

diff --git a/bin/beeline b/bin/beeline
@@ -17,29 +17,14 @@
 # limitations under the License.
 #
 
-# Figure out where Spark is installed
-FWDIR="$(cd `dirname $0`/..; pwd)"
+#
+# Shell script for starting BeeLine
 
-# Find the java binary
-if [ -n "${JAVA_HOME}" ]; then
-  RUNNER="${JAVA_HOME}/bin/java"
-else
-  if [ `command -v java` ]; then
-    RUNNER="java"
-  else
-    echo "JAVA_HOME is not set" >&2
-    exit 1
-  fi
-fi
+# Enter posix mode for bash
+set -o posix
 
-# Compute classpath using external script
-classpath_output=$($FWDIR/bin/compute-classpath.sh)
-if [[ "$?" != "0" ]]; then
-  echo "$classpath_output"
-  exit 1
-else
-  CLASSPATH=$classpath_output
-fi
+# Figure out where Spark is installed
+FWDIR="$(cd `dirname $0`/..; pwd)"
 
 CLASS="org.apache.hive.beeline.BeeLine"
-exec "$RUNNER" -cp "$CLASSPATH" $CLASS "$@"
+exec "$FWDIR/bin/spark-class" $CLASS "$@"
diff --git a/bin/pyspark b/bin/pyspark
@@ -23,12 +23,18 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
 # Export this as SPARK_HOME
 export SPARK_HOME="$FWDIR"
 
+source $FWDIR/bin/utils.sh
+
 SCALA_VERSION=2.10
 
-if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+function usage() {
   echo "Usage: ./bin/pyspark [options]" 1>&2
   $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
   exit 0
+}
+
+if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+  usage
 fi
 
 # Exit if the user hasn't compiled Spark
@@ -66,10 +72,11 @@ fi
 # Build up arguments list manually to preserve quotes and backslashes.
 # We export Spark submit arguments as an environment variable because shell.py must run as a
 # PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks.
-
+SUBMIT_USAGE_FUNCTION=usage
+gatherSparkSubmitOpts "$@"
 PYSPARK_SUBMIT_ARGS=""
 whitespace="[[:space:]]"
-for i in "$@"; do
+for i in "${SUBMISSION_OPTS[@]}"; do
   if [[ $i =~ \" ]]; then i=$(echo $i | sed 's/\"/\\\"/g'); fi
   if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi
   PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i"
@@ -90,7 +97,10 @@ fi
 if [[ "$1" =~ \.py$ ]]; then
   echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
   echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
-  exec $FWDIR/bin/spark-submit "$@"
+  primary=$1
+  shift
+  gatherSparkSubmitOpts "$@"
+  exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}"
 else
   # Only use ipython if no command line arguments were provided [SPARK-1134]
   if [[ "$IPYTHON" = "1" ]]; then

diff --git a/bin/spark-shell b/bin/spark-shell
@@ -31,13 +31,21 @@ set -o posix
 ## Global script variables
 FWDIR="$(cd `dirname $0`/..; pwd)"
 
+function usage() {
+    echo "Usage: ./bin/spark-shell [options]"
+    $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
+    exit 0
+}
+
 if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
-  echo "Usage: ./bin/spark-shell [options]"
-  $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
-  exit 0
+  usage
 fi
 
-function main(){
+source $FWDIR/bin/utils.sh
+SUBMIT_USAGE_FUNCTION=usage
+gatherSparkSubmitOpts "$@"
+
+function main() {
     if $cygwin; then
         # Workaround for issue involving JLine and Cygwin
         # (see http://sourceforge.net/p/jline/bugs/40/).
@@ -46,11 +54,11 @@ function main(){
         # (see https://github.com/sbt/sbt/issues/562).
         stty -icanon min 1 -echo > /dev/null 2>&1
         export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
-        $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main spark-shell "$@"
+        $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
         stty icanon echo > /dev/null 2>&1
     else
         export SPARK_SUBMIT_OPTS
-        $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main spark-shell "$@"
+        $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
     fi
 }
 

diff --git a/bin/spark-shell.cmd b/bin/spark-shell.cmd
@@ -19,4 +19,4 @@ rem
 
 set SPARK_HOME=%~dp0..
 
-cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd spark-shell --class org.apache.spark.repl.Main %*
+cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd --class org.apache.spark.repl.Main %* spark-shell
diff --git a/bin/spark-sql b/bin/spark-sql
@@ -23,14 +23,72 @@
 # Enter posix mode for bash
 set -o posix
 
+CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
+
 # Figure out where Spark is installed
 FWDIR="$(cd `dirname $0`/..; pwd)"
 
-if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
-  echo "Usage: ./sbin/spark-sql [options]"
+function usage {
+  echo "Usage: ./bin/spark-sql [options] [cli option]"
+  pattern="usage"
+  pattern+="\|Spark assembly has been built with Hive"
+  pattern+="\|NOTE: SPARK_PREPEND_CLASSES is set"
+  pattern+="\|Spark Command: "
+  pattern+="\|--help"
+  pattern+="\|======="
+
   $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
+  echo
+  echo "CLI options:"
+  $FWDIR/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
+}
+
+function ensure_arg_number {
+  arg_number=$1
+  at_least=$2
+
+  if [[ $arg_number -lt $at_least ]]; then
+    usage
+    exit 1
+  fi
+}
+
+if [[ "$@" = --help ]] || [[ "$@" = -h ]]; then
+  usage
   exit 0
 fi
 
-CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
-exec "$FWDIR"/bin/spark-submit --class $CLASS spark-internal $@
+CLI_ARGS=()
+SUBMISSION_ARGS=()
+
+while (($#)); do
+  case $1 in
+    -d | --define | --database | -f | -h | --hiveconf | --hivevar | -i | -p)
+      ensure_arg_number $# 2
+      CLI_ARGS+=("$1"); shift
+      CLI_ARGS+=("$1"); shift
+      ;;
+
+    -e)
+      ensure_arg_number $# 2
+      CLI_ARGS+=("$1"); shift
+      CLI_ARGS+=("$1"); shift
+      ;;
+
+    -s | --silent)
+      CLI_ARGS+=("$1"); shift
+      ;;
+
+    -v | --verbose)
+      # Both SparkSubmit and SparkSQLCLIDriver recognizes -v | --verbose
+      CLI_ARGS+=("$1")
+      SUBMISSION_ARGS+=("$1"); shift
+      ;;
+
+    *)
+      SUBMISSION_ARGS+=("$1"); shift
+      ;;
+  esac
+done
+
+exec "$FWDIR"/bin/spark-submit --class $CLASS "${SUBMISSION_ARGS[@]}" spark-internal "${CLI_ARGS[@]}"
diff --git a/bin/utils.sh b/bin/utils.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Gather all all spark-submit options into SUBMISSION_OPTS
+function gatherSparkSubmitOpts() {
+
+  if [ -z "$SUBMIT_USAGE_FUNCTION" ]; then
+    echo "Function for printing usage of $0 is not set." 1>&2
+    echo "Please set usage function to shell variable 'SUBMIT_USAGE_FUNCTION' in $0" 1>&2
+    exit 1
+  fi
+
+  # NOTE: If you add or remove spark-sumbmit options,
+  # modify NOT ONLY this script but also SparkSubmitArgument.scala
+  SUBMISSION_OPTS=()
+  APPLICATION_OPTS=()
+  while (($#)); do
+    case "$1" in
+      --master | --deploy-mode | --class | --name | --jars | --py-files | --files | \
+      --conf | --properties-file | --driver-memory | --driver-java-options | \
+      --driver-library-path | --driver-class-path | --executor-memory | --driver-cores | \
+      --total-executor-cores | --executor-cores | --queue | --num-executors | --archives)
+        if [[ $# -lt 2 ]]; then
+          "$SUBMIT_USAGE_FUNCTION"
+          exit 1;
+        fi
+        SUBMISSION_OPTS+=("$1"); shift
+        SUBMISSION_OPTS+=("$1"); shift
+        ;;
+
+      --verbose | -v | --supervise)
+        SUBMISSION_OPTS+=("$1"); shift
+        ;;
+
+      *)
+        APPLICATION_OPTS+=("$1"); shift
+        ;;
+    esac
+  done
+
+  export SUBMISSION_OPTS
+  export APPLICATION_OPTS
+}
diff --git a/core/src/main/java/org/apache/spark/network/netty/FileClient.java b/core/src/main/java/org/apache/spark/network/netty/FileClient.java
-Original file line number
+Diff line change
@@ Expand Up / @@ -25,6 +25,7 @@ log4j-defaults.properties @@
     bootstrap-tooltip.js
     jquery-1.11.1.min.js
     sorttable.js
+    .*avsc
     .*txt
     .*json
     .*data
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
Expand Up		@@ -19,4 +19,4 @@ rem

		set SPARK_HOME=%~dp0..

		cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd spark-shell --class org.apache.spark.repl.Main %*
		cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd --class org.apache.spark.repl.Main %* spark-shell