wli600
diff --git a/‎assembly/pom.xml‎
Lines changed: 1 addition & 1 deletion b/‎assembly/pom.xml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bagel/pom.xml‎
Lines changed: 1 addition & 1 deletion b/‎bagel/pom.xml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/pom.xml‎
Lines changed: 1 addition & 1 deletion b/‎core/pom.xml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/ui/JettyUtils.scala‎
Lines changed: 8 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/ui/JettyUtils.scala‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎dev/create-release/create-release.sh‎
Lines changed: 2 additions & 2 deletions b/‎dev/create-release/create-release.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/configuration.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/configuration.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/sparkr.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/sparkr.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/sql-programming-guide.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/sql-programming-guide.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/streaming-programming-guide.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/streaming-programming-guide.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎ec2/spark_ec2.py‎
Lines changed: 3 additions & 3 deletions b/‎ec2/spark_ec2.py‎
Lines changed: 3 additions & 3 deletions
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.4.0-csd-5-SNAPSHOT</version>
+    <version>1.4.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
 
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.4.0-csd-5-SNAPSHOT</version>
+    <version>1.4.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
 
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.10</artifactId>
-    <version>1.4.0-csd-5-SNAPSHOT</version>
+    <version>1.4.1-csd-1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
 
@@ -210,10 +210,16 @@ private[spark] object JettyUtils extends Logging {
       conf: SparkConf,
       serverName: String = ""): ServerInfo = {
 
-    val collection = new ContextHandlerCollection
-    collection.setHandlers(handlers.toArray)
     addFilters(handlers, conf)
 
+    val collection = new ContextHandlerCollection
+    val gzipHandlers = handlers.map { h =>
+      val gzipHandler = new GzipHandler
+      gzipHandler.setHandler(h)
+      gzipHandler
+    }
+    collection.setHandlers(gzipHandlers.toArray)
+
     // Bind to the given port, or throw a java.net.BindException if the port is occupied
     def connect(currentPort: Int): (Server, Int) = {
       val server = new Server(new InetSocketAddress(hostName, currentPort))
 
@@ -118,13 +118,13 @@ if [[ ! "$@" =~ --skip-publish ]]; then
 
   rm -rf $SPARK_REPO
 
-  build/mvn -DskipTests -Pyarn -Phive \
+  build/mvn -DskipTests -Pyarn -Phive -Prelease\
     -Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
     clean install
 
   ./dev/change-version-to-2.11.sh
 
-  build/mvn -DskipTests -Pyarn -Phive \
+  build/mvn -DskipTests -Pyarn -Phive -Prelease\
     -Dscala-2.11 -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
     clean install
 
 
@@ -1007,9 +1007,9 @@ Apart from these, the following properties are also available, and may be useful
 <tr>
   <td><code>spark.rpc.numRetries</code></td>
   <td>3</td>
+  <td>
     Number of times to retry before an RPC task gives up.
     An RPC task will run at most times of this number.
-  <td>
   </td>
 </tr>
 <tr>
@@ -1029,8 +1029,8 @@ Apart from these, the following properties are also available, and may be useful
 <tr>
   <td><code>spark.rpc.lookupTimeout</code></td>
   <td>120s</td>
-    Duration for an RPC remote endpoint lookup operation to wait before timing out.
   <td>
+    Duration for an RPC remote endpoint lookup operation to wait before timing out.  
   </td>
 </tr>
 </table>
 
@@ -68,7 +68,7 @@ you can specify the packages with the `packages` argument.
 
 <div data-lang="r" markdown="1">
 {% highlight r %}
-sc <- sparkR.init(packages="com.databricks:spark-csv_2.11:1.0.3")
+sc <- sparkR.init(sparkPackages="com.databricks:spark-csv_2.11:1.0.3")
 sqlContext <- sparkRSQL.init(sc)
 {% endhighlight %}
 </div>
@@ -116,7 +116,7 @@ sql(hiveContext, "CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
 sql(hiveContext, "LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src")
 
 # Queries can be expressed in HiveQL.
-results <- hiveContext.sql("FROM src SELECT key, value")
+results <- sql(hiveContext, "FROM src SELECT key, value")
 
 # results is now a DataFrame
 head(results)
 
@@ -828,7 +828,7 @@ using this syntax.
 
 {% highlight scala %}
 val df = sqlContext.read.format("json").load("examples/src/main/resources/people.json")
-df.select("name", "age").write.format("json").save("namesAndAges.json")
+df.select("name", "age").write.format("parquet").save("namesAndAges.parquet")
 {% endhighlight %}
 
 </div>
@@ -1518,7 +1518,7 @@ sql(sqlContext, "CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
 sql(sqlContext, "LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src")
 
 # Queries can be expressed in HiveQL.
-results = sqlContext.sql("FROM src SELECT key, value").collect()
+results <- collect(sql(sqlContext, "FROM src SELECT key, value"))
 
 {% endhighlight %}
 
 
@@ -854,6 +854,8 @@ it with new information. To use this, you will have to do two steps.
 1. Define the state update function - Specify with a function how to update the state using the
 previous state and the new values from an input stream.
 
+In every batch, Spark will apply the state  update function for all existing keys, regardless of whether they have new data in a batch or not. If the update function returns `None` then the key-value pair will be eliminated.
+
 Let's illustrate this with an example. Say you want to maintain a running count of each word
 seen in a text data stream. Here, the running count is the state and it is an integer. We
 define the update function as:
 
@@ -127,7 +127,7 @@ def setup_external_libs(libs):
             )
             with open(tgz_file_path, "wb") as tgz_file:
                 tgz_file.write(download_stream.read())
-            with open(tgz_file_path) as tar:
+            with open(tgz_file_path, "rb") as tar:
                 if hashlib.md5(tar.read()).hexdigest() != lib["md5"]:
                     print("ERROR: Got wrong md5sum for {lib}.".format(lib=lib["name"]), file=stderr)
                     sys.exit(1)
@@ -1111,8 +1111,8 @@ def ssh(host, opts, command):
                 # If this was an ssh failure, provide the user with hints.
                 if e.returncode == 255:
                     raise UsageError(
-                        "Failed to SSH to remote host {0}.\n" +
-                        "Please check that you have provided the correct --identity-file and " +
+                        "Failed to SSH to remote host {0}.\n"
+                        "Please check that you have provided the correct --identity-file and "
                         "--key-pair parameters and try again.".format(host))
                 else:
                     raise e