Improved build configuration

1, Fix SPARK-1441: compile spark core error with hadoop 0.23.x 2, Fix SPARK-1491: maven hadoop-provided profile fails to build 3, Fix org.scala-lang: * ,org.apache.avro:* inconsistent versions dependency 4, A modified on the sql/catalyst/pom.xml,sql/hive/pom.xml,sql/core/pom.xml (Four spaces formatted into two spaces) Author: witgo <witgo@qq.com> Closes #480 from witgo/format_pom and squashes the following commits: 03f652f [witgo] review commit b452680 [witgo] Merge branch 'master' of https://github.com/apache/spark into format_pom bee920d [witgo] revert fix SPARK-1629: Spark Core missing commons-lang dependence 7382a07 [witgo] Merge branch 'master' of https://github.com/apache/spark into format_pom 6902c91 [witgo] fix SPARK-1629: Spark Core missing commons-lang dependence 0da4bc3 [witgo] merge master d1718ed [witgo] Merge branch 'master' of https://github.com/apache/spark into format_pom e345919 [witgo] add avro dependency to yarn-alpha 77fad08 [witgo] Merge branch 'master' of https://github.com/apache/spark into format_pom 62d0862 [witgo] Fix org.scala-lang: * inconsistent versions dependency 1a162d7 [witgo] Merge branch 'master' of https://github.com/apache/spark into format_pom 934f24d [witgo] review commit cf46edc [witgo] exclude jruby 06e7328 [witgo] Merge branch 'SparkBuild' into format_pom 99464d2 [witgo] fix maven hadoop-provided profile fails to build 0c6c1fc [witgo] Fix compile spark core error with hadoop 0.23.x 6851bec [witgo] Maintain consistent SparkBuild.scala, pom.xml (cherry picked from commit 030f2c2) Conflicts: sql/catalyst/pom.xml sql/core/pom.xml sql/hive/pom.xml
apache · Apr 29, 2014 · ee96460 · ee96460
1 parent 42ba706
commit ee96460
Show file tree

Hide file tree

Showing 23 changed files with 295 additions and 466 deletions.
diff --git a/bagel/pom.xml b/bagel/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project Bagel</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>

diff --git a/core/pom.xml b/core/pom.xml
@@ -30,19 +30,6 @@
   <packaging>jar</packaging>
   <name>Spark Project Core</name>
   <url>http://spark.apache.org/</url>
-  <!-- SPARK-1121: Adds an explicit dependency on Avro to work around a Hadoop 0.23.X issue -->
-  <profiles>
-    <profile>
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
@@ -147,15 +134,6 @@
       <groupId>org.json4s</groupId>
       <artifactId>json4s-jackson_${scala.binary.version}</artifactId>
       <version>3.2.6</version>
-      <!-- see also exclusion for lift-json; this is necessary since it depends on
-         scala-library and scalap 2.10.0, but we use 2.10.4, and only override
-         scala-library -->
-      <exclusions>
-        <exclusion>
-        <groupId>org.scala-lang</groupId>
-        <artifactId>scalap</artifactId>
-        </exclusion>
-      </exclusions>
     </dependency>
     <dependency>
       <groupId>colt</groupId>

diff --git a/docs/building-with-maven.md b/docs/building-with-maven.md
@@ -39,17 +39,23 @@ For Apache Hadoop versions 1.x, Cloudera CDH MRv1, and other Hadoop versions wit
     # Cloudera CDH 4.2.0 with MapReduce v1
     $ mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -DskipTests clean package
 
-For Apache Hadoop 2.x, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions with YARN, you should enable the "yarn-alpha" or "yarn" profile and set the "hadoop.version", "yarn.version" property:
+    # Apache Hadoop 0.23.x
+    $ mvn -Phadoop-0.23 -Dhadoop.version=0.23.7 -DskipTests clean package
+
+For Apache Hadoop 2.x, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions with YARN, you can enable the "yarn-alpha" or "yarn" profile and set the "hadoop.version", "yarn.version" property. Note that Hadoop 0.23.X requires a special `-Phadoop-0.23` profile:
 
     # Apache Hadoop 2.0.5-alpha
     $ mvn -Pyarn-alpha -Dhadoop.version=2.0.5-alpha -Dyarn.version=2.0.5-alpha -DskipTests clean package
 
     # Cloudera CDH 4.2.0 with MapReduce v2
     $ mvn -Pyarn-alpha -Dhadoop.version=2.0.0-cdh4.2.0 -Dyarn.version=2.0.0-cdh4.2.0 -DskipTests clean package
 
-    # Apache Hadoop 2.2.X ( e.g. 2.2.0 as below ) and newer
+    # Apache Hadoop 2.2.X (e.g. 2.2.0 as below) and newer
     $ mvn -Pyarn -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 -DskipTests clean package
 
+    # Apache Hadoop 0.23.x
+    $ mvn -Pyarn-alpha -Phadoop-0.23 -Dhadoop.version=0.23.7 -Dyarn.version=0.23.7 -DskipTests clean package
+
 ## Spark Tests in Maven ##
 
 Tests are run by default via the [ScalaTest Maven plugin](http://www.scalatest.org/user_guide/using_the_scalatest_maven_plugin). Some of the require Spark to be packaged first, so always run `mvn package` with `-DskipTests` the first time. You can then run the tests with `mvn -Dhadoop.version=... test`.

diff --git a/examples/pom.xml b/examples/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project Examples</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
@@ -124,6 +110,10 @@
           <groupId>commons-logging</groupId>
           <artifactId>commons-logging</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>org.jruby</groupId>
+          <artifactId>jruby-complete</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
     <dependency>

diff --git a/external/flume/pom.xml b/external/flume/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project External Flume</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>

diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project External Kafka</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>

diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project External MQTT</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-         <dependency>
-           <groupId>org.apache.avro</groupId>
-           <artifactId>avro</artifactId>
-         </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>

diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project External Twitter</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>

diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project External ZeroMQ</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>

diff --git a/graphx/pom.xml b/graphx/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project GraphX</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>

diff --git a/make-distribution.sh b/make-distribution.sh
@@ -123,10 +123,19 @@ else
 fi
 
 if [ "$SPARK_YARN" == "true" ]; then
-  mvn clean package -DskipTests -Pyarn -Dhadoop.version=$SPARK_HADOOP_VERSION \
-    -Dyarn.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE
+  if [[ "$SPARK_HADOOP_VERSION" =~ "0.23." ]]; then
+    mvn clean package -DskipTests -Pyarn-alpha -Dhadoop.version=$SPARK_HADOOP_VERSION \
+      -Dyarn.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE -Phadoop-0.23
+  else
+    mvn clean package -DskipTests -Pyarn -Dhadoop.version=$SPARK_HADOOP_VERSION \
+      -Dyarn.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE
+  fi
 else
-  mvn clean package -DskipTests -Dhadoop.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE
+  if [[ "$SPARK_HADOOP_VERSION" =~ "0.23." ]]; then
+    mvn clean package -Phadoop-0.23 -DskipTests -Dhadoop.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE
+  else
+    mvn clean package -DskipTests -Dhadoop.version=$SPARK_HADOOP_VERSION $MAYBE_HIVE
+  fi
 fi
 
 # Make directories

diff --git a/mllib/pom.xml b/mllib/pom.xml
@@ -31,20 +31,6 @@
   <name>Spark Project ML Library</name>
   <url>http://spark.apache.org/</url>
 
-  <profiles>
-    <profile>
-      <!-- SPARK-1121: SPARK-1121: Adds an explicit dependency on Avro to work around
-           a Hadoop 0.23.X issue -->
-      <id>yarn-alpha</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.avro</groupId>
-          <artifactId>avro</artifactId>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>