Skip to content

Commit bcf943f

Browse files
committed
Merge branch 'master' of https://github.com/apache/spark into shim-thriftserver1
2 parents c359822 + 0ac52e3 commit bcf943f

File tree

160 files changed

+2536
-826
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

160 files changed

+2536
-826
lines changed

LICENSE

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -712,18 +712,6 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
712712
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
713713
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
714714

715-
========================================================================
716-
For colt:
717-
========================================================================
718-
719-
Copyright (c) 1999 CERN - European Organization for Nuclear Research.
720-
Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose is hereby granted without fee, provided that the above copyright notice appear in all copies and that both that copyright notice and this permission notice appear in supporting documentation. CERN makes no representations about the suitability of this software for any purpose. It is provided "as is" without expressed or implied warranty.
721-
722-
Packages hep.aida.*
723-
724-
Written by Pavel Binko, Dino Ferrero Merlino, Wolfgang Hoschek, Tony Johnson, Andreas Pfeiffer, and others. Check the FreeHEP home page for more info. Permission to use and/or redistribute this work is granted under the terms of the LGPL License, with the exception that any usage related to military applications is expressly forbidden. The software and documentation made available under the terms of this license are provided with no warranty.
725-
726-
727715
========================================================================
728716
For SnapTree:
729717
========================================================================

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ storage systems. Because the protocols have changed in different versions of
8484
Hadoop, you must build Spark against the same version that your cluster runs.
8585

8686
Please refer to the build documentation at
87-
["Specifying the Hadoop Version"](http://spark.apache.org/docs/latest/building-spark.html#specifying-the-hadoop-version)
87+
["Specifying the Hadoop Version"](http://spark.apache.org/docs/latest/building-with-maven.html#specifying-the-hadoop-version)
8888
for detailed guidance on building for a particular distribution of Hadoop, including
8989
building for particular Hive and Hive Thriftserver distributions. See also
9090
["Third Party Hadoop Distributions"](http://spark.apache.org/docs/latest/hadoop-third-party-distributions.html)

assembly/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,10 @@
146146
<exclude>com/google/common/base/Present*</exclude>
147147
</excludes>
148148
</relocation>
149+
<relocation>
150+
<pattern>org.apache.commons.math3</pattern>
151+
<shadedPattern>org.spark-project.commons.math3</shadedPattern>
152+
</relocation>
149153
</relocations>
150154
<transformers>
151155
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />

core/pom.xml

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,6 @@
8585
<dependency>
8686
<groupId>org.apache.commons</groupId>
8787
<artifactId>commons-math3</artifactId>
88-
<version>3.3</version>
89-
<scope>test</scope>
9088
</dependency>
9189
<dependency>
9290
<groupId>com.google.code.findbugs</groupId>
@@ -162,10 +160,6 @@
162160
<artifactId>json4s-jackson_${scala.binary.version}</artifactId>
163161
<version>3.2.10</version>
164162
</dependency>
165-
<dependency>
166-
<groupId>colt</groupId>
167-
<artifactId>colt</artifactId>
168-
</dependency>
169163
<dependency>
170164
<groupId>org.apache.mesos</groupId>
171165
<artifactId>mesos</artifactId>
@@ -247,6 +241,11 @@
247241
</exclusion>
248242
</exclusions>
249243
</dependency>
244+
<dependency>
245+
<groupId>org.seleniumhq.selenium</groupId>
246+
<artifactId>selenium-java</artifactId>
247+
<scope>test</scope>
248+
</dependency>
250249
<dependency>
251250
<groupId>org.scalatest</groupId>
252251
<artifactId>scalatest_${scala.binary.version}</artifactId>
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark;
19+
20+
public enum JobExecutionStatus {
21+
RUNNING,
22+
SUCCEEDED,
23+
FAILED,
24+
UNKNOWN
25+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark;
19+
20+
/**
21+
* Exposes information about Spark Jobs.
22+
*
23+
* This interface is not designed to be implemented outside of Spark. We may add additional methods
24+
* which may break binary compatibility with outside implementations.
25+
*/
26+
public interface SparkJobInfo {
27+
int jobId();
28+
int[] stageIds();
29+
JobExecutionStatus status();
30+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark;
19+
20+
/**
21+
* Exposes information about Spark Stages.
22+
*
23+
* This interface is not designed to be implemented outside of Spark. We may add additional methods
24+
* which may break binary compatibility with outside implementations.
25+
*/
26+
public interface SparkStageInfo {
27+
int stageId();
28+
int currentAttemptId();
29+
String name();
30+
int numTasks();
31+
int numActiveTasks();
32+
int numCompletedTasks();
33+
int numFailedTasks();
34+
}

core/src/main/java/org/apache/spark/TaskContext.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,13 @@ static void unset() {
7171
/**
7272
* Add a (Java friendly) listener to be executed on task completion.
7373
* This will be called in all situation - success, failure, or cancellation.
74-
* <p/>
7574
* An example use is for HadoopRDD to register a callback to close the input stream.
7675
*/
7776
public abstract TaskContext addTaskCompletionListener(TaskCompletionListener listener);
7877

7978
/**
8079
* Add a listener in the form of a Scala closure to be executed on task completion.
8180
* This will be called in all situations - success, failure, or cancellation.
82-
* <p/>
8381
* An example use is for HadoopRDD to register a callback to close the input stream.
8482
*/
8583
public abstract TaskContext addTaskCompletionListener(final Function1<TaskContext, Unit> f);

core/src/main/java/org/apache/spark/api/java/function/PairFunction.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
import scala.Tuple2;
2323

2424
/**
25-
* A function that returns key-value pairs (Tuple2<K, V>), and can be used to construct PairRDDs.
25+
* A function that returns key-value pairs (Tuple2&lt;K, V&gt;), and can be used to
26+
* construct PairRDDs.
2627
*/
2728
public interface PairFunction<T, K, V> extends Serializable {
2829
public Tuple2<K, V> call(T t) throws Exception;

core/src/main/scala/org/apache/spark/SparkContext.scala

Lines changed: 9 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ import java.util.concurrent.atomic.AtomicInteger
2626
import java.util.{Properties, UUID}
2727
import java.util.UUID.randomUUID
2828
import scala.collection.{Map, Set}
29-
import scala.collection.JavaConversions._
3029
import scala.collection.generic.Growable
3130
import scala.collection.mutable.HashMap
3231
import scala.reflect.{ClassTag, classTag}
@@ -51,6 +50,7 @@ import org.apache.spark.scheduler.cluster.mesos.{CoarseMesosSchedulerBackend, Me
5150
import org.apache.spark.scheduler.local.LocalBackend
5251
import org.apache.spark.storage._
5352
import org.apache.spark.ui.SparkUI
53+
import org.apache.spark.ui.jobs.JobProgressListener
5454
import org.apache.spark.util.{CallSite, ClosureCleaner, MetadataCleaner, MetadataCleanerType, TimeStampedWeakValueHashMap, Utils}
5555

5656
/**
@@ -61,7 +61,7 @@ import org.apache.spark.util.{CallSite, ClosureCleaner, MetadataCleaner, Metadat
6161
* this config overrides the default configs as well as system properties.
6262
*/
6363

64-
class SparkContext(config: SparkConf) extends Logging {
64+
class SparkContext(config: SparkConf) extends SparkStatusAPI with Logging {
6565

6666
// This is used only by YARN for now, but should be relevant to other cluster types (Mesos,
6767
// etc) too. This is typically generated from InputFormatInfo.computePreferredLocations. It
@@ -224,10 +224,15 @@ class SparkContext(config: SparkConf) extends Logging {
224224
private[spark] val metadataCleaner =
225225
new MetadataCleaner(MetadataCleanerType.SPARK_CONTEXT, this.cleanup, conf)
226226

227-
// Initialize the Spark UI, registering all associated listeners
227+
228+
private[spark] val jobProgressListener = new JobProgressListener(conf)
229+
listenerBus.addListener(jobProgressListener)
230+
231+
// Initialize the Spark UI
228232
private[spark] val ui: Option[SparkUI] =
229233
if (conf.getBoolean("spark.ui.enabled", true)) {
230-
Some(new SparkUI(this))
234+
Some(SparkUI.createLiveUI(this, conf, listenerBus, jobProgressListener,
235+
env.securityManager,appName))
231236
} else {
232237
// For tests, do not enable the UI
233238
None
@@ -854,69 +859,6 @@ class SparkContext(config: SparkConf) extends Logging {
854859
/** The version of Spark on which this application is running. */
855860
def version = SPARK_VERSION
856861

857-
/**
858-
* Return a map from the slave to the max memory available for caching and the remaining
859-
* memory available for caching.
860-
*/
861-
def getExecutorMemoryStatus: Map[String, (Long, Long)] = {
862-
env.blockManager.master.getMemoryStatus.map { case(blockManagerId, mem) =>
863-
(blockManagerId.host + ":" + blockManagerId.port, mem)
864-
}
865-
}
866-
867-
/**
868-
* :: DeveloperApi ::
869-
* Return information about what RDDs are cached, if they are in mem or on disk, how much space
870-
* they take, etc.
871-
*/
872-
@DeveloperApi
873-
def getRDDStorageInfo: Array[RDDInfo] = {
874-
val rddInfos = persistentRdds.values.map(RDDInfo.fromRdd).toArray
875-
StorageUtils.updateRddInfo(rddInfos, getExecutorStorageStatus)
876-
rddInfos.filter(_.isCached)
877-
}
878-
879-
/**
880-
* Returns an immutable map of RDDs that have marked themselves as persistent via cache() call.
881-
* Note that this does not necessarily mean the caching or computation was successful.
882-
*/
883-
def getPersistentRDDs: Map[Int, RDD[_]] = persistentRdds.toMap
884-
885-
/**
886-
* :: DeveloperApi ::
887-
* Return information about blocks stored in all of the slaves
888-
*/
889-
@DeveloperApi
890-
def getExecutorStorageStatus: Array[StorageStatus] = {
891-
env.blockManager.master.getStorageStatus
892-
}
893-
894-
/**
895-
* :: DeveloperApi ::
896-
* Return pools for fair scheduler
897-
*/
898-
@DeveloperApi
899-
def getAllPools: Seq[Schedulable] = {
900-
// TODO(xiajunluan): We should take nested pools into account
901-
taskScheduler.rootPool.schedulableQueue.toSeq
902-
}
903-
904-
/**
905-
* :: DeveloperApi ::
906-
* Return the pool associated with the given name, if one exists
907-
*/
908-
@DeveloperApi
909-
def getPoolForName(pool: String): Option[Schedulable] = {
910-
Option(taskScheduler.rootPool.schedulableNameToSchedulable.get(pool))
911-
}
912-
913-
/**
914-
* Return current scheduling mode
915-
*/
916-
def getSchedulingMode: SchedulingMode.SchedulingMode = {
917-
taskScheduler.schedulingMode
918-
}
919-
920862
/**
921863
* Clear the job's list of files added by `addFile` so that they do not get downloaded to
922864
* any new nodes.

0 commit comments

Comments
 (0)