Skip to content

Commit 9d48cbf

Browse files
committed
Initial pass
1 parent d666053 commit 9d48cbf

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+166
-35
lines changed

core/src/main/scala/org/apache/spark/Aggregator.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ package org.apache.spark
2020
import org.apache.spark.util.collection.{AppendOnlyMap, ExternalAppendOnlyMap}
2121

2222
/**
23+
* <span class="badge badge-red" style="float: right;">SEMI-PRIVATE</span>
24+
*
2325
* A set of functions used to aggregate data.
2426
*
2527
* @param createCombiner function to create the initial value of the aggregation.

core/src/main/scala/org/apache/spark/Dependency.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,16 @@ import org.apache.spark.rdd.RDD
2121
import org.apache.spark.serializer.Serializer
2222

2323
/**
24+
* <span class="badge badge-red" style="float: right;">SEMI-PRIVATE</span>
25+
*
2426
* Base class for dependencies.
2527
*/
2628
abstract class Dependency[T](val rdd: RDD[T]) extends Serializable
2729

2830

2931
/**
32+
* <span class="badge badge-red" style="float: right;">SEMI-PRIVATE</span>
33+
*
3034
* Base class for dependencies where each partition of the parent RDD is used by at most one
3135
* partition of the child RDD. Narrow dependencies allow for pipelined execution.
3236
*/
@@ -41,6 +45,8 @@ abstract class NarrowDependency[T](rdd: RDD[T]) extends Dependency(rdd) {
4145

4246

4347
/**
48+
* <span class="badge badge-red" style="float: right;">SEMI-PRIVATE</span>
49+
*
4450
* Represents a dependency on the output of a shuffle stage.
4551
* @param rdd the parent RDD
4652
* @param partitioner partitioner used to partition the shuffle output
@@ -59,6 +65,8 @@ class ShuffleDependency[K, V](
5965

6066

6167
/**
68+
* <span class="badge badge-red" style="float: right;">SEMI-PRIVATE</span>
69+
*
6270
* Represents a one-to-one dependency between partitions of the parent and child RDDs.
6371
*/
6472
class OneToOneDependency[T](rdd: RDD[T]) extends NarrowDependency[T](rdd) {
@@ -67,6 +75,8 @@ class OneToOneDependency[T](rdd: RDD[T]) extends NarrowDependency[T](rdd) {
6775

6876

6977
/**
78+
* <span class="badge badge-red" style="float: right;">SEMI-PRIVATE</span>
79+
*
7080
* Represents a one-to-one dependency between ranges of partitions in the parent and child RDDs.
7181
* @param rdd the parent RDD
7282
* @param inStart the start of the range in the parent RDD

core/src/main/scala/org/apache/spark/FutureAction.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ import org.apache.spark.rdd.RDD
2525
import org.apache.spark.scheduler.{JobFailed, JobSucceeded, JobWaiter}
2626

2727
/**
28+
* <span class="badge badge-red" style="float: right;">EXPERIMENTAL</span>
29+
*
2830
* A future for the result of an action to support cancellation. This is an extension of the
2931
* Scala Future interface to support cancellation.
3032
*/
@@ -148,6 +150,8 @@ class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc:
148150

149151

150152
/**
153+
* <span class="badge badge-red" style="float: right;">EXPERIMENTAL</span>
154+
*
151155
* A [[FutureAction]] for actions that could trigger multiple Spark jobs. Examples include take,
152156
* takeSample. Cancellation works by setting the cancelled flag to true and interrupting the
153157
* action thread if it is being blocked by a job.

core/src/main/scala/org/apache/spark/SerializableWritable.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import org.apache.hadoop.conf.Configuration
2323
import org.apache.hadoop.io.ObjectWritable
2424
import org.apache.hadoop.io.Writable
2525

26-
class SerializableWritable[T <: Writable](@transient var t: T) extends Serializable {
26+
private[spark] class SerializableWritable[T <: Writable](@transient var t: T) extends Serializable {
2727
def value = t
2828
override def toString = t.toString
2929

core/src/main/scala/org/apache/spark/SparkEnv.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ import org.apache.spark.util.{AkkaUtils, Utils}
4141
* objects needs to have the right SparkEnv set. You can get the current environment with
4242
* SparkEnv.get (e.g. after creating a SparkContext) and set it with SparkEnv.set.
4343
*/
44-
class SparkEnv private[spark] (
44+
private[spark] class SparkEnv private[spark] (
4545
val executorId: String,
4646
val actorSystem: ActorSystem,
4747
val serializer: Serializer,

core/src/main/scala/org/apache/spark/TaskContext.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@ import scala.collection.mutable.ArrayBuffer
2121

2222
import org.apache.spark.executor.TaskMetrics
2323

24+
/**
25+
* <span class="badge badge-red" style="float: right;">SEMI-PRIVATE</span>
26+
*
27+
* Contextual information about a task which can be read or mutated during execution.
28+
*/
2429
class TaskContext(
2530
val stageId: Int,
2631
val partitionId: Int,

core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package org.apache.spark.api.java
1919

2020
import com.google.common.base.Optional
2121

22-
object JavaUtils {
22+
private[spark] object JavaUtils {
2323
def optionToOptional[T](option: Option[T]): Optional[T] =
2424
option match {
2525
case Some(value) => Optional.of(value)

core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import org.apache.hadoop.fs.FileSystem
2424

2525
import org.apache.spark.metrics.source.Source
2626

27-
class ExecutorSource(val executor: Executor, executorId: String) extends Source {
27+
private[spark] class ExecutorSource(val executor: Executor, executorId: String) extends Source {
2828
private def fileStats(scheme: String) : Option[FileSystem.Statistics] =
2929
FileSystem.getAllStatistics().filter(s => s.getScheme.equals(scheme)).headOption
3030

core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ package org.apache.spark.executor
1919

2020
import org.apache.spark.storage.{BlockId, BlockStatus}
2121

22+
/**
23+
* <span class="badge badge-red" style="float: right;">SEMI-PRIVATE</span>
24+
*
25+
* Metrics tracked during the execution of a task.
26+
*/
2227
class TaskMetrics extends Serializable {
2328
/**
2429
* Host's name the task runs on
@@ -82,6 +87,11 @@ object TaskMetrics {
8287
}
8388

8489

90+
/**
91+
* <span class="badge badge-red" style="float: right;">SEMI-PRIVATE</span>
92+
*
93+
* Metrics pertaining to shuffle data read in a given task.
94+
*/
8595
class ShuffleReadMetrics extends Serializable {
8696
/**
8797
* Absolute time when this task finished reading shuffle data
@@ -116,6 +126,11 @@ class ShuffleReadMetrics extends Serializable {
116126
var remoteBytesRead: Long = _
117127
}
118128

129+
/**
130+
* <span class="badge badge-red" style="float: right;">SEMI-PRIVATE</span>
131+
*
132+
* Metrics pertaining to shuffle data written in a given task.
133+
*/
119134
class ShuffleWriteMetrics extends Serializable {
120135
/**
121136
* Number of bytes written for the shuffle by this task

core/src/main/scala/org/apache/spark/io/CompressionCodec.scala

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,14 @@ import org.xerial.snappy.{SnappyInputStream, SnappyOutputStream}
2525
import org.apache.spark.SparkConf
2626

2727
/**
28+
* <span class="badge badge-red" style="float: right;">SEMI-PRIVATE</span>
29+
*
2830
* CompressionCodec allows the customization of choosing different compression implementations
2931
* to be used in block storage.
32+
*
33+
* Note: The wire protocol for a codec is not guaranteed compatible across versions of Spark.
34+
* This is intended for use as an internal compression utility within a single
35+
* Spark application.
3036
*/
3137
trait CompressionCodec {
3238

@@ -52,7 +58,13 @@ private[spark] object CompressionCodec {
5258

5359

5460
/**
61+
* <span class="badge badge-red" style="float: right;">SEMI-PRIVATE</span>
62+
*
5563
* LZF implementation of [[org.apache.spark.io.CompressionCodec]].
64+
*
65+
* Note: The wire protocol for this codec is not guaranteed to be compatible across versions
66+
* of Spark. This is intended for use as an internal compression utility within a single Spark
67+
* application.
5668
*/
5769
class LZFCompressionCodec(conf: SparkConf) extends CompressionCodec {
5870

@@ -65,8 +77,14 @@ class LZFCompressionCodec(conf: SparkConf) extends CompressionCodec {
6577

6678

6779
/**
80+
* <span class="badge badge-red" style="float: right;">SEMI-PRIVATE</span>
81+
*
6882
* Snappy implementation of [[org.apache.spark.io.CompressionCodec]].
6983
* Block size can be configured by spark.io.compression.snappy.block.size.
84+
*
85+
* Note: The wire protocol for this codec is not guaranteed to be compatible across versions
86+
* of Spark. This is intended for use as an internal compression utility within a single Spark
87+
* application.
7088
*/
7189
class SnappyCompressionCodec(conf: SparkConf) extends CompressionCodec {
7290

core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import com.codahale.metrics.{ConsoleReporter, MetricRegistry}
2525
import org.apache.spark.SecurityManager
2626
import org.apache.spark.metrics.MetricsSystem
2727

28-
class ConsoleSink(val property: Properties, val registry: MetricRegistry,
28+
private[spark] class ConsoleSink(val property: Properties, val registry: MetricRegistry,
2929
securityMgr: SecurityManager) extends Sink {
3030
val CONSOLE_DEFAULT_PERIOD = 10
3131
val CONSOLE_DEFAULT_UNIT = "SECONDS"

core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import com.codahale.metrics.{CsvReporter, MetricRegistry}
2626
import org.apache.spark.SecurityManager
2727
import org.apache.spark.metrics.MetricsSystem
2828

29-
class CsvSink(val property: Properties, val registry: MetricRegistry,
29+
private[spark] class CsvSink(val property: Properties, val registry: MetricRegistry,
3030
securityMgr: SecurityManager) extends Sink {
3131
val CSV_KEY_PERIOD = "period"
3232
val CSV_KEY_UNIT = "unit"

core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import com.codahale.metrics.graphite.{Graphite, GraphiteReporter}
2727
import org.apache.spark.SecurityManager
2828
import org.apache.spark.metrics.MetricsSystem
2929

30-
class GraphiteSink(val property: Properties, val registry: MetricRegistry,
30+
private[spark] class GraphiteSink(val property: Properties, val registry: MetricRegistry,
3131
securityMgr: SecurityManager) extends Sink {
3232
val GRAPHITE_DEFAULT_PERIOD = 10
3333
val GRAPHITE_DEFAULT_UNIT = "SECONDS"

core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import java.util.Properties
2222
import com.codahale.metrics.{JmxReporter, MetricRegistry}
2323
import org.apache.spark.SecurityManager
2424

25-
class JmxSink(val property: Properties, val registry: MetricRegistry,
25+
private[spark] class JmxSink(val property: Properties, val registry: MetricRegistry,
2626
securityMgr: SecurityManager) extends Sink {
2727

2828
val reporter: JmxReporter = JmxReporter.forRegistry(registry).build()

core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ import org.eclipse.jetty.servlet.ServletContextHandler
3030
import org.apache.spark.SecurityManager
3131
import org.apache.spark.ui.JettyUtils._
3232

33-
class MetricsServlet(val property: Properties, val registry: MetricRegistry,
33+
private[spark] class MetricsServlet(val property: Properties, val registry: MetricRegistry,
3434
securityMgr: SecurityManager) extends Sink {
3535
val SERVLET_KEY_PATH = "path"
3636
val SERVLET_KEY_SAMPLE = "sample"

core/src/main/scala/org/apache/spark/metrics/sink/Sink.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.metrics.sink
1919

20-
trait Sink {
20+
private[spark] trait Sink {
2121
def start: Unit
2222
def stop: Unit
2323
}

core/src/main/scala/org/apache/spark/metrics/source/JvmSource.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ package org.apache.spark.metrics.source
2020
import com.codahale.metrics.MetricRegistry
2121
import com.codahale.metrics.jvm.{GarbageCollectorMetricSet, MemoryUsageGaugeSet}
2222

23-
class JvmSource extends Source {
23+
private[spark] class JvmSource extends Source {
2424
val sourceName = "jvm"
2525
val metricRegistry = new MetricRegistry()
2626

core/src/main/scala/org/apache/spark/metrics/source/Source.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package org.apache.spark.metrics.source
1919

2020
import com.codahale.metrics.MetricRegistry
2121

22-
trait Source {
22+
private[spark] trait Source {
2323
def sourceName: String
2424
def metricRegistry: MetricRegistry
2525
}

core/src/main/scala/org/apache/spark/partial/BoundedDouble.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
package org.apache.spark.partial
1919

2020
/**
21-
* A Double with error bars on it.
21+
* <span class="badge badge-red" style="float: right;">EXPERIMENTAL</span>
22+
*
23+
* A Double value with error bars and associated confidence.
2224
*/
2325
class BoundedDouble(val mean: Double, val confidence: Double, val low: Double, val high: Double) {
2426
override def toString(): String = "[%.3f, %.3f]".format(low, high)

core/src/main/scala/org/apache/spark/partial/PartialResult.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717

1818
package org.apache.spark.partial
1919

20+
/**
21+
* <span class="badge badge-red" style="float: right;">EXPERIMENTAL</span>
22+
*/
2023
class PartialResult[R](initialVal: R, isFinal: Boolean) {
2124
private var finalValue: Option[R] = if (isFinal) Some(initialVal) else None
2225
private var failure: Option[Exception] = None

core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ import scala.reflect.ClassTag
2626
import org.apache.spark.{ComplexFutureAction, FutureAction, Logging}
2727

2828
/**
29+
* <span class="badge badge-red" style="float: right;">EXPERIMENTAL</span>
30+
*
2931
* A set of asynchronous RDD actions available through an implicit conversion.
3032
* Import `org.apache.spark.SparkContext._` at the top of your program to use these functions.
3133
*/

core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ private[spark] class CoGroupPartition(idx: Int, val deps: Array[CoGroupSplitDep]
5757
* @param rdds parent RDDs.
5858
* @param part partitioner used to partition the shuffle output.
5959
*/
60+
private[spark]
6061
class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part: Partitioner)
6162
extends RDD[(K, Seq[Seq[_]])](rdds.head.context, Nil) {
6263

core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ import org.apache.spark._
3232
* @param parentsIndices list of indices in the parent that have been coalesced into this partition
3333
* @param preferredLocation the preferred location for this partition
3434
*/
35-
case class CoalescedRDDPartition(
35+
private[spark] case class CoalescedRDDPartition(
3636
index: Int,
3737
@transient rdd: RDD[_],
3838
parentsIndices: Array[Int],
@@ -70,7 +70,7 @@ case class CoalescedRDDPartition(
7070
* @param maxPartitions number of desired partitions in the coalesced RDD
7171
* @param balanceSlack used to trade-off balance and locality. 1.0 is all locality, 0 is all balance
7272
*/
73-
class CoalescedRDD[T: ClassTag](
73+
private[spark] class CoalescedRDD[T: ClassTag](
7474
@transient var prev: RDD[T],
7575
maxPartitions: Int,
7676
balanceSlack: Double = 0.10)

core/src/main/scala/org/apache/spark/rdd/EmptyRDD.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import org.apache.spark.{Partition, SparkContext, TaskContext}
2424
/**
2525
* An RDD that is empty, i.e. has no element in it.
2626
*/
27-
class EmptyRDD[T: ClassTag](sc: SparkContext) extends RDD[T](sc, Nil) {
27+
private[spark] class EmptyRDD[T: ClassTag](sc: SparkContext) extends RDD[T](sc, Nil) {
2828

2929
override def getPartitions: Array[Partition] = Array.empty
3030

core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,14 @@ private[spark] class HadoopPartition(rddId: Int, idx: Int, @transient s: InputSp
7070
}
7171

7272
/**
73+
* <span class="badge badge-red" style="float: right;">SEMI-PRIVATE</span>
74+
*
7375
* An RDD that provides core functionality for reading data stored in Hadoop (e.g., files in HDFS,
7476
* sources in HBase, or S3), using the older MapReduce API (`org.apache.hadoop.mapred`).
7577
*
78+
* Note: Instantiating this class directly is not recommended, please use
79+
* [[org.apache.spark.SparkContext.hadoopRDD()]]
80+
*
7681
* @param sc The SparkContext to associate the RDD with.
7782
* @param broadcastedConf A general Hadoop Configuration, or a subclass of it. If the enclosed
7883
* variabe references an instance of JobConf, then that JobConf will be used for the Hadoop job.

core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import org.apache.spark.util.NextIterator
2727
private[spark] class JdbcPartition(idx: Int, val lower: Long, val upper: Long) extends Partition {
2828
override def index = idx
2929
}
30-
30+
// TODO: Expose a jdbcRDD function in SparkContext and mark this as semi-private
3131
/**
3232
* An RDD that executes an SQL query on a JDBC connection and reads results.
3333
* For usage example, see test case JdbcRDDSuite.

core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,14 @@ class NewHadoopPartition(rddId: Int, val index: Int, @transient rawSplit: InputS
3636
}
3737

3838
/**
39+
* <span class="badge badge-red" style="float: right;">SEMI-PRIVATE</span>
40+
*
3941
* An RDD that provides core functionality for reading data stored in Hadoop (e.g., files in HDFS,
4042
* sources in HBase, or S3), using the new MapReduce API (`org.apache.hadoop.mapreduce`).
4143
*
44+
* Note: Instantiating this class directly is not recommended, please use
45+
* [[org.apache.spark.SparkContext.newAPIHadoopRDD()]]
46+
*
4247
* @param sc The SparkContext to associate the RDD with.
4348
* @param inputFormatClass Storage format of the data to be read.
4449
* @param keyClass Class of the key associated with the inputFormatClass.

0 commit comments

Comments
 (0)