apache · mengxr · Aug 25, 2015 · mengxr · Aug 25, 2015
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ClassificationModel.scala
@@ -30,14 +30,15 @@ import org.apache.spark.rdd.RDD
  * belongs. The categories are represented by double values: 0.0, 1.0, 2.0, etc.
  */
 @Experimental
+@Since("0.8.0")
 trait ClassificationModel extends Serializable {
   /**
    * Predict values for the given data set using the model trained.
    *
    * @param testData RDD representing data points to be predicted
    * @return an RDD[Double] where each entry contains the corresponding prediction
    */
-  @Since("0.8.0")
+  @Since("1.0.0")
   def predict(testData: RDD[Vector]): RDD[Double]
 
   /**
@@ -46,15 +47,15 @@ trait ClassificationModel extends Serializable {
    * @param testData array representing a single data point
    * @return predicted category from the trained model
    */
-  @Since("0.8.0")
+  @Since("1.0.0")
   def predict(testData: Vector): Double
 
   /**
    * Predict values for examples stored in a JavaRDD.
    * @param testData JavaRDD representing data points to be predicted
    * @return a JavaRDD[java.lang.Double] where each entry contains the corresponding prediction
    */
-  @Since("0.8.0")
+  @Since("1.0.0")
   def predict(testData: JavaRDD[Vector]): JavaRDD[java.lang.Double] =
     predict(testData.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Double]]
 }

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -41,11 +41,12 @@ import org.apache.spark.rdd.RDD
  *                   Multinomial Logistic Regression. By default, it is binary logistic regression
  *                   so numClasses will be set to 2.
  */
-class LogisticRegressionModel (
-    override val weights: Vector,
-    override val intercept: Double,
-    val numFeatures: Int,
-    val numClasses: Int)
+@Since("0.8.0")
+class LogisticRegressionModel @Since("1.3.0") (
+    @Since("1.0.0") override val weights: Vector,
+    @Since("1.0.0") override val intercept: Double,
+    @Since("1.3.0") val numFeatures: Int,
+    @Since("1.3.0") val numClasses: Int)
   extends GeneralizedLinearModel(weights, intercept) with ClassificationModel with Serializable
   with Saveable with PMMLExportable {
 
@@ -75,6 +76,7 @@ class LogisticRegressionModel (
   /**
    * Constructs a [[LogisticRegressionModel]] with weights and intercept for binary classification.
    */
+  @Since("1.0.0")
   def this(weights: Vector, intercept: Double) = this(weights, intercept, weights.size, 2)
 
   private var threshold: Option[Double] = Some(0.5)
@@ -166,12 +168,12 @@ class LogisticRegressionModel (
 
   override protected def formatVersion: String = "1.0"
 
-  @Since("1.4.0")
   override def toString: String = {
     s"${super.toString}, numClasses = ${numClasses}, threshold = ${threshold.getOrElse("None")}"
   }
 }
 
+@Since("1.3.0")
 object LogisticRegressionModel extends Loader[LogisticRegressionModel] {
 
   @Since("1.3.0")
@@ -207,6 +209,7 @@ object LogisticRegressionModel extends Loader[LogisticRegressionModel] {
  * for k classes multi-label classification problem.
  * Using [[LogisticRegressionWithLBFGS]] is recommended over this.
  */
+@Since("0.8.0")
 class LogisticRegressionWithSGD private[mllib] (
     private var stepSize: Double,
     private var numIterations: Int,
@@ -216,6 +219,7 @@ class LogisticRegressionWithSGD private[mllib] (
 
   private val gradient = new LogisticGradient()
   private val updater = new SquaredL2Updater()
+  @Since("0.8.0")
   override val optimizer = new GradientDescent(gradient, updater)
     .setStepSize(stepSize)
     .setNumIterations(numIterations)
@@ -227,6 +231,7 @@ class LogisticRegressionWithSGD private[mllib] (
    * Construct a LogisticRegression object with default parameters: {stepSize: 1.0,
    * numIterations: 100, regParm: 0.01, miniBatchFraction: 1.0}.
    */
+  @Since("0.8.0")
   def this() = this(1.0, 100, 0.01, 1.0)
 
   override protected[mllib] def createModel(weights: Vector, intercept: Double) = {
@@ -238,6 +243,7 @@ class LogisticRegressionWithSGD private[mllib] (
  * Top-level methods for calling Logistic Regression using Stochastic Gradient Descent.
  * NOTE: Labels used in Logistic Regression should be {0, 1}
  */
+@Since("0.8.0")
 object LogisticRegressionWithSGD {
   // NOTE(shivaram): We use multiple train methods instead of default arguments to support
   // Java programs.
@@ -333,11 +339,13 @@ object LogisticRegressionWithSGD {
  * NOTE: Labels used in Logistic Regression should be {0, 1, ..., k - 1}
  * for k classes multi-label classification problem.
  */
+@Since("1.1.0")
 class LogisticRegressionWithLBFGS
   extends GeneralizedLinearAlgorithm[LogisticRegressionModel] with Serializable {
 
   this.setFeatureScaling(true)
 
+  @Since("1.1.0")
   override val optimizer = new LBFGS(new LogisticGradient, new SquaredL2Updater)
 
   override protected val validators = List(multiLabelValidator)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -41,11 +41,12 @@ import org.apache.spark.sql.{DataFrame, SQLContext}
  *              where D is number of features
  * @param modelType The type of NB model to fit  can be "multinomial" or "bernoulli"
  */
+@Since("0.9.0")
 class NaiveBayesModel private[spark] (
-    val labels: Array[Double],
-    val pi: Array[Double],
-    val theta: Array[Array[Double]],
-    val modelType: String)
+    @Since("1.0.0") val labels: Array[Double],
+    @Since("0.9.0") val pi: Array[Double],
+    @Since("0.9.0") val theta: Array[Array[Double]],
+    @Since("1.4.0") val modelType: String)
   extends ClassificationModel with Serializable with Saveable {
 
   import NaiveBayes.{Bernoulli, Multinomial, supportedModelTypes}
@@ -83,6 +84,7 @@ class NaiveBayesModel private[spark] (
       throw new UnknownError(s"Invalid modelType: $modelType.")
   }
 
+  @Since("1.0.0")
   override def predict(testData: RDD[Vector]): RDD[Double] = {
     val bcModel = testData.context.broadcast(this)
     testData.mapPartitions { iter =>
@@ -91,6 +93,7 @@ class NaiveBayesModel private[spark] (
     }
   }
 
+  @Since("1.0.0")
   override def predict(testData: Vector): Double = {
     modelType match {
       case Multinomial =>
@@ -107,6 +110,7 @@ class NaiveBayesModel private[spark] (
    * @return an RDD[Vector] where each entry contains the predicted posterior class probabilities,
    *         in the same order as class labels
    */
+  @Since("1.5.0")
   def predictProbabilities(testData: RDD[Vector]): RDD[Vector] = {
     val bcModel = testData.context.broadcast(this)
     testData.mapPartitions { iter =>
@@ -122,6 +126,7 @@ class NaiveBayesModel private[spark] (
    * @return predicted posterior class probabilities from the trained model,
    *         in the same order as class labels
    */
+  @Since("1.5.0")
   def predictProbabilities(testData: Vector): Vector = {
     modelType match {
       case Multinomial =>
@@ -158,6 +163,7 @@ class NaiveBayesModel private[spark] (
     new DenseVector(scaledProbs.map(_ / probSum))
   }
 
+  @Since("1.3.0")
   override def save(sc: SparkContext, path: String): Unit = {
     val data = NaiveBayesModel.SaveLoadV2_0.Data(labels, pi, theta, modelType)
     NaiveBayesModel.SaveLoadV2_0.save(sc, path, data)
@@ -166,6 +172,7 @@ class NaiveBayesModel private[spark] (
   override protected def formatVersion: String = "2.0"
 }
 
+@Since("1.3.0")
 object NaiveBayesModel extends Loader[NaiveBayesModel] {
 
   import org.apache.spark.mllib.util.Loader._
@@ -199,6 +206,7 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {
       dataRDD.write.parquet(dataPath(path))
     }
 
+    @Since("1.3.0")
     def load(sc: SparkContext, path: String): NaiveBayesModel = {
       val sqlContext = new SQLContext(sc)
       // Load Parquet data.
@@ -301,30 +309,35 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {
  * document classification.  By making every vector a 0-1 vector, it can also be used as
  * Bernoulli NB ([[http://tinyurl.com/p7c96j6]]). The input feature values must be nonnegative.
  */
-
+@Since("0.9.0")
 class NaiveBayes private (
     private var lambda: Double,
     private var modelType: String) extends Serializable with Logging {
 
   import NaiveBayes.{Bernoulli, Multinomial}
 
+  @Since("1.4.0")
   def this(lambda: Double) = this(lambda, NaiveBayes.Multinomial)
 
+  @Since("0.9.0")
   def this() = this(1.0, NaiveBayes.Multinomial)
 
   /** Set the smoothing parameter. Default: 1.0. */
+  @Since("0.9.0")
   def setLambda(lambda: Double): NaiveBayes = {
     this.lambda = lambda
     this
   }
 
   /** Get the smoothing parameter. */
+  @Since("1.4.0")
   def getLambda: Double = lambda
 
   /**
    * Set the model type using a string (case-sensitive).
    * Supported options: "multinomial" (default) and "bernoulli".
    */
+  @Since("1.4.0")
   def setModelType(modelType: String): NaiveBayes = {
     require(NaiveBayes.supportedModelTypes.contains(modelType),
       s"NaiveBayes was created with an unknown modelType: $modelType.")
@@ -333,13 +346,15 @@ class NaiveBayes private (
   }
 
   /** Get the model type. */
+  @Since("1.4.0")
   def getModelType: String = this.modelType
 
   /**
    * Run the algorithm with the configured parameters on an input RDD of LabeledPoint entries.
    *
    * @param data RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
    */
+  @Since("0.9.0")
   def run(data: RDD[LabeledPoint]): NaiveBayesModel = {
     val requireNonnegativeValues: Vector => Unit = (v: Vector) => {
       val values = v match {
@@ -423,6 +438,7 @@ class NaiveBayes private (
 /**
  * Top-level methods for calling naive Bayes.
  */
+@Since("0.9.0")
 object NaiveBayes {
 
   /** String name for multinomial model type. */
@@ -485,7 +501,7 @@ object NaiveBayes {
    * @param modelType The type of NB model to fit from the enumeration NaiveBayesModels, can be
    *              multinomial or bernoulli
    */
-  @Since("0.9.0")
+  @Since("1.4.0")
   def train(input: RDD[LabeledPoint], lambda: Double, modelType: String): NaiveBayesModel = {
     require(supportedModelTypes.contains(modelType),
       s"NaiveBayes was created with an unknown modelType: $modelType.")

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -33,9 +33,10 @@ import org.apache.spark.rdd.RDD
  * @param weights Weights computed for every feature.
  * @param intercept Intercept computed for this model.
  */
-class SVMModel (
-    override val weights: Vector,
-    override val intercept: Double)
+@Since("0.8.0")
+class SVMModel @Since("1.1.0") (
+    @Since("1.0.0") override val weights: Vector,
+    @Since("0.8.0") override val intercept: Double)
   extends GeneralizedLinearModel(weights, intercept) with ClassificationModel with Serializable
   with Saveable with PMMLExportable {
 
@@ -47,7 +48,7 @@ class SVMModel (
    * with prediction score greater than or equal to this threshold is identified as an positive,
    * and negative otherwise. The default value is 0.0.
    */
-  @Since("1.3.0")
+  @Since("1.0.0")
   @Experimental
   def setThreshold(threshold: Double): this.type = {
     this.threshold = Some(threshold)
@@ -92,12 +93,12 @@ class SVMModel (
 
   override protected def formatVersion: String = "1.0"
 
-  @Since("1.4.0")
   override def toString: String = {
     s"${super.toString}, numClasses = 2, threshold = ${threshold.getOrElse("None")}"
   }
 }
 
+@Since("1.3.0")
 object SVMModel extends Loader[SVMModel] {
 
   @Since("1.3.0")
@@ -132,6 +133,7 @@ object SVMModel extends Loader[SVMModel] {
  * regularization is used, which can be changed via [[SVMWithSGD.optimizer]].
  * NOTE: Labels used in SVM should be {0, 1}.
  */
+@Since("0.8.0")
 class SVMWithSGD private (
     private var stepSize: Double,
     private var numIterations: Int,
@@ -141,6 +143,7 @@ class SVMWithSGD private (
 
   private val gradient = new HingeGradient()
   private val updater = new SquaredL2Updater()
+  @Since("0.8.0")
   override val optimizer = new GradientDescent(gradient, updater)
     .setStepSize(stepSize)
     .setNumIterations(numIterations)
@@ -152,6 +155,7 @@ class SVMWithSGD private (
    * Construct a SVM object with default parameters: {stepSize: 1.0, numIterations: 100,
    * regParm: 0.01, miniBatchFraction: 1.0}.
    */
+  @Since("0.8.0")
   def this() = this(1.0, 100, 0.01, 1.0)
 
   override protected def createModel(weights: Vector, intercept: Double) = {
@@ -162,6 +166,7 @@ class SVMWithSGD private (
 /**
  * Top-level methods for calling SVM. NOTE: Labels used in SVM should be {0, 1}.
  */
+@Since("0.8.0")
 object SVMWithSGD {
 
   /**

diff --git a/...main/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionWithSGD.scala b/...main/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionWithSGD.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.mllib.classification
 
-import org.apache.spark.annotation.Experimental
+import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.regression.StreamingLinearAlgorithm
 
@@ -44,6 +44,7 @@ import org.apache.spark.mllib.regression.StreamingLinearAlgorithm
  * }}}
  */
 @Experimental
+@Since("1.3.0")
 class StreamingLogisticRegressionWithSGD private[mllib] (
     private var stepSize: Double,
     private var numIterations: Int,
@@ -58,6 +59,7 @@ class StreamingLogisticRegressionWithSGD private[mllib] (
    * Initial weights must be set before using trainOn or predictOn
    * (see `StreamingLinearAlgorithm`)
    */
+  @Since("1.3.0")
   def this() = this(0.1, 50, 1.0, 0.0)
 
   protected val algorithm = new LogisticRegressionWithSGD(
@@ -66,30 +68,35 @@ class StreamingLogisticRegressionWithSGD private[mllib] (
   protected var model: Option[LogisticRegressionModel] = None
 
   /** Set the step size for gradient descent. Default: 0.1. */
+  @Since("1.3.0")
   def setStepSize(stepSize: Double): this.type = {
     this.algorithm.optimizer.setStepSize(stepSize)
     this
   }
 
   /** Set the number of iterations of gradient descent to run per update. Default: 50. */
+  @Since("1.3.0")
   def setNumIterations(numIterations: Int): this.type = {
     this.algorithm.optimizer.setNumIterations(numIterations)
     this
   }
 
   /** Set the fraction of each batch to use for updates. Default: 1.0. */
+  @Since("1.3.0")
   def setMiniBatchFraction(miniBatchFraction: Double): this.type = {
     this.algorithm.optimizer.setMiniBatchFraction(miniBatchFraction)
     this
   }
 
   /** Set the regularization parameter. Default: 0.0. */
+  @Since("1.3.0")
   def setRegParam(regParam: Double): this.type = {
     this.algorithm.optimizer.setRegParam(regParam)
     this
   }
 
   /** Set the initial weights. Default: [0.0, 0.0]. */
+  @Since("1.3.0")
   def setInitialWeights(initialWeights: Vector): this.type = {
     this.model = Some(algorithm.createModel(initialWeights, 0.0))
     this