change default numFeatures to 2^20 in HashingTF

mengxr · mengxr · commit 773c1a9a2d23 · 2014-08-06T10:58:56.000-07:00
change annotation from DeveloperApi to Experimental in Normalizer and StandardScaler
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala
@@ -32,12 +32,12 @@ import org.apache.spark.util.Utils
  * :: Experimental ::
  * Maps a sequence of terms to their term frequencies using the hashing trick.
  *
- * @param numFeatures number of features (default: 1000000)
+ * @param numFeatures number of features (default: 2^20^)
  */
 @Experimental
 class HashingTF(val numFeatures: Int) extends Serializable {
 
-  def this() = this(1000000)
+  def this() = this(1 << 20)
 
   /**
    * Returns the index of the input term.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala
@@ -19,11 +19,11 @@ package org.apache.spark.mllib.feature
 
 import breeze.linalg.{DenseVector => BDV, SparseVector => BSV}
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 
 /**
- * :: DeveloperApi ::
+ * :: Experimental ::
  * Normalizes samples individually to unit L^p^ norm
  *
  * For any 1 <= p < Double.PositiveInfinity, normalizes samples using
@@ -33,7 +33,7 @@ import org.apache.spark.mllib.linalg.{Vector, Vectors}
  *
  * @param p Normalization in L^p^ space, p = 2 by default.
  */
-@DeveloperApi
+@Experimental
 class Normalizer(p: Double) extends VectorTransformer {
 
   def this() = this(2)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
@@ -19,22 +19,22 @@ package org.apache.spark.mllib.feature
 
 import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, Vector => BV}
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.Experimental
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.rdd.RDDFunctions._
 import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
 import org.apache.spark.rdd.RDD
 
 /**
- * :: DeveloperApi ::
+ * :: Experimental ::
  * Standardizes features by removing the mean and scaling to unit variance using column summary
  * statistics on the samples in the training set.
  *
  * @param withMean False by default. Centers the data with mean before scaling. It will build a
  *                 dense output, so this does not work on sparse input and will raise an exception.
  * @param withStd True by default. Scales the data to unit standard deviation.
  */
-@DeveloperApi
+@Experimental
 class StandardScaler(withMean: Boolean, withStd: Boolean) extends VectorTransformer {
 
   def this() = this(false, true)