Skip to content

Commit 773c1a9

Browse files
committed
change default numFeatures to 2^20 in HashingTF
change annotation from DeveloperApi to Experimental in Normalizer and StandardScaler
1 parent 883e122 commit 773c1a9

File tree

3 files changed

+8
-8
lines changed

3 files changed

+8
-8
lines changed

mllib/src/main/scala/org/apache/spark/mllib/feature/HashingTF.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,12 @@ import org.apache.spark.util.Utils
3232
* :: Experimental ::
3333
* Maps a sequence of terms to their term frequencies using the hashing trick.
3434
*
35-
* @param numFeatures number of features (default: 1000000)
35+
* @param numFeatures number of features (default: 2^20^)
3636
*/
3737
@Experimental
3838
class HashingTF(val numFeatures: Int) extends Serializable {
3939

40-
def this() = this(1000000)
40+
def this() = this(1 << 20)
4141

4242
/**
4343
* Returns the index of the input term.

mllib/src/main/scala/org/apache/spark/mllib/feature/Normalizer.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ package org.apache.spark.mllib.feature
1919

2020
import breeze.linalg.{DenseVector => BDV, SparseVector => BSV}
2121

22-
import org.apache.spark.annotation.DeveloperApi
22+
import org.apache.spark.annotation.Experimental
2323
import org.apache.spark.mllib.linalg.{Vector, Vectors}
2424

2525
/**
26-
* :: DeveloperApi ::
26+
* :: Experimental ::
2727
* Normalizes samples individually to unit L^p^ norm
2828
*
2929
* For any 1 <= p < Double.PositiveInfinity, normalizes samples using
@@ -33,7 +33,7 @@ import org.apache.spark.mllib.linalg.{Vector, Vectors}
3333
*
3434
* @param p Normalization in L^p^ space, p = 2 by default.
3535
*/
36-
@DeveloperApi
36+
@Experimental
3737
class Normalizer(p: Double) extends VectorTransformer {
3838

3939
def this() = this(2)

mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,22 @@ package org.apache.spark.mllib.feature
1919

2020
import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, Vector => BV}
2121

22-
import org.apache.spark.annotation.DeveloperApi
22+
import org.apache.spark.annotation.Experimental
2323
import org.apache.spark.mllib.linalg.{Vector, Vectors}
2424
import org.apache.spark.mllib.rdd.RDDFunctions._
2525
import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
2626
import org.apache.spark.rdd.RDD
2727

2828
/**
29-
* :: DeveloperApi ::
29+
* :: Experimental ::
3030
* Standardizes features by removing the mean and scaling to unit variance using column summary
3131
* statistics on the samples in the training set.
3232
*
3333
* @param withMean False by default. Centers the data with mean before scaling. It will build a
3434
* dense output, so this does not work on sparse input and will raise an exception.
3535
* @param withStd True by default. Scales the data to unit standard deviation.
3636
*/
37-
@DeveloperApi
37+
@Experimental
3838
class StandardScaler(withMean: Boolean, withStd: Boolean) extends VectorTransformer {
3939

4040
def this() = this(false, true)

0 commit comments

Comments
 (0)