@@ -27,22 +27,30 @@ import org.apache.spark.mllib.tree.configuration.QuantileStrategy._
2727/**
2828 * :: Experimental ::
2929 * Stores all the configuration options for tree construction
30- * @param algo classification or regression
31- * @param impurity criterion used for information gain calculation
30+ * @param algo Learning goal. Supported:
31+ * [[org.apache.spark.mllib.tree.configuration.Algo.Classification ]],
32+ * [[org.apache.spark.mllib.tree.configuration.Algo.Regression ]]
33+ * @param impurity Criterion used for information gain calculation.
34+ * Supported for Classification: [[org.apache.spark.mllib.tree.impurity.Gini ]],
35+ * [[org.apache.spark.mllib.tree.impurity.Entropy ]].
36+ * Supported for Regression: [[org.apache.spark.mllib.tree.impurity.Variance ]].
3237 * @param maxDepth Maximum depth of the tree.
3338 * E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
34- * @param numClassesForClassification number of classes for classification. Default value is 2
35- * leads to binary classification
36- * @param maxBins maximum number of bins used for splitting features
37- * @param quantileCalculationStrategy algorithm for calculating quantiles
39+ * @param numClassesForClassification Number of classes for classification.
40+ * (Ignored for regression.)
41+ * Default value is 2 (binary classification).
42+ * @param maxBins Maximum number of bins used for discretizing continuous features and
43+ * for choosing how to split on features at each node.
44+ * More bins give higher granularity.
45+ * @param quantileCalculationStrategy Algorithm for calculating quantiles. Supported:
46+ * [[org.apache.spark.mllib.tree.configuration.QuantileStrategy.Sort ]]
3847 * @param categoricalFeaturesInfo A map storing information about the categorical variables and the
3948 * number of discrete values they take. For example, an entry (n ->
4049 * k) implies the feature n is categorical with k categories 0,
4150 * 1, 2, ... , k-1. It's important to note that features are
4251 * zero-indexed.
43- * @param maxMemoryInMB maximum memory in MB allocated to histogram aggregation. Default value is
52+ * @param maxMemoryInMB Maximum memory in MB allocated to histogram aggregation. Default value is
4453 * 128 MB.
45- *
4654 */
4755@ Experimental
4856class Strategy (
@@ -64,20 +72,7 @@ class Strategy (
6472 = isMulticlassClassification && (categoricalFeaturesInfo.size > 0 )
6573
6674 /**
67- * Java-friendly constructor.
68- *
69- * @param algo classification or regression
70- * @param impurity criterion used for information gain calculation
71- * @param maxDepth Maximum depth of the tree.
72- * E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.
73- * @param numClassesForClassification number of classes for classification. Default value is 2
74- * leads to binary classification
75- * @param maxBins maximum number of bins used for splitting features
76- * @param categoricalFeaturesInfo A map storing information about the categorical variables and
77- * the number of discrete values they take. For example, an entry
78- * (n -> k) implies the feature n is categorical with k categories
79- * 0, 1, 2, ... , k-1. It's important to note that features are
80- * zero-indexed.
75+ * Java-friendly constructor for [[org.apache.spark.mllib.tree.configuration.Strategy ]]
8176 */
8277 def this (
8378 algo : Algo ,
@@ -90,6 +85,10 @@ class Strategy (
9085 categoricalFeaturesInfo.asInstanceOf [java.util.Map [Int , Int ]].asScala.toMap)
9186 }
9287
88+ /**
89+ * Check validity of parameters.
90+ * Throws exception if invalid.
91+ */
9392 private [tree] def assertValid (): Unit = {
9493 algo match {
9594 case Classification =>
0 commit comments