Skip to content

[SPARK-11938][PySpark] Expose numFeatures in all ML PredictionModel for PySpark #13922

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions python/pyspark/ml/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
DenseVector([5.5...])
>>> model.intercept
-2.68...
>>> model.numFeatures
1
>>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0))]).toDF()
>>> result = model.transform(test0).head()
>>> result.prediction
Expand Down Expand Up @@ -92,6 +94,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
True
>>> model.intercept == model2.intercept
True
>>> model.numFeatures == model2.numFeatures
True

.. versionadded:: 1.3.0
"""
Expand Down Expand Up @@ -239,6 +243,14 @@ def intercept(self):
"""
return self._call_java("intercept")

@property
@since("2.0.0")
def numFeatures(self):
"""
Number of features the model was trained on.
"""
return self._call_java("numFeatures")

@property
@since("2.0.0")
def summary(self):
Expand Down Expand Up @@ -524,6 +536,8 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
1
>>> model.featureImportances
SparseVector(1, {0: 1.0})
>>> model.numFeatures
1
>>> print(model.toDebugString)
DecisionTreeClassificationModel (uid=...) of depth 1 with 3 nodes...
>>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
Expand All @@ -548,6 +562,8 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
>>> model2 = DecisionTreeClassificationModel.load(model_path)
>>> model.featureImportances == model2.featureImportances
True
>>> model.numFeatures == model2.numFeatures
True

.. versionadded:: 1.4.0
"""
Expand Down Expand Up @@ -627,6 +643,14 @@ def featureImportances(self):
"""
return self._call_java("featureImportances")

@property
@since("2.0.0")
def numFeatures(self):
"""
Number of features the model was trained on.
"""
return self._call_java("numFeatures")


@inherit_doc
class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed,
Expand Down Expand Up @@ -668,6 +692,8 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
>>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
>>> model.transform(test1).head().prediction
1.0
>>> model.numFeatures
1
>>> model.trees
[DecisionTreeClassificationModel (uid=...) of depth..., DecisionTreeClassificationModel...]
>>> rfc_path = temp_path + "/rfc"
Expand All @@ -680,6 +706,8 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
>>> model2 = RandomForestClassificationModel.load(model_path)
>>> model.featureImportances == model2.featureImportances
True
>>> model.numFeatures == model2.numFeatures
True

.. versionadded:: 1.4.0
"""
Expand Down Expand Up @@ -752,6 +780,14 @@ def featureImportances(self):
"""
return self._call_java("featureImportances")

@property
@since("2.0.0")
def numFeatures(self):
"""
Number of features the model was trained on.
"""
return self._call_java("numFeatures")

@property
@since("2.0.0")
def trees(self):
Expand Down Expand Up @@ -804,6 +840,8 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
1.0
>>> model.totalNumNodes
15
>>> model.numFeatures
1
>>> print(model.toDebugString)
GBTClassificationModel (uid=...)...with 5 trees...
>>> gbtc_path = temp_path + "gbtc"
Expand All @@ -820,6 +858,8 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
True
>>> model.trees
[DecisionTreeRegressionModel (uid=...) of depth..., DecisionTreeRegressionModel...]
>>> model.numFeatures == model2.numFeatures
True

.. versionadded:: 1.4.0
"""
Expand Down Expand Up @@ -907,6 +947,14 @@ def featureImportances(self):
"""
return self._call_java("featureImportances")

@property
@since("2.0.0")
def numFeatures(self):
"""
Number of features the model was trained on.
"""
return self._call_java("numFeatures")

@property
@since("2.0.0")
def trees(self):
Expand Down Expand Up @@ -952,6 +1000,8 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H
>>> test1 = sc.parallelize([Row(features=Vectors.sparse(2, [0], [1.0]))]).toDF()
>>> model.transform(test1).head().prediction
1.0
>>> model.numFeatures
2
>>> nb_path = temp_path + "/nb"
>>> nb.save(nb_path)
>>> nb2 = NaiveBayes.load(nb_path)
Expand All @@ -969,6 +1019,8 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H
>>> result = model3.transform(test0).head()
>>> result.prediction
0.0
>>> model.numFeatures == model2.numFeatures
True

.. versionadded:: 1.5.0
"""
Expand Down Expand Up @@ -1066,6 +1118,14 @@ def theta(self):
"""
return self._call_java("theta")

@property
@since("2.0.0")
def numFeatures(self):
"""
Number of features the model was trained on.
"""
return self._call_java("numFeatures")


@inherit_doc
class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
Expand All @@ -1091,6 +1151,8 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
[2, 2, 2]
>>> model.weights.size
12
>>> model.numFeatures
2
>>> testDF = spark.createDataFrame([
... (Vectors.dense([1.0, 0.0]),),
... (Vectors.dense([0.0, 0.0]),)], ["features"])
Expand Down Expand Up @@ -1120,6 +1182,8 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
True
>>> model3.layers == model.layers
True
>>> model.numFeatures == model2.numFeatures
True

.. versionadded:: 1.6.0
"""
Expand Down Expand Up @@ -1267,6 +1331,14 @@ def weights(self):
"""
return self._call_java("weights")

@property
@since("2.0.0")
def numFeatures(self):
"""
Number of features the model was trained on.
"""
return self._call_java("numFeatures")


class OneVsRestParams(HasFeaturesCol, HasLabelCol, HasPredictionCol):
"""
Expand Down
48 changes: 48 additions & 0 deletions python/pyspark/ml/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
True
>>> abs(model.intercept - 0.0) < 0.001
True
>>> model.numFeatures
1
>>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
>>> abs(model.transform(test1).head().prediction - 1.0) < 0.001
True
Expand All @@ -90,6 +92,8 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
True
>>> model.intercept == model2.intercept
True
>>> model.numFeatures == model2.numFeatures
True

.. versionadded:: 1.4.0
"""
Expand Down Expand Up @@ -153,6 +157,14 @@ def intercept(self):
"""
return self._call_java("intercept")

@property
@since("2.0.0")
def numFeatures(self):
"""
Number of features the model was trained on.
"""
return self._call_java("numFeatures")

@property
@since("2.0.0")
def summary(self):
Expand Down Expand Up @@ -658,6 +670,8 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
3
>>> model.featureImportances
SparseVector(1, {0: 1.0})
>>> model.numFeatures
1
>>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
>>> model.transform(test0).head().prediction
0.0
Expand All @@ -678,6 +692,8 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
True
>>> model.transform(test1).head().variance
0.0
>>> model.numFeatures == model2.numFeatures
True

.. versionadded:: 1.4.0
"""
Expand Down Expand Up @@ -829,6 +845,14 @@ def featureImportances(self):
"""
return self._call_java("featureImportances")

@property
@since("2.0.0")
def numFeatures(self):
"""
Number of features the model was trained on.
"""
return self._call_java("numFeatures")


@inherit_doc
class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed,
Expand All @@ -850,6 +874,8 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
>>> model = rf.fit(df)
>>> model.featureImportances
SparseVector(1, {0: 1.0})
>>> model.numFeatures
1
>>> allclose(model.treeWeights, [1.0, 1.0])
True
>>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
Expand All @@ -872,6 +898,8 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
>>> model2 = RandomForestRegressionModel.load(model_path)
>>> model.featureImportances == model2.featureImportances
True
>>> model.numFeatures == model2.numFeatures
True

.. versionadded:: 1.4.0
"""
Expand Down Expand Up @@ -951,6 +979,14 @@ def featureImportances(self):
"""
return self._call_java("featureImportances")

@property
@since("2.0.0")
def numFeatures(self):
"""
Number of features the model was trained on.
"""
return self._call_java("numFeatures")


@inherit_doc
class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
Expand All @@ -974,6 +1010,8 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
>>> model = gbt.fit(df)
>>> model.featureImportances
SparseVector(1, {0: 1.0})
>>> model.numFeatures
1
>>> allclose(model.treeWeights, [1.0, 0.1, 0.1, 0.1, 0.1])
True
>>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
Expand All @@ -996,6 +1034,8 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
True
>>> model.trees
[DecisionTreeRegressionModel (uid=...) of depth..., DecisionTreeRegressionModel...]
>>> model.numFeatures == model2.numFeatures
True

.. versionadded:: 1.4.0
"""
Expand Down Expand Up @@ -1087,6 +1127,14 @@ def featureImportances(self):
"""
return self._call_java("featureImportances")

@property
@since("2.0.0")
def numFeatures(self):
"""
Number of features the model was trained on.
"""
return self._call_java("numFeatures")

@property
@since("2.0.0")
def trees(self):
Expand Down