Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
huaxingao committed Jul 21, 2020
1 parent 86e0579 commit 1586e30
Show file tree
Hide file tree
Showing 9 changed files with 192 additions and 59 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,12 @@ trait DefaultReadWriteTest extends TempDirectory { self: Suite =>
case (Array(values), Array(newValues)) =>
assert(values === newValues, s"Values do not match on param ${p.name}.")
case (value, newValue) =>
assert(value === newValue, s"Values do not match on param ${p.name}.")
if (value.isInstanceOf[Double] && value.asInstanceOf[Double].isNaN) {
assert(newValue.isInstanceOf[Double] && newValue.asInstanceOf[Double].isNaN,
s"Values do not match on param ${p.name}.")
} else {
assert(value === newValue, s"Values do not match on param ${p.name}.")
}
}
} else {
assert(!newInstance.isDefined(p), s"Param ${p.name} shouldn't be defined.")
Expand Down
57 changes: 41 additions & 16 deletions python/pyspark/ml/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,8 +513,8 @@ class _LinearSVCParams(_ClassifierParams, HasRegParam, HasMaxIter, HasFitInterce
" all predictions 0.0 and -Inf will make all predictions 1.0.",
typeConverter=TypeConverters.toFloat)

def __init__(self):
super(_LinearSVCParams, self).__init__()
def __init__(self, *args):
super(_LinearSVCParams, self).__init__(*args)
self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, fitIntercept=True,
standardization=True, threshold=0.0, aggregationDepth=2,
blockSize=1)
Expand Down Expand Up @@ -587,6 +587,8 @@ class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadabl
True
>>> model.intercept == model2.intercept
True
>>> model.transform(test0).take(1) == model2.transform(test0).take(1)
True
.. versionadded:: 2.2.0
"""
Expand Down Expand Up @@ -820,8 +822,8 @@ class _LogisticRegressionParams(_ProbabilisticClassifierParams, HasRegParam,
"classes for multinomial regression.",
typeConverter=TypeConverters.toVector)

def __init__(self):
super(_LogisticRegressionParams, self).__init__()
def __init__(self, *args):
super(_LogisticRegressionParams, self).__init__(*args)
self._setDefault(maxIter=100, regParam=0.0, tol=1E-6, threshold=0.5, family="auto",
blockSize=1)

Expand Down Expand Up @@ -1017,7 +1019,8 @@ class LogisticRegression(_JavaProbabilisticClassifier, _LogisticRegressionParams
>>> blorModel.intercept == model2.intercept
True
>>> model2
LogisticRegressionModel: uid=..., numClasses=2, numFeatures=2
>>> blorModel.transform(test0).take(1) == model2.transform(test0).take(1)
True
.. versionadded:: 1.3.0
"""
Expand Down Expand Up @@ -1313,8 +1316,8 @@ class _DecisionTreeClassifierParams(_DecisionTreeParams, _TreeClassifierParams):
Params for :py:class:`DecisionTreeClassifier` and :py:class:`DecisionTreeClassificationModel`.
"""

def __init__(self):
super(_DecisionTreeClassifierParams, self).__init__()
def __init__(self, *args):
super(_DecisionTreeClassifierParams, self).__init__(*args)
self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
impurity="gini", leafCol="", minWeightFractionPerNode=0.0)
Expand Down Expand Up @@ -1384,7 +1387,8 @@ class DecisionTreeClassifier(_JavaProbabilisticClassifier, _DecisionTreeClassifi
>>> model2 = DecisionTreeClassificationModel.load(model_path)
>>> model.featureImportances == model2.featureImportances
True
>>> model.transform(test0).take(1) == model2.transform(test0).take(1)
True
>>> df3 = spark.createDataFrame([
... (1.0, 0.2, Vectors.dense(1.0)),
... (1.0, 0.8, Vectors.dense(1.0)),
Expand Down Expand Up @@ -1550,8 +1554,8 @@ class _RandomForestClassifierParams(_RandomForestParams, _TreeClassifierParams):
Params for :py:class:`RandomForestClassifier` and :py:class:`RandomForestClassificationModel`.
"""

def __init__(self):
super(_RandomForestClassifierParams, self).__init__()
def __init__(self, *args):
super(_RandomForestClassifierParams, self).__init__(*args)
self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
impurity="gini", numTrees=20, featureSubsetStrategy="auto",
Expand Down Expand Up @@ -1628,6 +1632,8 @@ class RandomForestClassifier(_JavaProbabilisticClassifier, _RandomForestClassifi
>>> model2 = RandomForestClassificationModel.load(model_path)
>>> model.featureImportances == model2.featureImportances
True
>>> model.transform(test0).take(1) == model2.transform(test0).take(1)
True
.. versionadded:: 1.4.0
"""
Expand Down Expand Up @@ -1893,8 +1899,8 @@ class _GBTClassifierParams(_GBTParams, _HasVarianceImpurity):
"Supported options: " + ", ".join(supportedLossTypes),
typeConverter=TypeConverters.toString)

def __init__(self):
super(_GBTClassifierParams, self).__init__()
def __init__(self, *args):
super(_GBTClassifierParams, self).__init__(*args)
self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
lossType="logistic", maxIter=20, stepSize=0.1, subsamplingRate=1.0,
Expand Down Expand Up @@ -1992,6 +1998,8 @@ class GBTClassifier(_JavaProbabilisticClassifier, _GBTClassifierParams,
True
>>> model.treeWeights == model2.treeWeights
True
>>> model.transform(test0).take(1) == model2.transform(test0).take(1)
True
>>> model.trees
[DecisionTreeRegressionModel...depth=..., DecisionTreeRegressionModel...]
>>> validation = spark.createDataFrame([(0.0, Vectors.dense(-1.0),)],
Expand Down Expand Up @@ -2225,8 +2233,8 @@ class _NaiveBayesParams(_PredictorParams, HasWeightCol):
"and gaussian.",
typeConverter=TypeConverters.toString)

def __init__(self):
super(_NaiveBayesParams, self).__init__()
def __init__(self, *args):
super(_NaiveBayesParams, self).__init__(*args)
self._setDefault(smoothing=1.0, modelType="multinomial")

@since("1.5.0")
Expand Down Expand Up @@ -2312,6 +2320,8 @@ class NaiveBayes(_JavaProbabilisticClassifier, _NaiveBayesParams, HasThresholds,
True
>>> model.theta == model2.theta
True
>>> model.transform(test0).take(1) == model2.transform(test0).take(1)
True
>>> nb = nb.setThresholds([0.01, 10.00])
>>> model3 = nb.fit(df)
>>> result = model3.transform(test0).head()
Expand Down Expand Up @@ -2438,8 +2448,8 @@ class _MultilayerPerceptronParams(_ProbabilisticClassifierParams, HasSeed, HasMa
initialWeights = Param(Params._dummy(), "initialWeights", "The initial weights of the model.",
typeConverter=TypeConverters.toVector)

def __init__(self):
super(_MultilayerPerceptronParams, self).__init__()
def __init__(self, *args):
super(_MultilayerPerceptronParams, self).__init__(*args)
self._setDefault(maxIter=100, tol=1E-6, blockSize=128, stepSize=0.03, solver="l-bfgs")

@since("1.6.0")
Expand Down Expand Up @@ -2521,6 +2531,8 @@ class MultilayerPerceptronClassifier(_JavaProbabilisticClassifier, _MultilayerPe
True
>>> model.weights == model2.weights
True
>>> model.transform(testDF).take(1) == model2.transform(testDF).take(1)
True
>>> mlp2 = mlp2.setInitialWeights(list(range(0, 12)))
>>> model3 = mlp2.fit(df)
>>> model3.weights != model2.weights
Expand Down Expand Up @@ -2695,6 +2707,8 @@ class OneVsRest(Estimator, _OneVsRestParams, HasParallelism, JavaMLReadable, Jav
>>> model2 = OneVsRestModel.load(model_path)
>>> model2.transform(test0).head().newPrediction
0.0
>>> model.transform(test0).take(1) == model2.transform(test0).take(1)
True
>>> model.transform(test2).columns
['features', 'rawPrediction', 'newPrediction']
Expand Down Expand Up @@ -3120,6 +3134,17 @@ class FMClassifier(_JavaProbabilisticClassifier, _FactorizationMachinesParams, J
DenseVector([14.8232])
>>> model.factors
DenseMatrix(1, 2, [0.0163, -0.0051], 1)
>>> model_path = temp_path + "/fm_model"
>>> model.save(model_path)
>>> model2 = FMClassificationModel.load(model_path)
>>> model2.intercept
-7.316665276826291
>>> model2.linear
DenseVector([14.8232])
>>> model2.factors
DenseMatrix(1, 2, [0.0163, -0.0051], 1)
>>> model.transform(test0).take(1) == model2.transform(test0).take(1)
True
.. versionadded:: 3.0.0
"""
Expand Down
30 changes: 20 additions & 10 deletions python/pyspark/ml/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ class _GaussianMixtureParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionC
k = Param(Params._dummy(), "k", "Number of independent Gaussians in the mixture model. " +
"Must be > 1.", typeConverter=TypeConverters.toInt)

def __init__(self):
super(_GaussianMixtureParams, self).__init__()
def __init__(self, *args):
super(_GaussianMixtureParams, self).__init__(*args)
self._setDefault(k=2, tol=0.01, maxIter=100, aggregationDepth=2, blockSize=1)

@since("2.0.0")
Expand Down Expand Up @@ -325,6 +325,8 @@ class GaussianMixture(JavaEstimator, _GaussianMixtureParams, JavaMLWritable, Jav
Row(mean=DenseVector([0.825, 0.8675]))
>>> model2.gaussiansDF.select("cov").head()
Row(cov=DenseMatrix(2, 2, [0.0056, -0.0051, -0.0051, 0.0046], False))
>>> model.transform(df).take(1) == model2.transform(df).take(1)
True
>>> gm2.setWeightCol("weight")
GaussianMixture...
Expand Down Expand Up @@ -503,8 +505,8 @@ class _KMeansParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionCol, HasTo
initSteps = Param(Params._dummy(), "initSteps", "The number of steps for k-means|| " +
"initialization mode. Must be > 0.", typeConverter=TypeConverters.toInt)

def __init__(self):
super(_KMeansParams, self).__init__()
def __init__(self, *args):
super(_KMeansParams, self).__init__(*args)
self._setDefault(k=2, initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20,
distanceMeasure="euclidean")

Expand Down Expand Up @@ -637,6 +639,8 @@ class KMeans(JavaEstimator, _KMeansParams, JavaMLWritable, JavaMLReadable):
array([ True, True], dtype=bool)
>>> model.clusterCenters()[1] == model2.clusterCenters()[1]
array([ True, True], dtype=bool)
>>> model.transform(df).take(1) == model2.transform(df).take(1)
True
.. versionadded:: 1.5.0
"""
Expand Down Expand Up @@ -760,8 +764,8 @@ class _BisectingKMeansParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionC
"proportion of points (if < 1.0) of a divisible cluster.",
typeConverter=TypeConverters.toFloat)

def __init__(self):
super(_BisectingKMeansParams, self).__init__()
def __init__(self, *args):
super(_BisectingKMeansParams, self).__init__(*args)
self._setDefault(maxIter=20, k=4, minDivisibleClusterSize=1.0)

@since("2.0.0")
Expand Down Expand Up @@ -914,6 +918,8 @@ class BisectingKMeans(JavaEstimator, _BisectingKMeansParams, JavaMLWritable, Jav
array([ True, True], dtype=bool)
>>> model.clusterCenters()[1] == model2.clusterCenters()[1]
array([ True, True], dtype=bool)
>>> model.transform(df).take(1) == model2.transform(df).take(1)
True
.. versionadded:: 2.0.0
"""
Expand Down Expand Up @@ -1072,8 +1078,8 @@ class _LDAParams(HasMaxIter, HasFeaturesCol, HasSeed, HasCheckpointInterval):
" partition is lost, so set this bit with care.",
TypeConverters.toBoolean)

def __init__(self):
super(_LDAParams, self).__init__()
def __init__(self, *args):
super(_LDAParams, self).__init__(*args)
self._setDefault(maxIter=20, checkpointInterval=10,
k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51,
subsamplingRate=0.05, optimizeDocConcentration=True,
Expand Down Expand Up @@ -1389,6 +1395,8 @@ class LDA(JavaEstimator, _LDAParams, JavaMLReadable, JavaMLWritable):
>>> local_model_path = temp_path + "/lda_local_model"
>>> localModel.save(local_model_path)
>>> sameLocalModel = LocalLDAModel.load(local_model_path)
>>> model.transform(df).take(1) == sameLocalModel.transform(df).take(1)
True
.. versionadded:: 2.0.0
"""
Expand Down Expand Up @@ -1600,8 +1608,8 @@ class _PowerIterationClusteringParams(HasMaxIter, HasWeightCol):
"Name of the input column for destination vertex IDs.",
typeConverter=TypeConverters.toString)

def __init__(self):
super(_PowerIterationClusteringParams, self).__init__()
def __init__(self, *args):
super(_PowerIterationClusteringParams, self).__init__(*args)
self._setDefault(k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst")

@since("2.4.0")
Expand Down Expand Up @@ -1677,6 +1685,8 @@ class PowerIterationClustering(_PowerIterationClusteringParams, JavaParams, Java
2
>>> pic2.getMaxIter()
40
>>> pic2.assignClusters(df).take(6) == assignments.take(6)
True
.. versionadded:: 2.4.0
"""
Expand Down
Loading

0 comments on commit 1586e30

Please sign in to comment.