@@ -185,34 +185,33 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
185
185
>>> from pyspark.sql import Row
186
186
>>> from pyspark.ml.linalg import Vectors
187
187
>>> bdf = sc.parallelize([
188
- ... Row(label=1.0, weight=2.0, features=Vectors.dense(1.0)),
189
- ... Row(label=0.0, weight=2.0, features=Vectors.sparse(1, [], []))]).toDF()
190
- >>> blor = LogisticRegression(maxIter=5, regParam=0.01, weightCol="weight")
188
+ ... Row(label=1.0, weight=1.0, features=Vectors.dense(0.0, 5.0)),
189
+ ... Row(label=0.0, weight=2.0, features=Vectors.dense(1.0, 2.0)),
190
+ ... Row(label=1.0, weight=3.0, features=Vectors.dense(2.0, 1.0)),
191
+ ... Row(label=0.0, weight=4.0, features=Vectors.dense(3.0, 3.0))]).toDF()
192
+ >>> blor = LogisticRegression(regParam=0.01, weightCol="weight")
191
193
>>> blorModel = blor.fit(bdf)
192
194
>>> blorModel.coefficients
193
- DenseVector([5.4 ...])
195
+ DenseVector([-1.080..., -0.646 ...])
194
196
>>> blorModel.intercept
195
- -2.63...
196
- >>> mdf = sc.parallelize([
197
- ... Row(label=1.0, weight=2.0, features=Vectors.dense(1.0)),
198
- ... Row(label=0.0, weight=2.0, features=Vectors.sparse(1, [], [])),
199
- ... Row(label=2.0, weight=2.0, features=Vectors.dense(3.0))]).toDF()
200
- >>> mlor = LogisticRegression(maxIter=5, regParam=0.01, weightCol="weight",
201
- ... family="multinomial")
197
+ 3.112...
198
+ >>> data_path = "data/mllib/sample_multiclass_classification_data.txt"
199
+ >>> mdf = spark.read.format("libsvm").load(data_path)
200
+ >>> mlor = LogisticRegression(regParam=0.1, elasticNetParam=1.0, family="multinomial")
202
201
>>> mlorModel = mlor.fit(mdf)
203
202
>>> mlorModel.coefficientMatrix
204
- DenseMatrix (3, 1, [-2.3 ..., 0.2..., 2.1 ...], 1)
203
+ SparseMatrix (3, 4, [0, 1, 2, 3], [3, 2, 1], [1.87 ..., -2.75..., -0.50 ...], 1)
205
204
>>> mlorModel.interceptVector
206
- DenseVector([2.1 ..., 0.6 ..., -2.8 ...])
207
- >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0))]).toDF()
205
+ DenseVector([0.04 ..., -0.42 ..., 0.37 ...])
206
+ >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 1.0 ))]).toDF()
208
207
>>> result = blorModel.transform(test0).head()
209
208
>>> result.prediction
210
- 0 .0
209
+ 1 .0
211
210
>>> result.probability
212
- DenseVector([0.99 ..., 0.00 ...])
211
+ DenseVector([0.02 ..., 0.97 ...])
213
212
>>> result.rawPrediction
214
- DenseVector([8.12 ..., -8.12 ...])
215
- >>> test1 = sc.parallelize([Row(features=Vectors.sparse(1 , [0], [1.0]))]).toDF()
213
+ DenseVector([-3.54 ..., 3.54 ...])
214
+ >>> test1 = sc.parallelize([Row(features=Vectors.sparse(2 , [0], [1.0]))]).toDF()
216
215
>>> blorModel.transform(test1).head().prediction
217
216
1.0
218
217
>>> blor.setParams("vector")
@@ -222,8 +221,8 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
222
221
>>> lr_path = temp_path + "/lr"
223
222
>>> blor.save(lr_path)
224
223
>>> lr2 = LogisticRegression.load(lr_path)
225
- >>> lr2.getMaxIter ()
226
- 5
224
+ >>> lr2.getRegParam ()
225
+ 0.01
227
226
>>> model_path = temp_path + "/lr_model"
228
227
>>> blorModel.save(model_path)
229
228
>>> model2 = LogisticRegressionModel.load(model_path)
@@ -1480,31 +1479,33 @@ class OneVsRest(Estimator, OneVsRestParams, MLReadable, MLWritable):
1480
1479
1481
1480
>>> from pyspark.sql import Row
1482
1481
>>> from pyspark.ml.linalg import Vectors
1483
- >>> df = sc.parallelize([
1484
- ... Row(label=0.0, features=Vectors.dense(1.0, 0.8)),
1485
- ... Row(label=1.0, features=Vectors.sparse(2, [], [])),
1486
- ... Row(label=2.0, features=Vectors.dense(0.5, 0.5))]).toDF()
1487
- >>> lr = LogisticRegression(maxIter=5, regParam=0.01)
1482
+ >>> data_path = "data/mllib/sample_multiclass_classification_data.txt"
1483
+ >>> df = spark.read.format("libsvm").load(data_path)
1484
+ >>> lr = LogisticRegression(regParam=0.01)
1488
1485
>>> ovr = OneVsRest(classifier=lr)
1489
1486
>>> model = ovr.fit(df)
1490
- >>> [x.coefficients for x in model.models]
1491
- [DenseVector([4.9791, 2.426]), DenseVector([-4.1198, -5.9326]), DenseVector([-3.314, 5.2423])]
1487
+ >>> model.models[0].coefficients
1488
+ DenseVector([0.5..., -1.0..., 3.4..., 4.2...])
1489
+ >>> model.models[1].coefficients
1490
+ DenseVector([-2.1..., 3.1..., -2.6..., -2.3...])
1491
+ >>> model.models[2].coefficients
1492
+ DenseVector([0.3..., -3.4..., 1.0..., -1.1...])
1492
1493
>>> [x.intercept for x in model.models]
1493
- [-5.06544 ..., 2.30341 ..., -1.29133 ...]
1494
- >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 0.0))]).toDF()
1494
+ [-2.7 ..., -2.5 ..., -1.3 ...]
1495
+ >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 0.0, 1.0, 1.0 ))]).toDF()
1495
1496
>>> model.transform(test0).head().prediction
1496
- 1.0
1497
- >>> test1 = sc.parallelize([Row(features=Vectors.sparse(2, [0], [1.0]))]).toDF()
1498
- >>> model.transform(test1).head().prediction
1499
1497
0.0
1500
- >>> test2 = sc.parallelize([Row(features=Vectors.dense(0.5, 0.4 ))]).toDF()
1501
- >>> model.transform(test2 ).head().prediction
1498
+ >>> test1 = sc.parallelize([Row(features=Vectors.sparse(4, [0], [1.0] ))]).toDF()
1499
+ >>> model.transform(test1 ).head().prediction
1502
1500
2.0
1501
+ >>> test2 = sc.parallelize([Row(features=Vectors.dense(0.5, 0.4, 0.3, 0.2))]).toDF()
1502
+ >>> model.transform(test2).head().prediction
1503
+ 0.0
1503
1504
>>> model_path = temp_path + "/ovr_model"
1504
1505
>>> model.save(model_path)
1505
1506
>>> model2 = OneVsRestModel.load(model_path)
1506
1507
>>> model2.transform(test0).head().prediction
1507
- 1 .0
1508
+ 0 .0
1508
1509
1509
1510
.. versionadded:: 2.0.0
1510
1511
"""
0 commit comments