1717
1818package org .apache .spark .mllib .regression
1919
20- import breeze .linalg .{Vector => BV }
21-
2220import org .apache .spark .SparkContext
2321import org .apache .spark .rdd .RDD
2422import org .apache .spark .mllib .optimization ._
2523import org .apache .spark .mllib .util .MLUtils
26- import org .apache .spark .mllib .linalg .{ Vectors , Vector }
24+ import org .apache .spark .mllib .linalg .Vector
2725
2826/**
2927 * Regression model trained using RidgeRegression.
@@ -58,8 +56,7 @@ class RidgeRegressionWithSGD private (
5856 var numIterations : Int ,
5957 var regParam : Double ,
6058 var miniBatchFraction : Double )
61- extends GeneralizedLinearAlgorithm [RidgeRegressionModel ]
62- with Serializable {
59+ extends GeneralizedLinearAlgorithm [RidgeRegressionModel ] with Serializable {
6360
6461 val gradient = new LeastSquaresGradient ()
6562 val updater = new SquaredL2Updater ()
@@ -72,10 +69,6 @@ class RidgeRegressionWithSGD private (
7269 // We don't want to penalize the intercept in RidgeRegression, so set this to false.
7370 super .setIntercept(false )
7471
75- private var yMean = 0.0
76- private var xColMean : BV [Double ] = _
77- private var xColSd : BV [Double ] = _
78-
7972 /**
8073 * Construct a RidgeRegression object with default parameters
8174 */
@@ -88,35 +81,7 @@ class RidgeRegressionWithSGD private (
8881 }
8982
9083 override protected def createModel (weights : Vector , intercept : Double ) = {
91- val weightsMat = weights.toBreeze
92- val weightsScaled = weightsMat :/ xColSd
93- val interceptScaled = yMean - weightsMat.dot(xColMean :/ xColSd)
94-
95- new RidgeRegressionModel (Vectors .fromBreeze(weightsScaled), interceptScaled)
96- }
97-
98- override def run (
99- input : RDD [LabeledPoint ],
100- initialWeights : Vector )
101- : RidgeRegressionModel =
102- {
103- val nfeatures : Int = input.first().features.size
104- val nexamples : Long = input.count()
105-
106- // To avoid penalizing the intercept, we center and scale the data.
107- val stats = MLUtils .computeStats(input, nfeatures, nexamples)
108- yMean = stats._1
109- xColMean = stats._2.toBreeze
110- xColSd = stats._3.toBreeze
111-
112- val normalizedData = input.map { point =>
113- val yNormalized = point.label - yMean
114- val featuresMat = point.features.toBreeze
115- val featuresNormalized = (featuresMat - xColMean) :/ xColSd
116- LabeledPoint (yNormalized, Vectors .fromBreeze(featuresNormalized))
117- }
118-
119- super .run(normalizedData, initialWeights)
84+ new RidgeRegressionModel (weights, intercept)
12085 }
12186}
12287
@@ -145,9 +110,7 @@ object RidgeRegressionWithSGD {
145110 stepSize : Double ,
146111 regParam : Double ,
147112 miniBatchFraction : Double ,
148- initialWeights : Vector )
149- : RidgeRegressionModel =
150- {
113+ initialWeights : Vector ): RidgeRegressionModel = {
151114 new RidgeRegressionWithSGD (stepSize, numIterations, regParam, miniBatchFraction).run(
152115 input, initialWeights)
153116 }
@@ -168,9 +131,7 @@ object RidgeRegressionWithSGD {
168131 numIterations : Int ,
169132 stepSize : Double ,
170133 regParam : Double ,
171- miniBatchFraction : Double )
172- : RidgeRegressionModel =
173- {
134+ miniBatchFraction : Double ): RidgeRegressionModel = {
174135 new RidgeRegressionWithSGD (stepSize, numIterations, regParam, miniBatchFraction).run(input)
175136 }
176137
@@ -189,9 +150,7 @@ object RidgeRegressionWithSGD {
189150 input : RDD [LabeledPoint ],
190151 numIterations : Int ,
191152 stepSize : Double ,
192- regParam : Double )
193- : RidgeRegressionModel =
194- {
153+ regParam : Double ): RidgeRegressionModel = {
195154 train(input, numIterations, stepSize, regParam, 1.0 )
196155 }
197156
@@ -206,9 +165,7 @@ object RidgeRegressionWithSGD {
206165 */
207166 def train (
208167 input : RDD [LabeledPoint ],
209- numIterations : Int )
210- : RidgeRegressionModel =
211- {
168+ numIterations : Int ): RidgeRegressionModel = {
212169 train(input, numIterations, 1.0 , 1.0 , 1.0 )
213170 }
214171
0 commit comments