@@ -463,16 +463,11 @@ class LogisticRegression @Since("1.2.0") (
463
463
}
464
464
465
465
/*
466
- The coefficients are laid out in column major order during training. e.g. for
467
- `numClasses = 3` and `numFeatures = 2` and `fitIntercept = true` the layout is:
468
-
469
- Array(beta_11, beta_21, beta_31, beta_12, beta_22, beta_32, intercept_1, intercept_2,
470
- intercept_3)
471
-
472
- where beta_jk corresponds to the coefficient for class `j` and feature `k`.
466
+ The coefficients are laid out in column major order during training. Here we initialize
467
+ a column major matrix of initial coefficients.
473
468
*/
474
- val initialCoefficientsWithIntercept =
475
- Vectors .zeros(numCoefficientSets * numFeaturesPlusIntercept)
469
+ val initialCoefWithInterceptMatrix =
470
+ Matrices .zeros(numCoefficientSets, numFeaturesPlusIntercept)
476
471
477
472
val initialModelIsValid = optInitialModel match {
478
473
case Some (_initialModel) =>
@@ -491,18 +486,15 @@ class LogisticRegression @Since("1.2.0") (
491
486
}
492
487
493
488
if (initialModelIsValid) {
494
- val initialCoefWithInterceptArray = initialCoefficientsWithIntercept.toArray
495
489
val providedCoef = optInitialModel.get.coefficientMatrix
496
- providedCoef.foreachActive { (row, col, value) =>
497
- // convert matrix to column major for training
498
- val flatIndex = col * numCoefficientSets + row
490
+ providedCoef.foreachActive { (classIndex, featureIndex, value) =>
499
491
// We need to scale the coefficients since they will be trained in the scaled space
500
- initialCoefWithInterceptArray(flatIndex) = value * featuresStd(col)
492
+ initialCoefWithInterceptMatrix.update(classIndex, featureIndex,
493
+ value * featuresStd(featureIndex))
501
494
}
502
495
if ($(fitIntercept)) {
503
- optInitialModel.get.interceptVector.foreachActive { (index, value) =>
504
- val coefIndex = numCoefficientSets * numFeatures + index
505
- initialCoefWithInterceptArray(coefIndex) = value
496
+ optInitialModel.get.interceptVector.foreachActive { (classIndex, value) =>
497
+ initialCoefWithInterceptMatrix.update(classIndex, numFeatures, value)
506
498
}
507
499
}
508
500
} else if ($(fitIntercept) && isMultinomial) {
@@ -532,8 +524,7 @@ class LogisticRegression @Since("1.2.0") (
532
524
val rawIntercepts = histogram.map(c => math.log(c + 1 )) // add 1 for smoothing
533
525
val rawMean = rawIntercepts.sum / rawIntercepts.length
534
526
rawIntercepts.indices.foreach { i =>
535
- initialCoefficientsWithIntercept.toArray(numClasses * numFeatures + i) =
536
- rawIntercepts(i) - rawMean
527
+ initialCoefWithInterceptMatrix.update(i, numFeatures, rawIntercepts(i) - rawMean)
537
528
}
538
529
} else if ($(fitIntercept)) {
539
530
/*
@@ -549,12 +540,12 @@ class LogisticRegression @Since("1.2.0") (
549
540
b = \log{P(1) / P(0)} = \log{count_1 / count_0}
550
541
}}}
551
542
*/
552
- initialCoefficientsWithIntercept.toArray(numFeatures) = math.log(
553
- histogram(1 ) / histogram(0 ))
543
+ initialCoefWithInterceptMatrix.update( 0 , numFeatures,
544
+ math.log( histogram(1 ) / histogram(0 ) ))
554
545
}
555
546
556
547
val states = optimizer.iterations(new CachedDiffFunction (costFun),
557
- initialCoefficientsWithIntercept.asBreeze.toDenseVector )
548
+ new BDV [ Double ](initialCoefWithInterceptMatrix.toArray) )
558
549
559
550
/*
560
551
Note that in Logistic Regression, the objective history (loss + regularization)
@@ -586,15 +577,24 @@ class LogisticRegression @Since("1.2.0") (
586
577
Note that the intercept in scaled space and original space is the same;
587
578
as a result, no scaling is needed.
588
579
*/
589
- val rawCoefficients = state.x.toArray.clone()
590
- val coefficientArray = Array .tabulate(numCoefficientSets * numFeatures) { i =>
591
- val colMajorIndex = (i % numFeatures) * numCoefficientSets + i / numFeatures
592
- val featureIndex = i % numFeatures
593
- if (featuresStd(featureIndex) != 0.0 ) {
594
- rawCoefficients(colMajorIndex) / featuresStd(featureIndex)
595
- } else {
596
- 0.0
580
+ val allCoefficients = state.x.toArray.clone()
581
+ val allCoefMatrix = new DenseMatrix (numCoefficientSets, numFeaturesPlusIntercept,
582
+ allCoefficients)
583
+ val denseCoefficientMatrix = new DenseMatrix (numCoefficientSets, numFeatures,
584
+ new Array [Double ](numCoefficientSets * numFeatures), isTransposed = true )
585
+ val interceptVec = if ($(fitIntercept) || ! isMultinomial) {
586
+ Vectors .zeros(numCoefficientSets)
587
+ } else {
588
+ Vectors .sparse(numCoefficientSets, Seq ())
589
+ }
590
+ // separate intercepts and coefficients from the combined matrix
591
+ allCoefMatrix.foreachActive { (classIndex, featureIndex, value) =>
592
+ val isIntercept = $(fitIntercept) && (featureIndex == numFeatures)
593
+ if (! isIntercept && featuresStd(featureIndex) != 0.0 ) {
594
+ denseCoefficientMatrix.update(classIndex, featureIndex,
595
+ value / featuresStd(featureIndex))
597
596
}
597
+ if (isIntercept) interceptVec.toArray(classIndex) = value
598
598
}
599
599
600
600
if ($(regParam) == 0.0 && isMultinomial) {
@@ -607,17 +607,16 @@ class LogisticRegression @Since("1.2.0") (
607
607
Friedman, et al. "Regularization Paths for Generalized Linear Models via
608
608
Coordinate Descent," https://core.ac.uk/download/files/153/6287975.pdf
609
609
*/
610
- val coefficientMean = coefficientArray.sum / coefficientArray.length
611
- coefficientArray.indices.foreach { i => coefficientArray(i) -= coefficientMean}
610
+ val denseValues = denseCoefficientMatrix.values
611
+ val coefficientMean = denseValues.sum / denseValues.length
612
+ denseCoefficientMatrix.update(_ - coefficientMean)
612
613
}
613
614
614
- val denseCoefficientMatrix =
615
- new DenseMatrix (numCoefficientSets, numFeatures, coefficientArray, isTransposed = true )
616
615
// TODO: use `denseCoefficientMatrix.compressed` after SPARK-17471
617
616
val compressedCoefficientMatrix = if (isMultinomial) {
618
617
denseCoefficientMatrix
619
618
} else {
620
- val compressedVector = Vectors .dense(coefficientArray ).compressed
619
+ val compressedVector = Vectors .dense(denseCoefficientMatrix.values ).compressed
621
620
compressedVector match {
622
621
case dv : DenseVector => denseCoefficientMatrix
623
622
case sv : SparseVector =>
@@ -626,25 +625,13 @@ class LogisticRegression @Since("1.2.0") (
626
625
}
627
626
}
628
627
629
- val interceptsArray : Array [Double ] = if ($(fitIntercept)) {
630
- Array .tabulate(numCoefficientSets) { i =>
631
- val coefIndex = numFeatures * numCoefficientSets + i
632
- rawCoefficients(coefIndex)
633
- }
634
- } else {
635
- Array .empty[Double ]
636
- }
637
- val interceptVector = if (interceptsArray.nonEmpty && isMultinomial) {
638
- // The intercepts are never regularized, so we always center the mean.
639
- val interceptMean = interceptsArray.sum / numClasses
640
- interceptsArray.indices.foreach { i => interceptsArray(i) -= interceptMean }
641
- Vectors .dense(interceptsArray)
642
- } else if (interceptsArray.length == 1 ) {
643
- Vectors .dense(interceptsArray)
644
- } else {
645
- Vectors .sparse(numCoefficientSets, Seq ())
628
+ // center the intercepts when using multinomial algorithm
629
+ if ($(fitIntercept) && isMultinomial) {
630
+ val interceptArray = interceptVec.toArray
631
+ val interceptMean = interceptArray.sum / interceptArray.length
632
+ (0 until interceptVec.size).foreach { i => interceptArray(i) -= interceptMean }
646
633
}
647
- (compressedCoefficientMatrix, interceptVector .compressed, arrayBuilder.result())
634
+ (compressedCoefficientMatrix, interceptVec .compressed, arrayBuilder.result())
648
635
}
649
636
}
650
637
@@ -1424,6 +1411,7 @@ private class LogisticAggregator(
1424
1411
private val numFeatures = bcFeaturesStd.value.length
1425
1412
private val numFeaturesPlusIntercept = if (fitIntercept) numFeatures + 1 else numFeatures
1426
1413
private val coefficientSize = bcCoefficients.value.size
1414
+ private val numCoefficientSets = if (multinomial) numClasses else 1
1427
1415
if (multinomial) {
1428
1416
require(numClasses == coefficientSize / numFeaturesPlusIntercept, s " The number of " +
1429
1417
s " coefficients should be ${numClasses * numFeaturesPlusIntercept} but was $coefficientSize" )
@@ -1633,12 +1621,12 @@ private class LogisticAggregator(
1633
1621
lossSum / weightSum
1634
1622
}
1635
1623
1636
- def gradient : Vector = {
1624
+ def gradient : Matrix = {
1637
1625
require(weightSum > 0.0 , s " The effective number of instances should be " +
1638
1626
s " greater than 0.0, but $weightSum. " )
1639
1627
val result = Vectors .dense(gradientSumArray.clone())
1640
1628
scal(1.0 / weightSum, result)
1641
- result
1629
+ new DenseMatrix (numCoefficientSets, numFeaturesPlusIntercept, result.toArray)
1642
1630
}
1643
1631
}
1644
1632
@@ -1664,6 +1652,7 @@ private class LogisticCostFun(
1664
1652
val featuresStd = bcFeaturesStd.value
1665
1653
val numFeatures = featuresStd.length
1666
1654
val numCoefficientSets = if (multinomial) numClasses else 1
1655
+ val numFeaturesPlusIntercept = if (fitIntercept) numFeatures + 1 else numFeatures
1667
1656
1668
1657
val logisticAggregator = {
1669
1658
val seqOp = (c : LogisticAggregator , instance : Instance ) => c.add(instance)
@@ -1675,32 +1664,34 @@ private class LogisticCostFun(
1675
1664
)(seqOp, combOp, aggregationDepth)
1676
1665
}
1677
1666
1678
- val totalGradientArray = logisticAggregator.gradient.toArray
1667
+ val totalGradientMatrix = logisticAggregator.gradient
1668
+ val coefMatrix = new DenseMatrix (numCoefficientSets, numFeaturesPlusIntercept, coeffs.toArray)
1679
1669
// regVal is the sum of coefficients squares excluding intercept for L2 regularization.
1680
1670
val regVal = if (regParamL2 == 0.0 ) {
1681
1671
0.0
1682
1672
} else {
1683
1673
var sum = 0.0
1684
- coeffs .foreachActive { case (index , value) =>
1674
+ coefMatrix .foreachActive { case (classIndex, featureIndex , value) =>
1685
1675
// We do not apply regularization to the intercepts
1686
- val isIntercept = fitIntercept && index >= numCoefficientSets * numFeatures
1676
+ val isIntercept = fitIntercept && (featureIndex == numFeatures)
1687
1677
if (! isIntercept) {
1688
1678
// The following code will compute the loss of the regularization; also
1689
1679
// the gradient of the regularization, and add back to totalGradientArray.
1690
1680
sum += {
1691
1681
if (standardization) {
1692
- totalGradientArray(index) += regParamL2 * value
1682
+ val gradValue = totalGradientMatrix(classIndex, featureIndex)
1683
+ totalGradientMatrix.update(classIndex, featureIndex, gradValue + regParamL2 * value)
1693
1684
value * value
1694
1685
} else {
1695
- val featureIndex = index / numCoefficientSets
1696
1686
if (featuresStd(featureIndex) != 0.0 ) {
1697
1687
// If `standardization` is false, we still standardize the data
1698
1688
// to improve the rate of convergence; as a result, we have to
1699
1689
// perform this reverse standardization by penalizing each component
1700
1690
// differently to get effectively the same objective function when
1701
1691
// the training dataset is not standardized.
1702
1692
val temp = value / (featuresStd(featureIndex) * featuresStd(featureIndex))
1703
- totalGradientArray(index) += regParamL2 * temp
1693
+ val gradValue = totalGradientMatrix(classIndex, featureIndex)
1694
+ totalGradientMatrix.update(classIndex, featureIndex, gradValue + regParamL2 * temp)
1704
1695
value * temp
1705
1696
} else {
1706
1697
0.0
@@ -1713,6 +1704,6 @@ private class LogisticCostFun(
1713
1704
}
1714
1705
bcCoeffs.destroy(blocking = false )
1715
1706
1716
- (logisticAggregator.loss + regVal, new BDV (totalGradientArray ))
1707
+ (logisticAggregator.loss + regVal, new BDV (totalGradientMatrix.toArray ))
1717
1708
}
1718
1709
}
0 commit comments