@@ -438,18 +438,14 @@ class LogisticRegression @Since("1.2.0") (
438
438
val standardizationParam = $(standardization)
439
439
def regParamL1Fun = (index : Int ) => {
440
440
// Remove the L1 penalization on the intercept
441
- val isIntercept = $(fitIntercept) && (( index + 1 ) % numFeaturesPlusIntercept == 0 )
441
+ val isIntercept = $(fitIntercept) && index >= numFeatures * numCoefficientSets
442
442
if (isIntercept) {
443
443
0.0
444
444
} else {
445
445
if (standardizationParam) {
446
446
regParamL1
447
447
} else {
448
- val featureIndex = if ($(fitIntercept)) {
449
- index % numFeaturesPlusIntercept
450
- } else {
451
- index % numFeatures
452
- }
448
+ val featureIndex = index / numCoefficientSets
453
449
// If `standardization` is false, we still standardize the data
454
450
// to improve the rate of convergence; as a result, we have to
455
451
// perform this reverse standardization by penalizing each component
@@ -466,6 +462,15 @@ class LogisticRegression @Since("1.2.0") (
466
462
new BreezeOWLQN [Int , BDV [Double ]]($(maxIter), 10 , regParamL1Fun, $(tol))
467
463
}
468
464
465
+ /*
466
+ The coefficients are laid out in column major order during training. e.g. for
467
+ `numClasses = 3` and `numFeatures = 2` and `fitIntercept = true` the layout is:
468
+
469
+ Array(beta_11, beta_21, beta_31, beta_12, beta_22, beta_32, intercept_1, intercept_2,
470
+ intercept_3)
471
+
472
+ where beta_jk corresponds to the coefficient for class `j` and feature `k`.
473
+ */
469
474
val initialCoefficientsWithIntercept =
470
475
Vectors .zeros(numCoefficientSets * numFeaturesPlusIntercept)
471
476
@@ -489,13 +494,14 @@ class LogisticRegression @Since("1.2.0") (
489
494
val initialCoefWithInterceptArray = initialCoefficientsWithIntercept.toArray
490
495
val providedCoef = optInitialModel.get.coefficientMatrix
491
496
providedCoef.foreachActive { (row, col, value) =>
492
- val flatIndex = row * numFeaturesPlusIntercept + col
497
+ // convert matrix to column major for training
498
+ val flatIndex = col * numCoefficientSets + row
493
499
// We need to scale the coefficients since they will be trained in the scaled space
494
500
initialCoefWithInterceptArray(flatIndex) = value * featuresStd(col)
495
501
}
496
502
if ($(fitIntercept)) {
497
503
optInitialModel.get.interceptVector.foreachActive { (index, value) =>
498
- val coefIndex = (index + 1 ) * numFeaturesPlusIntercept - 1
504
+ val coefIndex = numCoefficientSets * numFeatures + index
499
505
initialCoefWithInterceptArray(coefIndex) = value
500
506
}
501
507
}
@@ -526,7 +532,7 @@ class LogisticRegression @Since("1.2.0") (
526
532
val rawIntercepts = histogram.map(c => math.log(c + 1 )) // add 1 for smoothing
527
533
val rawMean = rawIntercepts.sum / rawIntercepts.length
528
534
rawIntercepts.indices.foreach { i =>
529
- initialCoefficientsWithIntercept.toArray(i * numFeaturesPlusIntercept + numFeatures ) =
535
+ initialCoefficientsWithIntercept.toArray(numClasses * numFeatures + i ) =
530
536
rawIntercepts(i) - rawMean
531
537
}
532
538
} else if ($(fitIntercept)) {
@@ -572,16 +578,20 @@ class LogisticRegression @Since("1.2.0") (
572
578
/*
573
579
The coefficients are trained in the scaled space; we're converting them back to
574
580
the original space.
581
+
582
+ Additionally, since the coefficients were laid out in column major order during training
583
+ to avoid extra computation, we convert them back to row major before passing them to the
584
+ model.
585
+
575
586
Note that the intercept in scaled space and original space is the same;
576
587
as a result, no scaling is needed.
577
588
*/
578
589
val rawCoefficients = state.x.toArray.clone()
579
590
val coefficientArray = Array .tabulate(numCoefficientSets * numFeatures) { i =>
580
- // flatIndex will loop though rawCoefficients, and skip the intercept terms.
581
- val flatIndex = if ($(fitIntercept)) i + i / numFeatures else i
591
+ val colMajorIndex = (i % numFeatures) * numCoefficientSets + i / numFeatures
582
592
val featureIndex = i % numFeatures
583
593
if (featuresStd(featureIndex) != 0.0 ) {
584
- rawCoefficients(flatIndex ) / featuresStd(featureIndex)
594
+ rawCoefficients(colMajorIndex ) / featuresStd(featureIndex)
585
595
} else {
586
596
0.0
587
597
}
@@ -618,7 +628,7 @@ class LogisticRegression @Since("1.2.0") (
618
628
619
629
val interceptsArray : Array [Double ] = if ($(fitIntercept)) {
620
630
Array .tabulate(numCoefficientSets) { i =>
621
- val coefIndex = (i + 1 ) * numFeaturesPlusIntercept - 1
631
+ val coefIndex = numFeatures * numCoefficientSets + i
622
632
rawCoefficients(coefIndex)
623
633
}
624
634
} else {
@@ -697,6 +707,7 @@ class LogisticRegressionModel private[spark] (
697
707
/**
698
708
* A vector of model coefficients for "binomial" logistic regression. If this model was trained
699
709
* using the "multinomial" family then an exception is thrown.
710
+ *
700
711
* @return Vector
701
712
*/
702
713
@ Since (" 2.0.0" )
@@ -720,6 +731,7 @@ class LogisticRegressionModel private[spark] (
720
731
/**
721
732
* The model intercept for "binomial" logistic regression. If this model was fit with the
722
733
* "multinomial" family then an exception is thrown.
734
+ *
723
735
* @return Double
724
736
*/
725
737
@ Since (" 1.3.0" )
@@ -1389,6 +1401,12 @@ class BinaryLogisticRegressionSummary private[classification] (
1389
1401
* $$
1390
1402
* </blockquote></p>
1391
1403
*
1404
+ * @note In order to avoid unnecessary computation during calculation of the gradient updates
1405
+ * we lay out the coefficients in column major order during training. This allows us to
1406
+ * perform feature standardization once, while still retaining sequential memory access
1407
+ * for speed. We convert back to row major order when we create the model,
1408
+ * since this form is optimal for the matrix operations used for prediction.
1409
+ *
1392
1410
* @param bcCoefficients The broadcast coefficients corresponding to the features.
1393
1411
* @param bcFeaturesStd The broadcast standard deviation values of the features.
1394
1412
* @param numClasses the number of possible outcomes for k classes classification problem in
@@ -1486,57 +1504,65 @@ private class LogisticAggregator(
1486
1504
var marginOfLabel = 0.0
1487
1505
var maxMargin = Double .NegativeInfinity
1488
1506
1489
- val margins = Array .tabulate (numClasses) { i =>
1490
- var margin = 0.0
1491
- features.foreachActive { (index, value) =>
1492
- if (localFeaturesStd(index) ! = 0.0 && value != 0.0 ) {
1493
- margin += localCoefficients(i * numFeaturesPlusIntercept + index) *
1494
- value / localFeaturesStd(index)
1495
- }
1507
+ val margins = new Array [ Double ] (numClasses)
1508
+ features.foreachActive { (index, value) =>
1509
+ val stdValue = value / localFeaturesStd(index)
1510
+ var j = 0
1511
+ while (j < numClasses) {
1512
+ margins(j) += localCoefficients(index * numClasses + j) * stdValue
1513
+ j += 1
1496
1514
}
1497
-
1515
+ }
1516
+ var i = 0
1517
+ while (i < numClasses) {
1498
1518
if (fitIntercept) {
1499
- margin += localCoefficients(i * numFeaturesPlusIntercept + numFeatures )
1519
+ margins(i) += localCoefficients(numClasses * numFeatures + i )
1500
1520
}
1501
- if (i == label.toInt) marginOfLabel = margin
1502
- if (margin > maxMargin) {
1503
- maxMargin = margin
1521
+ if (i == label.toInt) marginOfLabel = margins(i)
1522
+ if (margins(i) > maxMargin) {
1523
+ maxMargin = margins(i)
1504
1524
}
1505
- margin
1525
+ i += 1
1506
1526
}
1507
1527
1508
1528
/**
1509
1529
* When maxMargin > 0, the original formula could cause overflow.
1510
1530
* We address this by subtracting maxMargin from all the margins, so it's guaranteed
1511
1531
* that all of the new margins will be smaller than zero to prevent arithmetic overflow.
1512
1532
*/
1533
+ val multipliers = new Array [Double ](numClasses)
1513
1534
val sum = {
1514
1535
var temp = 0.0
1515
- if (maxMargin > 0 ) {
1516
- for (i <- 0 until numClasses) {
1517
- margins(i) -= maxMargin
1518
- temp += math.exp(margins(i))
1519
- }
1520
- } else {
1521
- for (i <- 0 until numClasses) {
1522
- temp += math.exp(margins(i))
1523
- }
1536
+ var i = 0
1537
+ while (i < numClasses) {
1538
+ if (maxMargin > 0 ) margins(i) -= maxMargin
1539
+ val exp = math.exp(margins(i))
1540
+ temp += exp
1541
+ multipliers(i) = exp
1542
+ i += 1
1524
1543
}
1525
1544
temp
1526
1545
}
1527
1546
1528
- for (i <- 0 until numClasses) {
1529
- val multiplier = math.exp(margins(i)) / sum - {
1530
- if (label == i) 1.0 else 0.0
1531
- }
1532
- features.foreachActive { (index, value) =>
1533
- if (localFeaturesStd(index) != 0.0 && value != 0.0 ) {
1534
- localGradientArray(i * numFeaturesPlusIntercept + index) +=
1535
- weight * multiplier * value / localFeaturesStd(index)
1547
+ margins.indices.foreach { i =>
1548
+ multipliers(i) = multipliers(i) / sum - (if (label == i) 1.0 else 0.0 )
1549
+ }
1550
+ features.foreachActive { (index, value) =>
1551
+ if (localFeaturesStd(index) != 0.0 && value != 0.0 ) {
1552
+ val stdValue = value / localFeaturesStd(index)
1553
+ var j = 0
1554
+ while (j < numClasses) {
1555
+ localGradientArray(index * numClasses + j) +=
1556
+ weight * multipliers(j) * stdValue
1557
+ j += 1
1536
1558
}
1537
1559
}
1538
- if (fitIntercept) {
1539
- localGradientArray(i * numFeaturesPlusIntercept + numFeatures) += weight * multiplier
1560
+ }
1561
+ if (fitIntercept) {
1562
+ var i = 0
1563
+ while (i < numClasses) {
1564
+ localGradientArray(numFeatures * numClasses + i) += weight * multipliers(i)
1565
+ i += 1
1540
1566
}
1541
1567
}
1542
1568
@@ -1637,6 +1663,7 @@ private class LogisticCostFun(
1637
1663
val bcCoeffs = instances.context.broadcast(coeffs)
1638
1664
val featuresStd = bcFeaturesStd.value
1639
1665
val numFeatures = featuresStd.length
1666
+ val numCoefficientSets = if (multinomial) numClasses else 1
1640
1667
1641
1668
val logisticAggregator = {
1642
1669
val seqOp = (c : LogisticAggregator , instance : Instance ) => c.add(instance)
@@ -1656,7 +1683,7 @@ private class LogisticCostFun(
1656
1683
var sum = 0.0
1657
1684
coeffs.foreachActive { case (index, value) =>
1658
1685
// We do not apply regularization to the intercepts
1659
- val isIntercept = fitIntercept && (( index + 1 ) % ( numFeatures + 1 ) == 0 )
1686
+ val isIntercept = fitIntercept && index >= numCoefficientSets * numFeatures
1660
1687
if (! isIntercept) {
1661
1688
// The following code will compute the loss of the regularization; also
1662
1689
// the gradient of the regularization, and add back to totalGradientArray.
@@ -1665,11 +1692,7 @@ private class LogisticCostFun(
1665
1692
totalGradientArray(index) += regParamL2 * value
1666
1693
value * value
1667
1694
} else {
1668
- val featureIndex = if (fitIntercept) {
1669
- index % (numFeatures + 1 )
1670
- } else {
1671
- index % numFeatures
1672
- }
1695
+ val featureIndex = index / numCoefficientSets
1673
1696
if (featuresStd(featureIndex) != 0.0 ) {
1674
1697
// If `standardization` is false, we still standardize the data
1675
1698
// to improve the rate of convergence; as a result, we have to
0 commit comments