4
4
5
5
using System ;
6
6
using System . Collections . Generic ;
7
+ using MathNet . Numerics . LinearAlgebra ;
7
8
using Microsoft . ML . Core . Data ;
8
9
using Microsoft . ML . Runtime ;
9
10
using Microsoft . ML . Runtime . CommandLine ;
@@ -40,11 +41,27 @@ public sealed partial class LogisticRegression : LbfgsTrainerBase<LogisticRegres
40
41
41
42
public sealed class Arguments : ArgumentsBase
42
43
{
44
+ /// <summary>
45
+ /// If set to <value>true</value>training statistics will be generated at the end of training.
46
+ /// If you have a large number of learned training parameters(more than 500),
47
+ /// generating the training statistics might take a few seconds.
48
+ /// More than 1000 weights might take a few minutes. For those cases consider using the instance of <see cref="ComputeLRTrainingStd"/>
49
+ /// present in the Microsoft.ML.HalLearners package. That computes the statistics using hardware acceleration.
50
+ /// </summary>
43
51
[ Argument ( ArgumentType . AtMostOnce , HelpText = "Show statistics of training examples." , ShortName = "stat" , SortOrder = 50 ) ]
44
52
public bool ShowTrainingStats = false ;
53
+
54
+ /// <summary>
55
+ /// The instance of <see cref="ComputeLRTrainingStd"/> that computes the training statistics at the end of training.
56
+ /// If you have a large number of learned training parameters(more than 500),
57
+ /// generating the training statistics might take a few seconds.
58
+ /// More than 1000 weights might take a few minutes. For those cases consider using the instance of <see cref="ComputeLRTrainingStd"/>
59
+ /// present in the Microsoft.ML.HalLearners package. That computes the statistics using hardware acceleration.
60
+ /// </summary>
61
+ public ComputeLRTrainingStd StdComputer ;
45
62
}
46
63
47
- private Double _posWeight ;
64
+ private double _posWeight ;
48
65
private LinearModelStatistics _stats ;
49
66
50
67
/// <summary>
@@ -78,6 +95,9 @@ public LogisticRegression(IHostEnvironment env,
78
95
79
96
_posWeight = 0 ;
80
97
ShowTrainingStats = Args . ShowTrainingStats ;
98
+
99
+ if ( ShowTrainingStats && Args . StdComputer == null )
100
+ Args . StdComputer = new ComputeLRTrainingStdImpl ( ) ;
81
101
}
82
102
83
103
/// <summary>
@@ -88,6 +108,9 @@ internal LogisticRegression(IHostEnvironment env, Arguments args)
88
108
{
89
109
_posWeight = 0 ;
90
110
ShowTrainingStats = Args . ShowTrainingStats ;
111
+
112
+ if ( ShowTrainingStats && Args . StdComputer == null )
113
+ Args . StdComputer = new ComputeLRTrainingStdImpl ( ) ;
91
114
}
92
115
93
116
public override PredictionKind PredictionKind => PredictionKind . BinaryClassification ;
@@ -330,7 +353,13 @@ protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.
330
353
}
331
354
}
332
355
333
- _stats = new LinearModelStatistics ( Host , NumGoodRows , numParams , deviance , nullDeviance ) ;
356
+ if ( Args . StdComputer == null )
357
+ _stats = new LinearModelStatistics ( Host , NumGoodRows , numParams , deviance , nullDeviance ) ;
358
+ else
359
+ {
360
+ var std = Args . StdComputer . ComputeStd ( hessian , weightIndices , numParams , CurrentWeights . Length , ch , L2Weight ) ;
361
+ _stats = new LinearModelStatistics ( Host , NumGoodRows , numParams , deviance , nullDeviance , std ) ;
362
+ }
334
363
}
335
364
336
365
protected override void ProcessPriorDistribution ( float label , float weight )
@@ -397,4 +426,125 @@ public static CommonOutputs.BinaryClassificationOutput TrainBinary(IHostEnvironm
397
426
( ) => LearnerEntryPointsUtils . FindColumn ( host , input . TrainingData . Schema , input . WeightColumn ) ) ;
398
427
}
399
428
}
429
+
430
+ /// <summary>
431
+ /// Computes the standard deviation matrix of each of the non-zero training weights, needed to calculate further the standard deviation,
432
+ /// p-value and z-Score.
433
+ /// If you need fast calculations, use the <see cref="ComputeLRTrainingStd"/> implementation in the Microsoft.ML.HALLearners package,
434
+ /// which makes use of hardware acceleration.
435
+ /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients.
436
+ /// </summary>
437
+ public abstract class ComputeLRTrainingStd
438
+ {
439
+ /// <summary>
440
+ /// Computes the standard deviation matrix of each of the non-zero training weights, needed to calculate further the standard deviation,
441
+ /// p-value and z-Score.
442
+ /// If you need fast calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration.
443
+ /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients.
444
+ /// </summary>
445
+ public abstract VBuffer < float > ComputeStd ( double [ ] hessian , int [ ] weightIndices , int parametersCount , int currentWeightsCount , IChannel ch , float l2Weight ) ;
446
+
447
+ /// <summary>
448
+ /// Adjust the variance for regularized cases.
449
+ /// </summary>
450
+ [ BestFriend ]
451
+ internal void AdjustVariance ( float inverseEntry , int iRow , int iCol , float l2Weight , float [ ] stdErrorValues2 )
452
+ {
453
+ var adjustment = l2Weight * inverseEntry * inverseEntry ;
454
+ stdErrorValues2 [ iRow ] -= adjustment ;
455
+
456
+ if ( 0 < iCol && iCol < iRow )
457
+ stdErrorValues2 [ iCol ] -= adjustment ;
458
+ }
459
+ }
460
+
461
+ /// <summary>
462
+ /// Extends the <see cref="ComputeLRTrainingStd"/> implementing <see cref="ComputeLRTrainingStd.ComputeStd(double[], int[], int, int, IChannel, float)"/> making use of Math.Net numeric
463
+ /// If you need faster calculations(have non-sparse weight vectors of more than 300 features), use the instance of ComputeLRTrainingStd from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration
464
+ /// for those computations.
465
+ /// </summary>
466
+ public sealed class ComputeLRTrainingStdImpl : ComputeLRTrainingStd
467
+ {
468
+ /// <summary>
469
+ /// Computes the standard deviation matrix of each of the non-zero training weights, needed to calculate further the standard deviation,
470
+ /// p-value and z-Score.
471
+ /// If you need faster calculations, use the ComputeStd method from the Microsoft.ML.HALLearners package, which makes use of hardware acceleration.
472
+ /// Due to the existence of regularization, an approximation is used to compute the variances of the trained linear coefficients.
473
+ /// </summary>
474
+ /// <param name="hessian"></param>
475
+ /// <param name="weightIndices"></param>
476
+ /// <param name="numSelectedParams"></param>
477
+ /// <param name="currentWeightsCount"></param>
478
+ /// <param name="ch">The <see cref="IChannel"/> used for messaging.</param>
479
+ /// <param name="l2Weight">The L2Weight used for training. (Supply the same one that got used during training.)</param>
480
+ public override VBuffer < float > ComputeStd ( double [ ] hessian , int [ ] weightIndices , int numSelectedParams , int currentWeightsCount , IChannel ch , float l2Weight )
481
+ {
482
+ Contracts . AssertValue ( ch ) ;
483
+ Contracts . AssertValue ( hessian , nameof ( hessian ) ) ;
484
+ Contracts . Assert ( numSelectedParams > 0 ) ;
485
+ Contracts . Assert ( currentWeightsCount > 0 ) ;
486
+ Contracts . Assert ( l2Weight > 0 ) ;
487
+
488
+ double [ , ] matrixHessian = new double [ numSelectedParams , numSelectedParams ] ;
489
+
490
+ int hessianLength = 0 ;
491
+ int dimension = numSelectedParams - 1 ;
492
+
493
+ for ( int row = dimension ; row >= 0 ; row -- )
494
+ {
495
+ for ( int col = 0 ; col <= dimension ; col ++ )
496
+ {
497
+ if ( ( row + col ) <= dimension )
498
+ {
499
+ if ( ( row + col ) == dimension )
500
+ {
501
+ matrixHessian [ row , col ] = hessian [ hessianLength ] ;
502
+ }
503
+ else
504
+ {
505
+ matrixHessian [ row , col ] = hessian [ hessianLength ] ;
506
+ matrixHessian [ dimension - col , dimension - row ] = hessian [ hessianLength ] ;
507
+ }
508
+ hessianLength ++ ;
509
+ }
510
+ else
511
+ continue ;
512
+ }
513
+ }
514
+
515
+ var h = Matrix < double > . Build . DenseOfArray ( matrixHessian ) ;
516
+ var invers = h . Inverse ( ) ;
517
+
518
+ float [ ] stdErrorValues = new float [ numSelectedParams ] ;
519
+ stdErrorValues [ 0 ] = ( float ) Math . Sqrt ( invers [ 0 , numSelectedParams - 1 ] ) ;
520
+
521
+ for ( int i = 1 ; i < numSelectedParams ; i ++ )
522
+ {
523
+ // Initialize with inverse Hessian.
524
+ // The diagonal of the inverse Hessian.
525
+ stdErrorValues [ i ] = ( float ) invers [ i , numSelectedParams - i - 1 ] ;
526
+ }
527
+
528
+ if ( l2Weight > 0 )
529
+ {
530
+ // Iterate through all entries of inverse Hessian to make adjustment to variance.
531
+ // A discussion on ridge regularized LR coefficient covariance matrix can be found here:
532
+ // http://www.aloki.hu/pdf/0402_171179.pdf (Equations 11 and 25)
533
+ // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf (Section "Significance testing in ridge logistic regression")
534
+ for ( int iRow = 1 ; iRow < numSelectedParams ; iRow ++ )
535
+ {
536
+ for ( int iCol = 0 ; iCol <= iRow ; iCol ++ )
537
+ {
538
+ float entry = ( float ) invers [ iRow , numSelectedParams - iCol - 1 ] ;
539
+ AdjustVariance ( entry , iRow , iCol , l2Weight , stdErrorValues ) ;
540
+ }
541
+ }
542
+ }
543
+
544
+ for ( int i = 1 ; i < numSelectedParams ; i ++ )
545
+ stdErrorValues [ i ] = ( float ) Math . Sqrt ( stdErrorValues [ i ] ) ;
546
+
547
+ return new VBuffer < float > ( currentWeightsCount , numSelectedParams , stdErrorValues , weightIndices ) ;
548
+ }
549
+ }
400
550
}
0 commit comments