-
Notifications
You must be signed in to change notification settings - Fork 1.9k
/
MatrixFactorizationTrainer.cs
598 lines (533 loc) · 35.7 KB
/
MatrixFactorizationTrainer.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.CommandLine;
using Microsoft.ML.Data;
using Microsoft.ML.EntryPoints;
using Microsoft.ML.Internal.Internallearn;
using Microsoft.ML.Recommender;
using Microsoft.ML.Recommender.Internal;
using Microsoft.ML.Runtime;
using Microsoft.ML.Trainers;
using Microsoft.ML.Trainers.Recommender;
[assembly: LoadableClass(MatrixFactorizationTrainer.Summary, typeof(MatrixFactorizationTrainer), typeof(MatrixFactorizationTrainer.Options),
new Type[] { typeof(SignatureTrainer), typeof(SignatureMatrixRecommendingTrainer) },
"Matrix Factorization", MatrixFactorizationTrainer.LoadNameValue, "libmf", "mf")]
namespace Microsoft.ML.Trainers
{
/// <summary>
/// The <see cref="IEstimator{TTransformer}"/> to predict elements in a matrix using matrix factorization (also known as a type of <a href="https://en.wikipedia.org/wiki/Collaborative_filtering">collaborative filtering</a>).
/// </summary>
/// <remarks>
/// <format type="text/markdown"><![CDATA[
/// To create this trainer, use [MatrixFactorization](xref:Microsoft.ML.RecommendationCatalog.RecommendationTrainers.MatrixFactorization(System.String,System.String,System.String,System.Int32,System.Double,System.Int32))
/// or [MatrixFactorization(Options)](xref:Microsoft.ML.RecommendationCatalog.RecommendationTrainers.MatrixFactorization(Microsoft.ML.Trainers.MatrixFactorizationTrainer.Options)).
///
/// ### Input and Output Columns
/// There are three input columns required, one for matrix row indexes, one for matrix column indexes, and one for
/// values (i.e., labels) in matrix.
/// They together define a matrix in [COO](https://en.wikipedia.org/wiki/Sparse_matrix#Coordinate_list_(COO)) format.
/// The type for label column is a vector of <xref:System.Single> while the other two columns are
/// [key](xref:Microsoft.ML.Data.KeyDataViewType) type scalar.
///
/// | Output Column Name | Column Type | Description|
/// | -- | -- | -- |
/// | `Score` | <xref:System.Single> | The predicted matrix value at the location specified by input columns (row index column and column index column). |
///
/// ### Trainer Characteristics
/// | | |
/// | -- | -- |
/// | Machine learning task | Recommender systems |
/// | Is normalization required? | Yes |
/// | Is caching required? | Yes |
/// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.Recommender |
///
/// ### Background
/// The basic idea of matrix factorization is finding two low-rank factor matrices to approximate the training matrix.
/// In this module, the expected training data (the factorized matrix) is a list of tuples.
/// Every tuple consists of a column index, a row index,
/// and the value at the location specified by the two indices. For an example data structure of a tuple, one can use:
///
/// ```csharp
/// // The following variables defines the shape of a m-by-n matrix. Indexes start with 0; that is, our indexing system
/// // is 0-based.
/// const int m = 60;
/// const int n = 100;
///
/// // A tuple of row index, column index, and rating. It specifies a value in the rating matrix.
/// class MatrixElement
/// {
/// // Matrix column index starts from 0 and is at most n-1.
/// [KeyType(n)]
/// public uint MatrixColumnIndex;
/// // Matrix row index starts from 0 and is at most m-1.
/// [KeyType(m)]
/// public uint MatrixRowIndex;
/// // The rating at the MatrixColumnIndex-th column and the MatrixRowIndex-th row.
/// public float Value;
/// }
/// ```
///
/// Notice that it's not necessary to specify all entries in the training matrix, so matrix factorization can be used to fill <i>missing values</i>.
/// This behavior is very helpful when building recommender systems.
///
/// To provide a better understanding on practical uses of matrix factorization, let's consider music recommendation as an example.
/// Assume that user IDs and music IDs are used as row and column indexes, respectively, and matrix's values are ratings provided by those users.
/// That is, rating $r$ at row $u$ and column $v$ means that user $u$ give $r$ to item $v$.
/// An incomplete matrix is very common because not all users may provide their feedbacks to all products (for example, no one can rate ten million songs).
/// Assume that $R\in{\mathbb R}^{m\times n}$ is a m-by-n rating matrix and the [rank](https://en.wikipedia.org/wiki/Rank_(linear_algebra)) of the two factor matrices are $P\in {\mathbb R}^{k\times m}$ and $Q\in {\mathbb R}^{k\times n}$, where $k$ is the approximation rank.
/// The predicted rating at the $u$-th row and the $v$-th column in $R$ would be the inner product of the $u$-th row of $P$ and the $v$-th row of $Q$; that is, $R$ is approximated by the product of $P$'s transpose ($P^T$) and $Q$.
/// Note that $k$ is usually much smaller than $m$ and $n$, so $P^T Q$ is usually called a low-rank approximation of $R$.
///
/// This trainer includes a [stochastic gradient method](https://en.wikipedia.org/wiki/Stochastic_gradient_descent) and a [coordinate descent method](https://en.wikipedia.org/wiki/Coordinate_descent) for finding $P$ and $Q$ via minimizing the distance between (non-missing part of) $R$ and its approximation $P^T Q$.
/// The coordinate descent method included is specifically for one-class matrix factorization where all observed ratings are positive signals (that is, all rating values are 1).
/// Notice that the only way to invoke one-class matrix factorization is to assign [one-class squared loss](xref:Microsoft.ML.Trainers.MatrixFactorizationTrainer.LossFunctionType.SquareLossOneClass)
/// to [loss function](xref:Microsoft.ML.Trainers.MatrixFactorizationTrainer.Options.LossFunction)
/// when calling [MatrixFactorization(Options)](xref:Microsoft.ML.RecommendationCatalog.RecommendationTrainers.MatrixFactorization(Microsoft.ML.Trainers.MatrixFactorizationTrainer.Options)).
/// See Page 6 and Page 28 [here](https://www.csie.ntu.edu.tw/~cjlin/talks/facebook.pdf) for a brief introduction to standard matrix factorization and one-class matrix factorization.
/// The [default setting](xref:Microsoft.ML.Trainers.MatrixFactorizationTrainer.LossFunctionType.SquareLossRegression) induces standard matrix factorization.
/// The underlying library used in ML.NET matrix factorization can be found on [a Github repository](https://github.com/cjlin1/libmf).
///
/// For users interested in the mathematical details, please see the references below.
///
/// * For the multi-threading implementation of the used stochastic gradient method, see [A Fast Parallel Stochastic Gradient Method for Matrix Factorization in Shared Memory Systems](https://www.csie.ntu.edu.tw/~cjlin/papers/libmf/libmf_journal.pdf).
/// * For the computation happening inside a single thread, see [A Learning-rate Schedule for Stochastic Gradient Methods to Matrix Factorization](https://www.csie.ntu.edu.tw/~cjlin/papers/libmf/mf_adaptive_pakdd.pdf).
/// * For the parallel coordinate descent method used and one-class matrix factorization formula, see [Selection of Negative Samples for One-class Matrix Factorization](https://www.csie.ntu.edu.tw/~cjlin/papers/one-class-mf/biased-mf-sdm-with-supp.pdf).
/// * For details in the underlying library used, see [LIBMF: A Library for Parallel Matrix Factorization in Shared-memory Systems](https://www.csie.ntu.edu.tw/~cjlin/papers/libmf/libmf_open_source.pdf).
///
/// Check the See Also section for links to usage examples.
/// ]]>
/// </format>
/// </remarks>
/// <seealso cref="Microsoft.ML.RecommendationCatalog.RecommendationTrainers.MatrixFactorization(string, string, string, int, double, int)"/>
/// <seealso cref="Microsoft.ML.RecommendationCatalog.RecommendationTrainers.MatrixFactorization(MatrixFactorizationTrainer.Options)"/>
/// <seealso cref="Options"/>
public sealed class MatrixFactorizationTrainer : ITrainer<MatrixFactorizationModelParameters>,
IEstimator<MatrixFactorizationPredictionTransformer>
{
/// <summary>
/// Type of loss function.
/// </summary>
public enum LossFunctionType
{
/// <summary>
/// Used in traditional collaborative filtering problem with squared loss.
/// </summary>
/// <remarks>
/// See <a href="https://www.csie.ntu.edu.tw/~cjlin/papers/libmf/mf_adaptive_pakdd.pdf">Equation</a> (1).
/// </remarks>
SquareLossRegression = 0,
/// <summary>
/// Used in implicit-feedback recommendation problem.
/// </summary>
/// <remarks>
/// See <a href="http://yifanhu.net/PUB/cf.pdf">Equation</a> (3).
/// </remarks>
SquareLossOneClass = 12
};
/// <summary>
/// Options for the <see cref="MatrixFactorizationTrainer"/> as used in [MatrixFactorization(Options)](xref:Microsoft.ML.RecommendationCatalog.RecommendationTrainers.MatrixFactorization(Microsoft.ML.Trainers.MatrixFactorizationTrainer.Options)).
/// </summary>
public sealed class Options
{
/// <summary>
/// The name of variable (i.e., Column in a <see cref="IDataView"/> type system) used as matrix's column index.
/// The column data must be <see cref="System.Single"/>.
/// </summary>
public string MatrixColumnIndexColumnName;
/// <summary>
/// The name of variable (i.e., column in a <see cref="IDataView"/> type system) used as matrix's row index.
/// The column data must be <see cref="Microsoft.ML.Data.KeyDataViewType"/>.
/// </summary>
public string MatrixRowIndexColumnName;
/// <summary>
/// The name variable (i.e., column in a <see cref="IDataView"/> type system) used as matrix's element value.
/// The column data must be <see cref="Microsoft.ML.Data.KeyDataViewType"/>.
/// </summary>
public string LabelColumnName;
/// <summary>
/// Loss function minimized for finding factor matrices.
/// </summary>
/// <remarks>
/// Two values are allowed, <see cref="LossFunctionType.SquareLossRegression"/> or <see cref="LossFunctionType.SquareLossOneClass"/>.
/// The <see cref="LossFunctionType.SquareLossRegression"/> means traditional collaborative filtering problem with squared loss.
/// The <see cref="LossFunctionType.SquareLossOneClass"/> triggers one-class matrix factorization for implicit-feedback recommendation problem.
/// </remarks>
[Argument(ArgumentType.AtMostOnce, HelpText = "Loss function minimized for finding factor matrices.")]
[TGUI(SuggestedSweeps = "0,12")]
[TlcModule.SweepableDiscreteParam("LossFunction", new object[] { LossFunctionType.SquareLossRegression, LossFunctionType.SquareLossOneClass })]
public LossFunctionType LossFunction = Defaults.LossFunction;
/// <summary>
/// Regularization parameter.
/// </summary>
/// <remarks>
/// It's the weight of factor matrices Frobenius norms in the objective function minimized by matrix factorization's algorithm. A small value could cause over-fitting.
/// </remarks>
[Argument(ArgumentType.AtMostOnce, HelpText = "Regularization parameter. " +
"It's the weight of factor matrices Frobenius norms in the objective function minimized by matrix factorization's algorithm. " +
"A small value could cause over-fitting.")]
[TGUI(SuggestedSweeps = "0.01,0.05,0.1,0.5,1")]
[TlcModule.SweepableDiscreteParam("Lambda", new object[] { 0.01f, 0.05f, 0.1f, 0.5f, 1f })]
public double Lambda = Defaults.Lambda;
/// <summary>
/// Rank of approximation matrices.
/// </summary>
/// <remarks>
/// If input data has size of m-by-n we would build two approximation matrices m-by-k and k-by-n where k is approximation rank.
/// </remarks>
[Argument(ArgumentType.AtMostOnce, HelpText = "Latent space dimension (denoted by k). If the factorized matrix is m-by-n, " +
"two factor matrices found by matrix factorization are m-by-k and k-by-n, respectively. " +
"This value is also known as the rank of matrix factorization because k is generally much smaller than m and n.", ShortName = "K")]
[TGUI(SuggestedSweeps = "8,16,64,128")]
[TlcModule.SweepableDiscreteParam("K", new object[] { 8, 16, 64, 128 })]
public int ApproximationRank = Defaults.ApproximationRank;
/// <summary>
/// Number of training iterations.
/// </summary>
[Argument(ArgumentType.AtMostOnce, HelpText = "Training iterations; that is, the times that the training algorithm iterates through the whole training data once.", ShortName = "iter,numiterations")]
[TGUI(SuggestedSweeps = "10,20,40")]
[TlcModule.SweepableDiscreteParam("NumIterations", new object[] { 10, 20, 40 })]
public int NumberOfIterations = Defaults.NumIterations;
///<summary>
/// Initial learning rate. It specifies the speed of the training algorithm.
///</summary>
///<remarks>
/// Small value may increase the number of iterations needed to achieve a reasonable result.
/// Large value may lead to numerical difficulty such as a infinity value.
///</remarks>
[Argument(ArgumentType.AtMostOnce, HelpText = "Initial learning rate. It specifies the speed of the training algorithm. " +
"Small value may increase the number of iterations needed to achieve a reasonable result. Large value may lead to numerical difficulty such as a infinity value.", ShortName = "Eta")]
[TGUI(SuggestedSweeps = "0.001,0.01,0.1")]
[TlcModule.SweepableDiscreteParam("Eta", new object[] { 0.001f, 0.01f, 0.1f })]
public double LearningRate = Defaults.LearningRate;
/// <summary>
/// Importance of unobserved entries' loss in one-class matrix factorization. Applicable if <see cref="LossFunction"/> set to <see cref="LossFunctionType.SquareLossOneClass"/>
/// </summary>
/// <remarks>
/// Importance of unobserved (i.e., negative) entries' loss in one-class matrix factorization.
/// In general, only a few of matrix entries (e.g., less than 1%) in the training are observed (i.e., positive).
/// To balance the contributions from unobserved and observed in the overall loss function, this parameter is
/// usually a small value so that the solver is able to find a factorization equally good to unobserved and observed
/// entries. If only 10000 observed entries present in a 200000-by-300000 training matrix, one can try Alpha = 10000 / (200000*300000 - 10000).
/// When most entries in the training matrix are observed, one can use Alpha >> 1; for example, if only 10000 in previous
/// matrix is not observed, one can try Alpha = (200000 * 300000 - 10000) / 10000. Consequently,
/// Alpha = (# of observed entries) / (# of unobserved entries) can make observed and unobserved entries equally important
/// in the minimized loss function. However, the best setting in machine learning is always data-dependent so user still needs to
/// try multiple values.
/// </remarks>
[Argument(ArgumentType.AtMostOnce, HelpText = "Importance of unobserved entries' loss in one-class matrix factorization.")]
[TGUI(SuggestedSweeps = "1,0.01,0.0001,0.000001")]
[TlcModule.SweepableDiscreteParam("Alpha", new object[] { 1f, 0.01f, 0.0001f, 0.000001f })]
public double Alpha = Defaults.Alpha;
/// <summary>
/// Desired negative entries value in one-class matrix factorization. Applicable if <see cref="LossFunction"/> set to <see cref="LossFunctionType.SquareLossOneClass"/>
/// </summary>
/// <remarks>
/// In one-class matrix factorization, all matrix values observed are one (which can be viewed as positive cases in binary classification)
/// while unobserved values (which can be viewed as negative cases in binary classification) need to be specified manually using this option.
/// </remarks>
[Argument(ArgumentType.AtMostOnce, HelpText = "Desired negative entries' value in one-class matrix factorization")]
[TGUI(SuggestedSweeps = "0.000001,0,0001,0.01")]
[TlcModule.SweepableDiscreteParam("C", new object[] { 0.000001f, 0.0001f, 0.01f })]
public double C = Defaults.C;
/// <summary>
/// Number of threads will be used during training. If unspecified all available threads will be use.
/// </summary>
[Argument(ArgumentType.AtMostOnce, HelpText = "Number of threads can be used in the training procedure.", ShortName = "t,numthreads")]
public int? NumberOfThreads;
/// <summary>
/// Suppress writing additional information to output.
/// </summary>
[Argument(ArgumentType.AtMostOnce, HelpText = "Suppress writing additional information to output.")]
public bool Quiet = Defaults.Quiet;
/// <summary>
/// Force the factor matrices to be non-negative.
/// </summary>
[Argument(ArgumentType.AtMostOnce, HelpText = "Force the factor matrices to be non-negative.", ShortName = "nn")]
public bool NonNegative = Defaults.NonNegative;
};
[BestFriend]
internal static class Defaults
{
public const bool Quiet = false;
public const bool NonNegative = false;
public const double C = 0.000001f;
public const double Alpha = 0.0001f;
public const double LearningRate = 0.1;
public const int NumIterations = 20;
public const int ApproximationRank = 8;
public const double Lambda = 0.1;
public const LossFunctionType LossFunction = LossFunctionType.SquareLossRegression;
}
internal const string Summary = "From pairs of row/column indices and a value of a matrix, this trains a predictor capable of filling in unknown entries of the matrix, "
+ "using a low-rank matrix factorization. This technique is often used in recommender system, where the row and column indices indicate users and items, "
+ "and the values of the matrix are ratings. ";
private readonly IHost _host;
// LIBMF's parameter
private readonly int _fun;
private readonly double _lambda;
private readonly int _k;
private readonly int _iter;
private readonly double _eta;
private readonly double _alpha;
private readonly double _c;
private readonly int _threads;
private readonly bool _quiet;
private readonly bool _doNmf;
PredictionKind ITrainer.PredictionKind => PredictionKind.Recommendation;
internal const string LoadNameValue = "MatrixFactorization";
/// <summary>
/// The row index, column index, and label columns needed to specify the training matrix. This trainer uses tuples of (row index, column index, label value) to specify a matrix.
/// For example, a 2-by-2 matrix
/// [9, 4]
/// [8, 7]
/// can be encoded as tuples (0, 0, 9), (0, 1, 4), (1, 0, 8), and (1, 1, 7). It means that the row/column/label column contains [0, 0, 1, 1]/
/// [0, 1, 0, 1]/[9, 4, 8, 7].
/// </summary>
/// <summary>
/// The name of variable (i.e., Column in a <see cref="IDataView"/> type system) used be as matrix's column index.
/// </summary>
internal readonly string MatrixColumnIndexName;
/// <summary>
/// The name of variable (i.e., column in a <see cref="IDataView"/> type system) used as matrix's row index.
/// </summary>
internal readonly string MatrixRowIndexName;
/// <summary>
/// The name variable (i.e., column in a <see cref="IDataView"/> type system) used as matrix's element value.
/// </summary>
internal readonly string LabelName;
/// <summary>
/// The <see cref="TrainerInfo"/> contains general parameters for this trainer.
/// </summary>
TrainerInfo ITrainer.Info => _info;
private readonly TrainerInfo _info;
/// <summary>
/// Initializes a new instance of <see cref="MatrixFactorizationTrainer"/> through the <see cref="Options"/> class.
/// </summary>
/// <param name="env">The private instance of <see cref="IHostEnvironment"/>.</param>
/// <param name="options">An instance of <see cref="Options"/> to apply advanced parameters to the algorithm.</param>
[BestFriend]
internal MatrixFactorizationTrainer(IHostEnvironment env, Options options)
{
Contracts.CheckValue(env, nameof(env));
_host = env.Register(LoadNameValue);
const string posError = "Parameter must be positive";
_host.CheckValue(options, nameof(options));
_host.CheckUserArg(options.ApproximationRank > 0, nameof(options.ApproximationRank), posError);
_host.CheckUserArg(!options.NumberOfThreads.HasValue || options.NumberOfThreads > 0, nameof(options.NumberOfThreads), posError);
_host.CheckUserArg(options.NumberOfIterations > 0, nameof(options.NumberOfIterations), posError);
_host.CheckUserArg(options.Lambda > 0, nameof(options.Lambda), posError);
_host.CheckUserArg(options.LearningRate > 0, nameof(options.LearningRate), posError);
_host.CheckUserArg(options.Alpha > 0, nameof(options.Alpha), posError);
_fun = (int)options.LossFunction;
_lambda = options.Lambda;
_k = options.ApproximationRank;
_iter = options.NumberOfIterations;
_eta = options.LearningRate;
_alpha = options.Alpha;
_c = options.C;
_threads = options.NumberOfThreads ?? Environment.ProcessorCount;
_quiet = options.Quiet;
_doNmf = options.NonNegative;
_info = new TrainerInfo(normalization: false, caching: false);
LabelName = options.LabelColumnName;
MatrixColumnIndexName = options.MatrixColumnIndexColumnName;
MatrixRowIndexName = options.MatrixRowIndexColumnName;
}
/// <summary>
/// Initializes a new instance of <see cref="MatrixFactorizationTrainer"/>.
/// </summary>
/// <param name="env">The private instance of <see cref="IHostEnvironment"/>.</param>
/// <param name="labelColumnName">The name of the label column.</param>
/// <param name="matrixColumnIndexColumnName">The name of the column hosting the matrix's column IDs.</param>
/// <param name="matrixRowIndexColumnName">The name of the column hosting the matrix's row IDs.</param>
/// <param name="approximationRank">Rank of approximation matrices.</param>
/// <param name="learningRate">Initial learning rate. It specifies the speed of the training algorithm.</param>
/// <param name="numIterations">Number of training iterations.</param>
[BestFriend]
internal MatrixFactorizationTrainer(IHostEnvironment env,
string labelColumnName,
string matrixColumnIndexColumnName,
string matrixRowIndexColumnName,
int approximationRank = Defaults.ApproximationRank,
double learningRate = Defaults.LearningRate,
int numIterations = Defaults.NumIterations)
{
Contracts.CheckValue(env, nameof(env));
_host = env.Register(LoadNameValue);
var args = new Options();
_fun = (int)args.LossFunction;
_k = approximationRank;
_iter = numIterations;
_eta = learningRate;
_alpha = args.Alpha;
_lambda = args.Lambda;
_c = args.C;
_threads = args.NumberOfThreads ?? Environment.ProcessorCount;
_quiet = args.Quiet;
_doNmf = args.NonNegative;
_info = new TrainerInfo(normalization: false, caching: false);
LabelName = labelColumnName;
MatrixColumnIndexName = matrixColumnIndexColumnName;
MatrixRowIndexName = matrixRowIndexColumnName;
}
/// <summary>
/// Train a matrix factorization model based on training data, validation data, and so on in the given context.
/// </summary>
/// <param name="context">The information collection needed for training. <see cref="TrainContext"/> for details.</param>
private MatrixFactorizationModelParameters Train(TrainContext context)
{
_host.CheckValue(context, nameof(context));
using (var ch = _host.Start("Training"))
{
return TrainCore(ch, context.TrainingSet, context.ValidationSet);
}
}
IPredictor ITrainer.Train(TrainContext context) => Train(context);
MatrixFactorizationModelParameters ITrainer<MatrixFactorizationModelParameters>.Train(TrainContext context) => Train(context);
private MatrixFactorizationModelParameters TrainCore(IChannel ch, RoleMappedData data, RoleMappedData validData = null)
{
_host.AssertValue(ch);
ch.AssertValue(data);
ch.AssertValueOrNull(validData);
ch.CheckParam(data.Schema.Label.HasValue, nameof(data), "Input data did not have a unique label");
RecommenderUtils.CheckAndGetMatrixIndexColumns(data, out var matrixColumnIndexColInfo, out var matrixRowIndexColInfo, isDecode: false);
var labelCol = data.Schema.Label.Value;
if (labelCol.Type != NumberDataViewType.Single && labelCol.Type != NumberDataViewType.Double)
throw ch.Except("Column '{0}' for label should be floating point, but is instead {1}", labelCol.Name, labelCol.Type);
MatrixFactorizationModelParameters predictor;
if (validData != null)
{
ch.CheckValue(validData, nameof(validData));
ch.CheckParam(validData.Schema.Label.HasValue, nameof(validData), "Input validation data did not have a unique label");
RecommenderUtils.CheckAndGetMatrixIndexColumns(validData, out var validMatrixColumnIndexColInfo, out var validMatrixRowIndexColInfo, isDecode: false);
var validLabelCol = validData.Schema.Label.Value;
if (validLabelCol.Type != NumberDataViewType.Single && validLabelCol.Type != NumberDataViewType.Double)
throw ch.Except("Column '{0}' for validation label should be floating point, but is instead {1}", validLabelCol.Name, validLabelCol.Type);
if (!matrixColumnIndexColInfo.Type.Equals(validMatrixColumnIndexColInfo.Type))
{
throw ch.ExceptParam(nameof(validData), "Train and validation sets' matrix-column types differed, {0} vs. {1}",
matrixColumnIndexColInfo.Type, validMatrixColumnIndexColInfo.Type);
}
if (!matrixRowIndexColInfo.Type.Equals(validMatrixRowIndexColInfo.Type))
{
throw ch.ExceptParam(nameof(validData), "Train and validation sets' matrix-row types differed, {0} vs. {1}",
matrixRowIndexColInfo.Type, validMatrixRowIndexColInfo.Type);
}
}
int colCount = matrixColumnIndexColInfo.Type.GetKeyCountAsInt32(_host);
int rowCount = matrixRowIndexColInfo.Type.GetKeyCountAsInt32(_host);
ch.Assert(rowCount > 0);
ch.Assert(colCount > 0);
// Checks for equality on the validation set ensure it is correct here.
using (var cursor = data.Data.GetRowCursor(matrixColumnIndexColInfo, matrixRowIndexColInfo, data.Schema.Label.Value))
{
// LibMF works only over single precision floats, but we want to be able to consume either.
var labGetter = RowCursorUtils.GetGetterAs<float>(NumberDataViewType.Single, cursor, data.Schema.Label.Value.Index);
var matrixColumnIndexGetter = RowCursorUtils.GetGetterAs<uint>(NumberDataViewType.UInt32, cursor, matrixColumnIndexColInfo.Index);
var matrixRowIndexGetter = RowCursorUtils.GetGetterAs<uint>(NumberDataViewType.UInt32, cursor, matrixRowIndexColInfo.Index);
if (validData == null)
{
// Have the trainer do its work.
using (var buffer = PrepareBuffer())
{
buffer.Train(ch, rowCount, colCount, cursor, labGetter, matrixRowIndexGetter, matrixColumnIndexGetter);
predictor = new MatrixFactorizationModelParameters(_host, buffer, (KeyDataViewType)matrixColumnIndexColInfo.Type, (KeyDataViewType)matrixRowIndexColInfo.Type);
}
}
else
{
RecommenderUtils.CheckAndGetMatrixIndexColumns(validData, out var validMatrixColumnIndexColInfo, out var validMatrixRowIndexColInfo, isDecode: false);
using (var validCursor = validData.Data.GetRowCursor(matrixColumnIndexColInfo, matrixRowIndexColInfo, data.Schema.Label.Value))
{
ValueGetter<float> validLabelGetter = RowCursorUtils.GetGetterAs<float>(NumberDataViewType.Single, validCursor, validData.Schema.Label.Value.Index);
var validMatrixColumnIndexGetter = RowCursorUtils.GetGetterAs<uint>(NumberDataViewType.UInt32, validCursor, validMatrixColumnIndexColInfo.Index);
var validMatrixRowIndexGetter = RowCursorUtils.GetGetterAs<uint>(NumberDataViewType.UInt32, validCursor, validMatrixRowIndexColInfo.Index);
// Have the trainer do its work.
using (var buffer = PrepareBuffer())
{
buffer.TrainWithValidation(ch, rowCount, colCount,
cursor, labGetter, matrixRowIndexGetter, matrixColumnIndexGetter,
validCursor, validLabelGetter, validMatrixRowIndexGetter, validMatrixColumnIndexGetter);
predictor = new MatrixFactorizationModelParameters(_host, buffer, (KeyDataViewType)matrixColumnIndexColInfo.Type, (KeyDataViewType)matrixRowIndexColInfo.Type);
}
}
}
}
return predictor;
}
private SafeTrainingAndModelBuffer PrepareBuffer()
{
return new SafeTrainingAndModelBuffer(_host, _fun, _k, _threads, Math.Max(20, 2 * _threads),
_iter, _lambda, _eta, _alpha, _c, _doNmf, _quiet, copyData: false);
}
/// <summary>
/// Trains a <see cref="MatrixFactorizationTrainer"/> using both training and validation data, returns a <see cref="MatrixFactorizationPredictionTransformer"/>.
/// </summary>
/// <param name="trainData">The training data set.</param>
/// <param name="validationData">The validation data set.</param>
public MatrixFactorizationPredictionTransformer Fit(IDataView trainData, IDataView validationData)
{
MatrixFactorizationModelParameters model = null;
var roles = new List<KeyValuePair<RoleMappedSchema.ColumnRole, string>>();
roles.Add(new KeyValuePair<RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Label, LabelName));
roles.Add(new KeyValuePair<RoleMappedSchema.ColumnRole, string>(RecommenderUtils.MatrixColumnIndexKind.Value, MatrixColumnIndexName));
roles.Add(new KeyValuePair<RoleMappedSchema.ColumnRole, string>(RecommenderUtils.MatrixRowIndexKind.Value, MatrixRowIndexName));
var trainingData = new RoleMappedData(trainData, roles);
var validData = validationData == null ? null : new RoleMappedData(validationData, roles);
using (var ch = _host.Start("Training"))
{
model = TrainCore(ch, trainingData, validData);
}
return new MatrixFactorizationPredictionTransformer(_host, model, trainData.Schema, MatrixColumnIndexName, MatrixRowIndexName);
}
/// <summary>
/// <summary> Trains and returns a <see cref="MatrixFactorizationPredictionTransformer"/>.</summary>
/// </summary>
/// <param name="input">The training data set.</param>
public MatrixFactorizationPredictionTransformer Fit(IDataView input) => Fit(input, null);
/// <summary>
/// Schema propagation for transformers. Returns the output schema of the data, if
/// the input schema is like the one provided.
/// </summary>
public SchemaShape GetOutputSchema(SchemaShape inputSchema)
{
_host.CheckValue(inputSchema, nameof(inputSchema));
void CheckColumnsCompatible(SchemaShape.Column cachedColumn, string columnRole)
{
if (!inputSchema.TryFindColumn(cachedColumn.Name, out var col))
throw _host.ExceptSchemaMismatch(nameof(col), columnRole, cachedColumn.Name);
if (!cachedColumn.IsCompatibleWith(col))
throw _host.ExceptSchemaMismatch(nameof(inputSchema), columnRole, cachedColumn.Name,
cachedColumn.GetTypeString(), col.GetTypeString());
}
// Check if label column is good.
var labelColumn = new SchemaShape.Column(LabelName, SchemaShape.Column.VectorKind.Scalar, NumberDataViewType.Single, false);
CheckColumnsCompatible(labelColumn, "label");
// Check if columns of matrix's row and column indexes are good. Note that column of IDataView and column of matrix are two different things.
var matrixColumnIndexColumn = new SchemaShape.Column(MatrixColumnIndexName, SchemaShape.Column.VectorKind.Scalar, NumberDataViewType.UInt32, true);
var matrixRowIndexColumn = new SchemaShape.Column(MatrixRowIndexName, SchemaShape.Column.VectorKind.Scalar, NumberDataViewType.UInt32, true);
CheckColumnsCompatible(matrixColumnIndexColumn, "matrixColumnIndex");
CheckColumnsCompatible(matrixRowIndexColumn, "matrixRowIndex");
// Input columns just pass through so that output column dictionary contains all input columns.
var outColumns = inputSchema.ToDictionary(x => x.Name);
// Add columns produced by this estimator.
foreach (var col in GetOutputColumnsCore(inputSchema))
outColumns[col.Name] = col;
return new SchemaShape(outColumns.Values);
}
private SchemaShape.Column[] GetOutputColumnsCore(SchemaShape inputSchema)
{
bool success = inputSchema.TryFindColumn(LabelName, out var labelCol);
Contracts.Assert(success);
return new[]
{
new SchemaShape.Column(DefaultColumnNames.Score, SchemaShape.Column.VectorKind.Scalar, NumberDataViewType.Single, false, new SchemaShape(AnnotationUtils.GetTrainerOutputAnnotation())),
};
}
}
}