Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes #4292 about using PFI with BPT and CMPB #4306

Merged
merged 25 commits into from
Nov 12, 2019
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
a016aa6
Added working sample to use PFI with binary class loaded from disk
antoniovs1029 Oct 2, 2019
bb97226
Remove comments from sample example of PFI with BPT loaded from disk.
antoniovs1029 Oct 4, 2019
30ed977
Added static class ParameterMixingCalibratedModelParameters to fix pr…
antoniovs1029 Oct 4, 2019
0b4bcd6
Load the calibrator and submodel first to generate a generic type at …
antoniovs1029 Oct 4, 2019
4eff967
Fixed the problem by means of adding an extra member to the Parameter…
antoniovs1029 Oct 4, 2019
cd5c612
Fixed problem in previous commit
antoniovs1029 Oct 4, 2019
c48347b
Cleaned up the sample of PFI with BPT loaded from disk
antoniovs1029 Oct 7, 2019
ec617e4
Fixed tests that were using a cast that now returns null. Notice that…
antoniovs1029 Oct 7, 2019
e085f77
Clean up some comments
antoniovs1029 Oct 7, 2019
c93802f
Used class attributes to solve the problem
antoniovs1029 Oct 10, 2019
3a9285f
Fixed empty spaces in code
antoniovs1029 Oct 10, 2019
013fb58
Remove hardcode of type CalibratedModelParametersBase
antoniovs1029 Oct 10, 2019
7a76cb7
Removed unused 'using Calibrators'
antoniovs1029 Oct 11, 2019
dd40190
Added Attribute suffix
antoniovs1029 Oct 11, 2019
37d97aa
Removed duplicated strings
antoniovs1029 Oct 11, 2019
6ab644c
Updated PredictionTransformerLoadTypeAttribute to use property instea…
antoniovs1029 Oct 11, 2019
8f687cb
Added tests for using PFI with Binary Classification loaded from disk
antoniovs1029 Oct 11, 2019
c1d11a1
Removed unused 'using System.IO'
antoniovs1029 Oct 11, 2019
4cf02dc
Fixed other CalibratedModelParameters classes, and added tests. Still…
antoniovs1029 Oct 17, 2019
7714f2f
Removed unused 'using' statements
antoniovs1029 Oct 17, 2019
bcd112f
Change in the FeatureWeightCalibratedModelParameters constructor to b…
antoniovs1029 Oct 17, 2019
89534b6
Removed comments and non-generic PMCMP class
antoniovs1029 Oct 18, 2019
c02507e
Corrected 2 access modifiers
antoniovs1029 Oct 23, 2019
7e7505d
Merge remote-tracking branch 'upstream/master' into myissue05
antoniovs1029 Nov 1, 2019
8263941
Merge remote-tracking branch 'upstream/master' into myissue05
antoniovs1029 Nov 11, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Calibrators;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;

namespace Samples.Dynamic.Trainers.BinaryClassification
{
public static class PermutationFeatureImportanceLoadFromDisk
{
public static void Example()
{

var mlContext = new MLContext(seed: 1);
var samples = GenerateData();
var data = mlContext.Data.LoadFromEnumerable(samples);

// Create pipeline
var featureColumns =
new string[] { nameof(Data.Feature1), nameof(Data.Feature2) };
var pipeline = mlContext.Transforms
.Concatenate("Features", featureColumns)
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression()
);

// Create and save model
var model0 = pipeline.Fit(data);
var lt = model0.LastTransformer;
var modelPath = "./model.zip";
mlContext.Model.Save(model0, data.Schema, modelPath);

// Load model
var model = mlContext.Model.Load(modelPath, out var schema);

// Transform the dataset.
var transformedData = model.Transform(data);

var linearPredictor = (model as TransformerChain<ITransformer>).LastTransformer as BinaryPredictionTransformer<CalibratedModelParametersBase<LinearBinaryModelParameters, PlattCalibrator>>;

// Execute PFI with the linearPredictor
var permutationMetrics = mlContext.BinaryClassification
.PermutationFeatureImportance(linearPredictor, transformedData,
permutationCount: 30);

// Sort indices according to PFI results
var sortedIndices = permutationMetrics
.Select((metrics, index) => new { index, metrics.AreaUnderRocCurve })
.OrderByDescending(
feature => Math.Abs(feature.AreaUnderRocCurve.Mean))
.Select(feature => feature.index);

Console.WriteLine("Feature\tModel Weight\tChange in AUC"
+ "\t95% Confidence in the Mean Change in AUC");
var auc = permutationMetrics.Select(x => x.AreaUnderRocCurve).ToArray();
foreach (int i in sortedIndices)
{
Console.WriteLine("{0}\t{1:0.00}\t{2:G4}\t{3:G4}",
featureColumns[i],
linearPredictor.Model.SubModel.Weights[i], // this way we can access the weights inside the submodel
auc[i].Mean,
1.96 * auc[i].StandardError);
}

// Expected output:
// Feature Model Weight Change in AUC 95% Confidence in the Mean Change in AUC
// Feature2 35.15 -0.387 0.002015
// Feature1 17.94 -0.1514 0.0008963
}

private class Data
{
public bool Label { get; set; }

public float Feature1 { get; set; }

public float Feature2 { get; set; }
}

/// Generate Data
private static IEnumerable<Data> GenerateData(int nExamples = 10000,
double bias = 0, double weight1 = 1, double weight2 = 2, int seed = 1)
{
var rng = new Random(seed);
for (int i = 0; i < nExamples; i++)
{
var data = new Data
{
Feature1 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)),
Feature2 = (float)(rng.Next(10) * (rng.NextDouble() - 0.5)),
};

// Create a noisy label.
var value = (float)(bias + weight1 * data.Feature1 + weight2 *
data.Feature2 + rng.NextDouble() - 0.5);

data.Label = Sigmoid(value) > 0.5;
yield return data;
}
}

private static double Sigmoid(double x) => 1.0 / (1.0 + Math.Exp(-1 * x));
}
}
72 changes: 63 additions & 9 deletions src/Microsoft.ML.Data/Prediction/Calibrator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using System.Collections.Immutable;
using System.IO;
using System.Linq;
using System.Reflection;
using Microsoft.ML;
using Microsoft.ML.Calibrators;
using Microsoft.ML.CommandLine;
Expand Down Expand Up @@ -396,6 +397,7 @@ bool ISingleCanSaveOnnx.SaveAsOnnx(OnnxContext ctx, string[] outputNames, string
}

[BestFriend]
[PredictionTransformerLoadType(typeof(CalibratedModelParametersBase<,>))]
internal sealed class ValueMapperCalibratedModelParameters<TSubModel, TCalibrator> :
ValueMapperCalibratedModelParametersBase<TSubModel, TCalibrator>, ICanSaveModel
where TSubModel : class
Expand Down Expand Up @@ -430,8 +432,8 @@ private static VersionInfo GetVersionInfoBulk()
loaderAssemblyName: typeof(ValueMapperCalibratedModelParameters<TSubModel, TCalibrator>).Assembly.FullName);
}

private ValueMapperCalibratedModelParameters(IHostEnvironment env, ModelLoadContext ctx)
: base(env, RegistrationName, GetPredictor(env, ctx), GetCalibrator(env, ctx))
private ValueMapperCalibratedModelParameters(IHostEnvironment env, ModelLoadContext ctx, TSubModel predictor, TCalibrator calibrator)
: base(env, RegistrationName, predictor, calibrator)
{
}

Expand All @@ -443,7 +445,16 @@ private static CalibratedModelParametersBase Create(IHostEnvironment env, ModelL
var ver2 = GetVersionInfoBulk();
var ver = ctx.Header.ModelSignature == ver2.ModelSignature ? ver2 : ver1;
ctx.CheckAtModel(ver);
return new ValueMapperCalibratedModelParameters<TSubModel, TCalibrator>(env, ctx);

antoniovs1029 marked this conversation as resolved.
Show resolved Hide resolved
// Load first the predictor and calibrator
var predictor = GetPredictor(env, ctx);
var calibrator = GetCalibrator(env, ctx);

// Create a generic type using the correct parameter types of predictor and calibrator
Type genericType = typeof(ValueMapperCalibratedModelParameters<,>);
var genericInstance = CreateCalibratedModelParameters.Create(env, ctx, predictor, calibrator, genericType);

return (CalibratedModelParametersBase)genericInstance;
}

void ICanSaveModel.Save(ModelSaveContext ctx)
Expand All @@ -456,6 +467,7 @@ void ICanSaveModel.Save(ModelSaveContext ctx)
}

[BestFriend]
[PredictionTransformerLoadType(typeof(CalibratedModelParametersBase<,>))]
internal sealed class FeatureWeightsCalibratedModelParameters<TSubModel, TCalibrator> :
ValueMapperCalibratedModelParametersBase<TSubModel, TCalibrator>,
IPredictorWithFeatureWeights<float>,
Expand Down Expand Up @@ -487,8 +499,9 @@ private static VersionInfo GetVersionInfo()
loaderAssemblyName: typeof(FeatureWeightsCalibratedModelParameters<TSubModel, TCalibrator>).Assembly.FullName);
}

private FeatureWeightsCalibratedModelParameters(IHostEnvironment env, ModelLoadContext ctx)
: base(env, RegistrationName, GetPredictor(env, ctx), GetCalibrator(env, ctx))
private FeatureWeightsCalibratedModelParameters(IHostEnvironment env, ModelLoadContext ctx,
TSubModel predictor, TCalibrator calibrator)
: base(env, RegistrationName, predictor, calibrator)
{
Host.Check(SubModel is IPredictorWithFeatureWeights<float>, "Predictor does not implement " + nameof(IPredictorWithFeatureWeights<float>));
_featureWeights = (IPredictorWithFeatureWeights<float>)SubModel;
Expand All @@ -499,7 +512,16 @@ private static CalibratedModelParametersBase Create(IHostEnvironment env, ModelL
Contracts.CheckValue(env, nameof(env));
env.CheckValue(ctx, nameof(ctx));
ctx.CheckAtModel(GetVersionInfo());
return new FeatureWeightsCalibratedModelParameters<TSubModel, TCalibrator>(env, ctx);

// Load first the predictor and calibrator
var predictor = GetPredictor(env, ctx);
var calibrator = GetCalibrator(env, ctx);

// Create a generic type using the correct parameter types of predictor and calibrator
Type genericType = typeof(FeatureWeightsCalibratedModelParameters<,>);
var genericInstance = CreateCalibratedModelParameters.Create(env, ctx, predictor, calibrator, genericType);

return (CalibratedModelParametersBase) genericInstance;
}

void ICanSaveModel.Save(ModelSaveContext ctx)
Expand All @@ -520,6 +542,7 @@ public void GetFeatureWeights(ref VBuffer<float> weights)
/// Encapsulates a predictor and a calibrator that implement <see cref="IParameterMixer"/>.
/// Its implementation of <see cref="IParameterMixer.CombineParameters"/> combines both the predictors and the calibrators.
/// </summary>
[PredictionTransformerLoadType(typeof(CalibratedModelParametersBase <,>))]
internal sealed class ParameterMixingCalibratedModelParameters<TSubModel, TCalibrator> :
ValueMapperCalibratedModelParametersBase<TSubModel, TCalibrator>,
IParameterMixer<float>,
Expand Down Expand Up @@ -553,8 +576,8 @@ private static VersionInfo GetVersionInfo()
loaderAssemblyName: typeof(ParameterMixingCalibratedModelParameters<TSubModel, TCalibrator>).Assembly.FullName);
}

private ParameterMixingCalibratedModelParameters(IHostEnvironment env, ModelLoadContext ctx)
: base(env, RegistrationName, GetPredictor(env, ctx), GetCalibrator(env, ctx))
private ParameterMixingCalibratedModelParameters(IHostEnvironment env, ModelLoadContext ctx, TSubModel predictor, TCalibrator calibrator)
: base(env, RegistrationName, predictor, calibrator)
{
Host.Check(SubModel is IParameterMixer<float>, "Predictor does not implement " + nameof(IParameterMixer));
Host.Check(SubModel is IPredictorWithFeatureWeights<float>, "Predictor does not implement " + nameof(IPredictorWithFeatureWeights<float>));
Expand All @@ -566,7 +589,16 @@ private static CalibratedModelParametersBase Create(IHostEnvironment env, ModelL
Contracts.CheckValue(env, nameof(env));
env.CheckValue(ctx, nameof(ctx));
ctx.CheckAtModel(GetVersionInfo());
return new ParameterMixingCalibratedModelParameters<TSubModel, TCalibrator>(env, ctx);

// Load first the predictor and calibrator
var predictor = GetPredictor(env, ctx);
var calibrator = GetCalibrator(env, ctx);

// Create a generic type using the correct parameter types of predictor and calibrator
Type genericType = typeof(ParameterMixingCalibratedModelParameters<,>);
object genericInstance = CreateCalibratedModelParameters.Create(env, ctx, predictor, calibrator, genericType);

return (CalibratedModelParametersBase) genericInstance;
}

void ICanSaveModel.Save(ModelSaveContext ctx)
Expand Down Expand Up @@ -777,6 +809,28 @@ ValueMapper<TSrc, VBuffer<float>> IFeatureContributionMapper.GetFeatureContribut
}
}

internal static class CreateCalibratedModelParameters
{
internal static object Create(IHostEnvironment env, ModelLoadContext ctx, object predictor, ICalibrator calibrator, Type calibratedModelParametersType)
{
Type[] genericTypeArgs = { predictor.GetType(), calibrator.GetType() };
Type constructed = calibratedModelParametersType.MakeGenericType(genericTypeArgs);

Type[] constructorArgs = {
typeof(IHostEnvironment),
typeof(ModelLoadContext),
predictor.GetType(),
calibrator.GetType()
};

// Call the appropiate constructor of the created generic type passing on the previously loaded predictor and calibrator
var genericCtor = constructed.GetConstructor(BindingFlags.NonPublic | BindingFlags.Instance, null, constructorArgs, null);
object genericInstance = genericCtor.Invoke(new object[] { env, ctx, predictor, calibrator });

return genericInstance;
}
}

[BestFriend]
internal static class CalibratorUtils
{
Expand Down
Loading