Skip to content

Commit

Permalink
AutoML Add Recommendation Task (#4246)
Browse files Browse the repository at this point in the history
Trains Recommendation models able to predict rating for existing users
  • Loading branch information
LittleLittleCloud authored and maryamariyan committed Oct 17, 2019
1 parent d531ea8 commit ee8418a
Show file tree
Hide file tree
Showing 36 changed files with 687 additions and 53 deletions.
20 changes: 20 additions & 0 deletions docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/Movie.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples.DataStructures
{
public class Movie
{
[LoadColumn(0)]
public string UserId;

[LoadColumn(1)]
public string MovieId;

[LoadColumn(2)]
public float Rating;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples
{
public class MovieRatingPrediction
{
[ColumnName("Score")]
public float Rating;
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
Expand All @@ -7,6 +7,8 @@

<ItemGroup>
<ProjectReference Include="..\..\..\src\Microsoft.ML.AutoML\Microsoft.ML.AutoML.csproj" />
<NativeAssemblyReference Include="MatrixFactorizationNative" />
<NativeAssemblyReference Include="FastTreeNative" />
</ItemGroup>

</Project>
3 changes: 3 additions & 0 deletions docs/samples/Microsoft.ML.AutoML.Samples/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ public static void Main(string[] args)
{
try
{
RecommendationExperiment.Run();
Console.Clear();

RegressionExperiment.Run();
Console.Clear();

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.IO;
using System.Linq;
using Microsoft.ML.AutoML.Samples.DataStructures;
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples
{
public static class RecommendationExperiment
{
private static string TrainDataPath = "<Path to your train dataset goes here>";
private static string TestDataPath = "<Path to your test dataset goes here>";
private static string ModelPath = @"<Desired model output directory goes here>\Model.zip";
private static string LabelColumnName = "Rating";
private static string UserColumnName = "UserId";
private static string ItemColumnName = "MovieId";
private static uint ExperimentTime = 60;

public static void Run()
{
MLContext mlContext = new MLContext();

// STEP 1: Load data
IDataView trainDataView = mlContext.Data.LoadFromTextFile<Movie>(TrainDataPath, hasHeader: true, separatorChar: ',');
IDataView testDataView = mlContext.Data.LoadFromTextFile<Movie>(TestDataPath, hasHeader: true, separatorChar: ',');

// STEP 2: Run AutoML experiment
Console.WriteLine($"Running AutoML recommendation experiment for {ExperimentTime} seconds...");
ExperimentResult<RegressionMetrics> experimentResult = mlContext.Auto()
.CreateRecommendationExperiment(new RecommendationExperimentSettings() { MaxExperimentTimeInSeconds = ExperimentTime })
.Execute(trainDataView, testDataView,
new ColumnInformation()
{
LabelColumnName = LabelColumnName,
UserIdColumnName = UserColumnName,
ItemIdColumnName = ItemColumnName
});

// STEP 3: Print metric from best model
RunDetail<RegressionMetrics> bestRun = experimentResult.BestRun;
Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}");
Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}");
Console.WriteLine($"Metrics of best model from validation data --");
PrintMetrics(bestRun.ValidationMetrics);

// STEP 5: Evaluate test data
IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView);
RegressionMetrics testMetrics = mlContext.Recommendation().Evaluate(testDataViewWithBestScore, labelColumnName: LabelColumnName);
Console.WriteLine($"Metrics of best model on test data --");
PrintMetrics(testMetrics);

// STEP 6: Save the best model for later deployment and inferencing
mlContext.Model.Save(bestRun.Model, trainDataView.Schema, ModelPath);

// STEP 7: Create prediction engine from the best trained model
var predictionEngine = mlContext.Model.CreatePredictionEngine<Movie, MovieRatingPrediction>(bestRun.Model);

// STEP 8: Initialize a new test, and get the prediction
var testMovie = new Movie
{
UserId = "1",
MovieId = "1097",
};
var prediction = predictionEngine.Predict(testMovie);
Console.WriteLine($"Predicted rating for: {prediction.Rating}");

// Only predict for existing users
testMovie = new Movie
{
UserId = "612", // new user
MovieId = "2940"
};
prediction = predictionEngine.Predict(testMovie);
Console.WriteLine($"Expected Rating NaN for unknown user, Predicted: {prediction.Rating}");

Console.WriteLine("Press any key to continue...");
Console.ReadKey();
}

private static void PrintMetrics(RegressionMetrics metrics)
{
Console.WriteLine($"MeanAbsoluteError: {metrics.MeanAbsoluteError}");
Console.WriteLine($"MeanSquaredError: {metrics.MeanSquaredError}");
Console.WriteLine($"RootMeanSquaredError: {metrics.RootMeanSquaredError}");
Console.WriteLine($"RSquared: {metrics.RSquared}");
}
}
}
1 change: 1 addition & 0 deletions pkg/Microsoft.ML.AutoML/Microsoft.ML.AutoML.nupkgproj
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
<ProjectReference Include="../Microsoft.ML/Microsoft.ML.nupkgproj" />
<ProjectReference Include="../Microsoft.ML.LightGBM/Microsoft.ML.LightGBM.nupkgproj" />
<ProjectReference Include="../Microsoft.ML.Mkl.Components/Microsoft.ML.Mkl.Components.nupkgproj" />
<ProjectReference Include="../Microsoft.ML.Recommender/Microsoft.ML.Recommender.nupkgproj" />
</ItemGroup>

</Project>
35 changes: 35 additions & 0 deletions src/Microsoft.ML.AutoML/API/AutoCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,41 @@ public MulticlassClassificationExperiment CreateMulticlassClassificationExperime
return new MulticlassClassificationExperiment(_context, experimentSettings);
}

/// <summary>
/// Creates a new AutoML experiment to run on a recommendation classification dataset.
/// </summary>
/// <param name="maxExperimentTimeInSeconds">Maximum number of seconds that experiment will run.</param>
/// <returns>A new AutoML recommendation classification experiment.</returns>
/// <remarks>
/// <para>See <see cref="RecommendationExperiment"/> for a more detailed code example of an AutoML multiclass classification experiment.</para>
/// <para>An experiment may run for longer than <paramref name="maxExperimentTimeInSeconds"/>.
/// This is because once AutoML starts training an ML.NET model, AutoML lets the
/// model train to completion. For instance, if the first model
/// AutoML trains takes 4 hours, and the second model trained takes 5 hours,
/// but <paramref name="maxExperimentTimeInSeconds"/> was the number of seconds in 6 hours,
/// the experiment will run for 4 + 5 = 9 hours (not 6 hours).</para>
/// </remarks>
public RecommendationExperiment CreateRecommendationExperiment(uint maxExperimentTimeInSeconds)
{
return new RecommendationExperiment(_context, new RecommendationExperimentSettings()
{
MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds
});
}

/// <summary>
/// Creates a new AutoML experiment to run on a recommendation dataset.
/// </summary>
/// <param name="experimentSettings">Settings for the AutoML experiment.</param>
/// <returns>A new AutoML recommendation experiment.</returns>
/// <remarks>
/// See <see cref="RecommendationExperiment"/> for a more detailed code example of an AutoML recommendation experiment.
/// </remarks>
public RecommendationExperiment CreateRecommendationExperiment(RecommendationExperimentSettings experimentSettings)
{
return new RecommendationExperiment(_context, experimentSettings);
}

/// <summary>
/// Infers information about the columns of a dataset in a file located at <paramref name="path"/>.
/// </summary>
Expand Down
10 changes: 10 additions & 0 deletions src/Microsoft.ML.AutoML/API/ColumnInference.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@ public sealed class ColumnInformation
/// <value>The default value is "Label".</value>
public string LabelColumnName { get; set; }

/// <summary>
/// The dataset column to use as a user ID for computation.
/// </summary>
public string UserIdColumnName { get; set; }

/// <summary>
/// The dataset column to use as a item ID for computation.
/// </summary>
public string ItemIdColumnName { get; set; }

/// <summary>
/// The dataset column to use for example weight.
/// </summary>
Expand Down
78 changes: 78 additions & 0 deletions src/Microsoft.ML.AutoML/API/RecommendationExperiment.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML
{
/// <summary>
/// Settings for AutoML experiments on recommendation datasets.
/// </summary>
public sealed class RecommendationExperimentSettings : ExperimentSettings
{
/// <summary>
/// Metric that AutoML will try to optimize over the course of the experiment.
/// </summary>
/// <value>The default value is <see cref="RegressionMetric.RSquared"/>.</value>
public RegressionMetric OptimizingMetric { get; set; }

/// <summary>
/// Collection of trainers the AutoML experiment can leverage.
/// </summary>
/// <value>The default value is a collection auto-populated with all possible trainers (all values of <see cref="RecommendationTrainer" />).</value>
public ICollection<RecommendationTrainer> Trainers { get; }

/// <summary>
/// Initializes a new instance of <see cref="RecommendationExperimentSettings"/>.
/// </summary>
public RecommendationExperimentSettings()
{
OptimizingMetric = RegressionMetric.RSquared;
Trainers = Enum.GetValues(typeof(RecommendationTrainer)).OfType<RecommendationTrainer>().ToList();
}
}

/// <summary>
/// Enumeration of ML.NET recommendation trainers used by AutoML.
/// </summary>
public enum RecommendationTrainer
{
MatrixFactorization
}

/// <summary>
/// AutoML experiment on recommendation datasets.
/// </summary>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[RecommendationExperiment](~/../docs/samples/docs/samples/Microsoft.ML.AutoML.Samples/RecommendationExperiment.cs)]
/// ]]></format>
/// </example>
public sealed class RecommendationExperiment : ExperimentBase<RegressionMetrics, RecommendationExperimentSettings>
{
internal RecommendationExperiment(MLContext context, RecommendationExperimentSettings settings)
: base(context,
new RegressionMetricsAgent(context, settings.OptimizingMetric),
new OptimizingMetricInfo(settings.OptimizingMetric),
settings,
TaskKind.Recommendation,
TrainerExtensionUtil.GetTrainerNames(settings.Trainers))
{
}

private protected override CrossValidationRunDetail<RegressionMetrics> GetBestCrossValRun(IEnumerable<CrossValidationRunDetail<RegressionMetrics>> results)
{
return BestResultUtil.GetBestRun(results, MetricsAgent, OptimizingMetricInfo.IsMaximizing);
}

private protected override RunDetail<RegressionMetrics> GetBestRun(IEnumerable<RunDetail<RegressionMetrics>> results)
{
return BestResultUtil.GetBestRun(results, MetricsAgent, OptimizingMetricInfo.IsMaximizing);
}
}
}
19 changes: 18 additions & 1 deletion src/Microsoft.ML.AutoML/ColumnInference/ColumnInformationUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML
{
Expand Down Expand Up @@ -47,6 +46,16 @@ internal static class ColumnInformationUtil
return ColumnPurpose.Ignore;
}

if (columnName == columnInfo.UserIdColumnName)
{
return ColumnPurpose.UserId;
}

if (columnName == columnInfo.ItemIdColumnName)
{
return ColumnPurpose.ItemId;
}

return null;
}

Expand Down Expand Up @@ -76,6 +85,12 @@ internal static ColumnInformation BuildColumnInfo(IEnumerable<(string name, Colu
case ColumnPurpose.NumericFeature:
columnInfo.NumericColumnNames.Add(column.name);
break;
case ColumnPurpose.UserId:
columnInfo.UserIdColumnName = column.name;
break;
case ColumnPurpose.ItemId:
columnInfo.ItemIdColumnName = column.name;
break;
case ColumnPurpose.TextFeature:
columnInfo.TextColumnNames.Add(column.name);
break;
Expand All @@ -98,6 +113,8 @@ public static IEnumerable<string> GetColumnNames(ColumnInformation columnInforma
{
var columnNames = new List<string>();
AddStringToListIfNotNull(columnNames, columnInformation.LabelColumnName);
AddStringToListIfNotNull(columnNames, columnInformation.UserIdColumnName);
AddStringToListIfNotNull(columnNames, columnInformation.ItemIdColumnName);
AddStringToListIfNotNull(columnNames, columnInformation.ExampleWeightColumnName);
AddStringToListIfNotNull(columnNames, columnInformation.SamplingKeyColumnName);
AddStringsToListIfNotNull(columnNames, columnInformation.CategoricalColumnNames);
Expand Down
4 changes: 3 additions & 1 deletion src/Microsoft.ML.AutoML/ColumnInference/ColumnPurpose.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ internal enum ColumnPurpose
TextFeature = 4,
Weight = 5,
ImagePath = 6,
SamplingKey = 7
SamplingKey = 7,
UserId = 8,
ItemId = 9
}
}
2 changes: 1 addition & 1 deletion src/Microsoft.ML.AutoML/Experiment/SuggestedTrainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public SuggestedTrainer Clone()
return new SuggestedTrainer(_mlContext, _trainerExtension, _columnInfo, HyperParamSet?.Clone());
}

public ITrainerEstimator<ISingleFeaturePredictionTransformer<object>, object> BuildTrainer()
public ITrainerEstimator<IPredictionTransformer<object>, object> BuildTrainer()
{
IEnumerable<SweepableParam> sweepParams = null;
if (HyperParamSet != null)
Expand Down
1 change: 1 addition & 0 deletions src/Microsoft.ML.AutoML/Microsoft.ML.AutoML.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
<ProjectReference Include="..\Microsoft.ML.CpuMath\Microsoft.ML.CpuMath.csproj" />
<ProjectReference Include="..\Microsoft.ML.LightGbm\Microsoft.ML.LightGbm.csproj" />
<ProjectReference Include="..\Microsoft.ML.Mkl.Components\Microsoft.ML.Mkl.Components.csproj" />
<ProjectReference Include="..\Microsoft.ML.Recommender\Microsoft.ML.Recommender.csproj" />
<ProjectReference Include="..\Microsoft.ML.Transforms\Microsoft.ML.Transforms.csproj" />
</ItemGroup>

Expand Down
1 change: 1 addition & 0 deletions src/Microsoft.ML.AutoML/TaskKind.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@ internal enum TaskKind
BinaryClassification,
MulticlassClassification,
Regression,
Recommendation
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

namespace Microsoft.ML.AutoML
{
using ITrainerEstimator = ITrainerEstimator<ISingleFeaturePredictionTransformer<object>, object>;
using ITrainerEstimator = ITrainerEstimator<IPredictionTransformer<object>, object>;

internal class AveragedPerceptronBinaryExtension : ITrainerExtension
{
Expand Down
Loading

0 comments on commit ee8418a

Please sign in to comment.