Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples
{
public class SearchData
{
[LoadColumn(0)]
public string GroupId;

[LoadColumn(1)]
public float Features;

[LoadColumn(2)]
public float Label;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples
{
public class SearchDataPrediction
{
[ColumnName("PredictedLabel")]
public float Prediction;

public float Score { get; set; }
}
}
3 changes: 3 additions & 0 deletions docs/samples/Microsoft.ML.AutoML.Samples/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ public static void Main(string[] args)
MulticlassClassificationExperiment.Run();
Console.Clear();

RankingExperiment.Run();
Console.Clear();

Console.WriteLine("Done");
}
catch (Exception ex)
Expand Down
89 changes: 89 additions & 0 deletions docs/samples/Microsoft.ML.AutoML.Samples/RankingExperiment.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Linq;
using Microsoft.ML.Data;
using Microsoft.ML.AutoML.Samples.DataStructures;

namespace Microsoft.ML.AutoML.Samples
{
public static class RankingExperiment
{
private static string TrainDataPath = "<Path to your train dataset goes here>";
private static string TestDataPath = "<Path to your test dataset goes here>";
private static string ModelPath = @"<Desired model output directory goes here>\Model.zip";
private static string LabelColumnName = "Label";
private static string GroupColumnName = "GroupId";
private static uint ExperimentTime = 60;

public static void Run()
{
MLContext mlContext = new MLContext();

// STEP 1: Load data
IDataView trainDataView = mlContext.Data.LoadFromTextFile<SearchData>(TrainDataPath, hasHeader: true, separatorChar: ',');
IDataView testDataView = mlContext.Data.LoadFromTextFile<SearchData>(TestDataPath, hasHeader: true, separatorChar: ',');

// STEP 2: Run AutoML experiment
Console.WriteLine($"Running AutoML recommendation experiment for {ExperimentTime} seconds...");
ExperimentResult<RankingMetrics> experimentResult = mlContext.Auto()
.CreateRankingExperiment(new RankingExperimentSettings() { MaxExperimentTimeInSeconds = ExperimentTime })
.Execute(trainDataView, testDataView,
new ColumnInformation()
{
LabelColumnName = LabelColumnName,
GroupIdColumnName = GroupColumnName
});

// STEP 3: Print metric from best model
RunDetail<RankingMetrics> bestRun = experimentResult.BestRun;
Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}");
Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}");
Console.WriteLine($"Metrics of best model from validation data --");
PrintMetrics(bestRun.ValidationMetrics);

// STEP 5: Evaluate test data
IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView);
RankingMetrics testMetrics = mlContext.Ranking.Evaluate(testDataViewWithBestScore, labelColumnName: LabelColumnName);
Console.WriteLine($"Metrics of best model on test data --");
PrintMetrics(testMetrics);

// STEP 6: Save the best model for later deployment and inferencing
mlContext.Model.Save(bestRun.Model, trainDataView.Schema, ModelPath);

// STEP 7: Create prediction engine from the best trained model
var predictionEngine = mlContext.Model.CreatePredictionEngine<SearchData, SearchDataPrediction>(bestRun.Model);

// STEP 8: Initialize a new test, and get the prediction
var testPage = new SearchData
{
GroupId = "1",
Features = 9,
Label = 1
};
var prediction = predictionEngine.Predict(testPage);
Console.WriteLine($"Predicted rating for: {prediction.Prediction}");

// New Page
testPage = new SearchData
{
GroupId = "2",
Features = 2,
Label = 9
};
prediction = predictionEngine.Predict(testPage);
Console.WriteLine($"Predicted: {prediction.Prediction}");

Console.WriteLine("Press any key to continue...");
Console.ReadKey();
}

private static void PrintMetrics(RankingMetrics metrics)
{
Console.WriteLine($"NormalizedDiscountedCumulativeGains: {metrics.NormalizedDiscountedCumulativeGains}");
Console.WriteLine($"DiscountedCumulativeGains: {metrics.DiscountedCumulativeGains}");

}
}
}
35 changes: 35 additions & 0 deletions src/Microsoft.ML.AutoML/API/AutoCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,41 @@ public RecommendationExperiment CreateRecommendationExperiment(RecommendationExp
return new RecommendationExperiment(_context, experimentSettings);
}

/// <summary>
/// Creates a new AutoML experiment to run on a ranking dataset.
/// </summary>
/// <param name="maxExperimentTimeInSeconds">Maximum number of seconds that experiment will run.</param>
/// <returns>A new AutoML ranking experiment.</returns>
/// <remarks>
/// <para>See <see cref="RankingExperiment"/> for a more detailed code example of an AutoML ranking experiment.</para>
/// <para>An experiment may run for longer than <paramref name="maxExperimentTimeInSeconds"/>.
/// This is because once AutoML starts training an ML.NET model, AutoML lets the
/// model train to completion. For instance, if the first model
/// AutoML trains takes 4 hours, and the second model trained takes 5 hours,
/// but <paramref name="maxExperimentTimeInSeconds"/> was the number of seconds in 6 hours,
/// the experiment will run for 4 + 5 = 9 hours (not 6 hours).</para>
/// </remarks>
public RankingExperiment CreateRankingExperiment(uint maxExperimentTimeInSeconds)
{
return new RankingExperiment(_context, new RankingExperimentSettings()
{
MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds
});
}

/// <summary>
/// Creates a new AutoML experiment to run on a ranking dataset.
/// </summary>
/// <param name="experimentSettings">Settings for the AutoML experiment.</param>
/// <returns>A new AutoML ranking experiment.</returns>
/// <remarks>
/// See <see cref="RankingExperiment"/> for a more detailed code example of an AutoML ranking experiment.
/// </remarks>
public RankingExperiment CreateRankingExperiment(RankingExperimentSettings experimentSettings)
{
return new RankingExperiment(_context, experimentSettings);
}

/// <summary>
/// Infers information about the columns of a dataset in a file located at <paramref name="path"/>.
/// </summary>
Expand Down
5 changes: 5 additions & 0 deletions src/Microsoft.ML.AutoML/API/ColumnInference.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ public sealed class ColumnInformation
/// </summary>
public string UserIdColumnName { get; set; }

/// <summary>
/// The dataset column to use as a group ID for computation.
/// </summary>
public string GroupIdColumnName { get; set; }

/// <summary>
/// The dataset column to use as a item ID for computation.
/// </summary>
Expand Down
124 changes: 124 additions & 0 deletions src/Microsoft.ML.AutoML/API/RankingExperiment.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers.FastTree;
using Microsoft.ML.Trainers.LightGbm;

namespace Microsoft.ML.AutoML
{
public sealed class RankingExperimentSettings : ExperimentSettings
{
/// <summary>
/// Metric that AutoML will try to optimize over the course of the experiment.
/// </summary>
/// <value>The default value is <see cref="RankingMetric" />.</value>
public RankingMetric OptimizingMetric { get; set; }

/// <summary>
/// Collection of trainers the AutoML experiment can leverage.
/// </summary>
/// <value>
/// The default value is a collection auto-populated with all possible trainers (all values of <see cref="RankingTrainer" />).
/// </value>
public ICollection<RankingTrainer> Trainers { get; }
public RankingExperimentSettings()
{
OptimizingMetric = RankingMetric.Ndcg;
Trainers = Enum.GetValues(typeof(RankingTrainer)).OfType<RankingTrainer>().ToList();
}
}
public enum RankingMetric
{
/// <summary>
/// See <see cref="RankingMetrics.NormalizedDiscountedCumulativeGains"/>.
/// </summary>
Ndcg,
/// <summary>
/// See <see cref="RankingMetrics.DiscountedCumulativeGains"/>.
/// </summary>
Dcg
}
/// <summary>
/// Enumeration of ML.NET ranking trainers used by AutoML.
/// </summary>
public enum RankingTrainer
{
/// <summary>
/// See <see cref="LightGbmRankingTrainer"/>.
/// </summary>
LightGbmRanking,
/// <summary>
/// See <see cref="FastTreeRankingTrainer"/>.
/// </summary>
FastTreeRanking
}

/// <summary>
/// Extension methods that operate over ranking experiment run results.
/// </summary>
public static class RankingExperimentResultExtensions
{
/// <summary>
/// Select the best run from an enumeration of experiment runs.
/// </summary>
/// <param name="results">Enumeration of AutoML experiment run results.</param>
/// <param name="metric">Metric to consider when selecting the best run.</param>
/// <returns>The best experiment run.</returns>
public static RunDetail<RankingMetrics> Best(this IEnumerable<RunDetail<RankingMetrics>> results, RankingMetric metric = RankingMetric.Ndcg)
{
var metricsAgent = new RankingMetricsAgent(null, metric);
var isMetricMaximizing = new OptimizingMetricInfo(metric).IsMaximizing;
return BestResultUtil.GetBestRun(results, metricsAgent, isMetricMaximizing);
}

/// <summary>
/// Select the best run from an enumeration of experiment cross validation runs.
/// </summary>
/// <param name="results">Enumeration of AutoML experiment cross validation run results.</param>
/// <param name="metric">Metric to consider when selecting the best run.</param>
/// <returns>The best experiment run.</returns>
public static CrossValidationRunDetail<RankingMetrics> Best(this IEnumerable<CrossValidationRunDetail<RankingMetrics>> results, RankingMetric metric = RankingMetric.Ndcg)
{
var metricsAgent = new RankingMetricsAgent(null, metric);
var isMetricMaximizing = new OptimizingMetricInfo(metric).IsMaximizing;
return BestResultUtil.GetBestRun(results, metricsAgent, isMetricMaximizing);
}
}

/// <summary>
/// AutoML experiment on ranking datasets.
/// </summary>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[RankingExperiment](~/../docs/samples/docs/samples/Microsoft.ML.AutoML.Samples/RankingExperiment.cs)]
/// ]]></format>
/// </example>
public sealed class RankingExperiment : ExperimentBase<RankingMetrics, RankingExperimentSettings>
{
internal RankingExperiment(MLContext context, RankingExperimentSettings settings)
: base(context,
new RankingMetricsAgent(context, settings.OptimizingMetric),
new OptimizingMetricInfo(settings.OptimizingMetric),
settings,
TaskKind.Ranking,
TrainerExtensionUtil.GetTrainerNames(settings.Trainers))
{
}

private protected override CrossValidationRunDetail<RankingMetrics> GetBestCrossValRun(IEnumerable<CrossValidationRunDetail<RankingMetrics>> results)
{
return BestResultUtil.GetBestRun(results, MetricsAgent, OptimizingMetricInfo.IsMaximizing);
}

private protected override RunDetail<RankingMetrics> GetBestRun(IEnumerable<RunDetail<RankingMetrics>> results)
{
return BestResultUtil.GetBestRun(results, MetricsAgent, OptimizingMetricInfo.IsMaximizing);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ internal static class ColumnInformationUtil
return ColumnPurpose.UserId;
}

if (columnName == columnInfo.GroupIdColumnName)
{
return ColumnPurpose.GroupId;
}

if (columnName == columnInfo.ItemIdColumnName)
{
return ColumnPurpose.ItemId;
Expand Down Expand Up @@ -96,6 +101,9 @@ internal static ColumnInformation BuildColumnInfo(IEnumerable<(string name, Colu
case ColumnPurpose.ItemId:
columnInfo.ItemIdColumnName = column.name;
break;
case ColumnPurpose.GroupId:
columnInfo.GroupIdColumnName = column.name;
break;
case ColumnPurpose.TextFeature:
columnInfo.TextColumnNames.Add(column.name);
break;
Expand Down Expand Up @@ -123,6 +131,7 @@ public static IEnumerable<string> GetColumnNames(ColumnInformation columnInforma
AddStringToListIfNotNull(columnNames, columnInformation.LabelColumnName);
AddStringToListIfNotNull(columnNames, columnInformation.UserIdColumnName);
AddStringToListIfNotNull(columnNames, columnInformation.ItemIdColumnName);
AddStringToListIfNotNull(columnNames, columnInformation.GroupIdColumnName);
AddStringToListIfNotNull(columnNames, columnInformation.ExampleWeightColumnName);
AddStringToListIfNotNull(columnNames, columnInformation.SamplingKeyColumnName);
AddStringsToListIfNotNull(columnNames, columnInformation.CategoricalColumnNames);
Expand Down
3 changes: 2 additions & 1 deletion src/Microsoft.ML.AutoML/ColumnInference/ColumnPurpose.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ internal enum ColumnPurpose
ImagePath = 6,
SamplingKey = 7,
UserId = 8,
ItemId = 9
ItemId = 9,
GroupId = 10
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ internal enum EstimatorName
{
ColumnConcatenating,
ColumnCopying,
Hashing,
KeyToValueMapping,
MissingValueIndicating,
MissingValueReplacing,
Expand All @@ -32,6 +33,7 @@ internal class EstimatorExtensionCatalog
{ EstimatorName.ColumnConcatenating, typeof(ColumnConcatenatingExtension) },
{ EstimatorName.ColumnCopying, typeof(ColumnCopyingExtension) },
{ EstimatorName.KeyToValueMapping, typeof(KeyToValueMappingExtension) },
{ EstimatorName.Hashing, typeof(HashingExtension) },
{ EstimatorName.MissingValueIndicating, typeof(MissingValueIndicatingExtension) },
{ EstimatorName.MissingValueReplacing, typeof(MissingValueReplacingExtension) },
{ EstimatorName.Normalizing, typeof(NormalizingExtension) },
Expand Down
21 changes: 21 additions & 0 deletions src/Microsoft.ML.AutoML/EstimatorExtensions/EstimatorExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,27 @@ private static IEstimator<ITransformer> CreateInstance(MLContext context, string
}
}

internal class HashingExtension : IEstimatorExtension
{
public IEstimator<ITransformer> CreateInstance(MLContext context, PipelineNode pipelineNode)
{
return CreateInstance(context, pipelineNode.InColumns[0], pipelineNode.OutColumns[0]);
}

public static SuggestedTransform CreateSuggestedTransform(MLContext context, string inColumn, string outColumn)
{
var pipelineNode = new PipelineNode(EstimatorName.Hashing.ToString(),
PipelineNodeType.Transform, inColumn, outColumn);
var estimator = CreateInstance(context, inColumn, outColumn);
return new SuggestedTransform(pipelineNode, estimator);
}

private static IEstimator<ITransformer> CreateInstance(MLContext context, string inColumn, string outColumn)
{
return context.Transforms.Conversion.Hash(outColumn, inColumn);
}
}

internal class MissingValueIndicatingExtension : IEstimatorExtension
{
public IEstimator<ITransformer> CreateInstance(MLContext context, PipelineNode pipelineNode)
Expand Down
Loading