Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changed default value of RowGroupColumnName from null to GroupId #5290

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/Microsoft.ML.FastTree/FastTreeArguments.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

using System;
using Microsoft.ML.CommandLine;
using Microsoft.ML.Data;
using Microsoft.ML.EntryPoints;
using Microsoft.ML.Internal.Internallearn;
using Microsoft.ML.Runtime;
Expand Down Expand Up @@ -301,6 +302,7 @@ public EarlyStoppingRankingMetric EarlyStoppingMetric
public Options()
{
EarlyStoppingMetric = EarlyStoppingRankingMetric.NdcgAt1; // Use L1 by default.
RowGroupColumnName = DefaultColumnNames.GroupId; // Use GroupId as default for ranking options.
}

ITrainer IComponentFactory<ITrainer>.CreateComponent(IHostEnvironment env) => new FastTreeRankingTrainer(env, this);
Expand Down
5 changes: 5 additions & 0 deletions src/Microsoft.ML.LightGbm/LightGbmRankingTrainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,11 @@ static Options()
NameMapping.Add(nameof(EvaluateMetricType.NormalizedDiscountedCumulativeGain), "ndcg");
}

public Options()
{
RowGroupColumnName = DefaultColumnNames.GroupId; // Use GroupId as default for ranking options.
}

internal override Dictionary<string, object> ToDictionary(IHost host)
{
var res = base.ToDictionary(host);
Expand Down
27 changes: 27 additions & 0 deletions src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using Microsoft.ML.Data;

Expand Down Expand Up @@ -262,6 +263,17 @@ public static IEnumerable<BinaryLabelFloatFeatureVectorFloatWeightSample> Genera
return data;
}

public class FloatLabelFloatFeatureVectorUlongGroupIdSample
{
public float Label;

[VectorType(_simpleBinaryClassSampleFeatureLength)]
public float[] Features;

[KeyType(ulong.MaxValue - 1)]
public ulong GroupId;
}

public class FloatLabelFloatFeatureVectorSample
{
public float Label;
Expand All @@ -270,6 +282,21 @@ public class FloatLabelFloatFeatureVectorSample
public float[] Features;
}

public static IEnumerable<FloatLabelFloatFeatureVectorUlongGroupIdSample> GenerateFloatLabelFloatFeatureVectorUlongGroupIdSamples(int exampleCount, double naRate = 0, ulong minGroupId = 1, ulong maxGroupId = 5)
{
var data = new List<FloatLabelFloatFeatureVectorUlongGroupIdSample>();
var rnd = new Random(0);
var intermediate = GenerateFloatLabelFloatFeatureVectorSamples(exampleCount, naRate).ToList();

for (int i = 0; i < exampleCount; ++i)
{
var sample = new FloatLabelFloatFeatureVectorUlongGroupIdSample() { Label = intermediate[i].Label, Features = intermediate[i].Features, GroupId = (ulong)rnd.Next((int)minGroupId, (int)maxGroupId) };
data.Add(sample);
}

return data;
}

public static IEnumerable<FloatLabelFloatFeatureVectorSample> GenerateFloatLabelFloatFeatureVectorSamples(int exampleCount, double naRate = 0)
{
var rnd = new Random(0);
Expand Down
6 changes: 3 additions & 3 deletions test/BaselineOutput/Common/EntryPoints/core_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -7675,7 +7675,7 @@
"Required": false,
"SortOrder": 5.0,
"IsNullable": false,
"Default": null
Copy link
Member

@antoniovs1029 antoniovs1029 Jul 7, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: Just curious, was the manifest automatically updated simply by adding the constructor logic you added?

"Default": "GroupId"
},
{
"Name": "NormalizeFeatures",
Expand Down Expand Up @@ -12532,7 +12532,7 @@
"Required": false,
"SortOrder": 5.0,
"IsNullable": false,
"Default": null
"Default": "GroupId"
},
{
"Name": "NormalizeFeatures",
Expand Down Expand Up @@ -27371,7 +27371,7 @@
"Required": false,
"SortOrder": 5.0,
"IsNullable": false,
"Default": null
"Default": "GroupId"
},
{
"Name": "NormalizeFeatures",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ public void TestFastTreeTweedieFeaturizationInPipeline()
public void TestFastTreeRankingFeaturizationInPipeline()
{
int dataPointCount = 200;
var data = SamplesUtils.DatasetUtils.GenerateFloatLabelFloatFeatureVectorSamples(dataPointCount).ToList();
var data = SamplesUtils.DatasetUtils.GenerateFloatLabelFloatFeatureVectorUlongGroupIdSamples(dataPointCount).ToList();
var dataView = ML.Data.LoadFromEnumerable(data);
dataView = ML.Data.Cache(dataView);

Expand Down