Skip to content

Commit e285889

Browse files
authored
Conversion catalog samples (#3167)
* adding a sample for convert MultiColumns. Moving files around. * Adjust the samples about ValueMapping * Addressing PR comments
1 parent ac53748 commit e285889

File tree

11 files changed

+339
-336
lines changed

11 files changed

+339
-336
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertType.cs

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,6 @@ namespace Microsoft.ML.Samples.Dynamic
55
{
66
public static class ConvertType
77
{
8-
private sealed class InputData
9-
{
10-
public bool Survived;
11-
}
12-
13-
private sealed class TransformedData
14-
{
15-
public bool Survived { get; set; }
16-
17-
public Int32 SurvivedInt32 { get; set; }
18-
}
19-
208
public static void Example()
219
{
2210
var mlContext = new MLContext(seed: 1);
@@ -51,5 +39,13 @@ public static void Example()
5139
// A: False Aconv:0
5240
// A: False Aconv:0
5341
}
42+
private class InputData
43+
{
44+
public bool Survived;
45+
}
46+
private sealed class TransformedData : InputData
47+
{
48+
public Int32 SurvivedInt32 { get; set; }
49+
}
5450
}
5551
}
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
using System;
2+
using Microsoft.ML.Data;
3+
4+
namespace Microsoft.ML.Samples.Dynamic
5+
{
6+
// This example illustrates how to convert multiple columns of different types to one type, in this case System.Single.
7+
// This is often a useful data transformation before concatenating the features together and passing them to a particular estimator.
8+
public static class ConvertTypeMultiColumn
9+
{
10+
public static void Example()
11+
{
12+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
13+
// as well as the source of randomness.
14+
var mlContext = new MLContext(seed: 1);
15+
16+
var rawData = new[] {
17+
new InputData() { Feature1 = true, Feature2 = "0.4", Feature3 = DateTime.Now, Feature4 = 0.145},
18+
new InputData() { Feature1 = false, Feature2 = "0.5", Feature3 = DateTime.Today, Feature4 = 3.14},
19+
new InputData() { Feature1 = false, Feature2 = "14", Feature3 = DateTime.Today, Feature4 = 0.2046},
20+
new InputData() { Feature1 = false, Feature2 = "23", Feature3 = DateTime.Now, Feature4 = 0.1206},
21+
new InputData() { Feature1 = true, Feature2 = "8904", Feature3 = DateTime.UtcNow, Feature4 = 8.09},
22+
};
23+
24+
// Convert the data to an IDataView.
25+
var data = mlContext.Data.LoadFromEnumerable(rawData);
26+
27+
// Construct the pipeline.
28+
var pipeline = mlContext.Transforms.Conversion.ConvertType(new[]
29+
{
30+
new InputOutputColumnPair("Converted1", "Feature1"),
31+
new InputOutputColumnPair("Converted2", "Feature2"),
32+
new InputOutputColumnPair("Converted3", "Feature3"),
33+
new InputOutputColumnPair("Converted4", "Feature4"),
34+
},
35+
DataKind.Single);
36+
37+
// Let's fit our pipeline to the data.
38+
var transformer = pipeline.Fit(data);
39+
// Transforming the same data. This will add the 4 columns defined in the pipeline, containing the converted
40+
// values of the initial columns.
41+
var transformedData = transformer.Transform(data);
42+
43+
// Shape the transformed data as a strongly typed IEnumerable.
44+
var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, true);
45+
46+
// Printing the results.
47+
Console.WriteLine("Converted1\t Converted2\t Converted3\t Converted4");
48+
foreach (var item in convertedData)
49+
Console.WriteLine($"\t{item.Converted1}\t {item.Converted2}\t\t {item.Converted3}\t {item.Converted4}");
50+
51+
// Transformed data.
52+
//
53+
// Converted1 Converted2 Converted3 Converted4
54+
// 1 0.4 6.368921E+17 0.145
55+
// 0 0.5 6.368916E+17 3.14
56+
// 0 14 6.368916E+17 0.2046
57+
// 0 23 6.368921E+17 0.1206
58+
// 1 8904 6.368924E+17 8.09
59+
60+
}
61+
// The initial data type
62+
private class InputData
63+
{
64+
public bool Feature1;
65+
public string Feature2;
66+
public DateTime Feature3;
67+
public double Feature4;
68+
}
69+
// The resulting data type after the transformation
70+
private class TransformedData : InputData
71+
{
72+
public float Converted1 { get; set; }
73+
public float Converted2 { get; set; }
74+
public float Converted3 { get; set; }
75+
public float Converted4 { get; set; }
76+
}
77+
}
78+
}
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Microsoft.ML.Data;
4+
5+
6+
namespace Microsoft.ML.Samples.Dynamic
7+
{
8+
public static class MapValue
9+
{
10+
/// This example demonstrates the use of the ValueMappingEstimator by mapping strings to other string values, or floats to strings.
11+
/// This is useful to map types to a category.
12+
public static void Example()
13+
{
14+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
15+
// as well as the source of randomness.
16+
var mlContext = new MLContext();
17+
18+
// Get a small dataset as an IEnumerable.
19+
var rawData = new[] {
20+
new DataPoint() { Timeframe = "0-4yrs" , Score = 1 },
21+
new DataPoint() { Timeframe = "6-11yrs" , Score = 2 },
22+
new DataPoint() { Timeframe = "12-25yrs" , Score = 3 },
23+
new DataPoint() { Timeframe = "0-5yrs" , Score = 4 },
24+
new DataPoint() { Timeframe = "12-25yrs" , Score = 5 },
25+
new DataPoint() { Timeframe = "25+yrs" , Score = 5 },
26+
};
27+
28+
var data = mlContext.Data.LoadFromEnumerable(rawData);
29+
30+
// Construct the mapping to other strings for the Timeframe column.
31+
var timeframeMap = new Dictionary<string, string>();
32+
timeframeMap["0-4yrs"] = "Short";
33+
timeframeMap["0-5yrs"] = "Short";
34+
timeframeMap["6-11yrs"] = "Medium";
35+
timeframeMap["12-25yrs"] = "Long";
36+
timeframeMap["25+yrs"] = "Long";
37+
38+
// Construct the mapping of strings to keys(uints) for the Timeframe column.
39+
var timeframeKeyMap = new Dictionary<string, uint>();
40+
timeframeKeyMap["0-4yrs"] = 1;
41+
timeframeKeyMap["0-5yrs"] = 1;
42+
timeframeKeyMap["6-11yrs"] = 2;
43+
timeframeKeyMap["12-25yrs"] = 3;
44+
timeframeKeyMap["25+yrs"] = 3;
45+
46+
// Construct the mapping of ints to strings for the Score column.
47+
var scoreMap = new Dictionary<int, string>();
48+
scoreMap[1] = "Low";
49+
scoreMap[2] = "Low";
50+
scoreMap[3] = "Average";
51+
scoreMap[4] = "High";
52+
scoreMap[5] = "High";
53+
54+
// Constructs the ML.net pipeline
55+
var pipeline = mlContext.Transforms.Conversion.MapValue("TimeframeCategory", timeframeMap, "Timeframe")
56+
.Append(mlContext.Transforms.Conversion.MapValue("ScoreCategory", scoreMap, "Score"))
57+
// on the MapValue below, the treatValuesAsKeyType is set to true. The type of the Label column will be a KeyDataViewType type,
58+
// and it can be used as input for trainers performing multiclass classification.
59+
.Append(mlContext.Transforms.Conversion.MapValue("Label", timeframeKeyMap, "Timeframe", treatValuesAsKeyType: true));
60+
61+
// Fits the pipeline to the data.
62+
IDataView transformedData = pipeline.Fit(data).Transform(data);
63+
64+
// Getting the resulting data as an IEnumerable.
65+
// This will contain the newly created columns.
66+
IEnumerable<TransformedData> features = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: false);
67+
68+
Console.WriteLine($" Timeframe TimeframeCategory Label Score ScoreCategory");
69+
foreach (var featureRow in features)
70+
Console.WriteLine($"{featureRow.Timeframe}\t\t{featureRow.TimeframeCategory}\t\t\t{featureRow.Label}\t\t{featureRow.Score}\t{featureRow.ScoreCategory}");
71+
72+
// TransformedData obtained post-transformation.
73+
//
74+
// Timeframe TimeframeCategory Label Score ScoreCategory
75+
// 0-4yrs Short 1 1 Low
76+
// 6-11yrs Medium 2 2 Low
77+
// 12-25yrs Long 3 3 Average
78+
// 0-5yrs Short 1 4 High
79+
// 12-25yrs Long 3 5 High
80+
// 25+yrs Long 3 5 High
81+
}
82+
private class DataPoint
83+
{
84+
public string Timeframe { get; set; }
85+
public int Score { get; set; }
86+
}
87+
private class TransformedData : DataPoint
88+
{
89+
public string TimeframeCategory { get; set; }
90+
public string ScoreCategory { get; set; }
91+
public uint Label { get; set; }
92+
}
93+
}
94+
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
using System;
2+
using System.Collections.Generic;
3+
4+
namespace Microsoft.ML.Samples.Dynamic
5+
{
6+
public static class MapValueIdvLookup
7+
{
8+
/// This example demonstrates the use of MapValue by mapping floats to strings, looking up the mapping in an IDataView.
9+
/// This is useful to map types to a grouping.
10+
public static void Example()
11+
{
12+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
13+
// as well as the source of randomness.
14+
var mlContext = new MLContext();
15+
16+
// Get a small dataset as an IEnumerable.
17+
var rawData = new[] {
18+
new DataPoint() { Price = 3.14f },
19+
new DataPoint() { Price = 2000f },
20+
new DataPoint() { Price = 1.19f },
21+
new DataPoint() { Price = 2.17f },
22+
new DataPoint() { Price = 33.784f },
23+
24+
};
25+
26+
// Convert to IDataView
27+
var data = mlContext.Data.LoadFromEnumerable(rawData);
28+
29+
// Create the lookup map data IEnumerable.
30+
var lookupData = new[] {
31+
new LookupMap { Value = 3.14f, Category = "Low" },
32+
new LookupMap { Value = 1.19f , Category = "Low" },
33+
new LookupMap { Value = 2.17f , Category = "Low" },
34+
new LookupMap { Value = 33.784f, Category = "Medium" },
35+
new LookupMap { Value = 2000f, Category = "High"}
36+
37+
};
38+
39+
// Convert to IDataView
40+
var lookupIdvMap = mlContext.Data.LoadFromEnumerable(lookupData);
41+
42+
// Constructs the ValueMappingEstimator making the ML.NET pipeline
43+
var pipeline = mlContext.Transforms.Conversion.MapValue("PriceCategory", lookupIdvMap, lookupIdvMap.Schema["Value"], lookupIdvMap.Schema["Category"], "Price");
44+
45+
// Fits the ValueMappingEstimator and transforms the data converting the Price to PriceCategory.
46+
IDataView transformedData = pipeline.Fit(data).Transform(data);
47+
48+
// Getting the resulting data as an IEnumerable.
49+
IEnumerable<TransformedData> features = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: false);
50+
51+
Console.WriteLine($" Price PriceCategory");
52+
foreach (var featureRow in features)
53+
Console.WriteLine($"{featureRow.Price}\t\t{featureRow.PriceCategory}");
54+
55+
// TransformedData obtained post-transformation.
56+
//
57+
// Price PriceCategory
58+
// 3.14 Low
59+
// 2000 High
60+
// 1.19 Low
61+
// 2.17 Low
62+
// 33.784 Medium
63+
}
64+
65+
// Type for the IDataView that will be serving as the map
66+
private class LookupMap
67+
{
68+
public float Value { get; set; }
69+
public string Category { get; set; }
70+
}
71+
private class DataPoint
72+
{
73+
public float Price { get; set; }
74+
}
75+
private class TransformedData : DataPoint
76+
{
77+
public string PriceCategory { get; set; }
78+
}
79+
}
80+
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
using System;
2+
using System.Collections.Generic;
3+
namespace Microsoft.ML.Samples.Dynamic
4+
{
5+
public static class MapValueToArray
6+
{
7+
/// This example demonstrates the use of MapValue by mapping strings to array values, which allows for mapping data to numeric arrays.
8+
/// This functionality is useful when the generated column will serve as the Features column for a trainer. Most of the trainers take a numeric vector, as the Features column.
9+
/// In this example, we are mapping the Timeframe data to arbitrary integer arrays.
10+
public static void Example()
11+
{
12+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
13+
// as well as the source of randomness.
14+
var mlContext = new MLContext();
15+
16+
// Get a small dataset as an IEnumerable.
17+
var rawData = new[] {
18+
new DataPoint() { Timeframe = "0-4yrs" },
19+
new DataPoint() { Timeframe = "6-11yrs" },
20+
new DataPoint() { Timeframe = "12-25yrs" },
21+
new DataPoint() { Timeframe = "0-5yrs" },
22+
new DataPoint() { Timeframe = "12-25yrs" },
23+
new DataPoint() { Timeframe = "25+yrs" },
24+
};
25+
26+
var data = mlContext.Data.LoadFromEnumerable(rawData);
27+
28+
// Creating a list of key-value pairs to indicate the mapping between the
29+
// DataPoint values, and the arrays they should map to.
30+
var timeframeMap = new Dictionary<string, int[]>();
31+
timeframeMap["0-4yrs"] = new int[] { 0, 5, 300 };
32+
timeframeMap["0-5yrs"] = new int[] { 0, 5, 300 };
33+
timeframeMap["6-11yrs"] = new int[] { 6, 11, 300 };
34+
timeframeMap["12-25yrs"] = new int[] { 12, 50, 300 };
35+
timeframeMap["25+yrs"] = new int[] { 12, 50, 300 };
36+
37+
// Constructs the ValueMappingEstimator making the ML.NET pipeline.
38+
var pipeline = mlContext.Transforms.Conversion.MapValue("Features", timeframeMap, "Timeframe");
39+
40+
// Fits the ValueMappingEstimator and transforms the data adding the Features column.
41+
IDataView transformedData = pipeline.Fit(data).Transform(data);
42+
43+
// Getting the resulting data as an IEnumerable.
44+
IEnumerable<TransformedData> featuresColumn = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: false);
45+
46+
Console.WriteLine($"Timeframe Features");
47+
foreach (var featureRow in featuresColumn)
48+
Console.WriteLine($"{featureRow.Timeframe}\t\t {string.Join(",", featureRow.Features)}");
49+
50+
// Timeframe Features
51+
// 0-4yrs 0, 5, 300
52+
// 6-11yrs 6, 11, 300
53+
// 12-25yrs 12, 50, 300
54+
// 0-5yrs 0, 5, 300
55+
// 12-25yrs 12, 50,300
56+
// 25+yrs 12, 50, 300
57+
}
58+
public class DataPoint
59+
{
60+
public string Timeframe { get; set; }
61+
}
62+
public class TransformedData : DataPoint
63+
{
64+
public int[] Features { get; set; }
65+
}
66+
}
67+
}

0 commit comments

Comments
 (0)