-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Creation of components through MLContext, internalization, and renaming #2510
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
a54fa85
general cleanup
artidoro 9499363
added catalog extensions and moved tensorflow arguments
artidoro 40ba094
fix doc for dnnimagefeaturizerestimator
artidoro a2ec77f
resolving review comments and adding samples
artidoro 49a3114
adding references to samples and regenerating catalog
artidoro 55ecb11
fixing samples
artidoro 205958c
resolving review comments
artidoro File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
67 changes: 67 additions & 0 deletions
67
docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
using System; | ||
using System.Linq; | ||
using Microsoft.ML.Data; | ||
using Microsoft.ML.Trainers; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
public class PriorTrainerSample | ||
{ | ||
public static void Example() | ||
{ | ||
// Downloading the dataset from github.com/dotnet/machinelearning. | ||
// This will create a sentiment.tsv file in the filesystem. | ||
// You can open this file, if you want to see the data. | ||
string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); | ||
|
||
// A preview of the data. | ||
// Sentiment SentimentText | ||
// 0 " :Erm, thank you. " | ||
// 1 ==You're cool== | ||
|
||
// Create a new context for ML.NET operations. It can be used for exception tracking and logging, | ||
// as a catalog of available operations and as the source of randomness. | ||
var mlContext = new MLContext(); | ||
|
||
// Step 1: Read the data as an IDataView. | ||
// First, we define the reader: specify the data columns and where to find them in the text file. | ||
var reader = mlContext.Data.CreateTextLoader( | ||
columns: new[] | ||
{ | ||
new TextLoader.Column("Sentiment", DataKind.R4, 0), | ||
new TextLoader.Column("SentimentText", DataKind.Text, 1) | ||
}, | ||
hasHeader: true | ||
); | ||
|
||
// Read the data | ||
var data = reader.Read(dataFile); | ||
|
||
// Split it between training and test data | ||
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data); | ||
|
||
// Step 2: Pipeline | ||
// Featurize the text column through the FeaturizeText API. | ||
// Then append a binary classifier, setting the "Label" column as the label of the dataset, and | ||
// the "Features" column produced by FeaturizeText as the features column. | ||
var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText") | ||
.AppendCacheCheckpoint(mlContext) // Add a data-cache step within a pipeline. | ||
.Append(mlContext.BinaryClassification.Trainers.Prior(labelColumn: "Sentiment")); | ||
|
||
// Step 3: Train the pipeline | ||
var trainedPipeline = pipeline.Fit(trainTestData.TrainSet); | ||
|
||
// Step 4: Evaluate on the test set | ||
var transformedData = trainedPipeline.Transform(trainTestData.TestSet); | ||
var evalMetrics = mlContext.BinaryClassification.Evaluate(transformedData, label: "Sentiment"); | ||
|
||
// Step 5: Inspect the output | ||
Console.WriteLine("Accuracy: " + evalMetrics.Accuracy); | ||
|
||
// The Prior trainer outputs the proportion of a label in the dataset as the probability of that label. | ||
// In this case it means that there is a split of around 64%-36% of positive and negative labels in the dataset. | ||
// Expected output: | ||
// Accuracy: 0.647058823529412 | ||
} | ||
} | ||
} |
67 changes: 67 additions & 0 deletions
67
docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
using System; | ||
using System.Linq; | ||
using Microsoft.ML.Data; | ||
using Microsoft.ML.Trainers; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
public class RandomTrainerSample | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Zeeshan A, Shahab and me have PRs where we create BinaryClassification folder. |
||
{ | ||
public static void Example() | ||
{ | ||
// Downloading the dataset from github.com/dotnet/machinelearning. | ||
// This will create a sentiment.tsv file in the filesystem. | ||
// You can open this file, if you want to see the data. | ||
string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); | ||
|
||
// A preview of the data. | ||
// Sentiment SentimentText | ||
// 0 " :Erm, thank you. " | ||
// 1 ==You're cool== | ||
|
||
// Create a new context for ML.NET operations. It can be used for exception tracking and logging, | ||
// as a catalog of available operations and as the source of randomness. | ||
var mlContext = new MLContext(seed: 1); | ||
|
||
// Step 1: Read the data as an IDataView. | ||
// First, we define the reader: specify the data columns and where to find them in the text file. | ||
var reader = mlContext.Data.CreateTextLoader( | ||
columns: new[] | ||
{ | ||
new TextLoader.Column("Sentiment", DataKind.R4, 0), | ||
new TextLoader.Column("SentimentText", DataKind.Text, 1) | ||
}, | ||
hasHeader: true | ||
); | ||
|
||
// Read the data | ||
var data = reader.Read(dataFile); | ||
|
||
// Split it between training and test data | ||
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data); | ||
|
||
// Step 2: Pipeline | ||
// Featurize the text column through the FeaturizeText API. | ||
// Then append a binary classifier, setting the "Label" column as the label of the dataset, and | ||
// the "Features" column produced by FeaturizeText as the features column. | ||
var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText") | ||
.AppendCacheCheckpoint(mlContext) // Add a data-cache step within a pipeline. | ||
.Append(mlContext.BinaryClassification.Trainers.Random()); | ||
|
||
// Step 3: Train the pipeline | ||
var trainedPipeline = pipeline.Fit(trainTestData.TrainSet); | ||
|
||
// Step 4: Evaluate on the test set | ||
var transformedData = trainedPipeline.Transform(trainTestData.TestSet); | ||
var evalMetrics = mlContext.BinaryClassification.Evaluate(transformedData, label: "Sentiment"); | ||
|
||
// Step 5: Inspect the output | ||
Console.WriteLine("Accuracy: " + evalMetrics.Accuracy); | ||
|
||
// We expect an output probability closet to 0.5 as the Random trainer outputs a random prediction. | ||
// Regardless of the input features, the trainer will predict either positive or negative label with equal probability. | ||
// Expected output (close to 0.5): | ||
artidoro marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Accuracy: 0.588235294117647 | ||
} | ||
} | ||
} |
70 changes: 70 additions & 0 deletions
70
docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingSample.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
using System; | ||
using Microsoft.ML.Data; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
public class CustomMappingSample | ||
{ | ||
public static void Example() | ||
{ | ||
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, | ||
// as well as the source of randomness. | ||
var mlContext = new MLContext(); | ||
|
||
// Get a small dataset as an IEnumerable and convert it to an IDataView. | ||
var data = SamplesUtils.DatasetUtils.GetInfertData(); | ||
var trainData = mlContext.Data.ReadFromEnumerable(data); | ||
|
||
// Preview of the data. | ||
// | ||
// Age Case Education Induced Parity PooledStratum RowNum ... | ||
// 26 1 0-5yrs 1 6 3 1 ... | ||
// 42 1 0-5yrs 1 1 1 2 ... | ||
// 39 1 0-5yrs 2 6 4 3 ... | ||
// 34 1 0-5yrs 2 4 2 4 ... | ||
// 35 1 6-11yrs 1 3 32 5 ... | ||
|
||
// We define the custom mapping between input and output rows that will be applied by the transformation. | ||
Action<SamplesUtils.DatasetUtils.SampleInfertData, SampleInfertDataTransformed> mapping = | ||
(input, output) => output.IsUnderThirty = input.Age < 30; | ||
|
||
// Custom transformations can be used to transform data directly, or as part of a pipeline. Below we transform data directly. | ||
var transformer = mlContext.Transforms.CustomMappingTransformer(mapping, null); | ||
var transformedData = transformer.Transform(trainData); | ||
|
||
// Preview of the data. | ||
// | ||
// IsUnderThirty Age Case Education Induced Parity PooledStratum RowNum ... | ||
// true 26 1 0-5yrs 1 6 3 1 ... | ||
// false 42 1 0-5yrs 1 1 1 2 ... | ||
// false 39 1 0-5yrs 2 6 4 3 ... | ||
// false 34 1 0-5yrs 2 4 2 4 ... | ||
// false 35 1 6-11yrs 1 3 32 5 ... | ||
|
||
// Here instead we use it as part of a pipeline of estimators. | ||
var pipeline = mlContext.Transforms.CustomMapping(mapping, null) | ||
.Append(mlContext.Transforms.Concatenate(outputColumnName: "Features", inputColumnNames: new[] { "Parity", "Induced" })) | ||
// It is useful to add a caching checkpoint before a trainer that does several passes over the data. | ||
.AppendCacheCheckpoint(mlContext) | ||
// We use binary FastTree to predict the label column that was generated by the custom mapping at the first step of the pipeline. | ||
.Append(mlContext.BinaryClassification.Trainers.FastTree(labelColumn: "IsUnderThirty")); | ||
|
||
// We can train the pipeline and use it to transform data. | ||
transformedData = pipeline.Fit(trainData).Transform(trainData); | ||
} | ||
|
||
// Represents the transformed infertility dataset. | ||
public class SampleInfertDataTransformed | ||
{ | ||
public int RowNum { get; set; } | ||
public string Education { get; set; } | ||
public bool IsUnderThirty { get; set; } | ||
public float Parity { get; set; } | ||
public float Induced { get; set; } | ||
public float Case { get; set; } | ||
public float Spontaneous { get; set; } | ||
public float Stratum { get; set; } | ||
public float PooledStratum { get; set; } | ||
} | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.