diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs index 1fe2f70325..9f607a9c9e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs @@ -9,15 +9,21 @@ public static partial class TransformSamples { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var ml = new MLContext(); // Get a small dataset as an IEnumerable and convert to IDataView. var data = new List() { - new SampleSentimentData { Sentiment = true, SentimentText = "Best game I've ever played." }, - new SampleSentimentData { Sentiment = false, SentimentText = "==RUDE== Dude, 2" }, - new SampleSentimentData { Sentiment = true, SentimentText = "Until the next game, this is the best Xbox game!" } }; + new SampleSentimentData { Sentiment = true, + SentimentText = "Best game I've ever played." }, + + new SampleSentimentData { Sentiment = false, + SentimentText = "==RUDE== Dude, 2" }, + + new SampleSentimentData { Sentiment = true, + SentimentText = "Until the next game," + + "this is the best Xbox game!" } }; // Convert IEnumerable to IDataView. var trainData = ml.Data.LoadFromEnumerable(data); @@ -29,23 +35,42 @@ public static void Example() // false ==RUDE== Dude, 2. // true Until the next game, this is the best Xbox game! - // A pipeline to tokenize text as characters and then combine them together into n-grams - // The pipeline uses the default settings to featurize. + // A pipeline to tokenize text as characters and then combine them + // together into n-grams. The pipeline uses the default settings to + // featurize. + + var charsPipeline = ml.Transforms.Text + .TokenizeIntoCharactersAsKeys("Chars", "SentimentText", + useMarkerCharacters: false); + + var ngramOnePipeline = ml.Transforms.Text + .ProduceNgrams("CharsUnigrams", "Chars", ngramLength: 1); - var charsPipeline = ml.Transforms.Text.TokenizeIntoCharactersAsKeys("Chars", "SentimentText", useMarkerCharacters: false); - var ngramOnePipeline = ml.Transforms.Text.ProduceNgrams("CharsUnigrams", "Chars", ngramLength: 1); - var ngramTwpPipeline = ml.Transforms.Text.ProduceNgrams("CharsTwograms", "Chars"); - var oneCharsPipeline = charsPipeline.Append(ngramOnePipeline); - var twoCharsPipeline = charsPipeline.Append(ngramTwpPipeline); + var ngramTwpPipeline = ml.Transforms.Text + .ProduceNgrams("CharsTwograms", "Chars"); + + var oneCharsPipeline = charsPipeline + .Append(ngramOnePipeline); + + var twoCharsPipeline = charsPipeline + .Append(ngramTwpPipeline); // The transformed data for pipelines. - var transformedData_onechars = oneCharsPipeline.Fit(trainData).Transform(trainData); - var transformedData_twochars = twoCharsPipeline.Fit(trainData).Transform(trainData); + var transformedData_onechars = oneCharsPipeline.Fit(trainData) + .Transform(trainData); + + var transformedData_twochars = twoCharsPipeline.Fit(trainData) + .Transform(trainData); // Small helper to print the text inside the columns, in the console. - Action>, VBuffer>> printHelper = (columnName, column, names) => + Action>, + VBuffer>> + printHelper = (columnName, column, names) => + { - Console.WriteLine($"{columnName} column obtained post-transformation."); + Console.WriteLine( + $"{columnName} column obtained post-transformation."); + var slots = names.GetValues(); foreach (var featureRow in column) { @@ -54,12 +79,19 @@ public static void Example() Console.WriteLine(""); } - Console.WriteLine("==================================================="); + Console.WriteLine( + "==================================================="); }; - // Preview of the CharsUnigrams column obtained after processing the input. + // Preview of the CharsUnigrams column obtained after processing the + // input. VBuffer> slotNames = default; - transformedData_onechars.Schema["CharsUnigrams"].GetSlotNames(ref slotNames); - var charsOneGramColumn = transformedData_onechars.GetColumn>(transformedData_onechars.Schema["CharsUnigrams"]); + transformedData_onechars.Schema["CharsUnigrams"] + .GetSlotNames(ref slotNames); + + var charsOneGramColumn = transformedData_onechars + .GetColumn>(transformedData_onechars + .Schema["CharsUnigrams"]); + printHelper("CharsUnigrams", charsOneGramColumn, slotNames); // CharsUnigrams column obtained post-transformation. @@ -67,8 +99,13 @@ public static void Example() // 'e' - 1 '' - 2 'd' - 1 '=' - 4 'R' - 1 'U' - 1 'D' - 2 'E' - 1 'u' - 1 ',' - 1 '2' - 1 // 'B' - 0 'e' - 6 's' - 3 't' - 6 '' - 9 'g' - 2 'a' - 2 'm' - 2 'I' - 0 ''' - 0 'v' - 0 ... // Preview of the CharsTwoGrams column obtained after processing the input. - var charsTwoGramColumn = transformedData_twochars.GetColumn>(transformedData_twochars.Schema["CharsTwograms"]); - transformedData_twochars.Schema["CharsTwograms"].GetSlotNames(ref slotNames); + var charsTwoGramColumn = transformedData_twochars + .GetColumn>(transformedData_twochars + .Schema["CharsTwograms"]); + + transformedData_twochars.Schema["CharsTwograms"] + .GetSlotNames(ref slotNames); + printHelper("CharsTwograms", charsTwoGramColumn, slotNames); // CharsTwograms column obtained post-transformation. @@ -78,7 +115,8 @@ public static void Example() } /// - /// A dataset that contains a tweet and the sentiment assigned to that tweet: 0 - negative and 1 - positive sentiment. + /// A dataset that contains a tweet and the sentiment assigned to that + /// tweet: 0 - negative and 1 - positive sentiment. /// public class SampleSentimentData { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/SimpleDataViewImplementation.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/SimpleDataViewImplementation.cs index b1a134b192..f19f42348d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/SimpleDataViewImplementation.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/SimpleDataViewImplementation.cs @@ -7,17 +7,21 @@ namespace Samples.Dynamic { /// - /// The interface is the central concept of "data" in ML.NET. While many conveniences exist - /// to create pre-baked implementations, it is also useful to know how to create one completely from scratch. We also - /// take this opportunity to illustrate and motivate the basic principles of how the IDataView system is architected, - /// since people interested in implementing need at least some knowledge of those principles. + /// The interface is the central concept of "data" in + /// ML.NET. While many conveniences exist to create pre-baked implementations, + /// it is also useful to know how to create one completely from scratch. We also + /// take this opportunity to illustrate and motivate the basic principles of how + /// the IDataView system is architected, since people interested in + /// implementing need at least some knowledge of those + /// principles. /// public static class SimpleDataViewImplementation { public static void Example() { - // First we create an array of these objects, which we "present" as this IDataView implementation so that it - // can be used in a simple ML.NET pipeline. + // First we create an array of these objects, which we "present" as this + // IDataView implementation so that it can be used in a simple ML.NET + // pipeline. var inputArray = new[] { new InputObject(false, "Hello my friend."), @@ -26,8 +30,8 @@ public static void Example() }; var dataView = new InputObjectDataView(inputArray); - // So, this is a very simple pipeline: a transformer that tokenizes Text, does nothing with the Label column - // at all. + // So, this is a very simple pipeline: a transformer that tokenizes + // Text, does nothing with the Label column at all. var mlContext = new MLContext(); var transformedDataView = mlContext.Transforms.Text.TokenizeIntoWords( "TokenizedText", "Text").Fit(dataView).Transform(dataView); @@ -35,22 +39,31 @@ public static void Example() var textColumn = transformedDataView.Schema["Text"]; var tokensColumn = transformedDataView.Schema["TokenizedText"]; - using (var cursor = transformedDataView.GetRowCursor(new[] { textColumn, tokensColumn })) + using (var cursor = transformedDataView.GetRowCursor( + new[] { textColumn, tokensColumn })) + { - // Note that it is best to get the getters and values *before* iteration, so as to faciliate buffer - // sharing (if applicable), and column-type validation once, rather than many times. + // Note that it is best to get the getters and values *before* + // iteration, so as to faciliate buffer sharing (if applicable), + // and column-type validation once, rather than many times. ReadOnlyMemory textValue = default; VBuffer> tokensValue = default; - var textGetter = cursor.GetGetter>(textColumn); - var tokensGetter = cursor.GetGetter>>(tokensColumn); + var textGetter = cursor + .GetGetter>(textColumn); + + var tokensGetter = cursor + .GetGetter>>(tokensColumn); while (cursor.MoveNext()) { textGetter(ref textValue); tokensGetter(ref tokensValue); - Console.WriteLine($"{textValue} => {string.Join(", ", tokensValue.DenseValues())}"); + Console.WriteLine( + $"{textValue} => " + + $"{string.Join(", ", tokensValue.DenseValues())}"); + } // The output to console is this: @@ -59,12 +72,15 @@ public static void Example() // Stay awhile and listen. => Stay, awhile, and, listen. // Masterfully done hero! => Masterfully, done, hero! - // Note that it may be interesting to set a breakpoint on the Console.WriteLine, and explore - // what is going on with the cursor, and the buffers. In particular, on the third iteration, - // while `tokensValue` is logically presented as a three element array, internally you will - // see that the arrays internal to that structure have (at least) four items, specifically: - // `Masterfully`, `done`, `hero!`, `listen.`. In this way we see a simple example of the details - // of how buffer sharing from one iteration to the next actually works. + // Note that it may be interesting to set a breakpoint on the + // Console.WriteLine, and explore what is going on with the cursor, + // and the buffers. In particular, on the third iteration, while + // `tokensValue` is logically presented as a three element array, + // internally you will see that the arrays internal to that + // structure have (at least) four items, specifically: + // `Masterfully`, `done`, `hero!`, `listen.`. In this way we see a + // simple example of the details of how buffer sharing from one + // iteration to the next actually works. } } @@ -81,37 +97,51 @@ public InputObject(bool label, string text) } /// - /// This is an implementation of that wraps an - /// of the above . Note that normally under these circumstances, the first - /// recommendation would be to use a convenience like - /// - /// or something like that, rather than implementing outright. However, sometimes when - /// code generation is impossible on some situations, like Unity or other similar platforms, implementing + /// This is an implementation of that wraps an + /// of the above . + /// Note that normally under these circumstances, the first recommendation + /// would be to use a convenience like + /// + /// or something like that, rather than implementing + /// outright. However, sometimes when code generation is impossible on some + /// situations, like Unity or other similar platforms, implementing /// something even closely resembling this may become necessary. /// - /// This implementation of , being didactic, is much simpler than practically - /// anything one would find in the ML.NET codebase. In this case we have a completely fixed schema (the two - /// fields of ), with fixed types. + /// This implementation of , being didactic, is much + /// simpler than practically anything one would find in the ML.NET codebase. + /// In this case we have a completely fixed schema (the two fields of + /// ), with fixed types. /// - /// For , note that we keep a very simple schema based off the members of the object. You - /// may in fact note that it is possible in this specific case, this implementation of - /// could share the same object across all instances of this - /// object, but since this is almost never the case, I do not take advantage of that. + /// For , note that we keep a very simple schema based + /// off the members of the object. You may in fact note that it is possible + /// in this specific case, this implementation of + /// could share the same object across all + /// instances of this object, but since this is almost never the case, I do + /// not take advantage of that. /// - /// We have chosen to wrap an , so in fact only a very simple implementation is - /// possible. Specifically: we cannot meaningfully shuffle (so is - /// , and even if a parameter were passed to - /// , we could not make use of it), we do - /// not know the count of the item right away without counting (so, it is most correct for - /// to return , even after we might hypothetically know after - /// the first pass, given the immutability principle of ), and the - /// method returns a single item. + /// We have chosen to wrap an , so in fact only + /// a very simple implementation is possible. Specifically: we cannot + /// meaningfully shuffle (so is + /// , and even if a + /// parameter were passed to + /// , + /// we could not make use of it), we do not know the count of the item right + /// away without counting (so, it is most correct for + /// to return , even after + /// we might hypothetically know after the first pass, given the + /// immutability principle of ), and the + /// method returns a + /// single item. /// - /// The derived class has more documentation specific to its behavior. + /// The derived class has more documentation + /// specific to its behavior. /// - /// Note that this implementation, as well as the nested derived class, does - /// almost no validation of parameters or guard against misuse than we would like from, say, implementations of - /// the same classes within the ML.NET codebase. + /// Note that this implementation, as well as the nested + /// derived class, does almost no validation + /// of parameters or guard against misuse than we would like from, say, + /// implementations of the same classes within the ML.NET codebase. /// private sealed class InputObjectDataView : IDataView { @@ -131,52 +161,76 @@ public InputObjectDataView(IEnumerable data) public long? GetRowCount() => null; - public DataViewRowCursor GetRowCursor(IEnumerable columnsNeeded, Random rand = null) - => new Cursor(this, columnsNeeded.Any(c => c.Index == 0), columnsNeeded.Any(c => c.Index == 1)); + public DataViewRowCursor GetRowCursor( + IEnumerable columnsNeeded, + Random rand = null) + + => new Cursor(this, columnsNeeded.Any(c => c.Index == 0), + columnsNeeded.Any(c => c.Index == 1)); + + public DataViewRowCursor[] GetRowCursorSet( + IEnumerable columnsNeeded, int n, + Random rand = null) - public DataViewRowCursor[] GetRowCursorSet(IEnumerable columnsNeeded, int n, Random rand = null) => new[] { GetRowCursor(columnsNeeded, rand) }; /// - /// Having this be a private sealed nested class follows the typical pattern: in most - /// implementations, the cursor instance is almost always that. The only "common" - /// exceptions to this tendency are those implementations that are such thin wrappings of existing - /// without even bothering to change the schema. + /// Having this be a private sealed nested class follows the typical + /// pattern: in most implementations, the cursor + /// instance is almost always that. The only "common" exceptions to this + /// tendency are those implementations that are such thin wrappings of + /// existing without even bothering to change + /// the schema. /// - /// On the subject of schema, note that there is an expectation that the object is - /// reference equal to the object that created this cursor, as we see here. + /// On the subject of schema, note that there is an expectation that + /// the object is reference equal to the + /// object that created this cursor, as + /// we see here. /// - /// Note that returns 0. As described in the documentation of that property, that - /// is meant to facilitate the reconciliation of the partitioning of the data in the case where multiple - /// cursors are returned from - /// , but since only one is - /// ever returned from the implementation, this behavior is appropriate. + /// Note that returns 0. As described in the + /// documentation of that property, that is meant to facilitate the + /// reconciliation of the partitioning of the data in the case where + /// multiple cursors are returned from + /// , + /// but since only one is ever returned from the implementation, this + /// behavior is appropriate. /// - /// Similarly, since it is impossible to have a shuffled cursor or a cursor set, it is sufficient for the - /// implementation to return a simple ID based on the position. If, however, this - /// had been something built on, hypothetically, an or some other such structure, and - /// shuffling and partitioning was available, an ID based on the index of whatever item was being returned - /// would be appropriate. + /// Similarly, since it is impossible to have a shuffled cursor or a + /// cursor set, it is sufficient for the + /// implementation to return a simple ID based on the position. If, + /// however, this had been something built on, hypothetically, an + /// or some other such structure, and shuffling + /// and partitioning was available, an ID based on the index of whatever + /// item was being returned would be appropriate. /// - /// Note the usage of the parameters on the - /// implementations. This is most valuable in the case of buffer sharing for , but - /// we still of course have to deal with it here. + /// Note the usage of the parameters on the + /// implementations. This is most + /// valuable in the case of buffer sharing for , + /// but we still of course have to deal with it here. /// - /// Note also that we spend a considerable amount of effort to not make the - /// and - /// methods correctly reflect what was asked for from - /// the - /// method that was used to create this method. In this particular case, the point is somewhat moot: this - /// mechanism exists to enable lazy evaluation, but since this cursor is implemented to wrap an - /// which has no concept of lazy evaluation, there is no real practical benefit - /// to doing this. However, it is best of course to illustrate the general principle for the sake of the - /// example. + /// Note also that we spend a considerable amount of effort to not make + /// the and + /// methods + /// correctly reflect what was asked for from the + /// method that was used + /// to create this method. In this particular case, the point is + /// somewhat moot: this mechanism exists to enable lazy evaluation, + /// but since this cursor is implemented to wrap an + /// which has no concept of lazy + /// evaluation, there is no real practical benefit to doing this. + /// However, it is best of course to illustrate the general principle + /// for the sake of the example. /// - /// Even in this simple form, we see the reason why - /// is beneficial: the implementations themselves are simple to the point - /// where their operation is dwarfed by the simple acts of casting and validation checking one sees in - /// . In this way we only pay the cost of validation - /// and casting once, not every time we get a value. + /// Even in this simple form, we see the reason why + /// is + /// beneficial: the implementations + /// themselves are simple to the point where their operation is dwarfed + /// by the simple acts of casting and validation checking one sees in + /// . In this way + /// we only pay the cost of validation and casting once, not every time + /// we get a value. /// private sealed class Cursor : DataViewRowCursor { @@ -189,15 +243,22 @@ private sealed class Cursor : DataViewRowCursor public override long Batch => 0; public override DataViewSchema Schema { get; } - public Cursor(InputObjectDataView parent, bool wantsLabel, bool wantsText) + public Cursor(InputObjectDataView parent, bool wantsLabel, + bool wantsText) + { Schema = parent.Schema; _position = -1; _enumerator = parent._data.GetEnumerator(); _getters = new Delegate[] { - wantsLabel ? (ValueGetter)LabelGetterImplementation : null, - wantsText ? (ValueGetter>)TextGetterImplementation : null + wantsLabel ? + (ValueGetter)LabelGetterImplementation : null, + + wantsText ? + (ValueGetter>) + TextGetterImplementation : null + }; } @@ -217,13 +278,17 @@ protected override void Dispose(bool disposing) private void LabelGetterImplementation(ref bool value) => value = _enumerator.Current.Label; - private void TextGetterImplementation(ref ReadOnlyMemory value) + private void TextGetterImplementation( + ref ReadOnlyMemory value) + => value = _enumerator.Current.Text.AsMemory(); private void IdGetterImplementation(ref DataViewRowId id) => id = new DataViewRowId((ulong)_position, 0); - public override ValueGetter GetGetter(DataViewSchema.Column column) + public override ValueGetter GetGetter( + DataViewSchema.Column column) + { if (!IsColumnActive(column)) throw new ArgumentOutOfRangeException(nameof(column)); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs index 5c49ac6bbd..db8f7c4961 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs @@ -10,16 +10,22 @@ public static class TextTransform { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var ml = new MLContext(); // Get a small dataset as an IEnumerable and convert to IDataView. // Get a small dataset as an IEnumerable and convert to IDataView. var data = new List() { - new SampleSentimentData { Sentiment = true, SentimentText = "Best game I've ever played." }, - new SampleSentimentData { Sentiment = false, SentimentText = "==RUDE== Dude, 2" }, - new SampleSentimentData { Sentiment = true, SentimentText = "Until the next game, this is the best Xbox game!" } }; + new SampleSentimentData { Sentiment = true, + SentimentText = "Best game I've ever played." }, + + new SampleSentimentData { Sentiment = false, + SentimentText = "==RUDE== Dude, 2" }, + + new SampleSentimentData { Sentiment = true, + SentimentText = "Until the next game," + + "this is the best Xbox game!" } }; // Convert IEnumerable to IDataView. var trainData = ml.Data.LoadFromEnumerable(data); @@ -31,29 +37,47 @@ public static void Example() // false ==RUDE== Dude, 2. // true Until the next game, this is the best Xbox game! - // A pipeline for featurization of the "SentimentText" column, and placing the output in a new column named "DefaultTextFeatures" - // The pipeline uses the default settings to featurize. + // A pipeline for featurization of the "SentimentText" column, and + // placing the output in a new column named "DefaultTextFeatures". The + // pipeline uses the default settings to featurize. string defaultColumnName = "DefaultTextFeatures"; - var default_pipeline = ml.Transforms.Text.FeaturizeText(defaultColumnName , "SentimentText"); + var default_pipeline = ml.Transforms.Text + .FeaturizeText(defaultColumnName , "SentimentText"); - // Another pipeline, that customizes the advanced settings of the FeaturizeText transformer. + // Another pipeline, that customizes the advanced settings of the + // FeaturizeText transformer. string customizedColumnName = "CustomizedTextFeatures"; - var customized_pipeline = ml.Transforms.Text.FeaturizeText(customizedColumnName, new TextFeaturizingEstimator.Options + var customized_pipeline = ml.Transforms.Text + .FeaturizeText(customizedColumnName, + new TextFeaturizingEstimator.Options + { KeepPunctuations = false, KeepNumbers = false, OutputTokensColumnName = "OutputTokens", - StopWordsRemoverOptions = new StopWordsRemovingEstimator.Options() { Language = TextFeaturizingEstimator.Language.English }, // supports English, French, German, Dutch, Italian, Spanish, Japanese + StopWordsRemoverOptions = + new StopWordsRemovingEstimator.Options() { + Language = TextFeaturizingEstimator.Language.English }, + // supports English, French, German, Dutch, Italian, Spanish, + // Japanese + }, "SentimentText"); // The transformed data for both pipelines. - var transformedData_default = default_pipeline.Fit(trainData).Transform(trainData); - var transformedData_customized = customized_pipeline.Fit(trainData).Transform(trainData); + var transformedData_default = default_pipeline.Fit(trainData) + .Transform(trainData); + + var transformedData_customized = customized_pipeline.Fit(trainData) + .Transform(trainData); // Small helper to print the text inside the columns, in the console. - Action>> printHelper = (columnName, column) => + Action>> printHelper = (columnName, + column) => + { - Console.WriteLine($"{columnName} column obtained post-transformation."); + Console.WriteLine( + $"{columnName} column obtained post-transformation."); + foreach (var featureRow in column) { foreach (var value in featureRow.GetValues()) @@ -61,11 +85,17 @@ public static void Example() Console.WriteLine(""); } - Console.WriteLine("==================================================="); + Console.WriteLine( + "==================================================="); + }; - // Preview of the DefaultTextFeatures column obtained after processing the input. - var defaultColumn = transformedData_default.GetColumn>(transformedData_default.Schema[defaultColumnName]); + // Preview of the DefaultTextFeatures column obtained after processing + // the input. + var defaultColumn = transformedData_default + .GetColumn>(transformedData_default + .Schema[defaultColumnName]); + printHelper(defaultColumnName, defaultColumn); // DefaultTextFeatures column obtained post-transformation. @@ -74,8 +104,12 @@ public static void Example() // 0.2357023 0.2357023 0.2357023 0.2357023 0.4714046 0.2357023 0.2357023 0.2357023 0.2357023 0.2357023 0.2357023 0.2357023 0.2357023 0.2357023 0.2357023 0.5773503 0.5773503 0.5773503 0.1924501 0.1924501 0.1924501 0.1924501 0.1924501 0.1924501 0.1924501 0.1924501 0.1924501 0.4472136 0.4472136 0.4472136 0.4472136 0.4472136 // 0 0.1230915 0.1230915 0.1230915 0.1230915 0.246183 0.246183 0.246183 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.1230915 0 0 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.3692745 0.246183 0.246183 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.246183 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.1230915 0.2886751 0 0 0 0 0 0 0 0.2886751 0.5773503 0.2886751 0.2886751 0.2886751 0.2886751 0.2886751 0.2886751 - // Preview of the CustomizedTextFeatures column obtained after processing the input. - var customizedColumn = transformedData_customized.GetColumn>(transformedData_customized.Schema[customizedColumnName]); + // Preview of the CustomizedTextFeatures column obtained after + // processing the input. + var customizedColumn = transformedData_customized + .GetColumn>(transformedData_customized + .Schema[customizedColumnName]); + printHelper(customizedColumnName, customizedColumn); // CustomizedTextFeatures column obtained post-transformation. @@ -86,7 +120,8 @@ public static void Example() } /// - /// A dataset that contains a tweet and the sentiment assigned to that tweet: 0 - negative and 1 - positive sentiment. + /// A dataset that contains a tweet and the sentiment assigned to that + /// tweet: 0 - negative and 1 - positive sentiment. /// public class SampleSentimentData { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/WithOnFitDelegate.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/WithOnFitDelegate.cs index fcb56653c5..72c541b8b3 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/WithOnFitDelegate.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/WithOnFitDelegate.cs @@ -13,27 +13,40 @@ public class WithOnFitDelegate { public static void Example() { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); var samples = new List() { - new DataPoint(){ Features = new float[4] { 8, 1, 3, 0}, Label = true }, - new DataPoint(){ Features = new float[4] { 6, 2, 2, 0}, Label = true }, - new DataPoint(){ Features = new float[4] { 4, 0, 1, 0}, Label = false }, - new DataPoint(){ Features = new float[4] { 2,-1,-1, 1}, Label = false } + new DataPoint(){ Features = new float[4] { 8, 1, 3, 0}, + Label = true }, + + new DataPoint(){ Features = new float[4] { 6, 2, 2, 0}, + Label = true }, + + new DataPoint(){ Features = new float[4] { 4, 0, 1, 0}, + Label = false }, + + new DataPoint(){ Features = new float[4] { 2,-1,-1, 1}, + Label = false } + }; - // Convert training data to IDataView, the general data type used in ML.NET. + // Convert training data to IDataView, the general data type used in + // ML.NET. var data = mlContext.Data.LoadFromEnumerable(samples); - // Create a pipeline to normalize the features and train a binary classifier. - // We use WithOnFitDelegate for the intermediate binning normalization step, - // so that we can inspect the properties of the normalizer after fitting. + // Create a pipeline to normalize the features and train a binary + // classifier. We use WithOnFitDelegate for the intermediate binning + // normalization step, so that we can inspect the properties of the + // normalizer after fitting. NormalizingTransformer binningTransformer = null; var pipeline = - mlContext.Transforms.NormalizeBinning("Features", maximumBinCount: 3) - .WithOnFitDelegate(fittedTransformer => binningTransformer = fittedTransformer) - .Append(mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression()); + mlContext.Transforms + .NormalizeBinning("Features", maximumBinCount: 3) + .WithOnFitDelegate( + fittedTransformer => binningTransformer = fittedTransformer) + .Append(mlContext.BinaryClassification.Trainers + .LbfgsLogisticRegression()); Console.WriteLine(binningTransformer == null); // Expected Output: @@ -56,6 +69,7 @@ public static void Example() Console.WriteLine( $"Bin {i}: Density = {binningParam.Density[i]}, " + $"Upper-bounds = {upperBounds}"); + } // Expected output: // Bin 0: Density = 2, Upper-bounds = 3, 7, Infinity