-
Notifications
You must be signed in to change notification settings - Fork 1.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Sample of using LoadFromEnumerable with a SchemaDefinition #3696
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using Microsoft.ML; | ||
using Microsoft.ML.Data; | ||
|
||
namespace Samples.Dynamic | ||
{ | ||
public static class LoadFromEnumerable | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Add comment explaining that you are not only loading from enumerable but also setting vector metadata properties. Why whould this be useful IRL? #Resolved |
||
{ | ||
// Creating IDataView from IEnumerable, and setting the size of the vector at runtime. | ||
// When the data model is defined through types, setting the size of the vector is done through the VectorType | ||
// annotation. When the size of the data is not known at compile time, the Schema can be directly modified at runtime | ||
// and the size of the vector set there. | ||
// This is important, because most of the ML.NET trainers require the Features vector to be of known size. | ||
public static void Example() | ||
{ | ||
// Create a new context for ML.NET operations. It can be used for exception tracking and logging, | ||
// as a catalog of available operations and as the source of randomness. | ||
var mlContext = new MLContext(); | ||
|
||
// Get a small dataset as an IEnumerable. | ||
IEnumerable<DataPointVector> enumerableKnownSize = new DataPointVector[] | ||
{ | ||
new DataPointVector{ Features = new float[]{ 1.2f, 3.4f, 4.5f, 3.2f, 7,5f } }, | ||
new DataPointVector{ Features = new float[]{ 4.2f, 3.4f, 14.65f, 3.2f, 3,5f } }, | ||
new DataPointVector{ Features = new float[]{ 1.6f, 3.5f, 4.5f, 6.2f, 3,5f } }, | ||
}; | ||
|
||
// Load dataset into an IDataView. | ||
IDataView data = mlContext.Data.LoadFromEnumerable(enumerableKnownSize); | ||
var featureColumn = data.Schema["Features"].Type as VectorDataViewType; | ||
// Inspecting the schema | ||
Console.WriteLine($"Is the size of the Features column known: {featureColumn.IsKnownSize}.\nSize: {featureColumn.Size}"); | ||
|
||
// Preview | ||
// | ||
// Is the size of the Features column known? True. | ||
// Size: 5. | ||
|
||
// If the size of the vector is unknown at compile time, it can be set at runtime. | ||
IEnumerable<DataPoint> enumerableUnknownSize = new DataPoint[] | ||
{ | ||
new DataPoint{ Features = new float[]{ 1.2f, 3.4f, 4.5f } }, | ||
new DataPoint{ Features = new float[]{ 4.2f, 3.4f, 1.6f } }, | ||
new DataPoint{ Features = new float[]{ 1.6f, 3.5f, 4.5f } }, | ||
}; | ||
|
||
// The feature dimension (typically this will be the Count of the array of the features vector | ||
// known at runtime). | ||
int featureDimension = 3; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this a misplaced comment ? .. featureDimension is not used until line 61 for setting , not retrieving .. #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The sample is for cases when we don't know the size at compile time, so this is is set to a constant, but in real life would be the size of some array. But good point, i don't have all that language there, and i don't think it is necessary; so I'll remove the comment :) In reply to: 283514291 [](ancestors = 283514291) |
||
var definedSchema = SchemaDefinition.Create(typeof(DataPoint)); | ||
featureColumn = definedSchema["Features"].ColumnType as VectorDataViewType; | ||
Console.WriteLine($"Is the size of the Features column known: {featureColumn.IsKnownSize}.\nSize: {featureColumn.Size}"); | ||
|
||
// Preview | ||
// | ||
// Is the size of the Features column known? False. | ||
// Size: 0. | ||
|
||
// Set the column type to be a known-size vector. | ||
var vectorItemType = ((VectorDataViewType)definedSchema[0].ColumnType).ItemType; | ||
definedSchema[0].ColumnType = new VectorDataViewType(vectorItemType, featureDimension); | ||
|
||
// Read the data into an IDataView with the modified schema supplied in | ||
IDataView data2 = mlContext.Data.LoadFromEnumerable(enumerableUnknownSize, definedSchema); | ||
|
||
featureColumn = data2.Schema["Features"].Type as VectorDataViewType; | ||
// Inspecting the schema | ||
Console.WriteLine($"Is the size of the Features column known: {featureColumn.IsKnownSize}.\nSize: {featureColumn.Size}"); | ||
|
||
// Preview | ||
// | ||
// Is the size of the Features column known? True. | ||
// Size: 3. | ||
} | ||
} | ||
|
||
public class DataPoint | ||
{ | ||
public float[] Features { get; set; } | ||
} | ||
|
||
public class DataPointVector | ||
{ | ||
[VectorType(5)] | ||
public float[] Features { get; set; } | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add headers #Resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need headers for samples? whatever we put here, will show up in the docs verbatim, so we didn't include the license header in any of the samples. .NET samples don't have headers either (example below). So I suggest not including headers for samples files.
https://docs.microsoft.com/en-us/dotnet/api/system.io.file?view=netframework-4.8
In reply to: 282706294 [](ancestors = 282706294)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i forgot, i'll remove them. #Resolved