Skip to content

Commit

Permalink
Adding samples for data save and load from text and binary files (dot…
Browse files Browse the repository at this point in the history
…net#3745)

* Adding samples for data save and load from text and binary files

* PR comments

* nits
  • Loading branch information
najeeb-kazmi authored May 21, 2019
1 parent 706299b commit ce46faa
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
using System;
using System.Collections.Generic;
using System.IO;
using Microsoft.ML;

namespace Samples.Dynamic
{
public static class SaveAndLoadFromBinary
{
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
// Setting the seed to a fixed number in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);

// Create a list of training data points.
var dataPoints = new List<DataPoint>()
{
new DataPoint(){ Label = 0, Features = 4},
new DataPoint(){ Label = 0, Features = 5},
new DataPoint(){ Label = 0, Features = 6},
new DataPoint(){ Label = 1, Features = 8},
new DataPoint(){ Label = 1, Features = 9},
};

// Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
IDataView data = mlContext.Data.LoadFromEnumerable(dataPoints);

// Create a FileStream object and write the IDataView to it as a binary IDV file.
using (FileStream stream = new FileStream("data.idv", FileMode.Create))
mlContext.Data.SaveAsBinary(data, stream);

// Create an IDataView object by loading the binary IDV file.
IDataView loadedData = mlContext.Data.LoadFromBinary("data.idv");

// Inspect the data that is loaded from the previously saved binary file.
var loadedDataEnumerable = mlContext.Data.CreateEnumerable<DataPoint>(loadedData, reuseRowObject: false);
foreach (DataPoint row in loadedDataEnumerable)
Console.WriteLine($"{row.Label}, {row.Features}");

// Preview of the loaded data.
// 0, 4
// 0, 5
// 0, 6
// 1, 8
// 1, 9
}

// Example with label and feature values. A data set is a collection of such examples.
private class DataPoint
{
public float Label { get; set; }

public float Features { get; set; }
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
using System;
using System.Collections.Generic;
using System.IO;
using Microsoft.ML;

namespace Samples.Dynamic
{
public static class SaveAndLoadFromText
{
public static void Example()
{
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
// Setting the seed to a fixed number in this example to make outputs deterministic.
var mlContext = new MLContext(seed: 0);

// Create a list of training data points.
var dataPoints = new List<DataPoint>()
{
new DataPoint(){ Label = 0, Features = 4},
new DataPoint(){ Label = 0, Features = 5},
new DataPoint(){ Label = 0, Features = 6},
new DataPoint(){ Label = 1, Features = 8},
new DataPoint(){ Label = 1, Features = 9},
};

// Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
IDataView data = mlContext.Data.LoadFromEnumerable(dataPoints);

// Create a FileStream object and write the IDataView to it as a text file.
using (FileStream stream = new FileStream("data.tsv", FileMode.Create))
mlContext.Data.SaveAsText(data, stream);

// Create an IDataView object by loading the text file.
IDataView loadedData = mlContext.Data.LoadFromTextFile("data.tsv");

// Inspect the data that is loaded from the previously saved text file.
var loadedDataEnumerable = mlContext.Data.CreateEnumerable<DataPoint>(loadedData, reuseRowObject: false);
foreach (DataPoint row in loadedDataEnumerable)
Console.WriteLine($"{row.Label}, {row.Features}");

// Preview of the loaded data.
// 0, 4
// 0, 5
// 0, 6
// 1, 8
// 1, 9
}

// Example with label and feature values. A data set is a collection of such examples.
private class DataPoint
{
public float Label { get; set; }

public float Features { get; set; }
}
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ public static IDataView LoadFromBinary(this DataOperationsCatalog catalog, IMult
/// </summary>
/// <param name="catalog">The catalog.</param>
/// <param name="path">The path to the file to load from.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[LoadFromBinary](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromBinary.cs)]
/// ]]>
/// </format>
/// </example>
public static IDataView LoadFromBinary(this DataOperationsCatalog catalog, string path)
{
Contracts.CheckNonEmpty(path, nameof(path));
Expand All @@ -54,6 +61,13 @@ public static IDataView LoadFromBinary(this DataOperationsCatalog catalog, strin
/// <param name="data">The data view to save.</param>
/// <param name="stream">The stream to write to.</param>
/// <param name="keepHidden">Whether to keep hidden columns in the dataset.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[SaveAsBinary](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromBinary.cs)]
/// ]]>
/// </format>
/// </example>
public static void SaveAsBinary(this DataOperationsCatalog catalog, IDataView data, Stream stream,
bool keepHidden = false)
{
Expand Down
14 changes: 14 additions & 0 deletions src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,13 @@ public static IDataView LoadFromTextFile<TInput>(this DataOperationsCatalog cata
/// <param name="catalog">The <see cref="DataOperationsCatalog"/> catalog.</param>
/// <param name="path">Specifies a file from which to load.</param>
/// <param name="options">Defines the settings of the load operation.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[LoadFromTextFile](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromText.cs)]
/// ]]>
/// </format>
/// </example>
public static IDataView LoadFromTextFile(this DataOperationsCatalog catalog, string path,
TextLoader.Options options = null)
{
Expand All @@ -186,6 +193,13 @@ public static IDataView LoadFromTextFile(this DataOperationsCatalog catalog, str
/// <param name="schema">Whether to write the header comment with the schema.</param>
/// <param name="keepHidden">Whether to keep hidden columns in the dataset.</param>
/// <param name="forceDense">Whether to save columns in dense format even if they are sparse vectors.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[SaveAsText](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SaveAndLoadFromText.cs)]
/// ]]>
/// </format>
/// </example>
public static void SaveAsText(this DataOperationsCatalog catalog,
IDataView data,
Stream stream,
Expand Down

0 comments on commit ce46faa

Please sign in to comment.