-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Svmlight loader and saver #4190
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
722e71e
SvmLightLoader
yaeldMS 8d106cf
SvmLightLoader, SvmLightSaver and tests
yaeldMS cb2e2e1
Add comments to public API methods
yaeldMS b4cffed
Address code review comments and add a sample
yaeldMS 1fdb289
address code review comments and add more tests
yaeldMS ee1b17e
Code review comments
yaeldMS d7ae208
SvmLightLoader
yaeldMS d2619ed
SvmLightLoader, SvmLightSaver and tests
yaeldMS File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
116 changes: 116 additions & 0 deletions
116
docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/LoadingSvmLight.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.IO; | ||
using System.Text; | ||
using Microsoft.ML; | ||
using Microsoft.ML.Data; | ||
using Microsoft.ML.Transforms; | ||
using Microsoft.VisualBasic.CompilerServices; | ||
using Tensorflow; | ||
|
||
namespace Samples.Dynamic.DataOperations | ||
{ | ||
public static class LoadingSvmLight | ||
{ | ||
// This examples shows all the ways to load data with TextLoader. | ||
public static void Example() | ||
{ | ||
// Create a random SVM light format file. | ||
var random = new Random(42); | ||
var dataDirectoryName = "DataDir"; | ||
Directory.CreateDirectory(dataDirectoryName); | ||
var fileName = Path.Combine(dataDirectoryName, $"SVM_Data.csv"); | ||
using (var fs = File.CreateText(fileName)) | ||
{ | ||
// Write random lines in SVM light format | ||
for (int line = 0; line < 10; line++) | ||
{ | ||
var sb = new StringBuilder(); | ||
if (random.NextDouble() > 0.5) | ||
sb.Append("1 "); | ||
else | ||
sb.Append("-1 "); | ||
if (line % 2 == 0) | ||
sb.Append("cost:1"); | ||
else | ||
sb.Append("cost:2"); | ||
for (int i = 1; i <= 10; i++) | ||
{ | ||
if (random.NextDouble() > 0.5) | ||
continue; | ||
sb.Append($"{i}:{random.NextDouble()} "); | ||
} | ||
fs.WriteLine(sb.ToString()); | ||
} | ||
} | ||
|
||
// Create an SvmLightLoader. | ||
var mlContext = new MLContext(); | ||
var file = new MultiFileSource(fileName); | ||
var loader = mlContext.Data.CreateSvmLightLoader(dataSample: file); | ||
|
||
// Load a single file from path. | ||
var svmData = loader.Load(file); | ||
|
||
PrintSchema(svmData); | ||
|
||
// Expected Output: | ||
// Column Label type Single | ||
// Column Weight type Single | ||
// Column GroupId type Key<UInt64, 0 - 18446744073709551613> | ||
// Column Comment type String | ||
// Column Features type Vector<Single, 10> | ||
|
||
PrintData(svmData); | ||
|
||
// Expected Output: | ||
// 1 1 0 0 0.2625927 0 0 0.7612506 0.2573214 0 0.3809696 0.5174511 | ||
// -1 1 0 0 0 0.7051522 0 0 0.7111546 0.9062127 0 0 | ||
// -1 1 0 0 0 0.535722 0 0 0.1491191 0.05100901 0 0 | ||
// -1 1 0 0.6481459 0.04449836 0 0 0.4203662 0 0 0.01325378 0.2674384 | ||
// -1 1 0 0 0.7978093 0.5134962 0.008952909 0 0.003074009 0.6541431 0.9135142 0 | ||
// -1 1 0 0.3727672 0.4369507 0 0 0.2973725 0 0 0 0.8816807 | ||
// 1 1 0 0.1031429 0.3332489 0 0.1346936 0.5916625 0 0 0 0 | ||
// 1 1 0 0 0 0.3454075 0 0.2197472 0.03848049 0.5923384 0.09373277 0 | ||
// -1 1 0 0.7511514 0 0.0420841 0 0 0.9262196 0 0.545344 0 | ||
// 1 1 0 0.02958358 0.9334617 0 0 0.8833956 0.2947684 0 0 0 | ||
|
||
// If the loader is created without a data sample we need to specify the number of features expected in the file. | ||
loader = mlContext.Data.CreateSvmLightLoader(inputSize: 10); | ||
svmData = loader.Load(file); | ||
|
||
PrintSchema(svmData); | ||
PrintData(svmData); | ||
} | ||
|
||
private static void PrintSchema(IDataView svmData) | ||
{ | ||
foreach (var col in svmData.Schema) | ||
Console.WriteLine($"Column {col.Name} type {col.Type}"); | ||
} | ||
|
||
private static void PrintData(IDataView svmData) | ||
{ | ||
using (var cursor = svmData.GetRowCursor(svmData.Schema)) | ||
{ | ||
var labelGetter = cursor.GetGetter<float>(svmData.Schema["Label"]); | ||
var weightGetter = cursor.GetGetter<float>(svmData.Schema["Weight"]); | ||
var featuresGetter = cursor.GetGetter<VBuffer<float>>(svmData.Schema["Features"]); | ||
|
||
VBuffer<float> features = default; | ||
while (cursor.MoveNext()) | ||
{ | ||
float label = default; | ||
labelGetter(ref label); | ||
|
||
float weight = default; | ||
weightGetter(ref weight); | ||
|
||
featuresGetter(ref features); | ||
|
||
Console.WriteLine($"{label} {weight} {string.Join(' ', features.DenseValues())}"); | ||
} | ||
} | ||
} | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Text loader?