Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into singlis/scrub-lightgbm
Browse files Browse the repository at this point in the history
  • Loading branch information
singlis committed Mar 2, 2019
2 parents ce21121 + 1942c8f commit b958c37
Show file tree
Hide file tree
Showing 40 changed files with 216 additions and 212 deletions.
18 changes: 18 additions & 0 deletions .vsts-dotnet-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,15 @@ phases:
parameters:
name: Centos
buildScript: ./build.sh
customMatrixes:
Build_Debug_Intrinsics:
_configuration: Debug-Intrinsics
_config_short: DI
_includeBenchmarkData: false
Build_Release:
_configuration: Release
_config_short: R
_includeBenchmarkData: true
queue:
name: Hosted Ubuntu 1604
container: CentosContainer
Expand All @@ -39,6 +48,15 @@ phases:
parameters:
name: Ubuntu
buildScript: ./build.sh
customMatrixes:
Build_Debug:
_configuration: Debug
_config_short: D
_includeBenchmarkData: false
Build_Release_Intrinsics:
_configuration: Release-Intrinsics
_config_short: RI
_includeBenchmarkData: true
queue:
name: Hosted Ubuntu 1604
container: UbuntuContainer
Expand Down
2 changes: 1 addition & 1 deletion BuildToolsVersion.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.0.0-preview1-03129-01
3.0.0-preview1-03721-01
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ public static void Example()
var outData = featureContributionCalculator.Fit(scoredData).Transform(scoredData);

// Let's extract the weights from the linear model to use as a comparison
var weights = new VBuffer<float>();
model.Model.GetFeatureWeights(ref weights);
var weights = model.Model.Weights;

// Let's now walk through the first ten records and see which feature drove the values the most
// Get prediction scores and contributions
Expand All @@ -63,7 +62,7 @@ public static void Example()
var value = row.Features[featureOfInterest];
var contribution = row.FeatureContributions[featureOfInterest];
var name = data.Schema[featureOfInterest + 1].Name;
var weight = weights.GetValues()[featureOfInterest];
var weight = weights[featureOfInterest];

Console.WriteLine("{0:0.00}\t{1:0.00}\t{2}\t{3:0.00}\t{4:0.00}\t{5:0.00}",
row.MedianHomeValue,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,22 @@ public class PriorTrainer
{
public static void Example()
{
// Downloading the dataset from github.com/dotnet/machinelearning.
// This will create a sentiment.tsv file in the filesystem.
// You can open this file, if you want to see the data.
string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset()[0];
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
var mlContext = new MLContext();

// Download and featurize the dataset.
var dataFiles = SamplesUtils.DatasetUtils.DownloadSentimentDataset();
var trainFile = dataFiles[0];
var testFile = dataFiles[1];

// A preview of the data.
// Sentiment SentimentText
// 0 " :Erm, thank you. "
// 1 ==You're cool==

// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
var mlContext = new MLContext();

// Step 1: Load the data as an IDataView.
// First, we define the loader: specify the data columns and where to find them in the text file.
// Step 1: Read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var loader = mlContext.Data.CreateTextLoader(
columns: new[]
{
Expand All @@ -31,12 +31,9 @@ public static void Example()
},
hasHeader: true
);

// Load the data
var data = loader.Load(dataFile);

// Split it between training and test data
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data);
// Load the data
var trainData = loader.Load(trainFile);

// Step 2: Pipeline
// Featurize the text column through the FeaturizeText API.
Expand All @@ -47,19 +44,27 @@ public static void Example()
.Append(mlContext.BinaryClassification.Trainers.Prior(labelColumnName: "Sentiment"));

// Step 3: Train the pipeline
var trainedPipeline = pipeline.Fit(trainTestData.TrainSet);
var trainedPipeline = pipeline.Fit(trainData);

// Step 4: Evaluate on the test set
var transformedData = trainedPipeline.Transform(trainTestData.TestSet);
var transformedData = trainedPipeline.Transform(loader.Load(testFile));
var evalMetrics = mlContext.BinaryClassification.Evaluate(transformedData, label: "Sentiment");

// Step 5: Inspect the output
Console.WriteLine("Accuracy: " + evalMetrics.Accuracy);
SamplesUtils.ConsoleUtils.PrintMetrics(evalMetrics);

// The Prior trainer outputs the proportion of a label in the dataset as the probability of that label.
// In this case it means that there is a split of around 64%-36% of positive and negative labels in the dataset.
// In this case 'Accuracy: 0.50' means that there is a split of around 50%-50% of positive and negative labels in the test dataset.
// Expected output:
// Accuracy: 0.647058823529412

// Accuracy: 0.50
// AUC: 0.50
// F1 Score: 0.67
// Negative Precision: 0.00
// Negative Recall: 0.00
// Positive Precision: 0.50
// Positive Recall: 1.00
// LogLoss: 1.05
// LogLossReduction: -4.89
// Entropy: 1.00
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,59 +7,64 @@ public static class RandomTrainer
{
public static void Example()
{
// Downloading the dataset from github.com/dotnet/machinelearning.
// This will create a sentiment.tsv file in the filesystem.
// You can open this file, if you want to see the data.
string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset()[0];
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
var mlContext = new MLContext(seed: 1);

// Download and featurize the dataset.
var dataFiles = SamplesUtils.DatasetUtils.DownloadSentimentDataset();
var trainFile = dataFiles[0];
var testFile = dataFiles[1];

// A preview of the data.
// Sentiment SentimentText
// 0 " :Erm, thank you. "
// 1 ==You're cool==

// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
var mlContext = new MLContext(seed: 1);

// Step 1: Load the data as an IDataView.
// First, we define the loader: specify the data columns and where to find them in the text file.
var loader = mlContext.Data.CreateTextLoader(
// Step 1: Read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.CreateTextLoader(
columns: new[]
{
new TextLoader.Column("Sentiment", DataKind.Single, 0),
new TextLoader.Column("SentimentText", DataKind.String, 1)
},
hasHeader: true
);

// Load the data
var data = loader.Load(dataFile);

// Split it between training and test data
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data);
// Read the data
var trainData = reader.Load(trainFile);

// Step 2: Pipeline
// Featurize the text column through the FeaturizeText API.
// Then append a binary classifier, setting the "Label" column as the label of the dataset, and
// the "Features" column produced by FeaturizeText as the features column.
var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
.AppendCacheCheckpoint(mlContext) // Add a data-cache step within a pipeline.
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.BinaryClassification.Trainers.Random());

// Step 3: Train the pipeline
var trainedPipeline = pipeline.Fit(trainTestData.TrainSet);
var trainedPipeline = pipeline.Fit(trainData);

// Step 4: Evaluate on the test set
var transformedData = trainedPipeline.Transform(trainTestData.TestSet);
var transformedData = trainedPipeline.Transform(reader.Load(testFile));
var evalMetrics = mlContext.BinaryClassification.Evaluate(transformedData, label: "Sentiment");

// Step 5: Inspect the output
Console.WriteLine("Accuracy: " + evalMetrics.Accuracy);
SamplesUtils.ConsoleUtils.PrintMetrics(evalMetrics);

// We expect an output probability closet to 0.5 as the Random trainer outputs a random prediction.
// Regardless of the input features, the trainer will predict either positive or negative label with equal probability.
// Expected output (close to 0.5):
// Accuracy: 0.588235294117647
// Expected output: (close to 0.5):

// Accuracy: 0.56
// AUC: 0.57
// F1 Score: 0.60
// Negative Precision: 0.57
// Negative Recall: 0.44
// Positive Precision: 0.55
// Positive Recall: 0.67
// LogLoss: 1.53
// LogLossReduction: -53.37
// Entropy: 1.00
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public static void Example()
var split = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
// Create data training pipeline
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicStochasticGradientDescent(
new ML.Trainers.HalLearners.SymSgdClassificationTrainer.Options()
new ML.Trainers.HalLearners.SymbolicStochasticGradientDescentClassificationTrainer.Options()
{
LearningRate = 0.2f,
NumberOfIterations = 10,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public static void Example()

// Create the estimator, here we only need OrdinaryLeastSquares trainer
// as data is already processed in a form consumable by the trainer
var pipeline = mlContext.Regression.Trainers.OrdinaryLeastSquares(new OlsLinearRegressionTrainer.Options()
var pipeline = mlContext.Regression.Trainers.OrdinaryLeastSquares(new OrdinaryLeastSquaresRegressionTrainer.Options()
{
L2Weight = 0.1f,
PerParameterSignificance = false
Expand Down
8 changes: 3 additions & 5 deletions docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,10 @@ public static void SdcaRegression()
var model = learningPipeline.Fit(trainData);

// Check the weights that the model learned
VBuffer<float> weights = default;
pred.GetFeatureWeights(ref weights);
var weights = pred.Weights;

var weightsValues = weights.GetValues();
Console.WriteLine($"weight 0 - {weightsValues[0]}");
Console.WriteLine($"weight 1 - {weightsValues[1]}");
Console.WriteLine($"weight 0 - {weights[0]}");
Console.WriteLine($"weight 1 - {weights[1]}");

// Evaluate how the model is doing on the test data
var dataWithPredictions = model.Transform(testData);
Expand Down
3 changes: 2 additions & 1 deletion src/Microsoft.ML.Data/Dirty/PredictorInterfaces.cs
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,8 @@ internal interface ICanSaveInSourceCode
/// <summary>
/// Interface implemented by components that can assign weights to features.
/// </summary>
public interface IHaveFeatureWeights
[BestFriend]
internal interface IHaveFeatureWeights
{
/// <summary>
/// Returns the weights for the features.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.IO;
using Microsoft.ML.Data;

Expand All @@ -21,7 +22,7 @@ public static class AlexNetExtension
/// </summary>
public static EstimatorChain<ColumnCopyingTransformer> AlexNet(this DnnImageModelSelector dnnModelContext, IHostEnvironment env, string outputColumnName, string inputColumnName)
{
return AlexNet(dnnModelContext, env, outputColumnName, inputColumnName, Path.Combine(AssemblyPathHelpers.GetExecutingAssemblyLocation(), "DnnImageModels"));
return AlexNet(dnnModelContext, env, outputColumnName, inputColumnName, Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "DnnImageModels"));
}

/// <summary>
Expand Down
19 changes: 0 additions & 19 deletions src/Microsoft.ML.DnnImageFeaturizer.AlexNet/AssemblyPathHelpers.cs

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,6 @@
<IncludeInPackage>Microsoft.ML.DnnImageFeaturizer.ResNet101</IncludeInPackage>
</PropertyGroup>

<ItemGroup>
<Compile Include="..\Microsoft.ML.DnnImageFeaturizer.AlexNet\AssemblyPathHelpers.cs" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\Microsoft.ML.OnnxTransformer\Microsoft.ML.OnnxTransformer.csproj" />
</ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.IO;
using Microsoft.ML.Data;

Expand All @@ -21,7 +22,7 @@ public static class ResNet101Extension
/// </summary>
public static EstimatorChain<ColumnCopyingTransformer> ResNet101(this DnnImageModelSelector dnnModelContext, IHostEnvironment env, string outputColumnName, string inputColumnName)
{
return ResNet101(dnnModelContext, env, outputColumnName, inputColumnName, Path.Combine(AssemblyPathHelpers.GetExecutingAssemblyLocation(), "DnnImageModels"));
return ResNet101(dnnModelContext, env, outputColumnName, inputColumnName, Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "DnnImageModels"));
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,6 @@
<IncludeInPackage>Microsoft.ML.DnnImageFeaturizer.ResNet18</IncludeInPackage>
</PropertyGroup>

<ItemGroup>
<Compile Include="..\Microsoft.ML.DnnImageFeaturizer.AlexNet\AssemblyPathHelpers.cs" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\Microsoft.ML.OnnxTransformer\Microsoft.ML.OnnxTransformer.csproj" />
</ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.IO;
using Microsoft.ML.Data;

Expand All @@ -21,7 +22,7 @@ public static class ResNet18Extension
/// </summary>
public static EstimatorChain<ColumnCopyingTransformer> ResNet18(this DnnImageModelSelector dnnModelContext, IHostEnvironment env, string outputColumnName, string inputColumnName)
{
return ResNet18(dnnModelContext, env, outputColumnName, inputColumnName, Path.Combine(AssemblyPathHelpers.GetExecutingAssemblyLocation(), "DnnImageModels"));
return ResNet18(dnnModelContext, env, outputColumnName, inputColumnName, Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "DnnImageModels"));
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,6 @@
<IncludeInPackage>Microsoft.ML.DnnImageFeaturizer.ResNet50</IncludeInPackage>
</PropertyGroup>

<ItemGroup>
<Compile Include="..\Microsoft.ML.DnnImageFeaturizer.AlexNet\AssemblyPathHelpers.cs" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\Microsoft.ML.OnnxTransformer\Microsoft.ML.OnnxTransformer.csproj" />
</ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.IO;
using Microsoft.ML.Data;

Expand All @@ -21,7 +22,7 @@ public static class ResNet50Extension
/// </summary>
public static EstimatorChain<ColumnCopyingTransformer> ResNet50(this DnnImageModelSelector dnnModelContext, IHostEnvironment env, string outputColumnName, string inputColumnName)
{
return ResNet50(dnnModelContext, env, outputColumnName, inputColumnName, Path.Combine(AssemblyPathHelpers.GetExecutingAssemblyLocation(), "DnnImageModels"));
return ResNet50(dnnModelContext, env, outputColumnName, inputColumnName, Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "DnnImageModels"));
}

/// <summary>
Expand Down
Loading

0 comments on commit b958c37

Please sign in to comment.