Skip to content

switch housing dataset to wine #170

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -328,3 +328,5 @@ ASALocalRun/

# MSBuild Binary and Structured Log
*.binlog
# Ignore external test datasets.
/test/data/external/
18 changes: 16 additions & 2 deletions build.proj
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), Directory.Build.props))\Directory.Build.props" />

<Import Project="$(ToolsDir)VersionTools.targets" Condition="Exists('$(ToolsDir)VersionTools.targets')" />

<UsingTask TaskName="DownloadFilesFromUrl" AssemblyFile="$(ToolsDir)Microsoft.DotNet.Build.Tasks.dll"/>
<PropertyGroup>
<!-- To disable the restoration of packages, set RestoreDuringBuild=false or pass /p:RestoreDuringBuild=false.-->
<RestoreDuringBuild Condition="'$(RestoreDuringBuild)'==''">true</RestoreDuringBuild>
Expand All @@ -33,6 +33,7 @@
RestoreProjects;
BuildNative;
$(TraversalBuildDependsOn);
DownloadExternalTestFiles;
RunTests;
</TraversalBuildDependsOn>
</PropertyGroup>
Expand All @@ -56,13 +57,26 @@
<ItemGroup>
<PkgProject Include="pkg\**\*.nupkgproj" />
</ItemGroup>

<MSBuild Projects="@(PkgProject)"
Targets="Restore" />
<MSBuild Projects="@(PkgProject)"
Targets="Pack" />
</Target>

<ItemGroup>
<TestFile Include="$(MSBuildThisFileDirectory)/test/data/external/winequality-white.csv"
Url="https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv"
DestinationFile="$(MSBuildThisFileDirectory)test/data/external/winequality-white.csv" />
</ItemGroup>

<Target Name="DownloadExternalTestFiles" Inputs="@(TestFile)" Outputs="%(TestFile.DestinationFile)">
<Message Importance="High" Text="Downloading external test files... %(TestFile.DestinationFile)" />
<DownloadFilesFromUrl Items="@(TestFile)"
DestinationDir="test/data/external"
TreatErrorsAsWarnings="true"/>
</Target>

<Target Name="RunTests" Condition="'$(RunTests)'=='true'">
<MSBuild Projects="test\run-tests.proj"
Targets="RunTests" />
Expand Down
32 changes: 28 additions & 4 deletions test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

using ML = Microsoft.ML;
using Microsoft.ML.Runtime;
using Microsoft.ML.Data;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.EntryPoints;
using Microsoft.ML.TestFramework;
Expand Down Expand Up @@ -269,10 +270,10 @@ public void TestCrossValidationBinaryMacro()
}
}

[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
[Fact]
public void TestCrossValidationMacro()
{
var dataPath = GetDataPath(@"housing.txt");
var dataPath = GetDataPath(TestDatasets.winequality.trainFilename);
using (var env = new TlcEnvironment())
{
var subGraph = env.CreateExperiment();
Expand All @@ -295,7 +296,30 @@ public void TestCrossValidationMacro()
var modelCombineOutput = subGraph.Add(modelCombine);

var experiment = env.CreateExperiment();
var importInput = new ML.Data.TextLoader(dataPath);
var importInput = new ML.Data.TextLoader(dataPath)
{
Arguments = new TextLoaderArguments
{
Separator = new[] { ';' },
HasHeader = true,
Column = new[]
{
new TextLoaderColumn()
{
Name = "Label",
Source = new [] { new TextLoaderRange(11) },
Type = DataKind.Num
},

new TextLoaderColumn()
{
Name = "Features",
Source = new [] { new TextLoaderRange(0,10) },
Type = DataKind.Num
}
}
}
};
var importOutput = experiment.Add(importInput);

var crossValidate = new ML.Models.CrossValidator
Expand Down Expand Up @@ -324,7 +348,7 @@ public void TestCrossValidationMacro()
Assert.True(b);
double val = 0;
getter(ref val);
Assert.Equal(3.32, val, 1);
Assert.Equal(0.58, val, 1);
b = cursor.MoveNext();
Assert.False(b);
}
Expand Down
23 changes: 13 additions & 10 deletions test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
Original file line number Diff line number Diff line change
Expand Up @@ -910,7 +910,7 @@ public void EntryPointTextToKeyToText()
}

private void RunTrainScoreEvaluate(string learner, string evaluator, string dataPath, string warningsPath, string overallMetricsPath,
string instanceMetricsPath, string confusionMatrixPath = null)
string instanceMetricsPath, string confusionMatrixPath = null, string loader = null)
{
string inputGraph = string.Format(@"
{{
Expand All @@ -919,6 +919,7 @@ private void RunTrainScoreEvaluate(string learner, string evaluator, string data
'Name': 'Data.CustomTextLoader',
'Inputs': {{
'InputFile': '$file'
{8}
}},
'Outputs': {{
'Data': '$AllData'
Expand Down Expand Up @@ -978,7 +979,8 @@ private void RunTrainScoreEvaluate(string learner, string evaluator, string data
}}
}}", learner, evaluator, EscapePath(dataPath), EscapePath(warningsPath), EscapePath(overallMetricsPath), EscapePath(instanceMetricsPath),
confusionMatrixPath != null ? ", 'ConfusionMatrix': '$ConfusionMatrix'" : "",
confusionMatrixPath != null ? string.Format(", 'ConfusionMatrix' : '{0}'", EscapePath(confusionMatrixPath)) : "");
confusionMatrixPath != null ? string.Format(", 'ConfusionMatrix' : '{0}'", EscapePath(confusionMatrixPath)) : "",
string.IsNullOrWhiteSpace(loader) ? "" : string.Format(",'CustomSchema': '{0}'", loader));

var jsonPath = DeleteOutputPath("graph.json");
File.WriteAllLines(jsonPath, new[] { inputGraph });
Expand Down Expand Up @@ -1036,15 +1038,16 @@ public void EntryPointEvaluateMultiClass()
Assert.Equal(3, CountRows(loader));
}

[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
[Fact]
public void EntryPointEvaluateRegression()
{
var dataPath = GetDataPath("housing.txt");
var dataPath = GetDataPath(TestDatasets.winequality.trainFilename);
var warningsPath = DeleteOutputPath("warnings.idv");
var overallMetricsPath = DeleteOutputPath("overall.idv");
var instanceMetricsPath = DeleteOutputPath("instance.idv");

RunTrainScoreEvaluate("Trainers.StochasticDualCoordinateAscentRegressor", "Models.RegressionEvaluator", dataPath, warningsPath, overallMetricsPath, instanceMetricsPath);
RunTrainScoreEvaluate("Trainers.StochasticDualCoordinateAscentRegressor", "Models.RegressionEvaluator",
dataPath, warningsPath, overallMetricsPath, instanceMetricsPath, loader: TestDatasets.winequality.loaderSettings);

using (var loader = new BinaryLoader(Env, new BinaryLoader.Arguments(), warningsPath))
Assert.Equal(0, CountRows(loader));
Expand All @@ -1053,7 +1056,7 @@ public void EntryPointEvaluateRegression()
Assert.Equal(1, CountRows(loader));

using (var loader = new BinaryLoader(Env, new BinaryLoader.Arguments(), instanceMetricsPath))
Assert.Equal(104, CountRows(loader));
Assert.Equal(975, CountRows(loader));
}

[Fact]
Expand All @@ -1068,10 +1071,10 @@ public void EntryPointSDCAMultiClass()
TestEntryPointRoutine("iris.txt", "Trainers.StochasticDualCoordinateAscentClassifier");
}

[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
[Fact()]
public void EntryPointSDCARegression()
{
TestEntryPointRoutine("housing.txt", "Trainers.StochasticDualCoordinateAscentRegressor");
TestEntryPointRoutine(TestDatasets.winequality.trainFilename, "Trainers.StochasticDualCoordinateAscentRegressor", loader: TestDatasets.winequality.loaderSettings);
}

[Fact]
Expand Down Expand Up @@ -1142,10 +1145,10 @@ public void EntryPointHogwildSGD()
TestEntryPointRoutine("breast-cancer.txt", "Trainers.StochasticGradientDescentBinaryClassifier");
}

[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
[Fact()]
public void EntryPointPoissonRegression()
{
TestEntryPointRoutine("housing.txt", "Trainers.PoissonRegressor");
TestEntryPointRoutine(TestDatasets.winequality.trainFilename, "Trainers.PoissonRegressor", loader: TestDatasets.winequality.loaderSettings);
}

[Fact]
Expand Down
8 changes: 8 additions & 0 deletions test/Microsoft.ML.TestFramework/Datasets.cs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,14 @@ public static class TestDatasets
testFilename = "housing.txt"
};

public static TestDataset winequality = new TestDataset
{
name = "wine",
trainFilename = "external/winequality-white.csv",
testFilename = "external/winequality-white.csv",
loaderSettings = "col=Label:R4:11 col=Features:R4:0-10 sep=; header+"
};

public static TestDataset msm = new TestDataset
{
// REVIEW: Why is the MSM train set smaller than the test set? Reverse these!
Expand Down
2 changes: 1 addition & 1 deletion test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@

<ItemGroup>
<NativeAssemblyReference Include="CpuMathNative" />
<NativeAssemblyReference Include="FastTreeNative" />
<NativeAssemblyReference Include="FastTreeNative" />
</ItemGroup>
</Project>