Skip to content

switch housing dataset to wine #170

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -328,3 +328,5 @@ ASALocalRun/

# MSBuild Binary and Structured Log
*.binlog
# Ignore external test datasets.
/test/data/external/
19 changes: 17 additions & 2 deletions build.proj
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), Directory.Build.props))\Directory.Build.props" />

<Import Project="$(ToolsDir)VersionTools.targets" Condition="Exists('$(ToolsDir)VersionTools.targets')" />

<UsingTask TaskName="DownloadFilesFromUrl" AssemblyFile="$(ToolsDir)Microsoft.DotNet.Build.Tasks.dll"/>
<PropertyGroup>
<!-- To disable the restoration of packages, set RestoreDuringBuild=false or pass /p:RestoreDuringBuild=false.-->
<RestoreDuringBuild Condition="'$(RestoreDuringBuild)'==''">true</RestoreDuringBuild>
Expand All @@ -33,6 +33,7 @@
RestoreProjects;
BuildNative;
$(TraversalBuildDependsOn);
DownloadExternalTestFiles;
RunTests;
</TraversalBuildDependsOn>
</PropertyGroup>
Expand All @@ -56,13 +57,27 @@
<ItemGroup>
<PkgProject Include="pkg\**\*.nupkgproj" />
</ItemGroup>

<MSBuild Projects="@(PkgProject)"
Targets="Restore" />
<MSBuild Projects="@(PkgProject)"
Targets="Pack" />
</Target>

<ItemGroup>
<TestFile Include="$(MSBuildThisFileDirectory)/test/data/external/winequality-white.csv">
<Url>https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv</Url>
<DestinationFile>$(MSBuildThisFileDirectory)test/data/external/winequality-white.csv</DestinationFile>
</TestFile>
</ItemGroup>

<Target Name="DownloadExternalTestFiles" Inputs="@(TestFile)" Outputs="%(TestFile.DestinationFile)">
<Message Importance="High" Text="Downloading external test files... %(TestFile.DestinationFile)" />
<DownloadFilesFromUrl Items="@(TestFile)"
DestinationDir="test/data/external"
TreatErrorsAsWarnings="true"/>
</Target>

<Target Name="RunTests" Condition="'$(RunTests)'=='true'">
<MSBuild Projects="test\run-tests.proj"
Targets="RunTests" />
Expand Down
8 changes: 4 additions & 4 deletions test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -269,10 +269,10 @@ public void TestCrossValidationBinaryMacro()
}
}

[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
[Fact()]
public void TestCrossValidationMacro()
{
var dataPath = GetDataPath(@"housing.txt");
var dataPath = GetDataPath(@"external/winequality-white.csv");
using (var env = new TlcEnvironment())
{
var subGraph = env.CreateExperiment();
Expand All @@ -295,7 +295,7 @@ public void TestCrossValidationMacro()
var modelCombineOutput = subGraph.Add(modelCombine);

var experiment = env.CreateExperiment();
var importInput = new ML.Data.TextLoader();
var importInput = new ML.Data.TextLoader() { CustomSchema = "col=Label:R4:11 col=Features:R4:0-10 sep=; header+" };
var importOutput = experiment.Add(importInput);

var crossValidate = new ML.Models.CrossValidator
Expand Down Expand Up @@ -324,7 +324,7 @@ public void TestCrossValidationMacro()
Assert.True(b);
double val = 0;
getter(ref val);
Assert.Equal(3.32, val, 1);
Assert.Equal(0.58, val, 1);
b = cursor.MoveNext();
Assert.False(b);
}
Expand Down
23 changes: 13 additions & 10 deletions test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
Original file line number Diff line number Diff line change
Expand Up @@ -729,7 +729,7 @@ public void EntryPointTextToKeyToText()
}

private void RunTrainScoreEvaluate(string learner, string evaluator, string dataPath, string warningsPath, string overallMetricsPath,
string instanceMetricsPath, string confusionMatrixPath = null)
string instanceMetricsPath, string confusionMatrixPath = null, string loader = null)
{
string inputGraph = string.Format(@"
{{
Expand All @@ -738,6 +738,7 @@ private void RunTrainScoreEvaluate(string learner, string evaluator, string data
'Name': 'Data.TextLoader',
'Inputs': {{
'InputFile': '$file'
{8}
}},
'Outputs': {{
'Data': '$AllData'
Expand Down Expand Up @@ -797,7 +798,8 @@ private void RunTrainScoreEvaluate(string learner, string evaluator, string data
}}
}}", learner, evaluator, EscapePath(dataPath), EscapePath(warningsPath), EscapePath(overallMetricsPath), EscapePath(instanceMetricsPath),
confusionMatrixPath != null ? ", 'ConfusionMatrix': '$ConfusionMatrix'" : "",
confusionMatrixPath != null ? string.Format(", 'ConfusionMatrix' : '{0}'", EscapePath(confusionMatrixPath)) : "");
confusionMatrixPath != null ? string.Format(", 'ConfusionMatrix' : '{0}'", EscapePath(confusionMatrixPath)) : "",
string.IsNullOrWhiteSpace(loader) ? "" : string.Format(",'CustomSchema': '{0}'", loader));

var jsonPath = DeleteOutputPath("graph.json");
File.WriteAllLines(jsonPath, new[] { inputGraph });
Expand Down Expand Up @@ -855,15 +857,16 @@ public void EntryPointEvaluateMultiClass()
Assert.Equal(3, CountRows(loader));
}

[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
[Fact]
public void EntryPointEvaluateRegression()
{
var dataPath = GetDataPath("housing.txt");
var dataPath = GetDataPath(@"external/winequality-white.csv");
Copy link
Contributor

@glebuk glebuk May 22, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@"external/winequality-white.csv [](start = 39, length = 32)

Extract dataset info into static dataset classes so that no need to repeat paths and schemas. Ideally you can even just return a new loader for each dataset. #Closed

var warningsPath = DeleteOutputPath("warnings.idv");
var overallMetricsPath = DeleteOutputPath("overall.idv");
var instanceMetricsPath = DeleteOutputPath("instance.idv");

RunTrainScoreEvaluate("Trainers.StochasticDualCoordinateAscentRegressor", "Models.RegressionEvaluator", dataPath, warningsPath, overallMetricsPath, instanceMetricsPath);
RunTrainScoreEvaluate("Trainers.StochasticDualCoordinateAscentRegressor", "Models.RegressionEvaluator",
dataPath, warningsPath, overallMetricsPath, instanceMetricsPath, loader: "col=Label:R4:11 col=Features:R4:0-10 sep=; header+");

using (var loader = new BinaryLoader(Env, new BinaryLoader.Arguments(), warningsPath))
Assert.Equal(0, CountRows(loader));
Expand All @@ -872,7 +875,7 @@ public void EntryPointEvaluateRegression()
Assert.Equal(1, CountRows(loader));

using (var loader = new BinaryLoader(Env, new BinaryLoader.Arguments(), instanceMetricsPath))
Assert.Equal(104, CountRows(loader));
Assert.Equal(975, CountRows(loader));
}

[Fact]
Expand All @@ -887,10 +890,10 @@ public void EntryPointSDCAMultiClass()
TestEntryPointRoutine("iris.txt", "Trainers.StochasticDualCoordinateAscentClassifier");
}

[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
[Fact()]
public void EntryPointSDCARegression()
{
TestEntryPointRoutine("housing.txt", "Trainers.StochasticDualCoordinateAscentRegressor");
TestEntryPointRoutine(@"external/winequality-white.csv", "Trainers.StochasticDualCoordinateAscentRegressor", loader: "col=Label:R4:11 col=Features:R4:0-10 sep=; header+");
}

[Fact]
Expand Down Expand Up @@ -961,10 +964,10 @@ public void EntryPointHogwildSGD()
TestEntryPointRoutine("breast-cancer.txt", "Trainers.StochasticGradientDescentBinaryClassifier");
}

[Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")]
[Fact()]
public void EntryPointPoissonRegression()
{
TestEntryPointRoutine("housing.txt", "Trainers.PoissonRegressor");
TestEntryPointRoutine(@"external/winequality-white.csv", "Trainers.PoissonRegressor", loader: "col=Label:R4:11 col=Features:R4:0-10 sep=; header+");
}

[Fact]
Expand Down
2 changes: 1 addition & 1 deletion test/Microsoft.ML.Tests/Microsoft.ML.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@

<ItemGroup>
<NativeAssemblyReference Include="CpuMathNative" />
<NativeAssemblyReference Include="FastTreeNative" />
<NativeAssemblyReference Include="FastTreeNative" />
</ItemGroup>
</Project>