Skip to content

Added working version of checking whether file is available for access #4938

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public AutoFitTests(ITestOutputHelper output) : base(output)
public void AutoFitBinaryTest()
{
var context = new MLContext(1);
var dataPath = DatasetUtil.DownloadUciAdultDataset();
var dataPath = DatasetUtil.GetUciAdultDataset();
var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel);
var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions);
var trainData = textLoader.Load(dataPath);
Expand Down Expand Up @@ -107,7 +107,7 @@ private void Context_Log(object sender, LoggingEventArgs e)
public void AutoFitRegressionTest()
{
var context = new MLContext(1);
var dataPath = DatasetUtil.DownloadMlNetGeneratedRegressionDataset();
var dataPath = DatasetUtil.GetMlNetGeneratedRegressionDataset();
var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.MlNetGeneratedRegressionLabel);
var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions);
var trainData = textLoader.Load(dataPath);
Expand Down
12 changes: 6 additions & 6 deletions test/Microsoft.ML.AutoML.Tests/ColumnInferenceTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public ColumnInferenceTests(ITestOutputHelper output) : base(output)
[Fact]
public void UnGroupReturnsMoreColumnsThanGroup()
{
var dataPath = DatasetUtil.DownloadUciAdultDataset();
var dataPath = DatasetUtil.GetUciAdultDataset();
var context = new MLContext(1);
var columnInferenceWithoutGrouping = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel, groupColumns: false);
foreach (var col in columnInferenceWithoutGrouping.TextLoaderOptions.Columns)
Expand All @@ -34,21 +34,21 @@ public void UnGroupReturnsMoreColumnsThanGroup()
[Fact]
public void IncorrectLabelColumnThrows()
{
var dataPath = DatasetUtil.DownloadUciAdultDataset();
var dataPath = DatasetUtil.GetUciAdultDataset();
var context = new MLContext(1);
Assert.Throws<ArgumentException>(new System.Action(() => context.Auto().InferColumns(dataPath, "Junk", groupColumns: false)));
}

[Fact]
public void LabelIndexOutOfBoundsThrows()
{
Assert.Throws<ArgumentOutOfRangeException>(() => new MLContext(1).Auto().InferColumns(DatasetUtil.DownloadUciAdultDataset(), 100));
Assert.Throws<ArgumentOutOfRangeException>(() => new MLContext(1).Auto().InferColumns(DatasetUtil.GetUciAdultDataset(), 100));
}

[Fact]
public void IdentifyLabelColumnThroughIndexWithHeader()
{
var result = new MLContext(1).Auto().InferColumns(DatasetUtil.DownloadUciAdultDataset(), 14, hasHeader: true);
var result = new MLContext(1).Auto().InferColumns(DatasetUtil.GetUciAdultDataset(), 14, hasHeader: true);
Assert.True(result.TextLoaderOptions.HasHeader);
var labelCol = result.TextLoaderOptions.Columns.First(c => c.Source[0].Min == 14 && c.Source[0].Max == 14);
Assert.Equal("hours-per-week", labelCol.Name);
Expand All @@ -58,7 +58,7 @@ public void IdentifyLabelColumnThroughIndexWithHeader()
[Fact]
public void IdentifyLabelColumnThroughIndexWithoutHeader()
{
var result = new MLContext(1).Auto().InferColumns(DatasetUtil.DownloadIrisDataset(), DatasetUtil.IrisDatasetLabelColIndex);
var result = new MLContext(1).Auto().InferColumns(DatasetUtil.GetIrisDataset(), DatasetUtil.IrisDatasetLabelColIndex);
Assert.False(result.TextLoaderOptions.HasHeader);
var labelCol = result.TextLoaderOptions.Columns.First(c => c.Source[0].Min == DatasetUtil.IrisDatasetLabelColIndex &&
c.Source[0].Max == DatasetUtil.IrisDatasetLabelColIndex);
Expand Down Expand Up @@ -177,7 +177,7 @@ public void DefaultColumnNamesNoGrouping()
public void InferColumnsColumnInfoParam()
{
var columnInfo = new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel };
var result = new MLContext(1).Auto().InferColumns(DatasetUtil.DownloadMlNetGeneratedRegressionDataset(),
var result = new MLContext(1).Auto().InferColumns(DatasetUtil.GetMlNetGeneratedRegressionDataset(),
columnInfo);
var labelCol = result.TextLoaderOptions.Columns.First(c => c.Name == DatasetUtil.MlNetGeneratedRegressionLabel);
Assert.Equal(DataKind.Single, labelCol.DataKind);
Expand Down
63 changes: 13 additions & 50 deletions test/Microsoft.ML.AutoML.Tests/DatasetUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
using System.IO.Compression;
using System.Linq;
using System.Net;
using System.Threading;
using Microsoft.ML.Data;
using Microsoft.ML.TestFrameworkCommon;

namespace Microsoft.ML.AutoML.Test
{
Expand All @@ -24,67 +24,30 @@ internal static class DatasetUtil

private static IDataView _uciAdultDataView;

public static string GetUciAdultDataset() => GetDataPath("adult.tiny.with-schema.txt");

public static string GetMlNetGeneratedRegressionDataset() => GetDataPath("generated_regression_dataset.csv");

public static string GetIrisDataset() => GetDataPath("iris.txt");

public static string GetDataPath(string fileName)
{
return Path.Combine(TestCommon.GetRepoRoot(), "test", "data", fileName);
}

public static IDataView GetUciAdultDataView()
{
if (_uciAdultDataView == null)
{
var context = new MLContext(1);
var uciAdultDataFile = DownloadUciAdultDataset();
var uciAdultDataFile = GetUciAdultDataset();
var columnInferenceResult = context.Auto().InferColumns(uciAdultDataFile, UciAdultLabel);
var textLoader = context.Data.CreateTextLoader(columnInferenceResult.TextLoaderOptions);
_uciAdultDataView = textLoader.Load(uciAdultDataFile);
}
return _uciAdultDataView;
}

// downloads the UCI Adult dataset from the ML.Net repo
public static string DownloadUciAdultDataset() =>
DownloadIfNotExists("https://raw.githubusercontent.com/dotnet/machinelearning/f0e639af5ffdc839aae8e65d19b5a9a1f0db634a/test/data/adult.tiny.with-schema.txt", "uciadult.dataset");

public static string DownloadMlNetGeneratedRegressionDataset() =>
DownloadIfNotExists("https://raw.githubusercontent.com/dotnet/machinelearning/e78971ea6fd736038b4c355b840e5cbabae8cb55/test/data/generated_regression_dataset.csv", "mlnet_generated_regression.dataset");

public static string DownloadIrisDataset() =>
DownloadIfNotExists("https://raw.githubusercontent.com/dotnet/machinelearning/54596ac/test/data/iris.txt", "iris.dataset");

private static string DownloadIfNotExists(string baseGitPath, string dataFile)
{
foreach (var nextIteration in Enumerable.Range(0, 10))
{
// if file doesn't already exist, download it
if (!File.Exists(dataFile))
{
var tempFile = Path.GetTempFileName();

try
{
using (var client = new WebClient())
{
client.DownloadFile(new Uri($"{baseGitPath}"), tempFile);

if (!File.Exists(dataFile))
{
File.Copy(tempFile, dataFile);
File.Delete(tempFile);
}
}
}
catch (Exception)
{
}
}

if (File.Exists(dataFile) && (new FileInfo(dataFile).Length > 0))
{
return dataFile;
}

Thread.Sleep(300);
}

throw new Exception($"Failed to download test file {dataFile}.");
}

public static string GetFlowersDataset()
{
const string datasetName = @"flowers";
Expand Down
2 changes: 1 addition & 1 deletion test/Microsoft.ML.AutoML.Tests/UserInputValidationTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ public void ValidateInferColumnsArgsEmptyFile()
[Fact]
public void ValidateInferColsPath()
{
UserInputValidationUtil.ValidateInferColumnsArgs(DatasetUtil.DownloadUciAdultDataset());
UserInputValidationUtil.ValidateInferColumnsArgs(DatasetUtil.GetUciAdultDataset());
}

[Fact]
Expand Down