Skip to content

Commit

Permalink
TextLoader now checks for presence of source file with clear error me…
Browse files Browse the repository at this point in the history
…ssage (#4665)

* Update TextLoaderSaverCatalog.cs

* Update TextLoaderTests.cs

* Update TextLoaderSaverCatalog.cs

* Update TextLoaderSaverCatalog.cs

* Update TextLoaderTests.cs

* Update TextLoaderTests.cs

The constructor should throw an exception when trying to load from a fake/missing file.

* Added path checks to AssemlyLoading, Binary and SVMLight Loader APIs
  • Loading branch information
mstfbl committed Jan 29, 2020
1 parent 202642b commit 3ee581b
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 17 deletions.
4 changes: 4 additions & 0 deletions src/Microsoft.ML.Core/ComponentModel/AssemblyLoadingUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ public static void LoadAndRegister(IHostEnvironment env, string[] assemblies)
{
// REVIEW: Will LoadFrom ever return null?
Contracts.CheckNonEmpty(path, nameof(path));
if (!File.Exists(path))
{
throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path);
}
var assem = LoadAssembly(env, path);
if (assem != null)
continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ public static IDataView LoadFromBinary(this DataOperationsCatalog catalog, IMult
public static IDataView LoadFromBinary(this DataOperationsCatalog catalog, string path)
{
Contracts.CheckNonEmpty(path, nameof(path));
if (!File.Exists(path))
{
throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path);
}

var env = catalog.GetEnvironment();

Expand Down
12 changes: 12 additions & 0 deletions src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ public static IDataView LoadFromTextFile(this DataOperationsCatalog catalog,
bool allowSparse = TextLoader.Defaults.AllowSparse)
{
Contracts.CheckNonEmpty(path, nameof(path));
if (!File.Exists(path))
{
throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path);
}

var options = new TextLoader.Options
{
Expand Down Expand Up @@ -154,6 +158,10 @@ public static IDataView LoadFromTextFile<TInput>(this DataOperationsCatalog cata
bool allowSparse = TextLoader.Defaults.AllowSparse)
{
Contracts.CheckNonEmpty(path, nameof(path));
if (!File.Exists(path))
{
throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path);
}

// REVIEW: it is almost always a mistake to have a 'trainable' text loader here.
// Therefore, we are going to disallow data sample.
Expand All @@ -179,6 +187,10 @@ public static IDataView LoadFromTextFile(this DataOperationsCatalog catalog, str
TextLoader.Options options = null)
{
Contracts.CheckNonEmpty(path, nameof(path));
if (!File.Exists(path))
{
throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path);
}

var env = catalog.GetEnvironment();
var source = new MultiFileSource(path);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ public static IDataView LoadFromSvmLightFile(this DataOperationsCatalog catalog,
bool zeroBased = false)
{
Contracts.CheckNonEmpty(path, nameof(path));
if (!File.Exists(path))
{
throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path);
}

var file = new MultiFileSource(path);
var loader = catalog.CreateSvmLightLoader(numberOfRows, inputSize, zeroBased, file);
Expand All @@ -83,6 +87,10 @@ public static IDataView LoadFromSvmLightFileWithFeatureNames(this DataOperations
long? numberOfRows = null)
{
Contracts.CheckNonEmpty(path, nameof(path));
if (!File.Exists(path))
{
throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path);
}

var file = new MultiFileSource(path);
var loader = catalog.CreateSvmLightLoaderWithFeatureNames(numberOfRows, file);
Expand Down
20 changes: 3 additions & 17 deletions test/Microsoft.ML.Tests/TextLoaderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -142,20 +142,6 @@ public TextLoaderTests(ITestOutputHelper output)
env = new ConsoleEnvironment(42).AddStandardComponents();
}

[Fact]
public void ConstructorDoesntThrow()
{
var mlContext = new MLContext(seed: 1);

Assert.NotNull(mlContext.Data.LoadFromTextFile<Input>("fakeFile.txt"));
Assert.NotNull(mlContext.Data.LoadFromTextFile<Input>("fakeFile.txt", hasHeader: true));
Assert.NotNull(mlContext.Data.LoadFromTextFile<Input>("fakeFile.txt", hasHeader: false));
Assert.NotNull(mlContext.Data.LoadFromTextFile<Input>("fakeFile.txt", hasHeader: false, trimWhitespace: false, allowSparse: false));
Assert.NotNull(mlContext.Data.LoadFromTextFile<Input>("fakeFile.txt", hasHeader: false, allowSparse: false));
Assert.NotNull(mlContext.Data.LoadFromTextFile<Input>("fakeFile.txt", hasHeader: false, allowQuoting: false));
Assert.NotNull(mlContext.Data.LoadFromTextFile<InputWithUnderscore>("fakeFile.txt"));
}

[Fact]
public void CanSuccessfullyApplyATransform()
{
Expand Down Expand Up @@ -588,11 +574,11 @@ public void CanSuccessfullyTrimSpaces()
}

[Fact]
public void ThrowsExceptionWithPropertyName()
public void ThrowsExceptionWithMissingFile()
{
var mlContext = new MLContext(seed: 1);
var ex = Assert.Throws<InvalidOperationException>(() => mlContext.Data.LoadFromTextFile<ModelWithoutColumnAttribute>("fakefile.txt"));
Assert.StartsWith($"Field 'String1' is missing the {nameof(LoadColumnAttribute)} attribute", ex.Message);
var ex = Assert.Throws<ArgumentOutOfRangeException>(() => mlContext.Data.LoadFromTextFile<ModelWithoutColumnAttribute>("fakefile.txt"));
Assert.StartsWith("File does not exist at path: fakefile.txt", ex.Message);
}

[Fact]
Expand Down

0 comments on commit 3ee581b

Please sign in to comment.