From 3ee581b6f4e714a1cd7cf2241e5a112dd8bd9dbd Mon Sep 17 00:00:00 2001 From: Mustafa Bal Date: Wed, 29 Jan 2020 14:06:43 +0300 Subject: [PATCH] TextLoader now checks for presence of source file with clear error message (#4665) * Update TextLoaderSaverCatalog.cs * Update TextLoaderTests.cs * Update TextLoaderSaverCatalog.cs * Update TextLoaderSaverCatalog.cs * Update TextLoaderTests.cs * Update TextLoaderTests.cs The constructor should throw an exception when trying to load from a fake/missing file. * Added path checks to AssemlyLoading, Binary and SVMLight Loader APIs --- .../ComponentModel/AssemblyLoadingUtils.cs | 4 ++++ .../Binary/BinaryLoaderSaverCatalog.cs | 4 ++++ .../Text/TextLoaderSaverCatalog.cs | 12 +++++++++++ .../SvmLight/SvmLightLoaderSaverCatalog.cs | 8 ++++++++ test/Microsoft.ML.Tests/TextLoaderTests.cs | 20 +++---------------- 5 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src/Microsoft.ML.Core/ComponentModel/AssemblyLoadingUtils.cs b/src/Microsoft.ML.Core/ComponentModel/AssemblyLoadingUtils.cs index 493c0d2998..249ee23f73 100644 --- a/src/Microsoft.ML.Core/ComponentModel/AssemblyLoadingUtils.cs +++ b/src/Microsoft.ML.Core/ComponentModel/AssemblyLoadingUtils.cs @@ -31,6 +31,10 @@ public static void LoadAndRegister(IHostEnvironment env, string[] assemblies) { // REVIEW: Will LoadFrom ever return null? Contracts.CheckNonEmpty(path, nameof(path)); + if (!File.Exists(path)) + { + throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path); + } var assem = LoadAssembly(env, path); if (assem != null) continue; diff --git a/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs b/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs index b4a48bd45a..fd15ab42d3 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Binary/BinaryLoaderSaverCatalog.cs @@ -47,6 +47,10 @@ public static IDataView LoadFromBinary(this DataOperationsCatalog catalog, IMult public static IDataView LoadFromBinary(this DataOperationsCatalog catalog, string path) { Contracts.CheckNonEmpty(path, nameof(path)); + if (!File.Exists(path)) + { + throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path); + } var env = catalog.GetEnvironment(); diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs index 8962b550bf..abadc8309a 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs @@ -112,6 +112,10 @@ public static IDataView LoadFromTextFile(this DataOperationsCatalog catalog, bool allowSparse = TextLoader.Defaults.AllowSparse) { Contracts.CheckNonEmpty(path, nameof(path)); + if (!File.Exists(path)) + { + throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path); + } var options = new TextLoader.Options { @@ -154,6 +158,10 @@ public static IDataView LoadFromTextFile(this DataOperationsCatalog cata bool allowSparse = TextLoader.Defaults.AllowSparse) { Contracts.CheckNonEmpty(path, nameof(path)); + if (!File.Exists(path)) + { + throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path); + } // REVIEW: it is almost always a mistake to have a 'trainable' text loader here. // Therefore, we are going to disallow data sample. @@ -179,6 +187,10 @@ public static IDataView LoadFromTextFile(this DataOperationsCatalog catalog, str TextLoader.Options options = null) { Contracts.CheckNonEmpty(path, nameof(path)); + if (!File.Exists(path)) + { + throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path); + } var env = catalog.GetEnvironment(); var source = new MultiFileSource(path); diff --git a/src/Microsoft.ML.Transforms/SvmLight/SvmLightLoaderSaverCatalog.cs b/src/Microsoft.ML.Transforms/SvmLight/SvmLightLoaderSaverCatalog.cs index e07665b43c..1a1afe0128 100644 --- a/src/Microsoft.ML.Transforms/SvmLight/SvmLightLoaderSaverCatalog.cs +++ b/src/Microsoft.ML.Transforms/SvmLight/SvmLightLoaderSaverCatalog.cs @@ -65,6 +65,10 @@ public static IDataView LoadFromSvmLightFile(this DataOperationsCatalog catalog, bool zeroBased = false) { Contracts.CheckNonEmpty(path, nameof(path)); + if (!File.Exists(path)) + { + throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path); + } var file = new MultiFileSource(path); var loader = catalog.CreateSvmLightLoader(numberOfRows, inputSize, zeroBased, file); @@ -83,6 +87,10 @@ public static IDataView LoadFromSvmLightFileWithFeatureNames(this DataOperations long? numberOfRows = null) { Contracts.CheckNonEmpty(path, nameof(path)); + if (!File.Exists(path)) + { + throw Contracts.ExceptParam(nameof(path), "File does not exist at path: {0}", path); + } var file = new MultiFileSource(path); var loader = catalog.CreateSvmLightLoaderWithFeatureNames(numberOfRows, file); diff --git a/test/Microsoft.ML.Tests/TextLoaderTests.cs b/test/Microsoft.ML.Tests/TextLoaderTests.cs index c439ae1a03..95578214e7 100644 --- a/test/Microsoft.ML.Tests/TextLoaderTests.cs +++ b/test/Microsoft.ML.Tests/TextLoaderTests.cs @@ -142,20 +142,6 @@ public TextLoaderTests(ITestOutputHelper output) env = new ConsoleEnvironment(42).AddStandardComponents(); } - [Fact] - public void ConstructorDoesntThrow() - { - var mlContext = new MLContext(seed: 1); - - Assert.NotNull(mlContext.Data.LoadFromTextFile("fakeFile.txt")); - Assert.NotNull(mlContext.Data.LoadFromTextFile("fakeFile.txt", hasHeader: true)); - Assert.NotNull(mlContext.Data.LoadFromTextFile("fakeFile.txt", hasHeader: false)); - Assert.NotNull(mlContext.Data.LoadFromTextFile("fakeFile.txt", hasHeader: false, trimWhitespace: false, allowSparse: false)); - Assert.NotNull(mlContext.Data.LoadFromTextFile("fakeFile.txt", hasHeader: false, allowSparse: false)); - Assert.NotNull(mlContext.Data.LoadFromTextFile("fakeFile.txt", hasHeader: false, allowQuoting: false)); - Assert.NotNull(mlContext.Data.LoadFromTextFile("fakeFile.txt")); - } - [Fact] public void CanSuccessfullyApplyATransform() { @@ -588,11 +574,11 @@ public void CanSuccessfullyTrimSpaces() } [Fact] - public void ThrowsExceptionWithPropertyName() + public void ThrowsExceptionWithMissingFile() { var mlContext = new MLContext(seed: 1); - var ex = Assert.Throws(() => mlContext.Data.LoadFromTextFile("fakefile.txt")); - Assert.StartsWith($"Field 'String1' is missing the {nameof(LoadColumnAttribute)} attribute", ex.Message); + var ex = Assert.Throws(() => mlContext.Data.LoadFromTextFile("fakefile.txt")); + Assert.StartsWith("File does not exist at path: fakefile.txt", ex.Message); } [Fact]