Skip to content

Fixed and Added unit tests for EnsureResourceAsync hanging issue #4943

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
fedd23c
Update ResourceManagerUtils.cs
mstfbl Mar 13, 2020
85f10af
Added TestDownloadFromLocal
mstfbl Mar 16, 2020
63b3f33
Added TestDownloadError
mstfbl Mar 16, 2020
e16b7d6
Revert "Added TestDownloadError"
mstfbl Mar 16, 2020
2caf810
Edit EnsureResourceAsync and its dependencies
mstfbl Mar 16, 2020
6e05a87
Edited TestDownloadFromLocal and re-added TestDownloadError()
mstfbl Mar 16, 2020
69b9827
Disabling TestDownloadFromLocal and TestDownloadError
mstfbl Mar 16, 2020
6e5b246
Edits
mstfbl Mar 16, 2020
cd56549
Re-activated TestDownloadError and TestDownloadFromLocal
mstfbl Mar 16, 2020
2c4d22e
Edits, added 5 min timeout, and debugging requested url
mstfbl Mar 16, 2020
2f67666
Removed timeouts, and re-added Resource download tests in separate un…
mstfbl Mar 17, 2020
8bf03c8
Edits
mstfbl Mar 17, 2020
fd3c7e6
Removed hardcode "microsoft.com" check for HTTP Status Code
mstfbl Mar 18, 2020
bc8b065
Update ResourceManagerUtils.cs
mstfbl Mar 18, 2020
95514c4
Edits for reviews, removing hardcodings of status codes
mstfbl Mar 18, 2020
93b5454
Removing paranthesis from one-liner if statement
mstfbl Mar 18, 2020
a54c7e0
Update TestResourceDownload.cs
mstfbl Mar 18, 2020
b8d5094
Update TestResourceDownload.cs
mstfbl Mar 18, 2020
38fc48f
Nit fix + test case fixes
mstfbl Mar 18, 2020
666d328
Update ResourceManagerUtils.cs
mstfbl Mar 18, 2020
a9e1b5d
Update ResourceManagerUtils.cs
mstfbl Mar 18, 2020
d460db7
Update ResourceManagerUtils.cs
mstfbl Mar 18, 2020
ed3c6fc
Update ResourceManagerUtils.cs
mstfbl Mar 19, 2020
d7b43ed
Added checking for the host of the download absoluteURL euqaling "aka…
mstfbl Mar 24, 2020
d9cdc07
Edit TestResourceDownload
mstfbl Mar 24, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 39 additions & 19 deletions src/Microsoft.ML.Core/Utilities/ResourceManagerUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ public static string GetUrl(string suffix)
/// Returns a <see cref="Task"/> that tries to download a resource from a specified url, and returns the path to which it was
/// downloaded, and an exception if one was thrown.
/// </summary>
/// <remarks>
/// The function <see cref="ResourceManagerUtils.DownloadResource"/> checks whether or not the absolute URL with the
/// default host "aka.ms" formed from <paramref name="relativeUrl"/> redirects to the default Microsoft homepage.
/// As such, only absolute URLs with the host "aka.ms" is supported with <see cref="ResourceManagerUtils.EnsureResourceAsync"/>.
/// </remarks>
/// <param name="env">The host environment.</param>
/// <param name="ch">A channel to provide information about the download.</param>
/// <param name="relativeUrl">The relative url from which to download.
Expand All @@ -109,6 +114,8 @@ public async Task<ResourceDownloadResults> EnsureResourceAsync(IHostEnvironment
return new ResourceDownloadResults(filePath,
$"Could not create a valid URI from the base URI '{MlNetResourcesUrl}' and the relative URI '{relativeUrl}'");
}
if (absoluteUrl.Host != "aka.ms")
throw new NotSupportedException("The function ResourceManagerUtils.EnsureResourceAsync only supports downloading from URLs of the host \"aka.ms\"");
return new ResourceDownloadResults(filePath,
await DownloadFromUrlWithRetryAsync(env, ch, absoluteUrl.AbsoluteUri, fileName, timeout, filePath), absoluteUrl.AbsoluteUri);
}
Expand Down Expand Up @@ -160,27 +167,8 @@ private async Task<string> DownloadFromUrlAsync(IHostEnvironment env, IChannel c
deleteNeeded = true;
return (await t).Message;
}

return CheckValidDownload(ch, filePath, url, ref deleteNeeded);
}
}

private static string CheckValidDownload(IChannel ch, string filePath, string url, ref bool deleteNeeded)
{
// If the relative url does not exist, aka.ms redirects to www.microsoft.com. Make sure this did not happen.
// If the file is big then it is definitely not the redirect.
var info = new FileInfo(filePath);
if (info.Length > 4096)
return null;
string error = null;
using (var r = new StreamReader(filePath))
{
var text = r.ReadToEnd();
if (text.Contains("<head>") && text.Contains("<body>") && text.Contains("microsoft.com"))
error = $"The url '{url}' does not exist. Url was redirected to www.microsoft.com.";
}
deleteNeeded = error != null;
return error;
}

private static void TryDelete(IChannel ch, string filePath, bool warn = true)
Expand Down Expand Up @@ -274,6 +262,8 @@ private Exception DownloadResource(IHostEnvironment env, IChannel ch, WebClient
using (var ws = fh.CreateWriteStream())
{
var headers = webClient.ResponseHeaders.GetValues("Content-Length");
if (uri.Host == "aka.ms" && IsRedirectToDefaultPage(uri.AbsoluteUri))
throw new NotSupportedException($"The provided url ({uri}) redirects to the default url ({DefaultUrl})");
if (Utils.Size(headers) == 0 || !long.TryParse(headers[0], out var size))
size = 10000000;

Expand Down Expand Up @@ -311,6 +301,36 @@ private Exception DownloadResource(IHostEnvironment env, IChannel ch, WebClient
}
}

/// <summary>This method checks whether or not the provided aka.ms url redirects to
/// Microsoft's homepage, as the default faulty aka.ms URLs redirect to https://www.microsoft.com/?ref=aka .</summary>
/// <param name="url"> The provided url to check </param>
public bool IsRedirectToDefaultPage(string url)
{
try
{
var request = WebRequest.Create(url);
// FileWebRequests cannot be redirected to default aka.ms webpage
if (request.GetType() == typeof(FileWebRequest))
return false;
HttpWebRequest httpWebRequest = (HttpWebRequest)request;
httpWebRequest.AllowAutoRedirect = false;
HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
}
catch (WebException e)
{
HttpWebResponse webResponse = (HttpWebResponse)e.Response;
// Redirects to default url
if (webResponse.StatusCode == HttpStatusCode.Redirect && webResponse.Headers["Location"] == "https://www.microsoft.com/?ref=aka")
return true;
// Redirects to another url
else if (webResponse.StatusCode == HttpStatusCode.MovedPermanently)
return false;
else
return false;
}
return false;
}

public static ResourceDownloadResults GetErrorMessage(out string errorMessage, params ResourceDownloadResults[] result)
{
var errorResult = result.FirstOrDefault(res => !string.IsNullOrEmpty(res.ErrorMessage));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,7 @@ private string EnsureModelFile(IHostEnvironment env, out int linesToSkip, WordEm
{
string dir = kind == WordEmbeddingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding ? Path.Combine("Text", "Sswe") : "WordVectors";
var url = $"{dir}/{modelFileName}";
var ensureModel = ResourceManagerUtils.Instance.EnsureResourceAsync(Host, ch, url, modelFileName, dir, Timeout);
var ensureModel = ResourceManagerUtils.Instance.EnsureResourceAsync(env, ch, url, modelFileName, dir, Timeout);
ensureModel.Wait();
var errorResult = ResourceManagerUtils.GetErrorMessage(out var errorMessage, ensureModel.Result);
if (errorResult != null)
Expand Down
142 changes: 142 additions & 0 deletions test/Microsoft.ML.Core.Tests/UnitTests/TestResourceDownload.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Microsoft.ML.Internal.Utilities;
using Microsoft.ML.RunTests;
using Microsoft.ML.Runtime;
using Xunit;
using Xunit.Abstractions;

[assembly: CollectionBehavior(DisableTestParallelization = true)]

namespace Microsoft.ML.Core.Tests.UnitTests
{
public class TestResourceDownload : BaseTestBaseline
{
public TestResourceDownload(ITestOutputHelper helper)
: base(helper)
{
}

[Fact]
[TestCategory("ResourceDownload")]
public async Task TestDownloadError()
{
var envVarOld = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable);
var timeoutVarOld = Environment.GetEnvironmentVariable(ResourceManagerUtils.TimeoutEnvVariable);
var resourcePathVarOld = Environment.GetEnvironmentVariable(Utils.CustomSearchDirEnvVariable);
Environment.SetEnvironmentVariable(Utils.CustomSearchDirEnvVariable, null);

try
{
var envVar = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable);
var saveToDir = GetOutputPath("copyto");
DeleteOutputPath("copyto", "breast-cancer.txt");
var sbOut = new StringBuilder();
var sbErr = new StringBuilder();

// Bad url.
if (!Uri.TryCreate("https://fake-website/fake-model.model/", UriKind.Absolute, out var badUri))
Fail("Uri could not be created");

Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, badUri.AbsoluteUri);
envVar = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable);
if (envVar != badUri.AbsoluteUri)
Fail("Environment variable not set properly");

DeleteOutputPath("copyto", "ResNet_18_Updated.model");
sbOut.Clear();
sbErr.Clear();
using (var outWriter = new StringWriter(sbOut))
using (var errWriter = new StringWriter(sbErr))
{
var env = new ConsoleEnvironment(42, outWriter: outWriter, errWriter: errWriter);
using (var ch = env.Start("Downloading"))
{
var fileName = "test_bad_url.model";
await Assert.ThrowsAsync<NotSupportedException>(() => ResourceManagerUtils.Instance.EnsureResourceAsync(env, ch, "Image/ResNet_18_Updated.model", fileName, saveToDir, 10 * 1000));

Log("Bad url");
Log($"out: {sbOut.ToString()}");
Log($"error: {sbErr.ToString()}");

if (File.Exists(Path.Combine(saveToDir, fileName)))
Fail($"File '{Path.Combine(saveToDir, fileName)}' should have been deleted.");
}
}

// Good url, bad page.
if (!Uri.TryCreate("https://cnn.com/", UriKind.Absolute, out var cnn))
Fail("Uri could not be created");
Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, cnn.AbsoluteUri);
envVar = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable);
if (envVar != cnn.AbsoluteUri)
Fail("Environment variable not set properly");

DeleteOutputPath("copyto", "ResNet_18_Updated.model");
sbOut.Clear();
sbErr.Clear();
using (var outWriter = new StringWriter(sbOut))
using (var errWriter = new StringWriter(sbErr))
{
var env = new ConsoleEnvironment(42, outWriter: outWriter, errWriter: errWriter);
using (var ch = env.Start("Downloading"))
{
var fileName = "test_cnn_page_does_not_exist.model";
await Assert.ThrowsAsync<NotSupportedException>(() => ResourceManagerUtils.Instance.EnsureResourceAsync(env, ch, "Image/ResNet_18_Updated.model", fileName, saveToDir, 10 * 1000));

Log("Good url, bad page");
Log($"out: {sbOut.ToString()}");
Log($"error: {sbErr.ToString()}");

if (File.Exists(Path.Combine(saveToDir, fileName)))
Fail($"File '{Path.Combine(saveToDir, fileName)}' should have been deleted.");
}
}

//Good url, good page
Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, envVarOld);
envVar = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable);
if (envVar != envVarOld)
Fail("Environment variable not set properly");

DeleteOutputPath("copyto", "sentiment.emd");
sbOut.Clear();
sbErr.Clear();
using (var outWriter = new StringWriter(sbOut))
using (var errWriter = new StringWriter(sbErr))
{
var env = new ConsoleEnvironment(42, outWriter: outWriter, errWriter: errWriter);
using (var ch = env.Start("Downloading"))
{
var fileName = "sentiment.emd";
var t = ResourceManagerUtils.Instance.EnsureResourceAsync(env, ch, "text/Sswe/sentiment.emd", fileName, saveToDir, 1 * 60 * 1000);
var results = await t;

if (results.ErrorMessage != null)
Fail(String.Format("Expected zero length error string. Received error: {0}", results.ErrorMessage));
if (t.Status != TaskStatus.RanToCompletion)
Fail("Download did not complete succesfully");
if (!File.Exists(GetOutputPath("copyto", "sentiment.emd")))
{
Fail($"File '{GetOutputPath("copyto", "sentiment.emd")}' does not exist. " +
$"File was downloaded to '{results.FileName}' instead." +
$"MICROSOFTML_RESOURCE_PATH is set to {Environment.GetEnvironmentVariable(Utils.CustomSearchDirEnvVariable)}");
}
}
}
Done();
}
finally
{
// Set environment variable back to its old value.
Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, envVarOld);
Environment.SetEnvironmentVariable(ResourceManagerUtils.TimeoutEnvVariable, timeoutVarOld);
Environment.SetEnvironmentVariable(Utils.CustomSearchDirEnvVariable, resourcePathVarOld);
}
}
}
}