Skip to content

Fixed and Added unit tests for EnsureResourceAsync hanging issue #4943

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
fedd23c
Update ResourceManagerUtils.cs
mstfbl Mar 13, 2020
85f10af
Added TestDownloadFromLocal
mstfbl Mar 16, 2020
63b3f33
Added TestDownloadError
mstfbl Mar 16, 2020
e16b7d6
Revert "Added TestDownloadError"
mstfbl Mar 16, 2020
2caf810
Edit EnsureResourceAsync and its dependencies
mstfbl Mar 16, 2020
6e05a87
Edited TestDownloadFromLocal and re-added TestDownloadError()
mstfbl Mar 16, 2020
69b9827
Disabling TestDownloadFromLocal and TestDownloadError
mstfbl Mar 16, 2020
6e5b246
Edits
mstfbl Mar 16, 2020
cd56549
Re-activated TestDownloadError and TestDownloadFromLocal
mstfbl Mar 16, 2020
2c4d22e
Edits, added 5 min timeout, and debugging requested url
mstfbl Mar 16, 2020
2f67666
Removed timeouts, and re-added Resource download tests in separate un…
mstfbl Mar 17, 2020
8bf03c8
Edits
mstfbl Mar 17, 2020
fd3c7e6
Removed hardcode "microsoft.com" check for HTTP Status Code
mstfbl Mar 18, 2020
bc8b065
Update ResourceManagerUtils.cs
mstfbl Mar 18, 2020
95514c4
Edits for reviews, removing hardcodings of status codes
mstfbl Mar 18, 2020
93b5454
Removing paranthesis from one-liner if statement
mstfbl Mar 18, 2020
a54c7e0
Update TestResourceDownload.cs
mstfbl Mar 18, 2020
b8d5094
Update TestResourceDownload.cs
mstfbl Mar 18, 2020
38fc48f
Nit fix + test case fixes
mstfbl Mar 18, 2020
666d328
Update ResourceManagerUtils.cs
mstfbl Mar 18, 2020
a9e1b5d
Update ResourceManagerUtils.cs
mstfbl Mar 18, 2020
d460db7
Update ResourceManagerUtils.cs
mstfbl Mar 18, 2020
ed3c6fc
Update ResourceManagerUtils.cs
mstfbl Mar 19, 2020
d7b43ed
Added checking for the host of the download absoluteURL euqaling "aka…
mstfbl Mar 24, 2020
d9cdc07
Edit TestResourceDownload
mstfbl Mar 24, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 43 additions & 24 deletions src/Microsoft.ML.Core/Utilities/ResourceManagerUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,7 @@ public async Task<ResourceDownloadResults> EnsureResourceAsync(IHostEnvironment
await DownloadFromUrlWithRetryAsync(env, ch, absoluteUrl.AbsoluteUri, fileName, timeout, filePath), absoluteUrl.AbsoluteUri);
}

private async Task<string> DownloadFromUrlWithRetryAsync(IHostEnvironment env, IChannel ch, string url, string fileName,
int timeout, string filePath, int retryTimes = 5)
private async Task<string> DownloadFromUrlWithRetryAsync(IHostEnvironment env, IChannel ch, string url, string fileName, int timeout, string filePath, int retryTimes = 5)
{
var downloadResult = "";

Expand All @@ -125,8 +124,12 @@ private async Task<string> DownloadFromUrlWithRetryAsync(IHostEnvironment env, I
if (string.IsNullOrEmpty(thisDownloadResult))
return thisDownloadResult;
else
{
downloadResult += thisDownloadResult + @"\n";

// Do not retry if the URL does not exist
if (thisDownloadResult.Contains("does not exist"))
return downloadResult;
}
await Task.Delay(10 * 1000);
}

Expand All @@ -136,7 +139,7 @@ private async Task<string> DownloadFromUrlWithRetryAsync(IHostEnvironment env, I
/// <returns>Returns the error message if an error occurred, null if download was successful.</returns>
private async Task<string> DownloadFromUrlAsync(IHostEnvironment env, IChannel ch, string url, string fileName, int timeout, string filePath)
{
using (var webClient = new WebClient())
using (var webClient = new WebClientResponse())
using (var downloadCancel = new CancellationTokenSource())
{
bool deleteNeeded = false;
Expand All @@ -160,27 +163,8 @@ private async Task<string> DownloadFromUrlAsync(IHostEnvironment env, IChannel c
deleteNeeded = true;
return (await t).Message;
}

return CheckValidDownload(ch, filePath, url, ref deleteNeeded);
}
}

private static string CheckValidDownload(IChannel ch, string filePath, string url, ref bool deleteNeeded)
{
// If the relative url does not exist, aka.ms redirects to www.microsoft.com. Make sure this did not happen.
// If the file is big then it is definitely not the redirect.
var info = new FileInfo(filePath);
if (info.Length > 4096)
return null;
string error = null;
using (var r = new StreamReader(filePath))
{
var text = r.ReadToEnd();
if (text.Contains("<head>") && text.Contains("<body>") && text.Contains("microsoft.com"))
error = $"The url '{url}' does not exist. Url was redirected to www.microsoft.com.";
}
deleteNeeded = error != null;
return error;
}

private static void TryDelete(IChannel ch, string filePath, bool warn = true)
Expand Down Expand Up @@ -252,7 +236,7 @@ private static string GetFilePath(IChannel ch, string fileName, string dir, out
return filePath;
}

private Exception DownloadResource(IHostEnvironment env, IChannel ch, WebClient webClient, Uri uri, string path, string fileName, CancellationToken ct)
private Exception DownloadResource(IHostEnvironment env, IChannel ch, WebClientResponse webClient, Uri uri, string path, string fileName, CancellationToken ct)
{
if (File.Exists(path))
return null;
Expand All @@ -274,6 +258,8 @@ private Exception DownloadResource(IHostEnvironment env, IChannel ch, WebClient
using (var ws = fh.CreateWriteStream())
{
var headers = webClient.ResponseHeaders.GetValues("Content-Length");
if (IsRedirectToDefaultPage(uri))
return ch.Except($"Invalid aka.ms url: {uri}");
if (Utils.Size(headers) == 0 || !long.TryParse(headers[0], out var size))
size = 10000000;

Expand Down Expand Up @@ -311,6 +297,28 @@ private Exception DownloadResource(IHostEnvironment env, IChannel ch, WebClient
}
}

public bool IsRedirectToDefaultPage(Uri uri)
{
try
{
HttpWebRequest myHttpWebRequest = (HttpWebRequest)WebRequest.Create(uri.AbsoluteUri);
myHttpWebRequest.AllowAutoRedirect = false;
HttpWebResponse myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse();
}
catch (WebException e)
{
// Redirects to https://www.microsoft.com/en-us/?ref=aka
if (e.Message == "The remote server returned an error: (302) Moved Temporarily.")
return true;
// Redirects to another url
else if (e.Message == "The remote server returned an error: (301) Moved Permanently.")
return false;
else
return false;
}
return false;
}

public static ResourceDownloadResults GetErrorMessage(out string errorMessage, params ResourceDownloadResults[] result)
{
var errorResult = result.FirstOrDefault(res => !string.IsNullOrEmpty(res.ErrorMessage));
Expand All @@ -328,4 +336,15 @@ public static ResourceDownloadResults GetErrorMessage(out string errorMessage, p
private static extern int chmod(string pathname, int mode);
#pragma warning restore IDE1006
}

public class WebClientResponse : WebClient
{
public WebResponse Response { get; private set; }

protected override WebResponse GetWebResponse(WebRequest request)
{
Response = base.GetWebResponse(request);
return Response;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,7 @@ private string EnsureModelFile(IHostEnvironment env, out int linesToSkip, WordEm
{
string dir = kind == WordEmbeddingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding ? Path.Combine("Text", "Sswe") : "WordVectors";
var url = $"{dir}/{modelFileName}";
var ensureModel = ResourceManagerUtils.Instance.EnsureResourceAsync(Host, ch, url, modelFileName, dir, Timeout);
var ensureModel = ResourceManagerUtils.Instance.EnsureResourceAsync(env, ch, url, modelFileName, dir, Timeout);
ensureModel.Wait();
var errorResult = ResourceManagerUtils.GetErrorMessage(out var errorMessage, ensureModel.Result);
if (errorResult != null)
Expand Down
Loading