Skip to content

Commit 44d6489

Browse files
committed
Implemented downloading into temporary file, added additional logic for existing file checks, code refactoring for download directory stuff, mega downloader: max retries now loaded from IUniversalDownloaderPlatformSettings, updated copyright year, switched settings classes to using record instead of class
1 parent 4acb116 commit 44d6489

File tree

27 files changed

+575
-318
lines changed

27 files changed

+575
-318
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
using System.Threading.Tasks;
6+
7+
namespace UniversalDownloaderPlatform.Common.Enums
8+
{
9+
/// <summary>
10+
/// What to do when file with the same name already exists
11+
/// </summary>
12+
public enum FileExistsAction
13+
{
14+
/// <summary>
15+
/// Check remote file size if enabled and available. If it's different, disabled or not available then download remote file and compare it with existing file, create a backup copy of old file if they are different.
16+
/// </summary>
17+
BackupIfDifferent,
18+
/// <summary>
19+
/// Same as BackupIfDifferent, but the backup copy of the file will not be created.
20+
/// </summary>
21+
ReplaceIfDifferent,
22+
/// <summary>
23+
/// Always replace existing file
24+
/// </summary>
25+
AlwaysReplace,
26+
/// <summary>
27+
/// Always keep existing file
28+
/// </summary>
29+
KeepExisting
30+
}
31+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
using System.Threading.Tasks;
6+
7+
namespace UniversalDownloaderPlatform.Common.Enums
8+
{
9+
public enum LogMessageLevel
10+
{
11+
Trace,
12+
Debug,
13+
Fatal,
14+
Error,
15+
Warning,
16+
Information
17+
}
18+
}

UniversalDownloaderPlatform.Common/Enums/RemoteFileSizeNotAvailableAction.cs

Lines changed: 0 additions & 27 deletions
This file was deleted.
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
using System.Threading.Tasks;
6+
7+
namespace UniversalDownloaderPlatform.Common.Helpers
8+
{
9+
internal static class ByteArrayExtensions
10+
{
11+
public static string ToHex(this byte[] bytes, bool upperCase)
12+
{
13+
StringBuilder result = new StringBuilder(bytes.Length * 2);
14+
15+
for (int i = 0; i < bytes.Length; i++)
16+
result.Append(bytes[i].ToString(upperCase ? "X2" : "x2"));
17+
18+
return result.ToString();
19+
}
20+
21+
}
22+
}
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.IO;
4+
using System.Linq;
5+
using System.Text;
6+
using System.Threading.Tasks;
7+
using UniversalDownloaderPlatform.Common.Enums;
8+
using UniversalDownloaderPlatform.Common.Exceptions;
9+
10+
namespace UniversalDownloaderPlatform.Common.Helpers
11+
{
12+
public static class FileExistsActionHelper
13+
{
14+
/// <summary>
15+
/// Performs all required actions based on the FileExistsAction value. Should be called before downloading the file when the file already exists on the disk.
16+
/// </summary>
17+
/// <param name="path">The path to the file already existing on the disk</param>
18+
/// <param name="remoteFileSize">The size of the remote file (supply -1 if not available)</param>
19+
/// <param name="isCheckRemoteFileSize">Should the remote file size check be performed at all</param>
20+
/// <param name="fileExistsAction">Action to perform</param>
21+
/// <param name="loggingFunction">Logging function</param>
22+
/// <returns>True if should continue the download, false if should stop download process for the file</returns>
23+
public static bool DoFileExistsActionBeforeDownload(string path,
24+
long remoteFileSize,
25+
bool isCheckRemoteFileSize,
26+
FileExistsAction fileExistsAction,
27+
Action<LogMessageLevel, string, Exception> loggingFunction)
28+
{
29+
if (fileExistsAction != FileExistsAction.AlwaysReplace)
30+
{
31+
bool isFilesIdentical = false;
32+
if (isCheckRemoteFileSize)
33+
{
34+
if (remoteFileSize > 0)
35+
{
36+
loggingFunction(LogMessageLevel.Debug, $"File {path} exists, size will be checked", null);
37+
try
38+
{
39+
if (new FileInfo(path).Length != remoteFileSize)
40+
{
41+
loggingFunction(LogMessageLevel.Warning, $"Local and remote file sizes does not match, file {path} will be redownloaded.", null);
42+
}
43+
else
44+
{
45+
loggingFunction(LogMessageLevel.Debug, $"File size for {path} matches", null);
46+
isFilesIdentical = true;
47+
}
48+
}
49+
catch (Exception ex)
50+
{
51+
loggingFunction(LogMessageLevel.Error, $"Error during file comparison: {ex}", ex);
52+
isFilesIdentical = true; //we assume that local file is identical if we can't check remote file size
53+
}
54+
}
55+
else
56+
isFilesIdentical = true; //assume that 0kb files and failed checks are always identical
57+
}
58+
59+
if (isFilesIdentical || fileExistsAction == FileExistsAction.KeepExisting)
60+
{
61+
loggingFunction(LogMessageLevel.Warning, $"File {path} already exists, will be skipped because of identical size to the remote file or because of file exists setting being set to keep existing file even on different remote size.", null);
62+
return false;
63+
}
64+
}
65+
66+
return true;
67+
}
68+
69+
/// <summary>
70+
/// Performs all required actions based on the FileExistsAction value. Should be called after temporary file has been downloaded when the file already exists on the disk.
71+
/// Automatically moves temporary file to the proper path
72+
/// </summary>
73+
/// <param name="path">The path to the file already existing on the disk</param>
74+
/// <param name="temporaryFilePath">The path to the temporary file on the disk</param>
75+
/// <param name="fileExistsAction">Action to perform</param>
76+
/// <param name="loggingFunction">Logging function</param>
77+
/// <exception cref="Exception"></exception>
78+
public static void DoFileExistsActionAfterDownload(
79+
string path,
80+
string temporaryFilePath,
81+
FileExistsAction fileExistsAction,
82+
Action<LogMessageLevel, string, Exception> loggingFunction)
83+
{
84+
if(File.Exists(path))
85+
{
86+
bool isShouldRemoveExistingFile = false;
87+
if (fileExistsAction == FileExistsAction.ReplaceIfDifferent ||
88+
fileExistsAction == FileExistsAction.BackupIfDifferent)
89+
{
90+
string existingFileHash = FileHashHelper.CalculateFileHash(path).ToHex(true);
91+
string downloadedFileHash = FileHashHelper.CalculateFileHash(temporaryFilePath).ToHex(true);
92+
93+
if (existingFileHash != downloadedFileHash)
94+
{
95+
if (fileExistsAction == FileExistsAction.BackupIfDifferent)
96+
{
97+
string backupFilename =
98+
$"{Path.GetFileNameWithoutExtension(path)}_old_{DateTimeOffset.UtcNow.ToUnixTimeSeconds()}{Path.GetExtension(path)}";
99+
loggingFunction(LogMessageLevel.Warning, $"Local and remote files are different, file {Path.GetFileName(path)} will replaced. Old file will be backed up as {Path.GetFileName(backupFilename)}. Remote file hash: {downloadedFileHash}, local file hash: {existingFileHash}", null);
100+
File.Move(path, Path.Combine(Path.GetDirectoryName(path), backupFilename));
101+
}
102+
else
103+
{
104+
isShouldRemoveExistingFile = true;
105+
}
106+
}
107+
else
108+
{
109+
loggingFunction(LogMessageLevel.Information, $"Existing file {Path.GetFileName(path)} is identical to downloaded file, original file will be kept.", null);
110+
try
111+
{
112+
File.Delete(temporaryFilePath);
113+
}
114+
catch (Exception ex)
115+
{
116+
throw new Exception($"Unable to remove the temporary file {Path.GetFileName(temporaryFilePath)} because of it being identical to existing file, error: {ex}", ex);
117+
}
118+
return;
119+
}
120+
}
121+
else if (fileExistsAction == FileExistsAction.AlwaysReplace)
122+
{
123+
isShouldRemoveExistingFile = true;
124+
}
125+
else //safeguard
126+
{
127+
throw new Exception($"Invalid state for {Path.GetFileName(path)}, managed to get past all FileExistActions check. Contact developer. Leftover files might be present in the download directory.");
128+
}
129+
130+
if (isShouldRemoveExistingFile)
131+
{
132+
try
133+
{
134+
File.Delete(path);
135+
}
136+
catch (Exception ex)
137+
{
138+
throw new Exception($"Unable to remove the original file {Path.GetFileName(path)} in order to replace with temporary file {Path.GetFileName(temporaryFilePath)}, error: {ex}", ex);
139+
}
140+
}
141+
}
142+
143+
try
144+
{
145+
File.Move(temporaryFilePath, path);
146+
}
147+
catch (Exception ex)
148+
{
149+
throw new Exception($"Unable to move {Path.GetFileName(temporaryFilePath)} to {Path.GetFileName(path)}, error: {ex}", ex);
150+
}
151+
}
152+
}
153+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.IO;
4+
using System.Linq;
5+
using System.Security.Cryptography;
6+
using System.Text;
7+
using System.Threading.Tasks;
8+
9+
namespace UniversalDownloaderPlatform.Common.Helpers
10+
{
11+
internal static class FileHashHelper
12+
{
13+
/// <summary>
14+
/// Calculated MD5 hash of the file
15+
/// </summary>
16+
/// <param name="path">Path to the file</param>
17+
/// <returns>Byte array containing the hash</returns>
18+
public static byte[] CalculateFileHash(string path)
19+
{
20+
using (var inputStream = File.Open(path, FileMode.Open))
21+
{
22+
var md5 = MD5.Create();
23+
return md5.ComputeHash(inputStream);
24+
}
25+
}
26+
}
27+
}

UniversalDownloaderPlatform.Common/Interfaces/ICrawledUrlProcessor.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@ public interface ICrawledUrlProcessor
1010
/// </summary>
1111
/// <returns></returns>
1212
Task BeforeStart(IUniversalDownloaderPlatformSettings settings);
13+
1314
/// <summary>
1415
/// Do any additional processing on the crawled url before download process starts. By returning false the function can skip downloading of this url.
1516
/// </summary>
1617
/// <param name="crawledUrl">Crawled url</param>
17-
/// <param name="downloadDirectory">Download directory</param>
1818
/// <returns></returns>
19-
Task<bool> ProcessCrawledUrl(ICrawledUrl crawledUrl, string downloadDirectory);
19+
Task<bool> ProcessCrawledUrl(ICrawledUrl crawledUrl);
2020
}
2121
}

UniversalDownloaderPlatform.Common/Interfaces/IPageCrawler.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,6 @@ public interface IPageCrawler
1717
/// </summary>
1818
/// <returns></returns>
1919
Task BeforeStart(IUniversalDownloaderPlatformSettings settings);
20-
Task<List<ICrawledUrl>> Crawl(ICrawlTargetInfo crawlTargetInfo, string downloadDirectory);
20+
Task<List<ICrawledUrl>> Crawl(ICrawlTargetInfo crawlTargetInfo);
2121
}
2222
}

UniversalDownloaderPlatform.Common/Interfaces/IPluginManager.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,8 @@ public interface IPluginManager
1616
/// Download file using one of the registered plugins (or default if none are found)
1717
/// </summary>
1818
/// <param name="crawledUrl"></param>
19-
/// <param name="downloadDirectory"></param>
2019
/// <returns></returns>
21-
Task DownloadCrawledUrl(ICrawledUrl crawledUrl, string downloadDirectory);
20+
Task DownloadCrawledUrl(ICrawledUrl crawledUrl);
2221

2322
/// <summary>
2423
/// Run entry contents through every plugin to extract supported urls.

UniversalDownloaderPlatform.Common/Interfaces/IWebDownloader.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@ public interface IWebDownloader
2323
/// <param name="url">File url</param>
2424
/// <param name="path">Path where the file should be saved</param>
2525
/// <param name="refererUrl">Url to be placed into the referer header, can be null</param>
26-
/// <param name="overwrite">Should it be allowed to overwrite file?</param>
27-
Task DownloadFile(string url, string path, string refererUrl = null, bool overwrite = false);
26+
Task DownloadFile(string url, string path, string refererUrl = null);
2827

2928
/// <summary>
3029
/// Download url as string data

0 commit comments

Comments
 (0)