forked from tangyoha/telegram_media_downloader
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfile_management.py
78 lines (69 loc) · 2.09 KB
/
file_management.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
"""Utility functions to handle downloaded files."""
import glob
import os
import pathlib
from hashlib import md5
def get_next_name(file_path: str) -> str:
"""
Get next available name to download file.
Parameters
----------
file_path: str
Absolute path of the file for which next available name to
be generated.
Returns
-------
str
Absolute path of the next available name for the file.
"""
posix_path = pathlib.Path(file_path)
counter: int = 1
new_file_name: str = os.path.join("{0}", "{1}-copy{2}{3}")
while os.path.isfile(
new_file_name.format(
posix_path.parent,
posix_path.stem,
counter,
"".join(posix_path.suffixes),
)
):
counter += 1
return new_file_name.format(
posix_path.parent,
posix_path.stem,
counter,
"".join(posix_path.suffixes),
)
def manage_duplicate_file(file_path: str):
"""
Check if a file is duplicate.
Compare the md5 of files with copy name pattern
and remove if the md5 hash is same.
Parameters
----------
file_path: str
Absolute path of the file for which duplicates needs to
be managed.
Returns
-------
str
Absolute path of the duplicate managed file.
"""
# pylint: disable = R1732
posix_path = pathlib.Path(file_path)
file_base_name: str = "".join(posix_path.stem.split("-copy")[0])
name_pattern: str = f"{posix_path.parent}/{file_base_name}*"
# Reason for using `str.translate()`
# https://stackoverflow.com/q/22055500/6730439
old_files: list = glob.glob(
name_pattern.translate({ord("["): "[[]", ord("]"): "[]]"})
)
if file_path in old_files:
old_files.remove(file_path)
current_file_md5: str = md5(open(file_path, "rb").read()).hexdigest()
for old_file_path in old_files:
old_file_md5: str = md5(open(old_file_path, "rb").read()).hexdigest()
if current_file_md5 == old_file_md5:
os.remove(file_path)
return old_file_path
return file_path