forked from mvdctop/Movie_Data_Capture
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnumber_parser.py
48 lines (43 loc) · 1.67 KB
/
number_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import os
import re
def get_number(filepath: str) -> str:
"""
>>> from number_parser import get_number
>>> get_number("/Users/Guest/AV_Data_Capture/snis-829.mp4")
'snis-829'
>>> get_number("/Users/Guest/AV_Data_Capture/snis-829-C.mp4")
'snis-829'
>>> get_number("C:¥Users¥Guest¥snis-829.mp4")
'snis-829'
>>> get_number("C:¥Users¥Guest¥snis-829-C.mp4")
'snis-829'
>>> get_number("./snis-829.mp4")
'snis-829'
>>> get_number("./snis-829-C.mp4")
'snis-829'
>>> get_number(".¥snis-829.mp4")
'snis-829'
>>> get_number(".¥snis-829-C.mp4")
'snis-829'
>>> get_number("snis-829.mp4")
'snis-829'
>>> get_number("snis-829-C.mp4")
'snis-829'
"""
filepath = os.path.basename(filepath)
if '-' in filepath or '_' in filepath: # 普通提取番号 主要处理包含减号-和_的番号
filepath = filepath.replace("_", "-")
filepath.strip('22-sht.me').strip('-HD').strip('-hd')
filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中时间
if 'FC2' or 'fc2' in filename:
filename = filename.replace('PPV','').replace('ppv','').replace('--','-').replace('_','-')
file_number = re.search(r'\w+-\w+', filename, re.A).group()
return file_number
else: # 提取不含减号-的番号,FANZA CID
try:
return str(re.findall(r'(.+?)\.', str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip("['']").replace('_', '-')
except:
return re.search(r'(.+?)\.', filepath)[0]
if __name__ == "__main__":
import doctest
doctest.testmod(raise_on_error=True)