Skip to content

Commit

Permalink
Update 2.3
Browse files Browse the repository at this point in the history
  • Loading branch information
mvdctop authored Jan 26, 2020
1 parent 70f1b16 commit fe72afc
Show file tree
Hide file tree
Showing 5 changed files with 190 additions and 42 deletions.
59 changes: 24 additions & 35 deletions AV_Data_Capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

# ============global var===========

version='2.2'
version='2.3'

config = ConfigParser()
config.read(config_file, encoding='UTF-8')
Expand All @@ -23,20 +23,6 @@

# ==========global var end=========

def moveMovies():
movieFiles = []
fromPath = config['movie_location']['path']
if Platform == 'win32':
movieFormat = ["avi", "rmvb", "wmv", "mov", "mp4", "mkv", "flv", "ts"]
else:
movieFormat = ["AVI", "RMVB", "WMV", "MOV", "MP4", "MKV", "FLV", "TS","avi", "rmvb", "wmv", "mov", "mp4", "mkv", "flv", "ts"]
for fm in movieFormat:
movieFiles = movieFiles + [os.path.join(dirpath, f)
for dirpath, dirnames, files in os.walk(fromPath)
for f in fnmatch.filter(files, '*.' + fm)]
for movie in movieFiles:
print("Move file " + movie)
shutil.move(movie, os.path.curdir)
def UpdateCheck():
if UpdateCheckSwitch() == '1':
html2 = get_html('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/update_check.json')
Expand All @@ -53,11 +39,17 @@ def movie_lists():
global exclude_directory_1
global exclude_directory_2
total=[]
file_type = ['mp4','avi','rmvb','wmv','mov','mkv','flv','ts']
file_type = ['.mp4','.avi','.rmvb','.wmv','.mov','.mkv','.flv','.ts','.MP4', '.AVI', '.RMVB', '.WMV', '.MOV', '.MKV', '.FLV', '.TS',]
exclude_directory_1 = config['common']['failed_output_folder']
exclude_directory_2 = config['common']['success_output_folder']
for a in file_type:
total += glob.glob(r"./*." + a)
file_root=os.getcwd()
for root,dirs,files in os.walk(file_root):
if exclude_directory_1 not in root and exclude_directory_2 not in root:
for f in files:
if os.path.splitext(f)[1] in file_type:
path = os.path.join(root,f)
path = path.replace(file_root,'.')
total.append(path)
return total
def CreatFailedFolder():
if not os.path.exists('failed/'): # 新建failed文件夹
Expand Down Expand Up @@ -86,7 +78,7 @@ def rreplace(self, old, new, *max):
return new.join(self.rsplit(old, count))
def getNumber(filepath):
filepath = filepath.replace('.\\','')
try: # 普通提取番号 主要处理包含减号-的番号
if '-' in filepath or '_' in filepath: # 普通提取番号 主要处理包含减号-和_的番号
filepath = filepath.replace("_", "-")
filepath.strip('22-sht.me').strip('-HD').strip('-hd')
filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中时间
Expand All @@ -97,18 +89,11 @@ def getNumber(filepath):
except: # 提取类似mkbd-s120番号
file_number = re.search('\w+-\w+\d+', filename).group()
return file_number
except: # 提取不含减号-的番号
else: # 提取不含减号-的番号,FANZA CID
try:
filename = str(re.sub("ts6\d", "", filepath)).strip('Tokyo-hot').strip('tokyo-hot')
filename = str(re.sub(".*?\.com-\d+", "", filename)).replace('_', '')
file_number = str(re.search('\w+\d{4}', filename).group(0))
return file_number
except: # 提取无减号番号
filename = str(re.sub("ts6\d", "", filepath)) # 去除ts64/265
filename = str(re.sub(".*?\.com-\d+", "", filename))
file_number = str(re.match('\w+', filename).group())
file_number = str(file_number.replace(re.match("^[A-Za-z]+", file_number).group(),re.match("^[A-Za-z]+", file_number).group() + '-'))
return file_number
return str(re.findall(r'(.+?)\.', str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip("['']").replace('_', '-')
except:
return re.search(r'(.+?)\.',filepath)[0]

def RunCore():
if Platform == 'win32':
Expand All @@ -120,7 +105,10 @@ def RunCore():
os.system('python core.py' + ' "' + i + '" --number "' + getNumber(i) + '"') # 从py文件启动(用于源码py)
else:
if os.path.exists('core.py'):
os.system('python3 core.py' + ' "' + i + '" --number "' + getNumber(i) + '"') # 从py文件启动(用于源码py)
try:
os.system('python3 core.py' + ' "' + i + '" --number "' + getNumber(i) + '"') # 从py文件启动(用于源码py)
except:
os.system('python core.py' + ' "' + i + '" --number "' + getNumber(i) + '"') # 从py文件启动(用于源码py)
elif os.path.exists('core.exe'):
os.system('core.exe' + ' "' + i + '" --number "' + getNumber(i) + '"') # 从exe启动(用于EXE版程序)
elif os.path.exists('core.py') and os.path.exists('core.exe'):
Expand All @@ -130,17 +118,18 @@ def RunCore():
print('[*]================== AV Data Capture ===================')
print('[*] Version '+version)
print('[*]======================================================')

CreatFailedFolder()
UpdateCheck()
moveMovies()
os.chdir(os.getcwd())
movie_list=movie_lists()

count = 0
count_all = str(len(movie_lists()))
print('[+]Find',str(len(movie_lists())),'movies')
count_all = str(len(movie_list))
print('[+]Find',count_all,'movies')
if config['common']['soft_link'] == '1':
print('[!] --- Soft link mode is ENABLE! ----')
for i in movie_lists(): #遍历电影列表 交给core处理
for i in movie_list: #遍历电影列表 交给core处理
count = count + 1
percentage = str(count/int(count_all)*100)[:4]+'%'
print('[!] - '+percentage+' ['+str(count)+'/'+count_all+'] -')
Expand Down
4 changes: 2 additions & 2 deletions config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@ media_warehouse=emby
[escape]
literals=\()

[movie_location]
path=
[debug_mode]
switch=0
57 changes: 54 additions & 3 deletions core.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import avsox
import javbus
import javdb
import fanza
# =========website========


Expand Down Expand Up @@ -135,6 +136,14 @@ def getDataFromJSON(file_number): # 从JSON返回元数据
# ==
elif 'siro' in file_number or 'SIRO' in file_number or 'Siro' in file_number:
json_data = json.loads(siro.main(file_number))
elif not '-' in file_number or '_' in file_number:
json_data = json.loads(fanza.main(file_number))
if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取
json_data = json.loads(javbus.main(file_number))
if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取
json_data = json.loads(avsox.main(file_number))
if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取
json_data = json.loads(javdb.main(file_number))
# ==
else:
json_data = json.loads(javbus.main(file_number))
Expand All @@ -145,7 +154,7 @@ def getDataFromJSON(file_number): # 从JSON返回元数据

# ================================================网站规则添加结束================================================

title = str(json_data['title']).replace(' ', '')
title = json_data['title']
studio = json_data['studio']
year = json_data['year']
outline = json_data['outline']
Expand Down Expand Up @@ -305,6 +314,18 @@ def imageDownload(): # 封面是否下载成功,否则移动到failed
if DownloadFileWithFilename(cover, number + c_word + '.jpg', path) == 'failed':
moveFailedFolder()
DownloadFileWithFilename(cover, number + c_word + '.jpg', path)
if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0:
print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg')
return
i = 1
while i <= int(config['proxy']['retry']):
if os.path.getsize(path + '/' + number + c_word + '.jpg') == 0:
print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]')
DownloadFileWithFilename(cover, number + c_word + '.jpg', path)
i = i + 1
continue
else:
break
if multi_part == 1:
old_name = os.path.join(path, number + c_word + '.jpg')
new_name = os.path.join(path, number + c_word + '.jpg')
Expand All @@ -316,11 +337,38 @@ def imageDownload(): # 封面是否下载成功,否则移动到failed
if DownloadFileWithFilename(cover, 'fanart.jpg', path) == 'failed':
moveFailedFolder()
DownloadFileWithFilename(cover, 'fanart.jpg', path)
if not os.path.getsize(path + '/fanart.jpg') == 0:
print('[+]Image Downloaded!', path + '/fanart.jpg')
return
i = 1
while i <= int(config['proxy']['retry']):
if os.path.getsize(path + '/fanart.jpg') == 0:
print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]')
DownloadFileWithFilename(cover, 'fanart.jpg', path)
i = i + 1
continue
else:
break
if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0:
print('[!]Image Download Failed! Trying again.')
DownloadFileWithFilename(cover, number + c_word + '.jpg', path)
print('[+]Image Downloaded!', path + '/fanart.jpg')
elif option == 'kodi':
if DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path) == 'failed':
moveFailedFolder()
DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path)
if not os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0:
print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg')
return
i = 1
while i <= int(config['proxy']['retry']):
if os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0:
print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]')
DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path)
i = i + 1
continue
else:
break
print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg')


Expand All @@ -330,6 +378,7 @@ def PrintFiles():
os.makedirs(path)
if option == 'plex':
with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code:
print('<?xml version="1.0" encoding="UTF-8" ?>', file=code)
print("<movie>", file=code)
print(" <title>" + naming_rule + part + "</title>", file=code)
print(" <set>", file=code)
Expand Down Expand Up @@ -377,6 +426,7 @@ def PrintFiles():
print("[+]Writeed! " + path + "/" + number + ".nfo")
elif option == 'emby':
with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code:
print('<?xml version="1.0" encoding="UTF-8" ?>', file=code)
print("<movie>", file=code)
print(" <title>" + naming_rule + part + "</title>", file=code)
print(" <set>", file=code)
Expand Down Expand Up @@ -424,6 +474,7 @@ def PrintFiles():
print("[+]Writeed! " + path + "/" + number + c_word + ".nfo")
elif option == 'kodi':
with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code:
print('<?xml version="1.0" encoding="UTF-8" ?>', file=code)
print("<movie>", file=code)
print(" <title>" + naming_rule + part + "</title>", file=code)
print(" <set>", file=code)
Expand Down Expand Up @@ -618,11 +669,11 @@ def debug_mode():
print('[+] ---Debug info---')
for i, v in json_data.items():
if i == 'outline':
print('[+] -', i, ':', len(v), 'characters')
print('[+] -', i, ' :', len(v), 'characters')
continue
if i == 'actor_photo' or i == 'year':
continue
print('[+] -', i+str(9-len(i)*'-'), ':', v)
print('[+] -',"%-11s" % i, ':', v)
print('[+] ---Debug info---')
except:
aaa = ''
Expand Down
108 changes: 108 additions & 0 deletions fanza.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
from lxml import etree
import json
from ADC_function import *
# import sys
# import io
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)

def getTitle(a):
html = etree.fromstring(a, etree.HTMLParser())
result = html.xpath('//*[@id="title"]/text()')[0]
return result
def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
html = etree.fromstring(a, etree.HTMLParser())
result = str(html.xpath("//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()")).strip(" ['']").replace("', '",',')
return result
def getStudio(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
try:
result1 = html.xpath("//td[contains(text(),'メーカー')]/following-sibling::td/a/text()")[0]
except:
result1 = html.xpath("//td[contains(text(),'メーカー')]/following-sibling::td/text()")[0]
return result1
def getRuntime(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0]
return re.search('\d+', str(result1)).group()
def getLabel(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
try:
result1 = html.xpath("//td[contains(text(),'シリーズ:')]/following-sibling::td/a/text()")[0]
except:
result1 = html.xpath("//td[contains(text(),'シリーズ:')]/following-sibling::td/text()")[0]
return result1
def getNum(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
try:
result1 = html.xpath("//td[contains(text(),'品番:')]/following-sibling::td/a/text()")[0]
except:
result1 = html.xpath("//td[contains(text(),'品番:')]/following-sibling::td/text()")[0]
return result1
def getYear(getRelease):
try:
result = str(re.search('\d{4}',getRelease).group())
return result
except:
return getRelease
def getRelease(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
try:
result1 = html.xpath("//td[contains(text(),'商品発売日:')]/following-sibling::td/a/text()")[0].lstrip('\n')
except:
result1 = html.xpath("//td[contains(text(),'商品発売日:')]/following-sibling::td/text()")[0].lstrip('\n')
return result1
def getTag(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
try:
result1 = str(html.xpath("//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()")).strip(" ['']")
except:
result1 = str(html.xpath("//td[contains(text(),'ジャンル:')]/following-sibling::td/text()")).strip(" ['']")
return result1.replace("', '",",")
def getCover(htmlcode,number):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = html.xpath('//*[@id="'+number+'"]/@href')[0]
return result
def getDirector(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
try:
result1 = html.xpath("//td[contains(text(),'監督:')]/following-sibling::td/a/text()")[0]
except:
result1 = html.xpath("//td[contains(text(),'監督:')]/following-sibling::td/text()")[0]
return result1
def getOutline(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace('\n','')
return result
def main(number):
htmlcode=get_html('https://www.dmm.co.jp/digital/videoa/-/detail/=/cid='+number)
url = 'https://www.dmm.co.jp/digital/videoa/-/detail/=/cid='+number
if '404 Not Found' in htmlcode:
htmlcode=get_html('https://www.dmm.co.jp/mono/dvd/-/detail/=/cid='+number)
url = 'https://www.dmm.co.jp/mono/dvd/-/detail/=/cid='+number
dic = {
'title': getTitle(htmlcode).strip(getActor(htmlcode)),
'studio': getStudio(htmlcode),
'outline': getOutline(htmlcode),
'runtime': getRuntime(htmlcode),
'director': getDirector(htmlcode),
'actor': getActor(htmlcode),
'release': getRelease(htmlcode),
'number': getNum(htmlcode),
'cover': getCover(htmlcode,number),
'imagecut': 1,
'tag': getTag(htmlcode),
'label':getLabel(htmlcode),
'year': getYear(getRelease(htmlcode)), # str(re.search('\d{4}',getRelease(a)).group()),
'actor_photo': '',
'website': url,
'source': 'siro.py',
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':')) # .encode('UTF-8')
return js

# main('DV-1562')
# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
#print(main('ssni00384'))
4 changes: 2 additions & 2 deletions update_check.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "2.2",
"version_show":"2.2",
"version": "2.3",
"version_show":"2.3",
"download": "https://github.com/yoshiko2/AV_Data_Capture/releases"
}

0 comments on commit fe72afc

Please sign in to comment.