Skip to content

Commit

Permalink
Update 1.1
Browse files Browse the repository at this point in the history
  • Loading branch information
mvdctop authored Aug 18, 2019
1 parent 449e900 commit c4fc220
Show file tree
Hide file tree
Showing 8 changed files with 236 additions and 114 deletions.
1 change: 1 addition & 0 deletions ADC_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
print('[+]config.ini: not found, creating...')
with open("config.ini", "wt", encoding='UTF-8') as code:
print("[common]", file=code)
print("main_mode=1", file=code)
print("failed_output_folder=failed", file=code)
print("success_output_folder=JAV_output", file=code)
print("", file=code)
Expand Down
95 changes: 47 additions & 48 deletions AV_Data_Capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

# ============global var===========

version='0.11.9'
version='1.1'

config = ConfigParser()
config.read(config_file, encoding='UTF-8')
Expand All @@ -25,47 +25,47 @@

def UpdateCheck():
if UpdateCheckSwitch() == '1':
html2 = get_html('https://raw.githubusercontent.com/wenead99/AV_Data_Capture/master/update_check.json')
html2 = get_html('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/update_check.json')
html = json.loads(str(html2))

if not version == html['version']:
print('[*] * New update ' + html['version'] + ' *')
print('[*] * New update ' + html['version'] + ' *')
print('[*] * Download *')
print('[*] ' + html['download'])
print('[*]=====================================')
else:
print('[+]Update Check disabled!')
def movie_lists():
directory = config['directory_capture']['directory']
a2=[]
b2=[]
c2=[]
d2=[]
e2=[]
f2=[]
g2=[]
h2=[]
mp4=[]
avi=[]
rmvb=[]
wmv=[]
mov=[]
mkv=[]
flv=[]
ts=[]
if directory=='*':
for i in os.listdir(os.getcwd()):
a2 += glob.glob(r"./" + i + "/*.mp4")
b2 += glob.glob(r"./" + i + "/*.avi")
c2 += glob.glob(r"./" + i + "/*.rmvb")
d2 += glob.glob(r"./" + i + "/*.wmv")
e2 += glob.glob(r"./" + i + "/*.mov")
f2 += glob.glob(r"./" + i + "/*.mkv")
g2 += glob.glob(r"./" + i + "/*.flv")
h2 += glob.glob(r"./" + i + "/*.ts")
total = a2 + b2 + c2 + d2 + e2 + f2 + g2 + h2
mp4 += glob.glob(r"./" + i + "/*.mp4")
avi += glob.glob(r"./" + i + "/*.avi")
rmvb += glob.glob(r"./" + i + "/*.rmvb")
wmv += glob.glob(r"./" + i + "/*.wmv")
mov += glob.glob(r"./" + i + "/*.mov")
mkv += glob.glob(r"./" + i + "/*.mkv")
flv += glob.glob(r"./" + i + "/*.flv")
ts += glob.glob(r"./" + i + "/*.ts")
total = mp4 + avi + rmvb + wmv + mov + mkv + flv + ts
return total
a2 = glob.glob(r"./" + directory + "/*.mp4")
b2 = glob.glob(r"./" + directory + "/*.avi")
c2 = glob.glob(r"./" + directory + "/*.rmvb")
d2 = glob.glob(r"./" + directory + "/*.wmv")
e2 = glob.glob(r"./" + directory + "/*.mov")
f2 = glob.glob(r"./" + directory + "/*.mkv")
g2 = glob.glob(r"./" + directory + "/*.flv")
h2 = glob.glob(r"./" + directory + "/*.ts")
total = a2 + b2 + c2 + d2 + e2 + f2 + g2 + h2
mp4 = glob.glob(r"./" + directory + "/*.mp4")
avi = glob.glob(r"./" + directory + "/*.avi")
rmvb = glob.glob(r"./" + directory + "/*.rmvb")
wmv = glob.glob(r"./" + directory + "/*.wmv")
mov = glob.glob(r"./" + directory + "/*.mov")
mkv = glob.glob(r"./" + directory + "/*.mkv")
flv = glob.glob(r"./" + directory + "/*.flv")
ts = glob.glob(r"./" + directory + "/*.ts")
total = mp4 + avi + rmvb + wmv + mov + mkv + flv + ts
return total
def CreatFailedFolder():
if not os.path.exists('failed/'): # 新建failed文件夹
Expand Down Expand Up @@ -94,31 +94,30 @@ def rreplace(self, old, new, *max):
return new.join(self.rsplit(old, count))
def getNumber(filepath):
try: # 普通提取番号 主要处理包含减号-的番号
filepath1 = filepath.replace("_", "-")
filepath1.strip('22-sht.me').strip('-HD').strip('-hd')
filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath1)) # 去除文件名中时间
file_number = re.search('\w+-\d+', filename).group()
return file_number
try:
filepath1 = filepath.replace("_", "-")
filepath1.strip('22-sht.me').strip('-HD').strip('-hd')
filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath1)) # 去除文件名中时间
file_number = re.search('\w+-\d+', filename).group()
return file_number
except:
filepath1 = filepath.replace("_", "-")
filepath1.strip('22-sht.me').strip('-HD').strip('-hd')
filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath1)) # 去除文件名中时间
file_number = re.search('\w+-\w+', filename).group()
return file_number
except: # 提取不含减号-的番号
try: # 提取东京热番号格式 n1087
filename1 = str(re.sub("h26\d", "", filepath)).strip('Tokyo-hot').strip('tokyo-hot')
try:
filename1 = str(re.sub("ts6\d", "", filepath)).strip('Tokyo-hot').strip('tokyo-hot')
filename0 = str(re.sub(".*?\.com-\d+", "", filename1)).strip('_')
if '-C.' in filepath or '-c.' in filepath:
cn_sub = '1'
file_number = str(re.search('n\d{4}', filename0).group(0))
file_number = str(re.search('\w+\d{4}', filename0).group(0))
return file_number
except: # 提取无减号番号
filename1 = str(re.sub("h26\d", "", filepath)) # 去除h264/265
filename1 = str(re.sub("ts6\d", "", filepath)) # 去除ts64/265
filename0 = str(re.sub(".*?\.com-\d+", "", filename1))
file_number2 = str(re.match('\w+', filename0).group())
if '-C.' in filepath or '-c.' in filepath:
cn_sub = '1'
file_number = str(file_number2.replace(re.match("^[A-Za-z]+", file_number2).group(),
re.match("^[A-Za-z]+", file_number2).group() + '-'))
file_number = str(file_number2.replace(re.match("^[A-Za-z]+", file_number2).group(),re.match("^[A-Za-z]+", file_number2).group() + '-'))
return file_number
# if not re.search('\w-', file_number).group() == 'None':
# file_number = re.search('\w+-\w+', filename).group()
#

def RunCore():
if Platform == 'win32':
Expand All @@ -138,7 +137,7 @@ def RunCore():

if __name__ =='__main__':
print('[*]===========AV Data Capture===========')
print('[*] Version '+version)
print('[*] Version '+version)
print('[*]=====================================')
CreatFailedFolder()
UpdateCheck()
Expand Down
112 changes: 112 additions & 0 deletions avsox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import re
from lxml import etree
import json
from bs4 import BeautifulSoup
from ADC_function import *

def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
soup = BeautifulSoup(htmlcode, 'lxml')
a = soup.find_all(attrs={'class': 'avatar-box'})
d = {}
for i in a:
l = i.img['src']
t = i.span.get_text()
p2 = {t: l}
d.update(p2)
return d
def getTitle(a):
try:
html = etree.fromstring(a, etree.HTMLParser())
result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0]
return result.replace('/', '')
except:
return ''
def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
soup = BeautifulSoup(a, 'lxml')
a = soup.find_all(attrs={'class': 'avatar-box'})
d = []
for i in a:
d.append(i.span.get_text())
return d
def getStudio(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ')
return result1
def getRuntime(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']")
return result1
def getLabel(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']")
return result1
def getNum(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
return result1
def getYear(release):
try:
result = str(re.search('\d{4}',release).group())
return result
except:
return release
def getRelease(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']")
return result1
def getCover(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']")
return result
def getCover_small(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
return result
def getTag(a): # 获取演员
soup = BeautifulSoup(a, 'lxml')
a = soup.find_all(attrs={'class': 'genre'})
d = []
for i in a:
d.append(i.get_text())
return d

def main(number):
a = get_html('https://avsox.asia/cn/search/' + number)
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
if result1 == '' or result1 == 'null' or result1 == 'None':
a = get_html('https://avsox.asia/cn/search/' + number.replace('-', '_'))
print(a)
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
if result1 == '' or result1 == 'null' or result1 == 'None':
a = get_html('https://avsox.asia/cn/search/' + number.replace('_', ''))
print(a)
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
web = get_html(result1)
soup = BeautifulSoup(web, 'lxml')
info = str(soup.find(attrs={'class': 'row movie'}))
dic = {
'actor': getActor(web),
'title': getTitle(web).strip(getNum(web)),
'studio': getStudio(info),
'outline': '',#
'runtime': getRuntime(info),
'director': '', #
'release': getRelease(info),
'number': getNum(info),
'cover': getCover(web),
'cover_small': getCover_small(a),
'imagecut': 3,
'tag': getTag(web),
'label': getLabel(info),
'year': getYear(getRelease(info)), # str(re.search('\d{4}',getRelease(a)).group()),
'actor_photo': getActorPhoto(web),
'website': result1,
'source': 'avsox.py',
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
return js

#print(main('041516_541'))
Loading

0 comments on commit c4fc220

Please sign in to comment.