diff --git a/ADC_function.py b/ADC_function.py index c287654f0..348be2b8e 100755 --- a/ADC_function.py +++ b/ADC_function.py @@ -19,6 +19,7 @@ print('[+]config.ini: not found, creating...') with open("config.ini", "wt", encoding='UTF-8') as code: print("[common]", file=code) + print("main_mode=1", file=code) print("failed_output_folder=failed", file=code) print("success_output_folder=JAV_output", file=code) print("", file=code) diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index 59d1eddda..885a5ea48 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -14,7 +14,7 @@ # ============global var=========== -version='0.11.9' +version='1.1' config = ConfigParser() config.read(config_file, encoding='UTF-8') @@ -25,11 +25,11 @@ def UpdateCheck(): if UpdateCheckSwitch() == '1': - html2 = get_html('https://raw.githubusercontent.com/wenead99/AV_Data_Capture/master/update_check.json') + html2 = get_html('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/update_check.json') html = json.loads(str(html2)) if not version == html['version']: - print('[*] * New update ' + html['version'] + ' *') + print('[*] * New update ' + html['version'] + ' *') print('[*] * Download *') print('[*] ' + html['download']) print('[*]=====================================') @@ -37,35 +37,35 @@ def UpdateCheck(): print('[+]Update Check disabled!') def movie_lists(): directory = config['directory_capture']['directory'] - a2=[] - b2=[] - c2=[] - d2=[] - e2=[] - f2=[] - g2=[] - h2=[] + mp4=[] + avi=[] + rmvb=[] + wmv=[] + mov=[] + mkv=[] + flv=[] + ts=[] if directory=='*': for i in os.listdir(os.getcwd()): - a2 += glob.glob(r"./" + i + "/*.mp4") - b2 += glob.glob(r"./" + i + "/*.avi") - c2 += glob.glob(r"./" + i + "/*.rmvb") - d2 += glob.glob(r"./" + i + "/*.wmv") - e2 += glob.glob(r"./" + i + "/*.mov") - f2 += glob.glob(r"./" + i + "/*.mkv") - g2 += glob.glob(r"./" + i + "/*.flv") - h2 += glob.glob(r"./" + i + "/*.ts") - total = a2 + b2 + c2 + d2 + e2 + f2 + g2 + h2 + mp4 += glob.glob(r"./" + i + "/*.mp4") + avi += glob.glob(r"./" + i + "/*.avi") + rmvb += glob.glob(r"./" + i + "/*.rmvb") + wmv += glob.glob(r"./" + i + "/*.wmv") + mov += glob.glob(r"./" + i + "/*.mov") + mkv += glob.glob(r"./" + i + "/*.mkv") + flv += glob.glob(r"./" + i + "/*.flv") + ts += glob.glob(r"./" + i + "/*.ts") + total = mp4 + avi + rmvb + wmv + mov + mkv + flv + ts return total - a2 = glob.glob(r"./" + directory + "/*.mp4") - b2 = glob.glob(r"./" + directory + "/*.avi") - c2 = glob.glob(r"./" + directory + "/*.rmvb") - d2 = glob.glob(r"./" + directory + "/*.wmv") - e2 = glob.glob(r"./" + directory + "/*.mov") - f2 = glob.glob(r"./" + directory + "/*.mkv") - g2 = glob.glob(r"./" + directory + "/*.flv") - h2 = glob.glob(r"./" + directory + "/*.ts") - total = a2 + b2 + c2 + d2 + e2 + f2 + g2 + h2 + mp4 = glob.glob(r"./" + directory + "/*.mp4") + avi = glob.glob(r"./" + directory + "/*.avi") + rmvb = glob.glob(r"./" + directory + "/*.rmvb") + wmv = glob.glob(r"./" + directory + "/*.wmv") + mov = glob.glob(r"./" + directory + "/*.mov") + mkv = glob.glob(r"./" + directory + "/*.mkv") + flv = glob.glob(r"./" + directory + "/*.flv") + ts = glob.glob(r"./" + directory + "/*.ts") + total = mp4 + avi + rmvb + wmv + mov + mkv + flv + ts return total def CreatFailedFolder(): if not os.path.exists('failed/'): # 新建failed文件夹 @@ -94,31 +94,30 @@ def rreplace(self, old, new, *max): return new.join(self.rsplit(old, count)) def getNumber(filepath): try: # 普通提取番号 主要处理包含减号-的番号 - filepath1 = filepath.replace("_", "-") - filepath1.strip('22-sht.me').strip('-HD').strip('-hd') - filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath1)) # 去除文件名中时间 - file_number = re.search('\w+-\d+', filename).group() - return file_number + try: + filepath1 = filepath.replace("_", "-") + filepath1.strip('22-sht.me').strip('-HD').strip('-hd') + filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath1)) # 去除文件名中时间 + file_number = re.search('\w+-\d+', filename).group() + return file_number + except: + filepath1 = filepath.replace("_", "-") + filepath1.strip('22-sht.me').strip('-HD').strip('-hd') + filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath1)) # 去除文件名中时间 + file_number = re.search('\w+-\w+', filename).group() + return file_number except: # 提取不含减号-的番号 - try: # 提取东京热番号格式 n1087 - filename1 = str(re.sub("h26\d", "", filepath)).strip('Tokyo-hot').strip('tokyo-hot') + try: + filename1 = str(re.sub("ts6\d", "", filepath)).strip('Tokyo-hot').strip('tokyo-hot') filename0 = str(re.sub(".*?\.com-\d+", "", filename1)).strip('_') - if '-C.' in filepath or '-c.' in filepath: - cn_sub = '1' - file_number = str(re.search('n\d{4}', filename0).group(0)) + file_number = str(re.search('\w+\d{4}', filename0).group(0)) return file_number except: # 提取无减号番号 - filename1 = str(re.sub("h26\d", "", filepath)) # 去除h264/265 + filename1 = str(re.sub("ts6\d", "", filepath)) # 去除ts64/265 filename0 = str(re.sub(".*?\.com-\d+", "", filename1)) file_number2 = str(re.match('\w+', filename0).group()) - if '-C.' in filepath or '-c.' in filepath: - cn_sub = '1' - file_number = str(file_number2.replace(re.match("^[A-Za-z]+", file_number2).group(), - re.match("^[A-Za-z]+", file_number2).group() + '-')) + file_number = str(file_number2.replace(re.match("^[A-Za-z]+", file_number2).group(),re.match("^[A-Za-z]+", file_number2).group() + '-')) return file_number - # if not re.search('\w-', file_number).group() == 'None': - # file_number = re.search('\w+-\w+', filename).group() - # def RunCore(): if Platform == 'win32': @@ -138,7 +137,7 @@ def RunCore(): if __name__ =='__main__': print('[*]===========AV Data Capture===========') - print('[*] Version '+version) + print('[*] Version '+version) print('[*]=====================================') CreatFailedFolder() UpdateCheck() diff --git a/avsox.py b/avsox.py new file mode 100644 index 000000000..ee4c79cad --- /dev/null +++ b/avsox.py @@ -0,0 +1,112 @@ +import re +from lxml import etree +import json +from bs4 import BeautifulSoup +from ADC_function import * + +def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img + soup = BeautifulSoup(htmlcode, 'lxml') + a = soup.find_all(attrs={'class': 'avatar-box'}) + d = {} + for i in a: + l = i.img['src'] + t = i.span.get_text() + p2 = {t: l} + d.update(p2) + return d +def getTitle(a): + try: + html = etree.fromstring(a, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0] + return result.replace('/', '') + except: + return '' +def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() + soup = BeautifulSoup(a, 'lxml') + a = soup.find_all(attrs={'class': 'avatar-box'}) + d = [] + for i in a: + d.append(i.span.get_text()) + return d +def getStudio(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ') + return result1 +def getRuntime(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']") + return result1 +def getLabel(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']") + return result1 +def getNum(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']") + return result1 +def getYear(release): + try: + result = str(re.search('\d{4}',release).group()) + return result + except: + return release +def getRelease(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']") + return result1 +def getCover(htmlcode): + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']") + return result +def getCover_small(htmlcode): + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']") + return result +def getTag(a): # 获取演员 + soup = BeautifulSoup(a, 'lxml') + a = soup.find_all(attrs={'class': 'genre'}) + d = [] + for i in a: + d.append(i.get_text()) + return d + +def main(number): + a = get_html('https://avsox.asia/cn/search/' + number) + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']") + if result1 == '' or result1 == 'null' or result1 == 'None': + a = get_html('https://avsox.asia/cn/search/' + number.replace('-', '_')) + print(a) + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']") + if result1 == '' or result1 == 'null' or result1 == 'None': + a = get_html('https://avsox.asia/cn/search/' + number.replace('_', '')) + print(a) + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']") + web = get_html(result1) + soup = BeautifulSoup(web, 'lxml') + info = str(soup.find(attrs={'class': 'row movie'})) + dic = { + 'actor': getActor(web), + 'title': getTitle(web).strip(getNum(web)), + 'studio': getStudio(info), + 'outline': '',# + 'runtime': getRuntime(info), + 'director': '', # + 'release': getRelease(info), + 'number': getNum(info), + 'cover': getCover(web), + 'cover_small': getCover_small(a), + 'imagecut': 3, + 'tag': getTag(web), + 'label': getLabel(info), + 'year': getYear(getRelease(info)), # str(re.search('\d{4}',getRelease(a)).group()), + 'actor_photo': getActorPhoto(web), + 'website': result1, + 'source': 'avsox.py', + } + js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') + return js + +#print(main('041516_541')) \ No newline at end of file diff --git a/core.py b/core.py index cb07b80e5..7fd5f6b70 100755 --- a/core.py +++ b/core.py @@ -6,14 +6,17 @@ import shutil from PIL import Image import time -import javbus import json -import fc2fans_club -import siro from ADC_function import * from configparser import ConfigParser import argparse +#=========website======== +import fc2fans_club +import siro +import avsox +import javbus import javdb +#=========website======== Config = ConfigParser() Config.read(config_file, encoding='UTF-8') @@ -42,6 +45,7 @@ website='' json_data={} actor_photo={} +cover_small='' naming_rule =''#eval(config['Name_Rule']['naming_rule']) location_rule=''#eval(config['Name_Rule']['location_rule']) program_mode = Config['common']['main_mode'] @@ -66,6 +70,11 @@ def CreatFailedFolder(): except: print("[-]failed!can not be make Failed output folder\n[-](Please run as Administrator)") os._exit(0) +def getDataState(json_data): #元数据获取失败检测 + if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null': + return 0 + else: + return 1 def getDataFromJSON(file_number): #从JSON返回元数据 global title global studio @@ -84,6 +93,7 @@ def getDataFromJSON(file_number): #从JSON返回元数据 global cn_sub global website global actor_photo + global cover_small global naming_rule global location_rule @@ -92,21 +102,33 @@ def getDataFromJSON(file_number): #从JSON返回元数据 # ================================================网站规则添加开始================================================ try: # 添加 需要 正则表达式的规则 - # =======================javdb.py======================= if re.search('^\d{5,}', file_number).group() in file_number: - json_data = json.loads(javbus.main_uncensored(file_number)) + json_data = json.loads(avsox.main(file_number)) + if getDataState(json_data) == 0: #如果元数据获取失败,请求番号至其他网站抓取 + json_data = json.loads(javdb.main(file_number)) + + elif re.search('\d+\D+', file_number).group() in file_number: + json_data = json.loads(siro.main(file_number)) + if getDataState(json_data) == 0: #如果元数据获取失败,请求番号至其他网站抓取 + json_data = json.loads(javbus.main(file_number)) + elif getDataState(json_data) == 0: #如果元数据获取失败,请求番号至其他网站抓取 + json_data = json.loads(javdb.main(file_number)) + except: # 添加 无需 正则表达式的规则 - # ====================fc2fans_club.py==================== if 'fc2' in file_number: json_data = json.loads(fc2fans_club.main(file_number.strip('fc2_').strip('fc2-').strip('ppv-').strip('PPV-').strip('FC2_').strip('FC2-').strip('ppv-').strip('PPV-'))) elif 'FC2' in file_number: json_data = json.loads(fc2fans_club.main(file_number.strip('FC2_').strip('FC2-').strip('ppv-').strip('PPV-').strip('fc2_').strip('fc2-').strip('ppv-').strip('PPV-'))) - # =======================siro.py========================= + elif 'HEYZO' in number or 'heyzo' in number or 'Heyzo' in number: + json_data = json.loads(avsox.main(file_number)) elif 'siro' in file_number or 'SIRO' in file_number or 'Siro' in file_number: json_data = json.loads(siro.main(file_number)) - # =======================javbus.py======================= else: json_data = json.loads(javbus.main(file_number)) + if getDataState(json_data) == 0: #如果元数据获取失败,请求番号至其他网站抓取 + json_data = json.loads(avsox.main(file_number)) + elif getDataState(json_data) == 0: #如果元数据获取失败,请求番号至其他网站抓取 + json_data = json.loads(javdb.main(file_number)) # ================================================网站规则添加结束================================================ @@ -120,16 +142,25 @@ def getDataFromJSON(file_number): #从JSON返回元数据 release = json_data['release'] number = json_data['number'] cover = json_data['cover'] + try: + cover_small = json_data['cover_small'] + except: + aaaaaaa='' imagecut = json_data['imagecut'] tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '') actor_photo = json_data['actor_photo'] website = json_data['website'] + source = json_data['source'] if title == '' or number == '': print('[-]Movie Data not found!') moveFailedFolder() + if imagecut == '3': + DownloadFileWithFilename() + + # ====================处理异常字符====================== #\/:*?"<>| if '\\' in title: title=title.replace('\\', ' ') @@ -153,6 +184,23 @@ def getDataFromJSON(file_number): #从JSON返回元数据 naming_rule = eval(config['Name_Rule']['naming_rule']) location_rule = eval(config['Name_Rule']['location_rule']) +def smallCoverCheck(): + if imagecut == 3: + if option == 'emby': + DownloadFileWithFilename(cover_small, '1.jpg', path) + img = Image.open(path + '/1.jpg') + w = img.width + h = img.height + img.save(path + '/' + number + '.png') + time.sleep(1) + os.remove(path + '/1.jpg') + if option == 'plex': + DownloadFileWithFilename(cover_small, '1.jpg', path) + img = Image.open(path + '/1.jpg') + w = img.width + h = img.height + img.save(path + '/poster.png') + os.remove(path + '/1.jpg') def creatFolder(): #创建文件夹 global actor global path @@ -352,7 +400,7 @@ def cutImage(): img2.save(path + '/poster.png') except: print('[-]Cover cut failed!') - else: + elif imagecut == 0: img = Image.open(path + '/fanart.jpg') w = img.width h = img.height @@ -368,7 +416,7 @@ def cutImage(): img2.save(path + '/' + number + '.png') except: print('[-]Cover cut failed!') - else: + elif imagecut == 0: img = Image.open(path + '/' + number + '.jpg') w = img.width h = img.height @@ -420,6 +468,7 @@ def renameJpgToBackdrop_copy(): if program_mode == '1': imageDownload(filepath) # creatFoder会返回番号路径 PrintFiles(filepath) # 打印文件 + smallCoverCheck() cutImage() # 裁剪图 pasteFileToFolder(filepath, path) # 移动文件 renameJpgToBackdrop_copy() diff --git a/fc2fans_club.py b/fc2fans_club.py index 9915a8704..e5d62df88 100755 --- a/fc2fans_club.py +++ b/fc2fans_club.py @@ -75,6 +75,7 @@ def main(number2): 'tag': getTag(htmlcode), 'actor_photo':'', 'website': 'http://fc2fans.club/html/FC2-' + number + '.html', + 'source': 'fc2fans_club.py', } #print(getTitle(htmlcode)) #print(getNum(htmlcode)) diff --git a/javbus.py b/javbus.py index 92c4fb4ec..457d3804b 100755 --- a/javbus.py +++ b/javbus.py @@ -1,17 +1,9 @@ import re -import requests #need install from pyquery import PyQuery as pq#need install from lxml import etree#need install -import os -import os.path -import shutil from bs4 import BeautifulSoup#need install -from PIL import Image#need install -import time import json from ADC_function import * -import javdb -import siro def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img soup = BeautifulSoup(htmlcode, 'lxml') @@ -88,16 +80,12 @@ def getTag(htmlcode): # 获取演员 def main(number): - try: - if re.search('\d+\D+', number).group() in number: - js = siro.main(number) - return js - except: - aaaa='' - try: htmlcode = get_html('https://www.javbus.com/' + number) - dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) + try: + dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) + except: + dww_htmlcode = '' dic = { 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))), 'studio': getStudio(htmlcode), @@ -114,35 +102,12 @@ def main(number): 'label': getSerise(htmlcode), 'actor_photo': getActorPhoto(htmlcode), 'website': 'https://www.javbus.com/' + number, + 'source' : 'javbus.py', } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') - if 'HEYZO' in number or 'heyzo' in number or 'Heyzo' in number: - htmlcode = get_html('https://www.javbus.com/' + number) - #dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) - dic = { - 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))), - 'studio': getStudio(htmlcode), - 'year': getYear(htmlcode), - 'outline': '', - 'runtime': getRuntime(htmlcode), - 'director': getDirector(htmlcode), - 'actor': getActor(htmlcode), - 'release': getRelease(htmlcode), - 'number': getNum(htmlcode), - 'cover': getCover(htmlcode), - 'imagecut': 1, - 'tag': getTag(htmlcode), - 'label': getSerise(htmlcode), - 'actor_photo': getActorPhoto(htmlcode), - 'website': 'https://www.javbus.com/' + number, - } - js2 = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, - separators=(',', ':'), ) # .encode('UTF-8') - return js2 return js except: - a=javdb.main(number) - return a + return main_uncensored(number) def main_uncensored(number): htmlcode = get_html('https://www.javbus.com/' + number) @@ -166,11 +131,7 @@ def main_uncensored(number): 'imagecut': 0, 'actor_photo': '', 'website': 'https://www.javbus.com/' + number, + 'source': 'javbus.py', } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') - - if getYear(htmlcode) == '' or getYear(htmlcode) == 'null': - js2 = javdb.main(number) - return js2 - return js \ No newline at end of file diff --git a/javdb.py b/javdb.py index 372311710..9deae67d0 100755 --- a/javdb.py +++ b/javdb.py @@ -1,7 +1,6 @@ import re from lxml import etree import json -import requests from bs4 import BeautifulSoup from ADC_function import * @@ -79,7 +78,6 @@ def main(number): result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']") b = get_html('https://javdb1.com' + result1) soup = BeautifulSoup(b, 'lxml') - a = str(soup.find(attrs={'class': 'panel'})) dic = { 'actor': getActor(a), @@ -99,6 +97,7 @@ def main(number): 'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()), 'actor_photo': '', 'website': 'https://javdb1.com' + result1, + 'source': 'javdb.py', } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') return js @@ -106,19 +105,18 @@ def main(number): a = get_html('https://javdb.com/search?q=' + number + '&f=all') html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']") - if result1 == '': + if result1 == '' or result1 == 'null': a = get_html('https://javdb.com/search?q=' + number.replace('-', '_') + '&f=all') html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']") - b = get_html('https://javdb.com' + result1) soup = BeautifulSoup(b, 'lxml') - a = str(soup.find(attrs={'class': 'panel'})) dic = { 'actor': getActor(a), - 'title': getTitle(b).replace("\\n", '').replace(' ', '').replace(getActor(a), '').replace(getNum(a), - '').replace( + 'title': getTitle(b).replace("\\n", '').replace(' ', '').replace(getActor(a), '').replace( + getNum(a), + '').replace( '无码', '').replace('有码', '').lstrip(' '), 'studio': getStudio(a), 'outline': getOutline(a), @@ -132,9 +130,10 @@ def main(number): 'label': getLabel(a), 'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()), 'actor_photo': '', - 'website':'https://javdb.com' + result1, + 'website': 'https://javdb.com' + result1, + 'source': 'javdb.py', } - js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') + js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), ) # .encode('UTF-8') return js #print(main('061519-861')) \ No newline at end of file diff --git a/siro.py b/siro.py index 0cd8b09be..9b9d69a24 100755 --- a/siro.py +++ b/siro.py @@ -1,7 +1,6 @@ import re from lxml import etree import json -import requests from bs4 import BeautifulSoup from ADC_function import * @@ -97,6 +96,7 @@ def main(number2): 'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()), 'actor_photo': '', 'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/', + 'source': 'siro.py', } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8') return js