Beta 11.4 Update

Dao258 · Jul 3, 2019 · 4b35113 · 4b35113
1 parent d672d4d
commit 4b35113
Show file tree

Hide file tree

Showing 8 changed files with 115 additions and 55 deletions.
diff --git a/ADC_function.py b/ADC_function.py
@@ -5,17 +5,17 @@
 from retrying import retry
 import sys
 
-# content = open('proxy.ini').read()
+# content = open('config.ini').read()
 # content = re.sub(r"\xfe\xff","", content)
 # content = re.sub(r"\xff\xfe","", content)
 # content = re.sub(r"\xef\xbb\xbf","", content)
 # open('BaseConfig.cfg', 'w').write(content)
 
 config = RawConfigParser()
-if os.path.exists('proxy.ini'):
-    config.read('proxy.ini', encoding='UTF-8')
+if os.path.exists('config.ini'):
+    config.read('config.ini', encoding='UTF-8')
 else:
-    with open("proxy.ini", "wt", encoding='UTF-8') as code:
+    with open("config.ini", "wt", encoding='UTF-8') as code:
         print("[proxy]",file=code)
         print("proxy=127.0.0.1:1080",file=code)
         print("timeout=10", file=code)

diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py
@@ -7,7 +7,7 @@
 import json
 import shutil
 
-version='0.11.2'
+version='0.11.4'
 os.chdir(os.getcwd())
 
 def UpdateCheck():
@@ -102,12 +102,20 @@ def getNumber(filepath):
         print('[-]' + str(os.path.basename(filepath)) + ' Cannot catch the number :')
         print('[-]' + str(os.path.basename(filepath)) + ' :', e)
         print('[-]Move ' + os.path.basename(filepath) + ' to failed folder')
+
         shutil.move(filepath, str(os.getcwd()) + '/' + 'failed/')
     except IOError as e2:
         print('[-]' + str(os.path.basename(filepath)) + ' Cannot catch the number :')
         print('[-]' + str(os.path.basename(filepath)) + ' :', e2)
         print('[-]Move ' + os.path.basename(filepath) + ' to failed folder')
         shutil.move(filepath, str(os.getcwd()) + '/' + 'failed/')
+def RunCore():
+    if os.path.exists('core.py'):
+        os.system('python core.py' + '   "' + i + '" --number "'+getNumber(i)+'"') #选择从py文件启动  （用于源码py）
+    elif os.path.exists('core.exe'):
+        os.system('core.exe' + '   "' + i + '" --number "'+getNumber(i)+'"')      #选择从exe文件启动（用于EXE版程序:
+    elif os.path.exists('core.py') and os.path.exists('core.exe'):
+        os.system('python core.py' + '   "' + i + '" --number "' + getNumber(i) + '"')  # 选择从py文件启动  （用于源码py）
 
 if __name__ =='__main__':
     print('[*]===========AV Data Capture===========')
@@ -124,11 +132,9 @@ def getNumber(filepath):
         percentage = str(count/int(count_all)*100)[:4]+'%'
         print('[!] - '+percentage+' ['+str(count)+'/'+count_all+'] -')
         print("[!]Making Data for   [" + i + "],the number is [" + getNumber(i) + "]")
-        os.system('python core.py' + '   "' + i + '" --number "'+getNumber(i)+'"') #选择从py文件启动  （用于源码py）
-        #os.system('core.exe' + '   "' + i + '" --number "'+getNumber(i)+'"')      #选择从exe文件启动（用于EXE版程序
-        #print()
+        RunCore()
         print("[*]=====================================")
 
     CEF('JAV_output')
     print("[+]All finished!!!")
-    input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束，你可以在结束之前查看和错误信息。")
+    input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束，你可以在结束之前查看和错误信息。")
diff --git a/config.ini b/config.ini
@@ -0,0 +1,12 @@
+[proxy]
+proxy=127.0.0.1:1080
+timeout=10
+retry=3
+
+[Name_Rule]
+location_rule='JAV_output/'+actor+'/'+number
+naming_rule=number+'-'+title
+
+[update]
+update_check=0
+#on=1,off=0
diff --git a/core.py b/core.py
@@ -30,6 +30,9 @@
 cn_sub=''
 path=''
 houzhui=''
+website=''
+json_data={}
+actor_photo={}
 naming_rule  =''#eval(config['Name_Rule']['naming_rule'])
 location_rule=''#eval(config['Name_Rule']['location_rule'])
 
@@ -63,14 +66,16 @@ def getDataFromJSON(file_number): #从JSON返回元数据
     global tag
     global image_main
     global cn_sub
+    global website
+    global actor_photo
 
     global naming_rule
     global location_rule
 
     try:    # 添加 需要 正则表达式的规则
         # =======================javdb.py=======================
         if re.search('^\d{5,}', file_number).group() in file_number:
-            json_data = json.loads(javdb.main(file_number))
+            json_data = json.loads(javbus.main_uncensored(file_number))
     except:  # 添加 无需 正则表达式的规则
         # ====================fc2fans_club.py====================
         if 'fc2' in file_number:
@@ -99,6 +104,8 @@ def getDataFromJSON(file_number): #从JSON返回元数据
     imagecut =       json_data['imagecut']
     tag =        str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(',')  # 字符串转列表
     actor =      str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
+    actor_photo =    json_data['actor_photo']
+    website =        json_data['website']
 
     # ====================处理异常字符====================== #\/:*?"<>|
     if '\\' in title:
@@ -142,9 +149,9 @@ def creatFolder(): #创建文件夹
 #=====================资源下载部分===========================
 def DownloadFileWithFilename(url,filename,path): #path = examle:photo , video.in the Project Folder!
     config = ConfigParser()
-    config.read('proxy.ini', encoding='UTF-8')
-    proxy = str(config['proxy']['proxy'])
-    timeout = int(config['proxy']['timeout'])
+    config.read('config.ini', encoding='UTF-8')
+    proxy       = str(config['proxy']['proxy'])
+    timeout     = int(config['proxy']['timeout'])
     retry_count = int(config['proxy']['retry'])
     i = 0
 
@@ -184,19 +191,20 @@ def DownloadFileWithFilename(url,filename,path): #path = examle:photo , video.in
             print('[-]Image Download :  Connect retry '+str(i)+'/'+str(retry_count))
 def imageDownload(filepath): #封面是否下载成功，否则移动到failed
     global path
-    if DownloadFileWithFilename(cover,naming_rule+'.jpg', path) == 'failed':
+    if DownloadFileWithFilename(cover,'fanart.jpg', path) == 'failed':
         shutil.move(filepath, 'failed/')
         os._exit(0)
-    DownloadFileWithFilename(cover, naming_rule+'.jpg', path)
-    print('[+]Image Downloaded!', path +'/'+naming_rule+'.jpg')
+    DownloadFileWithFilename(cover, 'fanart.jpg', path)
+    print('[+]Image Downloaded!', path +'/fanart.jpg')
 def PrintFiles(filepath):
     #global path
     global title
     global cn_sub
+    global actor_photo
     try:
         if not os.path.exists(path):
             os.makedirs(path)
-        with open(path + "/" + naming_rule + ".nfo", "wt", encoding='UTF-8') as code:
+        with open(path + "/" + number + ".nfo", "wt", encoding='UTF-8') as code:
             print("<movie>", file=code)
             print(" <title>" + naming_rule + "</title>", file=code)
             print("  <set>", file=code)
@@ -207,13 +215,15 @@ def PrintFiles(filepath):
             print("  <plot>"+outline+"</plot>", file=code)
             print("  <runtime>"+str(runtime).replace(" ","")+"</runtime>", file=code)
             print("  <director>" + director + "</director>", file=code)
-            print("  <poster>" + naming_rule + ".png</poster>", file=code)
-            print("  <thumb>" + naming_rule + ".png</thumb>", file=code)
-            print("  <fanart>"+naming_rule + '.jpg'+"</fanart>", file=code)
+            print("  <poster>poster.png</poster>", file=code)
+            print("  <thumb>thumb.png</thumb>", file=code)
+            print("  <fanart>fanart.jpg</fanart>", file=code)
             try:
-                for u in actor_list:
+                for key, value in actor_photo.items():
                     print("  <actor>", file=code)
-                    print("   <name>" + u + "</name>", file=code)
+                    print("   <name>" + key + "</name>", file=code)
+                    if not actor_photo == '':  # or actor_photo == []:
+                        print("   <thumb>" + value + "</thumb>", file=code)
                     print("  </actor>", file=code)
             except:
                 aaaa=''
@@ -237,9 +247,9 @@ def PrintFiles(filepath):
             print("  <num>" + number + "</num>", file=code)
             print("  <release>" + release + "</release>", file=code)
             print("  <cover>"+cover+"</cover>", file=code)
-            print("  <website>" + "https://www.javbus.com/"+number + "</website>", file=code)
+            print("  <website>" + website + "</website>", file=code)
             print("</movie>", file=code)
-            print("[+]Writeed!          "+path + "/" + naming_rule + ".nfo")
+            print("[+]Writeed!          "+path + "/" + number + ".nfo")
     except IOError as e:
         print("[-]Write Failed!")
         print(e)
@@ -253,31 +263,32 @@ def PrintFiles(filepath):
 def cutImage():
     if imagecut == 1:
         try:
-            img = Image.open(path + '/' + naming_rule + '.jpg')
+            img = Image.open(path + '/fanart.jpg')
             imgSize = img.size
             w = img.width
             h = img.height
             img2 = img.crop((w / 1.9, 0, w, h))
-            img2.save(path + '/' + naming_rule + '.png')
+            img2.save(path + '/poster.png')
         except:
             print('[-]Cover cut failed!')
     else:
-        img = Image.open(path + '/' + naming_rule + '.jpg')
+        img = Image.open(path + '/fanart.jpg')
         w = img.width
         h = img.height
-        img.save(path + '/' + naming_rule + '.png')
+        img.save(path + '/poster.png')
 def pasteFileToFolder(filepath, path): #文件路径，番号，后缀，要移动至的位置
     global houzhui
     houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|avi|rmvb|wmv|mov|mp4|mkv|flv|ts)$', filepath).group())
-    os.rename(filepath, naming_rule + houzhui)
-    shutil.move(naming_rule + houzhui, path)
+    os.rename(filepath, number + houzhui)
+    shutil.move(number + houzhui, path)
 def renameJpgToBackdrop_copy():
-    shutil.copy(path+'/'+naming_rule + '.jpg', path+'/Backdrop.jpg')
+    shutil.copy(path+'/fanart.jpg', path+'/Backdrop.jpg')
+    shutil.copy(path + '/poster.png', path + '/thumb.png')
 
 if __name__ == '__main__':
     filepath=argparse_get_file()[0] #影片的路径
 
-    if '-c.' in filepath or '-C.' in filepath:
+    if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath:
         cn_sub='1'
 
     if argparse_get_file()[1] == '':    #获取手动拉去影片获取的番号

diff --git a/fc2fans_club.py b/fc2fans_club.py
@@ -61,18 +61,20 @@ def main(number2):
     htmlcode2 = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
     htmlcode = ADC_function.get_html('http://fc2fans.club/html/FC2-' + number + '.html')
     dic = {
-        'title': getTitle(htmlcode),
-        'studio': getStudio(htmlcode),
+        'title':    getTitle(htmlcode),
+        'studio':   getStudio(htmlcode),
         'year': '',#str(re.search('\d{4}',getRelease(number)).group()),
-        'outline': getOutline(htmlcode,number),
-        'runtime': getYear(getRelease(htmlcode)),
+        'outline':  getOutline(htmlcode,number),
+        'runtime':  getYear(getRelease(htmlcode)),
         'director': getStudio(htmlcode),
-        'actor': getActor(htmlcode),
-        'release': getRelease(number),
-        'number': 'FC2-'+number,
-        'cover': getCover(htmlcode,number,htmlcode2),
+        'actor':    getActor(htmlcode),
+        'release':  getRelease(number),
+        'number':  'FC2-'+number,
+        'cover':    getCover(htmlcode,number,htmlcode2),
         'imagecut': 0,
-        'tag':getTag(htmlcode),
+        'tag':      getTag(htmlcode),
+        'actor_photo':'',
+        'website':  'http://fc2fans.club/html/FC2-' + number + '.html',
     }
     #print(getTitle(htmlcode))
     #print(getNum(htmlcode))

diff --git a/javbus.py b/javbus.py
@@ -13,6 +13,18 @@
 import javdb
 import siro
 
+def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
+    soup = BeautifulSoup(htmlcode, 'lxml')
+    a = soup.find_all(attrs={'class': 'star-name'})
+    d={}
+    for i in a:
+        l=i.a['href']
+        t=i.get_text()
+        html = etree.fromstring(get_html(l), etree.HTMLParser())
+        p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
+        p2={t:p}
+        d.update(p2)
+    return d
 def getTitle(htmlcode):  #获取标题
     doc = pq(htmlcode)
     title=str(doc('div.container h3').text()).replace(' ','-')
@@ -100,17 +112,18 @@ def main(number):
             'imagecut': 1,
             'tag': getTag(htmlcode),
             'label': getSerise(htmlcode),
+            'actor_photo': getActorPhoto(htmlcode),
+            'website': 'https://www.javbus.com/' + number,
         }
         js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
-
         if 'HEYZO' in number or 'heyzo' in number or 'Heyzo' in number:
             htmlcode = get_html('https://www.javbus.com/' + number)
-            dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
+            #dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
             dic = {
                 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
                 'studio': getStudio(htmlcode),
                 'year': getYear(htmlcode),
-                'outline': getOutline(dww_htmlcode),
+                'outline': '',
                 'runtime': getRuntime(htmlcode),
                 'director': getDirector(htmlcode),
                 'actor': getActor(htmlcode),
@@ -120,6 +133,8 @@ def main(number):
                 'imagecut': 1,
                 'tag': getTag(htmlcode),
                 'label': getSerise(htmlcode),
+                'actor_photo': getActorPhoto(htmlcode),
+                'website': 'https://www.javbus.com/' + number,
             }
             js2 = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,
                              separators=(',', ':'), )  # .encode('UTF-8')
@@ -132,6 +147,9 @@ def main(number):
 def main_uncensored(number):
     htmlcode = get_html('https://www.javbus.com/' + number)
     dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
+    if getTitle(htmlcode) == '':
+        htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_'))
+        dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
     dic = {
         'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''),
         'studio': getStudio(htmlcode),
@@ -146,6 +164,8 @@ def main_uncensored(number):
         'tag': getTag(htmlcode),
         'label': getSerise(htmlcode),
         'imagecut': 0,
+        'actor_photo': '',
+        'website': 'https://www.javbus.com/' + number,
     }
     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
 

diff --git a/javdb.py b/javdb.py
@@ -56,6 +56,8 @@ def getTag(a):
 def getCover(htmlcode):
     html = etree.fromstring(htmlcode, etree.HTMLParser())
     result = str(html.xpath('/html/body/section/div/div[2]/div[1]/a/img/@src')).strip(" ['']")
+    if result == '':
+        result = str(html.xpath('/html/body/section/div/div[3]/div[1]/a/img/@src')).strip(" ['']")
     return result
 def getDirector(a):
     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
@@ -68,13 +70,13 @@ def getOutline(htmlcode):
     return result
 def main(number):
     try:
-        try:
-            a = get_html('https://javdb1.com/search?q=' + number + '&f=all')
-            html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        except:
-            a = get_html('https://javdb1.com/search?q=' + number.replace('-', '_') + '&f=all')
-            html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+        a = get_html('https://javdb.com/search?q=' + number + '&f=all')
+        html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
         result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']")
+        if result1 == '':
+            a = get_html('https://javdb.com/search?q=' + number.replace('-', '_') + '&f=all')
+            html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+            result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']")
         b = get_html('https://javdb1.com' + result1)
         soup = BeautifulSoup(b, 'lxml')
 
@@ -95,17 +97,20 @@ def main(number):
             'tag': getTag(a),
             'label': getLabel(a),
             'year': getYear(getRelease(a)),  # str(re.search('\d{4}',getRelease(a)).group()),
+            'actor_photo': '',
+            'website': 'https://javdb1.com' + result1,
         }
         js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
         return js
     except:
-        try:
-            a = get_html('https://javdb.com/search?q=' + number + '&f=all')
-            html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        except:
+        a = get_html('https://javdb.com/search?q=' + number + '&f=all')
+        html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+        result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']")
+        if result1 == '':
             a = get_html('https://javdb.com/search?q=' + number.replace('-', '_') + '&f=all')
             html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']")
+            result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']")
+
         b = get_html('https://javdb.com' + result1)
         soup = BeautifulSoup(b, 'lxml')
 
@@ -126,6 +131,8 @@ def main(number):
             'tag': getTag(a),
             'label': getLabel(a),
             'year': getYear(getRelease(a)),  # str(re.search('\d{4}',getRelease(a)).group()),
+            'actor_photo': '',
+            'website':'https://javdb.com' + result1,
         }
         js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
         return js