Update 1.1

ylie7321 · Aug 18, 2019 · c4fc220 · c4fc220
1 parent 449e900
commit c4fc220
Show file tree

Hide file tree

Showing 8 changed files with 236 additions and 114 deletions.
diff --git a/ADC_function.py b/ADC_function.py
@@ -19,6 +19,7 @@
     print('[+]config.ini: not found, creating...')
     with open("config.ini", "wt", encoding='UTF-8') as code:
         print("[common]", file=code)
+        print("main_mode=1", file=code)
         print("failed_output_folder=failed", file=code)
         print("success_output_folder=JAV_output", file=code)
         print("", file=code)

diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py
@@ -14,7 +14,7 @@
 
 # ============global var===========
 
-version='0.11.9'
+version='1.1'
 
 config = ConfigParser()
 config.read(config_file, encoding='UTF-8')
@@ -25,47 +25,47 @@
 
 def UpdateCheck():
     if UpdateCheckSwitch() == '1':
-        html2 = get_html('https://raw.githubusercontent.com/wenead99/AV_Data_Capture/master/update_check.json')
+        html2 = get_html('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/update_check.json')
         html = json.loads(str(html2))
 
         if not version == html['version']:
-            print('[*]        * New update ' + html['version'] + ' *')
+            print('[*]           * New update ' + html['version'] + ' *')
             print('[*]             * Download *')
             print('[*] ' + html['download'])
             print('[*]=====================================')
     else:
         print('[+]Update Check disabled!')
 def movie_lists():
     directory = config['directory_capture']['directory']
-    a2=[]
-    b2=[]
-    c2=[]
-    d2=[]
-    e2=[]
-    f2=[]
-    g2=[]
-    h2=[]
+    mp4=[]
+    avi=[]
+    rmvb=[]
+    wmv=[]
+    mov=[]
+    mkv=[]
+    flv=[]
+    ts=[]
     if directory=='*':
         for i in os.listdir(os.getcwd()):
-            a2 += glob.glob(r"./" + i + "/*.mp4")
-            b2 += glob.glob(r"./" + i + "/*.avi")
-            c2 += glob.glob(r"./" + i + "/*.rmvb")
-            d2 += glob.glob(r"./" + i + "/*.wmv")
-            e2 += glob.glob(r"./" + i + "/*.mov")
-            f2 += glob.glob(r"./" + i + "/*.mkv")
-            g2 += glob.glob(r"./" + i + "/*.flv")
-            h2 += glob.glob(r"./" + i + "/*.ts")
-        total = a2 + b2 + c2 + d2 + e2 + f2 + g2 + h2
+            mp4 += glob.glob(r"./" + i + "/*.mp4")
+            avi += glob.glob(r"./" + i + "/*.avi")
+            rmvb += glob.glob(r"./" + i + "/*.rmvb")
+            wmv += glob.glob(r"./" + i + "/*.wmv")
+            mov += glob.glob(r"./" + i + "/*.mov")
+            mkv += glob.glob(r"./" + i + "/*.mkv")
+            flv += glob.glob(r"./" + i + "/*.flv")
+            ts += glob.glob(r"./" + i + "/*.ts")
+        total = mp4 + avi + rmvb + wmv + mov + mkv + flv + ts
         return total
-    a2 = glob.glob(r"./" + directory + "/*.mp4")
-    b2 = glob.glob(r"./" + directory + "/*.avi")
-    c2 = glob.glob(r"./" + directory + "/*.rmvb")
-    d2 = glob.glob(r"./" + directory + "/*.wmv")
-    e2 = glob.glob(r"./" + directory + "/*.mov")
-    f2 = glob.glob(r"./" + directory + "/*.mkv")
-    g2 = glob.glob(r"./" + directory + "/*.flv")
-    h2 = glob.glob(r"./" + directory + "/*.ts")
-    total = a2 + b2 + c2 + d2 + e2 + f2 + g2 + h2
+    mp4 = glob.glob(r"./" + directory + "/*.mp4")
+    avi = glob.glob(r"./" + directory + "/*.avi")
+    rmvb = glob.glob(r"./" + directory + "/*.rmvb")
+    wmv = glob.glob(r"./" + directory + "/*.wmv")
+    mov = glob.glob(r"./" + directory + "/*.mov")
+    mkv = glob.glob(r"./" + directory + "/*.mkv")
+    flv = glob.glob(r"./" + directory + "/*.flv")
+    ts = glob.glob(r"./" + directory + "/*.ts")
+    total = mp4 + avi + rmvb + wmv + mov + mkv + flv + ts
     return total
 def CreatFailedFolder():
     if not os.path.exists('failed/'):  # 新建failed文件夹
@@ -94,31 +94,30 @@ def rreplace(self, old, new, *max):
     return new.join(self.rsplit(old, count))
 def getNumber(filepath):
     try:  # 普通提取番号 主要处理包含减号-的番号
-        filepath1 = filepath.replace("_", "-")
-        filepath1.strip('22-sht.me').strip('-HD').strip('-hd')
-        filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath1))  # 去除文件名中时间
-        file_number = re.search('\w+-\d+', filename).group()
-        return file_number
+        try:
+            filepath1 = filepath.replace("_", "-")
+            filepath1.strip('22-sht.me').strip('-HD').strip('-hd')
+            filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath1))  # 去除文件名中时间
+            file_number = re.search('\w+-\d+', filename).group()
+            return file_number
+        except:
+            filepath1 = filepath.replace("_", "-")
+            filepath1.strip('22-sht.me').strip('-HD').strip('-hd')
+            filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath1))  # 去除文件名中时间
+            file_number = re.search('\w+-\w+', filename).group()
+            return file_number
     except:  # 提取不含减号-的番号
-        try:  # 提取东京热番号格式 n1087
-            filename1 = str(re.sub("h26\d", "", filepath)).strip('Tokyo-hot').strip('tokyo-hot')
+        try:
+            filename1 = str(re.sub("ts6\d", "", filepath)).strip('Tokyo-hot').strip('tokyo-hot')
             filename0 = str(re.sub(".*?\.com-\d+", "", filename1)).strip('_')
-            if '-C.' in filepath or '-c.' in filepath:
-                cn_sub = '1'
-            file_number = str(re.search('n\d{4}', filename0).group(0))
+            file_number = str(re.search('\w+\d{4}', filename0).group(0))
             return file_number
         except:  # 提取无减号番号
-            filename1 = str(re.sub("h26\d", "", filepath))  # 去除h264/265
+            filename1 = str(re.sub("ts6\d", "", filepath))  # 去除ts64/265
             filename0 = str(re.sub(".*?\.com-\d+", "", filename1))
             file_number2 = str(re.match('\w+', filename0).group())
-            if '-C.' in filepath or '-c.' in filepath:
-                cn_sub = '1'
-            file_number = str(file_number2.replace(re.match("^[A-Za-z]+", file_number2).group(),
-                                                   re.match("^[A-Za-z]+", file_number2).group() + '-'))
+            file_number = str(file_number2.replace(re.match("^[A-Za-z]+", file_number2).group(),re.match("^[A-Za-z]+", file_number2).group() + '-'))
             return file_number
-            # if not re.search('\w-', file_number).group() == 'None':
-            # file_number = re.search('\w+-\w+', filename).group()
-            #
 
 def RunCore():
     if Platform == 'win32':
@@ -138,7 +137,7 @@ def RunCore():
 
 if __name__ =='__main__':
     print('[*]===========AV Data Capture===========')
-    print('[*]           Version '+version)
+    print('[*]             Version '+version)
     print('[*]=====================================')
     CreatFailedFolder()
     UpdateCheck()

diff --git a/avsox.py b/avsox.py
@@ -0,0 +1,112 @@
+import re
+from lxml import etree
+import json
+from bs4 import BeautifulSoup
+from ADC_function import *
+
+def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
+    soup = BeautifulSoup(htmlcode, 'lxml')
+    a = soup.find_all(attrs={'class': 'avatar-box'})
+    d = {}
+    for i in a:
+        l = i.img['src']
+        t = i.span.get_text()
+        p2 = {t: l}
+        d.update(p2)
+    return d
+def getTitle(a):
+    try:
+        html = etree.fromstring(a, etree.HTMLParser())
+        result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0]
+        return result.replace('/', '')
+    except:
+        return ''
+def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
+    soup = BeautifulSoup(a, 'lxml')
+    a = soup.find_all(attrs={'class': 'avatar-box'})
+    d = []
+    for i in a:
+        d.append(i.span.get_text())
+    return d
+def getStudio(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ')
+    return result1
+def getRuntime(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']")
+    return result1
+def getLabel(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']")
+    return result1
+def getNum(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
+    return result1
+def getYear(release):
+    try:
+        result = str(re.search('\d{4}',release).group())
+        return result
+    except:
+        return release
+def getRelease(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']")
+    return result1
+def getCover(htmlcode):
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']")
+    return result
+def getCover_small(htmlcode):
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
+    return result
+def getTag(a):  # 获取演员
+    soup = BeautifulSoup(a, 'lxml')
+    a = soup.find_all(attrs={'class': 'genre'})
+    d = []
+    for i in a:
+        d.append(i.get_text())
+    return d
+
+def main(number):
+    a = get_html('https://avsox.asia/cn/search/' + number)
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
+    if result1 == '' or result1 == 'null' or result1 == 'None':
+        a = get_html('https://avsox.asia/cn/search/' + number.replace('-', '_'))
+        print(a)
+        html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+        result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
+        if result1 == '' or result1 == 'null' or result1 == 'None':
+            a = get_html('https://avsox.asia/cn/search/' + number.replace('_', ''))
+            print(a)
+            html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+            result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
+    web = get_html(result1)
+    soup = BeautifulSoup(web, 'lxml')
+    info = str(soup.find(attrs={'class': 'row movie'}))
+    dic = {
+        'actor': getActor(web),
+        'title': getTitle(web).strip(getNum(web)),
+        'studio': getStudio(info),
+        'outline': '',#
+        'runtime': getRuntime(info),
+        'director': '', #
+        'release': getRelease(info),
+        'number': getNum(info),
+        'cover': getCover(web),
+        'cover_small': getCover_small(a),
+        'imagecut': 3,
+        'tag': getTag(web),
+        'label': getLabel(info),
+        'year': getYear(getRelease(info)),  # str(re.search('\d{4}',getRelease(a)).group()),
+        'actor_photo': getActorPhoto(web),
+        'website': result1,
+        'source': 'avsox.py',
+    }
+    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
+    return js
+
+#print(main('041516_541'))