Skip to content

Commit

Permalink
Update 3.2
Browse files Browse the repository at this point in the history
  • Loading branch information
mvdctop authored Apr 15, 2020
1 parent 92e631f commit 1f4b7e6
Show file tree
Hide file tree
Showing 8 changed files with 192 additions and 73 deletions.
50 changes: 4 additions & 46 deletions ADC_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,59 +13,17 @@
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
# sys.setdefaultencoding('utf-8')

config_file='config.ini'
config_file = 'config.ini'
config = ConfigParser()

if os.path.exists(config_file):
try:
config.read(config_file, encoding='UTF-8')
except:
print('[-]Config.ini read failed! Please use the offical file!')
else:
print('[+]config.ini: not found, creating...',end='')
with open("config.ini", "wt", encoding='UTF-8') as code:
file_text = """[common]
main_mode=1
failed_output_folder=failed
success_output_folder=JAV_output
soft_link=0
[proxy]
proxy=192.168.2.2:1080
timeout=10
retry=3
[Name_Rule]
location_rule=actor+'/'+number
naming_rule=number+'-'+title
[update]
update_check=1
[media]
media_warehouse=emby
#emby or plex or kodi ,emby=jellyfin
[escape]
literals=\()/
folders=failed,JAV_output
[debug_mode]
switch=0
"""
print(file_text, file=code)
time.sleep(2)
print('.')
print('[+]config.ini: created!')
print('[+]Please restart the program!')
time.sleep(4)
os._exit(0)
try:
config.read(config_file, encoding='UTF-8')
except:
except Exception as e:
print('[-]'+e)
print('[-]Config.ini read failed! Please use the offical file!')


def get_network_settings():
try:
proxy = config["proxy"]["proxy"]
Expand Down
14 changes: 7 additions & 7 deletions AV_Data_Capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@ def argparse_function(switch):
parser = argparse.ArgumentParser()
parser.add_argument("file", default='',nargs='?', help="Single Movie file path.")
parser.add_argument("-c", "--config", default='config.ini', nargs='?', help="The config file Path.")
parser.add_argument("-e", "--exit", default='1', nargs='?', help="Exit Switch 1:Press enter key to exit. 2:Auto exit.")
parser.add_argument("-a", "--auto-exit", dest='autoexit', action="store_true", help="Auto exit after program complete")
args = parser.parse_args()
if switch == 1:
if args.file == '':
return ''
elif switch == 2:
return args.config
elif switch == 3:
return args.exit
return args.autoexit

def movie_lists(root, escape_folder):
for folder in escape_folder:
Expand Down Expand Up @@ -100,10 +100,10 @@ def getNumber(filepath,absolute_path = False):


if __name__ == '__main__':
version = '3.1.2'
version = '3.2'
config_file = argparse_function(2)
config = ConfigParser()
config.read(config_file, encoding='UTF-8')
config.read(argparse_function(2), encoding='UTF-8')
success_folder = config['common']['success_output_folder']
failed_folder = config['common']['failed_output_folder'] # 失败输出目录
escape_folder = config['escape']['folders'] # 多级目录刮削需要排除的目录
Expand Down Expand Up @@ -148,7 +148,7 @@ def getNumber(filepath,absolute_path = False):
core_main(i, getNumber(i), config_file=config_file)
print("[*]======================================================")
except Exception as e: # 番号提取异常
print('[-]' + i + ' ERRPR :')
print('[-]' + i + ' ERROR :')
print('[-]',e)
if config['common']['soft_link'] == '1':
print('[-]Link', i, 'to failed folder')
Expand All @@ -164,6 +164,6 @@ def getNumber(filepath,absolute_path = False):
CEF(success_folder)
CEF(failed_folder)
print("[+]All finished!!!")
if argparse_function(3) == '2':
if argparse_function(3) == True:
os._exit(0)
input("[+][+]Press enter key exit, you can check the error messge before you exit.")
input("[+][+]Press enter key exit, you can check the error messge before you exit.")
18 changes: 8 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,14 +173,12 @@ update_check=1
0为关闭,1为开启,不建议关闭

---
### 媒体库选择
### 刮削网站优先级
```
[media]
media_warehouse=emby
#emby plex kodi
[priority]
website=javbus,javdb,fanza,xcity,mgstage,fc2,avsox,jav321
```
可选择emby, plex, kodi
如果是PLEX,请安装插件:```XBMCnfoMoviesImporter```
```,```英文逗号分开网站,刮削顺序从左往右

---
### 排除指定字符和目录
Expand Down Expand Up @@ -267,12 +265,12 @@ AV_Data_Capture xxx-xxx-xxx.mp4
```
AV_Data_Capture -c config_other.ini
```
### 程序退出选择参数
默认值为```1```
### 程序自动退出

```
AV_Data_Capture -e 1
AV_Data_Capture -a
```
1为默认值,刮削结束后要按下回车键程序才会结束,如果是2,程序刮削完毕后会自动结束程序
输入参数即可在刮削结束后自动结束程序

## 多集影片处理
**建议使用视频合并合并为一个视频文件**
Expand Down
5 changes: 4 additions & 1 deletion config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@ naming_rule=number+'-'+title
[update]
update_check=1

[priority]
website=javbus,javdb,fanza,xcity,mgstage,fc2,avsox,jav321

[escape]
literals=\()/
folders=failed,JAV_output

[debug_mode]
switch=0
switch=0
12 changes: 7 additions & 5 deletions core.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import javdb
import fanza
import jav321

import xcity

# =====================本地文件处理===========================

Expand All @@ -47,7 +47,7 @@ def CreatFailedFolder(failed_folder):
return


def getDataFromJSON(file_number, filepath, failed_folder): # 从JSON返回元数据
def getDataFromJSON(file_number, filepath, failed_folder, sources): # 从JSON返回元数据
"""
iterate through all services and fetch the data
"""
Expand All @@ -60,10 +60,11 @@ def getDataFromJSON(file_number, filepath, failed_folder): # 从JSON返回元
"javbus": javbus.main,
"mgstage": mgstage.main,
"jav321": jav321.main,
"xcity" : xcity.main,
}

# default fetch order list, from the begining to the end
sources = ["javbus", "javdb", "fanza", "mgstage", "fc2", "avsox", "jav321"]
sources = sources.split(',')

# if the input file name matches centain rules,
# move some web service to the begining of the list
Expand Down Expand Up @@ -308,7 +309,7 @@ def PrintFiles(path, c_word, naming_rule, part, cn_sub, json_data, filepath, fai
print(" <cover>" + cover + "</cover>", file=code)
print(" <website>" + website + "</website>", file=code)
print("</movie>", file=code)
print("[+]Writeed! " + path + "/" + number + c_word + ".nfo")
print("[+]Wrote! " + path + "/" + number + c_word + ".nfo")
except IOError as e:
print("[-]Write Failed!")
print(e)
Expand Down Expand Up @@ -430,9 +431,10 @@ def core_main(file_path, number_th, config_file):
program_mode = Config['common']['main_mode'] # 运行模式
failed_folder = Config['common']['failed_output_folder'] # 失败输出目录
success_folder = Config['common']['success_output_folder'] # 成功输出目录
sources = Config['priority']['website'] # 网站优先级
filepath = file_path # 影片的路径
number = number_th
json_data = getDataFromJSON(number, filepath, failed_folder) # 定义番号
json_data = getDataFromJSON(number, filepath, failed_folder, sources) # 定义番号
if json_data["number"] != number:
# fix issue #119
# the root cause is we normalize the search id
Expand Down
5 changes: 3 additions & 2 deletions mgstage.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def getDirector(a):
return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
def getOutline(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('//p/text()')).strip(" ['']")
result = str(html.xpath('//p/text()')).strip(" ['']").replace(u'\\n', '').replace("', '', '", '')
return result
def main(number2):
number=number2.upper()
Expand Down Expand Up @@ -108,4 +108,5 @@ def main(number2):
return js
#print(htmlcode)

#print(main('SIRO-3607'))
if __name__ == '__main__':
print(main('SIRO-4149'))
4 changes: 2 additions & 2 deletions update_check.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": "3.1.2",
"version_show": "3.1.2",
"version": "3.2",
"version_show": "3.2",
"download": "https://github.com/yoshiko2/AV_Data_Capture/releases"
}
157 changes: 157 additions & 0 deletions xcity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import re
from lxml import etree
import json
from bs4 import BeautifulSoup
from ADC_function import *


# import sys
# import io
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)

def getTitle(a):
html = etree.fromstring(a, etree.HTMLParser())
result = html.xpath('//*[@id="program_detail_title"]/text()')[0]
return result


def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[3]/a/text()')[0]
return result1


def getActorPhoto(actor): # //*[@id="star_qdt"]/li/a/img
a = actor.split(',')
d = {}
for i in a:
p = {i: ''}
d.update(p)
return d


def getStudio(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[4]/a/span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')


def getRuntime(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[3]/text()')).strip(" ['']")
try:
return re.findall('\d+',result1)[0]
except:
return ''


def getLabel(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[5]/a/span/text()')).strip(" ['']")
return result1


def getNum(a):
html = etree.fromstring(a, etree.HTMLParser())
result1 = str(html.xpath('//*[@id="hinban"]/text()')).strip(" ['']")
return result1


def getYear(getRelease):
try:
result = str(re.search('\d{4}', getRelease).group())
return result
except:
return getRelease


def getRelease(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[4]/text()')).strip(" ['']")
try:
return re.findall('\d{4}/\d{2}/\d{2}', result1)[0]
except:
return ''


def getTag(a):
result2=[]
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[1]/li[6]/a/text()')
for i in result1:
i=i.replace(u'\n','')
i=i.replace(u'\t','')
result2.append(i)
return result2


def getCover_small(a, index=0):
# same issue mentioned below,
# javdb sometime returns multiple results
# DO NOT just get the firt one, get the one with correct index number
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
if not 'https' in result:
result = 'https:' + result
return result


def getCover(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[1]/p/a/@href')).strip(" ['']")
return 'https:'+result


def getDirector(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//*[@id="program_detail_director"]/text()')).strip(" ['']").replace(u'\\n','').replace(u'\\t','')
return result1


def getOutline(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('//*[@id="avodDetails"]/div/div[3]/div[2]/div/ul[2]/li[5]/p/text()')).strip(" ['']")
try:
return re.sub('\\\\\w*\d+','',result)
except:
return result


def main(number):
try:
number = number.upper()
query_result = get_html(
'https://xcity.jp/result_published/?genre=%2Fresult_published%2F&q=' + number.replace('-',
'') + '&sg=main&num=30')
html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
urls = html.xpath("//table[contains(@class, 'resultList')]/tr[2]/td[1]/a/@href")[0]
detail_page = get_html('https://xcity.jp' + urls)
dic = {
'actor': getActor(detail_page),
'title': getTitle(detail_page),
'studio': getStudio(detail_page),
'outline': getOutline(detail_page),
'runtime': getRuntime(detail_page),
'director': getDirector(detail_page),
'release': getRelease(detail_page),
'number': getNum(detail_page),
'cover': getCover(detail_page),
'cover_small': '',
'imagecut': 1,
'tag': getTag(detail_page),
'label': getLabel(detail_page),
'year': getYear(getRelease(detail_page)), # str(re.search('\d{4}',getRelease(a)).group()),
'actor_photo': getActorPhoto(getActor(detail_page)),
'website': 'https://javdb.com' + urls,
'source': 'xcity.py',
}
except Exception as e:
# print(e)
dic = {"title": ""}

js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
return js

if __name__ == '__main__':
print(main('VNDS-2624'))

0 comments on commit 1f4b7e6

Please sign in to comment.