forked from mvdctop/Movie_Data_Capture
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjavbus.py
140 lines (122 loc) · 5.17 KB
/
javbus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# -*- coding: utf-8 -*-
import re
import os
import secrets
import inspect
from lxml import etree
from urllib.parse import urljoin
from .parser import Parser
class Javbus(Parser):
source = 'javbus'
expr_number = '/html/head/meta[@name="keywords"]/@content'
expr_title = '/html/head/title/text()'
expr_studio = '//span[contains(text(),"製作商:")]/../a/text()'
expr_studio2 = '//span[contains(text(),"メーカー:")]/../a/text()'
expr_director = '//span[contains(text(),"導演:")]/../a/text()'
expr_directorJa = '//span[contains(text(),"監督:")]/../a/text()'
expr_series = '//span[contains(text(),"系列:")]/../a/text()'
expr_series2 = '//span[contains(text(),"シリーズ:")]/../a/text()'
expr_label = '//span[contains(text(),"系列:")]/../a/text()'
expr_cover = '//a[@class="bigImage"]/@href'
expr_release = '/html/body/div[5]/div[1]/div[2]/p[2]/text()'
expr_runtime = '/html/body/div[5]/div[1]/div[2]/p[3]/text()'
expr_actor = '//div[@class="star-name"]/a'
expr_actorphoto = '//div[@class="star-name"]/../a/img'
expr_extrafanart = '//div[@id="sample-waterfall"]/a/@href'
expr_tags = '/html/head/meta[@name="keywords"]/@content'
expr_uncensored = '//*[@id="navbar"]/ul[1]/li[@class="active"]/a[contains(@href,"uncensored")]'
def search(self, number):
self.number = number
try:
if self.specifiedUrl:
self.detailurl = self.specifiedUrl
htmltree = self.getHtmlTree(self.detailurl)
result = self.dictformat(htmltree)
return result
url = "https://www." + secrets.choice([
'buscdn.fun', 'busdmm.fun', 'busfan.fun', 'busjav.fun',
'cdnbus.fun',
'dmmbus.fun', 'dmmsee.fun',
'seedmm.fun',
]) + "/"
try:
self.detailurl = url + number
self.htmlcode = self.getHtml(self.detailurl)
except:
self.detailurl = 'https://www.javbus.com/' + number
self.htmlcode = self.getHtml(self.detailurl)
if self.htmlcode == 404:
return 404
htmltree = etree.fromstring(self.htmlcode,etree.HTMLParser())
result = self.dictformat(htmltree)
return result
except:
self.searchUncensored(number)
def searchUncensored(self, number):
""" 二次搜索无码
"""
self.imagecut = 0
self.uncensored = True
w_number = number.replace('.', '-')
if self.specifiedUrl:
self.detailurl = self.specifiedUrl
else:
self.detailurl = 'https://www.javbus.red/' + w_number
self.htmlcode = self.getHtml(self.detailurl)
if self.htmlcode == 404:
return 404
htmltree = etree.fromstring(self.htmlcode, etree.HTMLParser())
result = self.dictformat(htmltree)
return result
def getNum(self, htmltree):
return super().getNum(htmltree).split(',')[0]
def getTitle(self, htmltree):
title = super().getTitle(htmltree)
title = str(re.findall('^.+?\s+(.*) - JavBus$', title)[0]).strip()
return title
def getStudio(self, htmltree):
if self.uncensored:
return self.getTreeElement(htmltree, self.expr_studio2)
else:
return self.getTreeElement(htmltree, self.expr_studio)
def getCover(self, htmltree):
return urljoin("https://www.javbus.com", super().getCover(htmltree))
def getRuntime(self, htmltree):
return super().getRuntime(htmltree).strip(" ['']分鐘")
def getActors(self, htmltree):
actors = super().getActors(htmltree)
b=[]
for i in actors:
b.append(i.attrib['title'])
return b
def getActorPhoto(self, htmltree):
actors = self.getTreeAll(htmltree, self.expr_actorphoto)
d = {}
for i in actors:
p = i.attrib['src']
if "nowprinting.gif" in p:
continue
t = i.attrib['title']
d[t] = urljoin("https://www.javbus.com", p)
return d
def getDirector(self, htmltree):
if self.uncensored:
return self.getTreeElement(htmltree, self.expr_directorJa)
else:
return self.getTreeElement(htmltree, self.expr_director)
def getSeries(self, htmltree):
if self.uncensored:
return self.getTreeElement(htmltree, self.expr_series2)
else:
return self.getTreeElement(htmltree, self.expr_series)
def getTags(self, htmltree):
tags = self.getTreeElement(htmltree, self.expr_tags).split(',')
return tags[1:]
def getOutline(self, htmltree):
if self.morestoryline:
if any(caller for caller in inspect.stack() if os.path.basename(caller.filename) == 'airav.py'):
return '' # 从airav.py过来的调用不计算outline直接返回,避免重复抓取数据拖慢处理速度
from .storyline import getStoryline
return getStoryline(self.number , uncensored = self.uncensored,
proxies=self.proxies, verify=self.verify)
return ''