Skip to content

Commit

Permalink
Upate 3.5.1
Browse files Browse the repository at this point in the history
  • Loading branch information
mvdctop authored Jul 17, 2020
1 parent df3a959 commit 8231547
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 7 deletions.
6 changes: 5 additions & 1 deletion config.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import os
import configparser
import codecs


class Config:
def __init__(self, path: str = "config.ini"):
if os.path.exists(path):
self.conf = configparser.ConfigParser()
self.conf.read(path, encoding="utf-8")
try:
self.conf.read(path, encoding="utf-8-sig")
except:
self.conf.read(path, encoding="utf-8")
else:
print("[-] Config file not found! Use the default settings")
self.conf = self._default_config()
Expand Down
19 changes: 14 additions & 5 deletions javdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,22 @@ def getCover_small(a, index=0):
# javdb sometime returns multiple results
# DO NOT just get the firt one, get the one with correct index number
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
if not 'https' in result:
result = 'https:' + result
return result
try:
result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
if not 'https' in result:
result = 'https:' + result
return result
except: # 2020.7.17 Repair Cover Url crawl
result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@data-src")[index]
if not 'https' in result:
result = 'https:' + result
return result
def getCover(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath("//div[contains(@class, 'column-video-cover')]/a/img/@src")).strip(" ['']")
try:
result = html.xpath("//div[contains(@class, 'column-video-cover')]/a/img/@src")[0]
except: # 2020.7.17 Repair Cover Url crawl
result = html.xpath("//div[contains(@class, 'column-video-cover')]/img/@src")[0]
return result
def getDirector(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
Expand Down
2 changes: 1 addition & 1 deletion number_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@ def get_number(filepath: str) -> str:

if __name__ == "__main__":
import doctest
doctest.testmod(raise_on_error=True)
doctest.testmod(raise_on_error=True)
5 changes: 5 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
## 2020.6.22 更新
* 改进网站爬虫子程序参数混乱
* 修复命名规则release参数带```/```的问题
* 新增socks5本地代理连接
* 新增命名规则series参数

0 comments on commit 8231547

Please sign in to comment.