Skip to content

Commit

Permalink
Merge pull request mvdctop#941 from mark5231/dev-test
Browse files Browse the repository at this point in the history
add pissplay
  • Loading branch information
mvdctop authored Dec 6, 2022
2 parents 39b8809 + bb37d6a commit 376b724
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 7 deletions.
17 changes: 12 additions & 5 deletions scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,11 @@ def get_data_from_json(
# ================================================网站规则添加结束================================================

title = json_data.get('title')
actor_list = str(json_data.get('actor')).strip("[ ]").replace("'", '').split(',') # 字符串转列表
actor_list = [actor.strip() for actor in actor_list] # 去除空白
if json_data['source'] =='pissplay': # pissplay actor为英文名,不用去除空格
actor_list = [json_data.get('actor')]
else:
actor_list = str(json_data.get('actor')).strip("[ ]").replace("'", '').split(',') # 字符串转列表
actor_list = [actor.strip() for actor in actor_list] # 去除空白
director = json_data.get('director')
release = json_data.get('release')
number = json_data.get('number')
Expand Down Expand Up @@ -134,11 +137,15 @@ def get_data_from_json(
tag.remove('XXXX')
while 'xxx' in tag:
tag.remove('xxx')
actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
if json_data['source'] =='pissplay': # pissplay actor为英文名,不用去除空格
actor = str(actor_list).strip("[ ]").replace("'", '')
else:
actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')

if title == '' or number == '':
print('[-]Movie Number or Title not found!')
return None
if json_data['source'] != 'pissplay': # pissplay 没有番号
print('[-]Movie Number or Title not found!')
return None

# if imagecut == '3':
# DownloadFileWithFilename()
Expand Down
6 changes: 4 additions & 2 deletions scrapinglib/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from .avsox import Avsox
from .javlibrary import Javlibrary
from .javday import Javday
from .pissplay import Pissplay

from .tmdb import Tmdb
from .imdb import Imdb
Expand Down Expand Up @@ -52,7 +53,7 @@ class Scraping:
"""
adult_full_sources = ['javlibrary', 'javdb', 'javbus', 'airav', 'fanza', 'xcity', 'jav321',
'mgstage', 'fc2', 'avsox', 'dlsite', 'carib', 'madou',
'getchu', 'gcolle','javday'
'getchu', 'gcolle','javday','pissplay'
]
adult_func_mapping = {
'avsox': Avsox().scrape,
Expand All @@ -70,7 +71,8 @@ class Scraping:
'javdb': Javdb().scrape,
'getchu': Getchu().scrape,
'javlibrary': Javlibrary().scrape,
'javday': Javday().scrape
'javday': Javday().scrape,
'pissplay': Pissplay().scrape
}

general_full_sources = ['tmdb', 'imdb']
Expand Down
87 changes: 87 additions & 0 deletions scrapinglib/pissplay.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# -*- coding: utf-8 -*-

import re
from lxml import etree
from .parser import Parser
from datetime import datetime

# 搜刮 https://pissplay.com/ 中的视频
# pissplay中的视频没有番号,所以要通过文件名搜索
# 只用文件名和网站视频名完全一致时才可以被搜刮
class Pissplay(Parser):
source = 'pissplay'

expr_number = '//*[@id="video_title"]/text()' #这个网站上的视频没有番号,因此用标题代替
expr_title = '//*[@id="video_title"]/text()'
expr_cover = '/html/head//meta[@property="og:image"]/@content'
expr_tags = '//div[@id="video_tags"]/a/text()'
expr_release = '//div[@class="video_date"]/text()'
expr_outline = '//*[@id="video_description"]/p//text()'

def extraInit(self):
self.imagecut = 0 # 不裁剪封面
self.specifiedSource = None

def search(self, number):
self.number = number.strip().upper()
if self.specifiedUrl:
self.detailurl = self.specifiedUrl
else:
newName = re.sub(r"[^a-zA-Z0-9 ]", "", number) # 删除特殊符号
self.detailurl = "https://pissplay.com/videos/" + newName.lower().replace(" ","-") + "/"
self.htmlcode = self.getHtml(self.detailurl)
if self.htmlcode == 404:
return 404
htmltree = etree.fromstring(self.htmlcode, etree.HTMLParser())
result = self.dictformat(htmltree)
return result

def getNum(self, htmltree):
title = self.getTitle(htmltree)
return title

def getTitle(self, htmltree):
title = super().getTitle(htmltree)
title = re.sub(r"[^a-zA-Z0-9 ]", "", title) # 删除特殊符号
return title

def getCover(self, htmltree):
url = super().getCover(htmltree)
if not url.startswith('http'):
url = 'https:' + url
return url

def getRelease(self, htmltree):
releaseDate = super().getRelease(htmltree)
isoData = datetime.strptime(releaseDate, '%d %b %Y').strftime('%Y-%m-%d')
return isoData

def getStudio(self, htmltree):
return 'PissPlay'

def getTags(self, htmltree):
tags = self.getTreeAll(htmltree, self.expr_tags)
if 'Guests' in tags:
if tags[0] == 'Collaboration' or tags[0] == 'Toilet for a Day' or tags[0] == 'Collaboration':
del tags[1]
else:
tags = tags[1:]
return tags

def getActors(self, htmltree) -> list:
tags = self.getTreeAll(htmltree, self.expr_tags)
if 'Guests' in tags:
if tags[0] == 'Collaboration' or tags[0] == 'Toilet for a Day' or tags[0] == 'Collaboration':
return tags[1]
else:
return tags[0]
else:
return 'Bruce and Morgan'

def getOutline(self, htmltree):
outline = self.getTreeAll(htmltree, self.expr_outline)
if '– Morgan xx' in outline:
num = outline.index('– Morgan xx')
outline = outline[:num]
rstring = ''.join(outline).replace("&","and")
return rstring

0 comments on commit 376b724

Please sign in to comment.