From 9f0f37b7929a46cef9da2ccf1aa3bf968a03d259 Mon Sep 17 00:00:00 2001 From: TgSeed Date: Sat, 25 Jun 2022 21:01:40 +0000 Subject: [PATCH] feat: Performer Image by scene cover scraper Sets performer's selected scene cover as image for the performer. --- scrapers/performer-image-by-scene.py | 118 ++++++++ scrapers/performer-image-by-scene.yml | 17 ++ scrapers/py_common/graphql.py | 383 ++++++++++++++++++++++++++ 3 files changed, 518 insertions(+) create mode 100644 scrapers/performer-image-by-scene.py create mode 100644 scrapers/performer-image-by-scene.yml diff --git a/scrapers/performer-image-by-scene.py b/scrapers/performer-image-by-scene.py new file mode 100644 index 000000000..6c8c2ac61 --- /dev/null +++ b/scrapers/performer-image-by-scene.py @@ -0,0 +1,118 @@ +import json +import os +import re +import sys +from urllib.parse import urlparse + +try: + import requests +except ModuleNotFoundError: + print( + "You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", + file=sys.stderr + ) + print( + "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", + file=sys.stderr + ) + sys.exit() + +try: + import py_common.log as log + import py_common.graphql as graphql +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr + ) + sys.exit() + +def announce_result_to_stash(result): + if result is None: + result = [] if 'query' in sys.argv else {} + if 'query' in sys.argv: + if isinstance(result, list): + print(json.dumps(result)) + sys.exit(0) + else: + print(json.dumps([result])) + sys.exit(0) + else: + if isinstance(result, list): + if len(result) > 0: + print(json.dumps(result[0])) + sys.exit(0) + else: + print("{}") + sys.exit(0) + else: + print(json.dumps(result)) + sys.exit(0) + + +# Allows us to simply debug the script via CLI args +if len(sys.argv) > 2 and '-d' in sys.argv: + stdin = sys.argv[sys.argv.index('-d') + 1] +else: + stdin = sys.stdin.read() + +frag = json.loads(stdin) +performer_name = frag.get("name") +if performer_name is None: + announce_result_to_stash(None) +else: + performer_name = str(performer_name) +scene_index = None +scene_title = None + +regex_obj_parse_name_with_scene = re.compile(r"(.*?) - Scene (\d+)\. (.*)", re.IGNORECASE | re.MULTILINE) + +parsed_name = regex_obj_parse_name_with_scene.search(performer_name) +if parsed_name: + performer_name = parsed_name.group(1) + scene_index = int(parsed_name.group(2)) - 1 + scene_title = parsed_name.group(3) + +performers_data = graphql.getPerformersByName(performer_name) +performer_data = None +if performers_data is None or performers_data['count'] < 1: + announce_result_to_stash(None) +elif performers_data['count'] > 1: + for performers_data_element in performers_data['performers']: + if str(performers_data_element['name']).lower().strip() == performer_name.lower().strip(): + performer_data = performers_data_element + break + if performer_data is None: + # No match found by looking into the names, let's loop again and match with the aliases + for performers_data_element in performers_data['performers']: + if performer_name.lower().strip() in str(performers_data_element['aliases']).lower().strip(): + performer_data = performers_data_element + break +else: + performer_data = performers_data['performers'][0] + +if performer_data is None or 'id' not in performer_data or int(performer_data['id']) < 0: + announce_result_to_stash(None) + +performer_scenes = graphql.getSceneByPerformerId(performer_data['id']) + +result = [] +for i, scene in enumerate(performer_scenes['scenes']): + if 'paths' in scene and 'screenshot' in scene['paths'] and len(scene['paths']['screenshot']) > 0 and \ + ((scene_index is not None and i == scene_index) or scene_index is None): + # result = performer_data + if 'query' in sys.argv: + result.append( + {'Name': f'{performer_name} - Scene {i + 1}. {scene["title"]}', 'Image': scene['paths']['screenshot'], + 'Gender': performer_data['gender'], 'Images': [scene['paths']['screenshot']], + 'Ethnicity': performer_data['ethnicity'], 'Birthdate': performer_data['birthdate'], + 'Details': performer_data['details']} + ) + else: + result = {'Name': performer_name, 'Image': scene['paths']['screenshot'], + 'Gender': performer_data['gender'], 'Images': [scene['paths']['screenshot']], + 'Ethnicity': performer_data['ethnicity'], 'Birthdate': performer_data['birthdate'], + 'Details': performer_data['details']} + break + +announce_result_to_stash(result) diff --git a/scrapers/performer-image-by-scene.yml b/scrapers/performer-image-by-scene.yml new file mode 100644 index 000000000..fbbacc265 --- /dev/null +++ b/scrapers/performer-image-by-scene.yml @@ -0,0 +1,17 @@ +name: Performer Image by scene cover + +performerByFragment: + action: script + script: + - python + - performer-image-by-scene.py + - fetch + +performerByName: + action: script + script: + - python + - performer-image-by-scene.py + - query + +# Last Updated June 25, 2022 diff --git a/scrapers/py_common/graphql.py b/scrapers/py_common/graphql.py index a263171db..91659728d 100644 --- a/scrapers/py_common/graphql.py +++ b/scrapers/py_common/graphql.py @@ -1,4 +1,5 @@ import sys +from typing import Union try: import requests @@ -452,6 +453,7 @@ def getScene(scene_id): weight } """ + variables = { "id": scene_id } @@ -460,6 +462,387 @@ def getScene(scene_id): return result.get('findScene') return None +def getSceneByPerformerId(performer_id): + query = """ + query FindScenes($filter: FindFilterType, $scene_filter: SceneFilterType, $scene_ids: [Int!]) { + findScenes(filter: $filter, scene_filter: $scene_filter, scene_ids: $scene_ids) { + count + filesize + duration + scenes { + ...SceneData + __typename + } + __typename + } + } + + fragment SceneData on Scene { + id + checksum + oshash + title + details + url + date + rating + o_counter + organized + path + phash + interactive + interactive_speed + captions { + language_code + caption_type + __typename + } + created_at + updated_at + file { + size + duration + video_codec + audio_codec + width + height + framerate + bitrate + __typename + } + paths { + screenshot + preview + stream + webp + vtt + chapters_vtt + sprite + funscript + interactive_heatmap + caption + __typename + } + scene_markers { + ...SceneMarkerData + __typename + } + galleries { + ...SlimGalleryData + __typename + } + studio { + ...SlimStudioData + __typename + } + movies { + movie { + ...MovieData + __typename + } + scene_index + __typename + } + tags { + ...SlimTagData + __typename + } + performers { + ...PerformerData + __typename + } + stash_ids { + endpoint + stash_id + __typename + } + sceneStreams { + url + mime_type + label + __typename + } + __typename + } + + fragment SceneMarkerData on SceneMarker { + id + title + seconds + stream + preview + screenshot + scene { + id + __typename + } + primary_tag { + id + name + aliases + __typename + } + tags { + id + name + aliases + __typename + } + __typename + } + + fragment SlimGalleryData on Gallery { + id + checksum + path + title + date + url + details + rating + organized + image_count + cover { + file { + size + width + height + __typename + } + paths { + thumbnail + __typename + } + __typename + } + studio { + id + name + image_path + __typename + } + tags { + id + name + __typename + } + performers { + id + name + gender + favorite + image_path + __typename + } + scenes { + id + title + path + __typename + } + __typename + } + + fragment SlimStudioData on Studio { + id + name + image_path + stash_ids { + endpoint + stash_id + __typename + } + parent_studio { + id + __typename + } + details + rating + aliases + __typename + } + + fragment MovieData on Movie { + id + checksum + name + aliases + duration + date + rating + director + studio { + ...SlimStudioData + __typename + } + synopsis + url + front_image_path + back_image_path + scene_count + scenes { + id + title + path + __typename + } + __typename + } + + fragment SlimTagData on Tag { + id + name + aliases + image_path + __typename + } + + fragment PerformerData on Performer { + id + checksum + name + url + gender + twitter + instagram + birthdate + ethnicity + country + eye_color + height + measurements + fake_tits + career_length + tattoos + piercings + aliases + favorite + ignore_auto_tag + image_path + scene_count + image_count + gallery_count + movie_count + tags { + ...SlimTagData + __typename + } + stash_ids { + stash_id + endpoint + __typename + } + rating + details + death_date + hair_color + weight + __typename + } + """ + variables = { + "filter": { + "page": 1, + "per_page": 20, + "sort": "title", + "direction": "ASC" + }, + "scene_filter": { + "performers": { + "value": [str(performer_id)], + "modifier": "INCLUDES_ALL" + } + } + } + + result = callGraphQL(query, variables) + if result: + return result.get('findScenes') + return None + # scene = getScene(None, variables=variables) + # return scene + +def getPerformersByName(performer_name): + query = """ + query FindPerformers($filter: FindFilterType, $performer_filter: PerformerFilterType) { + findPerformers(filter: $filter, performer_filter: $performer_filter) { + count + performers { + ...PerformerData + __typename + } + __typename + } + } + + fragment PerformerData on Performer { + id + checksum + name + url + gender + twitter + instagram + birthdate + ethnicity + country + eye_color + height + measurements + fake_tits + career_length + tattoos + piercings + aliases + favorite + ignore_auto_tag + image_path + scene_count + image_count + gallery_count + movie_count + tags { + ...SlimTagData + __typename + } + stash_ids { + stash_id + endpoint + __typename + } + rating + details + death_date + hair_color + weight + __typename + } + + fragment SlimTagData on Tag { + id + name + aliases + image_path + __typename + } + """ + + variables = { + "filter": { + "q": performer_name, + "page": 1, + "per_page": 20, + "sort": "name", + "direction": "ASC" + }, + "performer_filter": {} + } + + result = callGraphQL(query, variables) + if result: + return result.get('findPerformers') + return None + def getGallery(gallery_id): query = """ query FindGallery($id: ID!) {