From 245badbd7deffd007cfe57f4f1b3558044208621 Mon Sep 17 00:00:00 2001 From: TgSeed <92082995+TgSeed@users.noreply.github.com> Date: Tue, 27 Sep 2022 22:47:28 +0000 Subject: [PATCH] Add Performer Image by Scene Cover python scraper (#1039) --- SCRAPERS-LIST.md | 1 + scrapers/performer-image-by-scene.py | 113 ++++++ scrapers/performer-image-by-scene.yml | 17 + scrapers/py_common/graphql.py | 483 +++++++++++++++++++++++++- 4 files changed, 611 insertions(+), 3 deletions(-) create mode 100644 scrapers/performer-image-by-scene.py create mode 100644 scrapers/performer-image-by-scene.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 882cff8b2..7d7a03d25 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1388,6 +1388,7 @@ Filename.yml | Scrape a scenes (local) filename to set as scene title | Utility jellyfin.yml| A Jellyfin/Emby scraper | A python scraper that uses the Jellyfin/Emby API to look for Scenes, Performers and Movies via URL, Query or Fragments. Needs the URL, API-Key and User from Jellyfin set in jellyfin.py and the URLs in jellyfin.yml adopted to your local Jelly/Emby Instance | MindGeekAPI.yml| A sceneBy(Name\|Fragment) scraper for MindGeek network| A python scraper that queries directly the MindGeek API. For further **needed** instructions refer to the relevant PRs and have a look in the `MindGeekApi.py` file | [#711](https://github.com/stashapp/CommunityScrapers/pull/711) [#738](https://github.com/stashapp/CommunityScrapers/pull/738) [#411](https://github.com/stashapp/CommunityScrapers/pull/411) multiscrape.yml| A performer scraper that can utilize multiple stash Performer scrapers| A python scraper that can use multiple existing performer scrapers in order to get performer meta. To configure it edit the `multiscrape.py` file|[#594](https://github.com/stashapp/CommunityScrapers/pull/594) +performer-image-by-scene.yml| A performer image scraper that gets images from scene covers | A python scraper that searches for scenes with the performer and sets the scene cover image as the performer image|[#1039](https://github.com/stashapp/CommunityScrapers/pull/1039) performer-image-dir.yml| A performer image scraper compatible with the actress-pics repo | A python scraper that searches in a cloned actress-pics repo for performer images. Configuration and more info in `performer-image-dir.py`|[#453](https://github.com/stashapp/CommunityScrapers/pull/453) ScrapeWithURL.yml| A sceneByFragment scraper to perform a sceneByURL scape on scenes with URLs provided | This scraper allows users to perform sceneByURL scrapes in bulk.| [#900](https://github.com/stashapp/CommunityScrapers/issues/900) ShokoAPI.yml| A sceneByFragment scraper for [Shoko Server](https://shokoanime.com) | A sceneByFragment scraper that queries a local Shoko Server instance using the filename for scene meta. To configure it edit the `ShokoAPI.py` file| [#586](https://github.com/stashapp/CommunityScrapers/issues/586) [#628](https://github.com/stashapp/CommunityScrapers/pull/628) diff --git a/scrapers/performer-image-by-scene.py b/scrapers/performer-image-by-scene.py new file mode 100644 index 000000000..f4ab52c7d --- /dev/null +++ b/scrapers/performer-image-by-scene.py @@ -0,0 +1,113 @@ +import json +import re +import sys +from pathlib import Path + +try: + from py_common import log + from py_common import graphql +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr + ) + sys.exit() + +MAX_TITLE_LENGTH = 25 + + +def announce_result_to_stash(result): + if result is None: + result = [] if 'query' in sys.argv else {} + if 'query' in sys.argv: + if isinstance(result, list): + print(json.dumps(result)) + sys.exit(0) + else: + print(json.dumps([result])) + sys.exit(0) + else: + if isinstance(result, list): + if len(result) > 0: + print(json.dumps(result[0])) + sys.exit(0) + else: + print("{}") + sys.exit(0) + else: + print(json.dumps(result)) + sys.exit(0) + + +# Allows us to simply debug the script via CLI args +if len(sys.argv) > 2 and '-d' in sys.argv: + stdin = sys.argv[sys.argv.index('-d') + 1] +else: + stdin = sys.stdin.read() + +frag = json.loads(stdin) +performer_name = frag.get("name") +if performer_name is None: + announce_result_to_stash(None) +else: + performer_name = str(performer_name) + +regex_obj_parse_name_with_scene = re.compile( + r"(.*?) - Scene (\d+)\. (.*)", re.IGNORECASE | re.MULTILINE) + +parsed_name = regex_obj_parse_name_with_scene.search(performer_name) + + +if parsed_name: + # scene id already available, get scene directly + performer_name = parsed_name.group(1) + scene_id = parsed_name.group(2) + log.debug(f"Using scene {scene_id} to get performer image") + performer_scene = graphql.getSceneScreenshot(scene_id) + performer = {'Name': performer_name, + 'Image': performer_scene['paths']['screenshot'], + 'Images': [performer_scene['paths']['screenshot']]} + announce_result_to_stash(performer) +else: + # search for scenes with the performer + + # first find the id of the performer + performers_data = graphql.getPerformersIdByName(performer_name) + performer_data = None + if performers_data is None or performers_data['count'] < 1: + announce_result_to_stash(None) + elif performers_data['count'] > 1: + for performers_data_element in performers_data['performers']: + if str(performers_data_element['name']).lower().strip() == performer_name.lower().strip(): + performer_data = performers_data_element + break + if performer_data is None: + # No match found by looking into the names, let's loop again and match with the aliases + for performers_data_element in performers_data['performers']: + if performer_name.lower().strip() in str(performers_data_element['aliases']).lower().strip(): + performer_data = performers_data_element + break + else: + performer_data = performers_data['performers'][0] + + if performer_data is None or 'id' not in performer_data or int(performer_data['id']) < 0: + announce_result_to_stash(None) + + # get all scenes with the performer + performer_scenes = graphql.getSceneIdByPerformerId(performer_data['id']) + + image_candidates = [] + for scene in performer_scenes['scenes']: + if 'paths' in scene and 'screenshot' in scene['paths'] and len(scene['paths']['screenshot']) > 0: + if 'query' in sys.argv: + scene_title = scene.get("title") + if scene_title is None: + scene_title = Path(scene["path"]).name + image_candidates.append( + { + 'Name': f'{performer_name} - Scene {scene["id"]}. {scene_title[0:MAX_TITLE_LENGTH]}', + 'Image': scene['paths']['screenshot'], + 'Images': [scene['paths']['screenshot']] + } + ) + announce_result_to_stash(image_candidates) diff --git a/scrapers/performer-image-by-scene.yml b/scrapers/performer-image-by-scene.yml new file mode 100644 index 000000000..fbbacc265 --- /dev/null +++ b/scrapers/performer-image-by-scene.yml @@ -0,0 +1,17 @@ +name: Performer Image by scene cover + +performerByFragment: + action: script + script: + - python + - performer-image-by-scene.py + - fetch + +performerByName: + action: script + script: + - python + - performer-image-by-scene.py + - query + +# Last Updated June 25, 2022 diff --git a/scrapers/py_common/graphql.py b/scrapers/py_common/graphql.py index a263171db..874f4b4dc 100644 --- a/scrapers/py_common/graphql.py +++ b/scrapers/py_common/graphql.py @@ -1,4 +1,5 @@ import sys +from typing import Union try: import requests @@ -48,7 +49,8 @@ def callGraphQL(query, variables=None): if result.get("data"): return result.get("data") elif response.status_code == 401: - log.error("[ERROR][GraphQL] HTTP Error 401, Unauthorised. You can add a API Key in 'config.py' in the 'py_common' folder") + log.error( + "[ERROR][GraphQL] HTTP Error 401, Unauthorised. You can add a API Key in 'config.py' in the 'py_common' folder") return None else: raise ConnectionError( @@ -452,6 +454,7 @@ def getScene(scene_id): weight } """ + variables = { "id": scene_id } @@ -460,6 +463,479 @@ def getScene(scene_id): return result.get('findScene') return None + +def getSceneScreenshot(scene_id): + query = """ + query FindScene($id: ID!, $checksum: String) { + findScene(id: $id, checksum: $checksum) { + id + paths { + screenshot + } + } + } + """ + variables = { + "id": scene_id + } + result = callGraphQL(query, variables) + if result: + return result.get('findScene') + return None + + +def getSceneByPerformerId(performer_id): + query = """ + query FindScenes($filter: FindFilterType, $scene_filter: SceneFilterType, $scene_ids: [Int!]) { + findScenes(filter: $filter, scene_filter: $scene_filter, scene_ids: $scene_ids) { + count + filesize + duration + scenes { + ...SceneData + __typename + } + __typename + } + } + + fragment SceneData on Scene { + id + checksum + oshash + title + details + url + date + rating + o_counter + organized + path + phash + interactive + interactive_speed + captions { + language_code + caption_type + __typename + } + created_at + updated_at + file { + size + duration + video_codec + audio_codec + width + height + framerate + bitrate + __typename + } + paths { + screenshot + preview + stream + webp + vtt + chapters_vtt + sprite + funscript + interactive_heatmap + caption + __typename + } + scene_markers { + ...SceneMarkerData + __typename + } + galleries { + ...SlimGalleryData + __typename + } + studio { + ...SlimStudioData + __typename + } + movies { + movie { + ...MovieData + __typename + } + scene_index + __typename + } + tags { + ...SlimTagData + __typename + } + performers { + ...PerformerData + __typename + } + stash_ids { + endpoint + stash_id + __typename + } + sceneStreams { + url + mime_type + label + __typename + } + __typename + } + + fragment SceneMarkerData on SceneMarker { + id + title + seconds + stream + preview + screenshot + scene { + id + __typename + } + primary_tag { + id + name + aliases + __typename + } + tags { + id + name + aliases + __typename + } + __typename + } + + fragment SlimGalleryData on Gallery { + id + checksum + path + title + date + url + details + rating + organized + image_count + cover { + file { + size + width + height + __typename + } + paths { + thumbnail + __typename + } + __typename + } + studio { + id + name + image_path + __typename + } + tags { + id + name + __typename + } + performers { + id + name + gender + favorite + image_path + __typename + } + scenes { + id + title + path + __typename + } + __typename + } + + fragment SlimStudioData on Studio { + id + name + image_path + stash_ids { + endpoint + stash_id + __typename + } + parent_studio { + id + __typename + } + details + rating + aliases + __typename + } + + fragment MovieData on Movie { + id + checksum + name + aliases + duration + date + rating + director + studio { + ...SlimStudioData + __typename + } + synopsis + url + front_image_path + back_image_path + scene_count + scenes { + id + title + path + __typename + } + __typename + } + + fragment SlimTagData on Tag { + id + name + aliases + image_path + __typename + } + + fragment PerformerData on Performer { + id + checksum + name + url + gender + twitter + instagram + birthdate + ethnicity + country + eye_color + height + measurements + fake_tits + career_length + tattoos + piercings + aliases + favorite + ignore_auto_tag + image_path + scene_count + image_count + gallery_count + movie_count + tags { + ...SlimTagData + __typename + } + stash_ids { + stash_id + endpoint + __typename + } + rating + details + death_date + hair_color + weight + __typename + } + """ + variables = { + "filter": { + "page": 1, + "per_page": 20, + "sort": "title", + "direction": "ASC" + }, + "scene_filter": { + "performers": { + "value": [str(performer_id)], + "modifier": "INCLUDES_ALL" + } + } + } + + result = callGraphQL(query, variables) + if result: + return result.get('findScenes') + return None + + +def getSceneIdByPerformerId(performer_id): + query = """ + query FindScenes($filter: FindFilterType, $scene_filter: SceneFilterType, $scene_ids: [Int!]) { + findScenes(filter: $filter, scene_filter: $scene_filter, scene_ids: $scene_ids) { + scenes { + id + title + path + paths { + screenshot + } + } + } + } + """ + variables = { + "filter": { + "page": 1, + "per_page": 20, + "sort": "id", + "direction": "DESC" + }, + "scene_filter": { + "performers": { + "value": [str(performer_id)], + "modifier": "INCLUDES_ALL" + } + } + } + + result = callGraphQL(query, variables) + if result: + return result.get('findScenes') + return None + + +def getPerformersByName(performer_name): + query = """ + query FindPerformers($filter: FindFilterType, $performer_filter: PerformerFilterType) { + findPerformers(filter: $filter, performer_filter: $performer_filter) { + count + performers { + ...PerformerData + __typename + } + __typename + } + } + + fragment PerformerData on Performer { + id + checksum + name + url + gender + twitter + instagram + birthdate + ethnicity + country + eye_color + height + measurements + fake_tits + career_length + tattoos + piercings + aliases + favorite + ignore_auto_tag + image_path + scene_count + image_count + gallery_count + movie_count + tags { + ...SlimTagData + __typename + } + stash_ids { + stash_id + endpoint + __typename + } + rating + details + death_date + hair_color + weight + __typename + } + + fragment SlimTagData on Tag { + id + name + aliases + image_path + __typename + } + """ + + variables = { + "filter": { + "q": performer_name, + "page": 1, + "per_page": 20, + "sort": "name", + "direction": "ASC" + }, + "performer_filter": {} + } + + result = callGraphQL(query, variables) + if result: + return result.get('findPerformers') + return None + + +def getPerformersIdByName(performer_name): + query = """ + query FindPerformers($filter: FindFilterType, $performer_filter: PerformerFilterType) { + findPerformers(filter: $filter, performer_filter: $performer_filter) { + count + performers { + ...PerformerData + } + } + } + + fragment PerformerData on Performer { + id + name + aliases + } + """ + + variables = { + "filter": { + "q": performer_name, + "page": 1, + "per_page": 20, + "sort": "name", + "direction": "ASC" + }, + "performer_filter": {} + } + + result = callGraphQL(query, variables) + if result: + return result.get('findPerformers') + return None + + def getGallery(gallery_id): query = """ query FindGallery($id: ID!) { @@ -696,7 +1172,8 @@ def getGallery(gallery_id): result = callGraphQL(query, variables) if result: return result.get('findGallery') - return None + return None + def getGalleryPath(gallery_id): query = """ @@ -712,4 +1189,4 @@ def getGalleryPath(gallery_id): result = callGraphQL(query, variables) if result: return result.get('findGallery') - return None + return None