Skip to content

Commit

Permalink
fix: updated torbox scraper to use api key. refactored scrapers sligh…
Browse files Browse the repository at this point in the history
…tly. added more logging to scrapers.
  • Loading branch information
dreulavelle committed Nov 8, 2024
1 parent ebd11fd commit afdb9f6
Show file tree
Hide file tree
Showing 12 changed files with 83 additions and 95 deletions.
19 changes: 14 additions & 5 deletions src/program/services/scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,23 @@ def __init__(self):
self.key = "scraping"
self.initialized = False
self.settings = settings_manager.settings.scraping
self.services = {
self.imdb_services = { # If we are missing imdb_id then we cant scrape here
Torrentio: Torrentio(),
Knightcrawler: Knightcrawler(),
Orionoid: Orionoid(),
Jackett: Jackett(),
TorBoxScraper: TorBoxScraper(),
Mediafusion: Mediafusion(),
Prowlarr: Prowlarr(),
Zilean: Zilean(),
Comet: Comet()
}
self.keyword_services = {
Jackett: Jackett(),
Prowlarr: Prowlarr(),
Zilean: Zilean()
}
self.services = {
**self.imdb_services,
**self.keyword_services
}
self.initialized = self.validate()
if not self.initialized:
return
Expand Down Expand Up @@ -65,6 +71,9 @@ def scrape(self, item: MediaItem, log = True) -> Dict[str, Stream]:
total_results = 0
results_lock = threading.RLock()

imdb_id = item.get_top_imdb_id()
available_services = self.services if imdb_id else self.keyword_services

def run_service(service, item,):
nonlocal total_results
service_results = service.run(item)
Expand All @@ -77,7 +86,7 @@ def run_service(service, item,):
results.update(service_results)
total_results += len(service_results)

for service_name, service in self.services.items():
for service_name, service in available_services.items():
if service.initialized:
thread = threading.Thread(target=run_service, args=(service, item), name=service_name.__name__)
threads.append(thread)
Expand Down
6 changes: 1 addition & 5 deletions src/program/services/scrapers/comet.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,18 +88,14 @@ def run(self, item: MediaItem) -> Dict[str, str]:
logger.error(f"Comet exception thrown: {str(e)}")
return {}


def scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]:
"""Wrapper for `Comet` scrape method"""
identifier, scrape_type, imdb_id = _get_stremio_identifier(item)
if not imdb_id:
return {}

url = f"{self.settings.url}/{self.encoded_string}/stream/{scrape_type}/{imdb_id}{identifier or ''}.json"

response = self.request_handler.execute(HttpMethod.GET, url, timeout=self.timeout)

if not response.is_ok or not getattr(response.data, "streams", None):
logger.log("NOT_FOUND", f"No streams found for {item.log_string}")
return {}

torrents: Dict[str, str] = {}
Expand Down
53 changes: 21 additions & 32 deletions src/program/services/scrapers/jackett.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,15 +98,6 @@ def run(self, item: MediaItem) -> Generator[MediaItem, None, None]:

def scrape(self, item: MediaItem) -> Dict[str, str]:
"""Scrape the given media item"""
data, stream_count = self.api_scrape(item)
if data:
logger.log("SCRAPER", f"Found {len(data)} streams out of {stream_count} for {item.log_string}")
else:
logger.log("NOT_FOUND", f"No streams found for {item.log_string}")
return data

def api_scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]:
"""Wrapper for `Jackett` scrape method"""
results_queue = queue.Queue()
threads = [
threading.Thread(target=self._thread_target, args=(item, indexer, results_queue))
Expand All @@ -118,8 +109,22 @@ def api_scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]:
for thread in threads:
thread.join()

results = self._collect_results(results_queue)
return self._process_results(results)
results = []
while not results_queue.empty():
results.extend(results_queue.get())

torrents: Dict[str, str] = {}
for result in results:
if result[1] is None:
continue
# infohash: raw_title
torrents[result[1]] = result[0]

if torrents:
logger.log("SCRAPER", f"Found {len(torrents)} streams for {item.log_string}")
else:
logger.log("NOT_FOUND", f"No streams found for {item.log_string}")
return torrents

def _thread_target(self, item: MediaItem, indexer: JackettIndexer, results_queue: queue.Queue):
"""Thread target for searching indexers"""
Expand All @@ -144,23 +149,6 @@ def _search_indexer(self, item: MediaItem, indexer: JackettIndexer) -> List[Tupl
else:
raise TypeError("Only Movie and Series is allowed!")

def _collect_results(self, results_queue: queue.Queue) -> List[Tuple[str, str]]:
"""Collect results from the queue"""
results = []
while not results_queue.empty():
results.extend(results_queue.get())
return results

def _process_results(self, results: List[Tuple[str, str]]) -> Tuple[Dict[str, str], int]:
"""Process the results and return the torrents"""
torrents: Dict[str, str] = {}
for result in results:
if result[1] is None:
continue
# infohash: raw_title
torrents[result[1]] = result[0]
return torrents, len(results)

def _search_movie_indexer(self, item: MediaItem, indexer: JackettIndexer) -> List[Tuple[str, str]]:
"""Search for movies on the given indexer"""
if indexer.movie_search_capabilities is None:
Expand Down Expand Up @@ -205,13 +193,14 @@ def _search_series_indexer(self, item: MediaItem, indexer: JackettIndexer) -> Li

def _get_series_search_params(self, item: MediaItem) -> Tuple[str, int, Optional[int]]:
"""Get search parameters for series"""
title = item.get_top_title()
if isinstance(item, Show):
return item.get_top_title(), None, None
return title, None, None
elif isinstance(item, Season):
return item.get_top_title(), item.number, None
return title, item.number, None
elif isinstance(item, Episode):
return item.get_top_title(), item.parent.number, item.number
return "", 0, None
return title, item.parent.number, item.number
return title, None, None

def _get_indexers(self) -> List[JackettIndexer]:
"""Get the indexers from Jackett"""
Expand Down
2 changes: 1 addition & 1 deletion src/program/services/scrapers/knightcrawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ def scrape(self, item: MediaItem) -> Dict[str, str]:
url += identifier

response = self.request_handler.execute(HttpMethod.GET, f"{url}.json", timeout=self.timeout)

if not response.is_ok or len(response.data.streams) <= 0:
logger.log("NOT_FOUND", f"No streams found for {item.log_string}")
return {}

torrents = {
Expand Down
1 change: 1 addition & 0 deletions src/program/services/scrapers/mediafusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]:
response = self.request_handler.execute(HttpMethod.GET, f"{url}.json", timeout=self.timeout)

if not response.is_ok or len(response.data.streams) <= 0:
logger.log("NOT_FOUND", f"No streams found for {item.log_string}")
return {}

torrents: Dict[str, str] = {}
Expand Down
1 change: 1 addition & 0 deletions src/program/services/scrapers/orionoid.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def scrape(self, item: MediaItem) -> Dict[str, str]:
params = self._build_query_params(item)
response = self.request_handler.execute(HttpMethod.GET, self.base_url, params=params, timeout=self.timeout)
if not response.is_ok or not hasattr(response.data, "data"):
logger.log("NOT_FOUND", f"No streams found for {item.log_string}")
return {}

torrents = {}
Expand Down
58 changes: 22 additions & 36 deletions src/program/services/scrapers/prowlarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,16 +100,7 @@ def run(self, item: MediaItem) -> Dict[str, str]:
return {}

def scrape(self, item: MediaItem) -> Dict[str, str]:
"""Scrape the given media item"""
data, stream_count = self.api_scrape(item)
if data:
logger.log("SCRAPER", f"Found {len(data)} streams out of {stream_count} for {item.log_string}")
else:
logger.log("NOT_FOUND", f"No streams found for {item.log_string}")
return data

def api_scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]:
"""Wrapper for `Prowlarr` scrape method"""
"""Scrape the given media item using Prowlarr indexers"""
results_queue = queue.Queue()
threads = [
threading.Thread(target=self._thread_target, args=(item, indexer, results_queue))
Expand All @@ -121,8 +112,22 @@ def api_scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]:
for thread in threads:
thread.join()

results = self._collect_results(results_queue)
return self._process_results(results)
results = []
while not results_queue.empty():
results.extend(results_queue.get())

torrents: Dict[str, str] = {}
for result in results:
if result[1] is None:
continue
torrents[result[1]] = result[0]

if torrents:
logger.log("SCRAPER", f"Found {len(torrents)} streams for {item.log_string}")
else:
logger.log("NOT_FOUND", f"No streams found for {item.log_string}")

return torrents

def _thread_target(self, item: MediaItem, indexer: ProwlarrIndexer, results_queue: queue.Queue):
try:
Expand All @@ -146,26 +151,6 @@ def _search_indexer(self, item: MediaItem, indexer: ProwlarrIndexer) -> List[Tup
else:
raise TypeError("Only Movie and Series is allowed!")

def _collect_results(self, results_queue: queue.Queue) -> List[Tuple[str, str]]:
"""Collect results from the queue"""
results = []
while not results_queue.empty():
results.extend(results_queue.get())
return results

def _process_results(self, results: List[Tuple[str, str]]) -> Tuple[Dict[str, str], int]:
"""Process the results and return the torrents"""
torrents: Dict[str, str] = {}

for result in results:
if result[1] is None:
continue

# infohash: raw_title
torrents[result[1]] = result[0]

return torrents, len(results)

def _search_movie_indexer(self, item: MediaItem, indexer: ProwlarrIndexer) -> List[Tuple[str, str]]:
"""Search for movies on the given indexer"""
if indexer.movie_search_capabilities is None:
Expand Down Expand Up @@ -209,13 +194,14 @@ def _search_series_indexer(self, item: MediaItem, indexer: ProwlarrIndexer) -> L

def _get_series_search_params(self, item: MediaItem) -> Tuple[str, int, Optional[int]]:
"""Get search parameters for series"""
title = item.get_top_title()
if isinstance(item, Show):
return item.get_top_title(), None, None
return title, None, None
elif isinstance(item, Season):
return item.get_top_title(), item.number, None
return title, item.number, None
elif isinstance(item, Episode):
return item.get_top_title(), item.parent.number, item.number
return "", 0, None
return title, item.parent.number, item.number
return title, None, None

def _get_indexers(self) -> List[ProwlarrIndexer]:
"""Get the indexers from Prowlarr"""
Expand Down
2 changes: 1 addition & 1 deletion src/program/services/scrapers/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def _parse_results(item: MediaItem, results: Dict[str, str], log_msg: bool = Tru
continue

if torrents:
logger.log("SCRAPER", f"Processed {len(torrents)} matches for {item.log_string}")
logger.log("SCRAPER", f"Found {len(torrents)} streams for {item.log_string}")
torrents = sort_torrents(torrents)
torrents_dict = {}
for torrent in torrents.values():
Expand Down
28 changes: 17 additions & 11 deletions src/program/services/scrapers/torbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def __init__(self):
self.key = "torbox"
self.settings = settings_manager.settings.scraping.torbox_scraper
self.base_url = "http://search-api.torbox.app"
self.user_plan = None
self.headers = {"Authorization": f"Bearer {self.settings.api_key}"}
self.timeout = self.settings.timeout
session = create_service_session()
self.request_handler = ScraperRequestHandler(session)
Expand All @@ -28,11 +28,14 @@ def validate(self) -> bool:
"""Validate the TorBox Scraper as a service"""
if not self.settings.enabled:
return False
if not self.settings.api_key:
logger.error("TorBox API key is not set.")
return False
if not isinstance(self.timeout, int) or self.timeout <= 0:
logger.error("TorBox timeout is not set or invalid.")
return False
try:
response = self.request_handler.execute(HttpMethod.GET, f"{self.base_url}/torrents/imdb:tt0944947?metadata=false&season=1&episode=1", timeout=self.timeout)
response = self.request_handler.execute(HttpMethod.GET, f"{self.base_url}/torrents/imdb:tt0944947?metadata=false&season=1&episode=1", headers=self.headers, timeout=self.timeout)
return response.is_ok
except Exception as e:
logger.exception(f"Error validating TorBox Scraper: {e}")
Expand All @@ -57,22 +60,25 @@ def run(self, item: MediaItem) -> Dict[str, str]:

def _build_query_params(self, item: MediaItem) -> str:
"""Build the query params for the TorBox API"""
params = [f"imdb:{item.imdb_id}"]
if item.type == "show":
params.append("season=1")
imdb_id = item.get_top_imdb_id()
if item.type == "movie":
return f"torrents/imdb:{imdb_id}"
elif item.type == "show":
return f"torrents/imdb:{imdb_id}?season=1&episode=1"
elif item.type == "season":
params.append(f"season={item.number}")
return f"torrents/imdb:{imdb_id}?season={item.number}&episode=1"
elif item.type == "episode":
params.append(f"season={item.parent.number}&episode={item.number}")
return "&".join(params)
return f"torrents/imdb:{imdb_id}?season={item.parent.number}&episode={item.number}"
return ""

def scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]:
def scrape(self, item: MediaItem) -> Dict[str, str]:
"""Wrapper for `Torbox` scrape method using Torbox API"""
query_params = self._build_query_params(item)
url = f"{self.base_url}/torrents/{query_params}?metadata=false"
url = f"{self.base_url}/{query_params}&metadata=false"

response = self.request_handler.execute(HttpMethod.GET, url, timeout=self.timeout)
response = self.request_handler.execute(HttpMethod.GET, url, headers=self.headers, timeout=self.timeout)
if not response.is_ok or not response.data.data.torrents:
logger.log("NOT_FOUND", f"No streams found for {item.log_string}")
return {}

torrents = {}
Expand Down
6 changes: 2 additions & 4 deletions src/program/services/scrapers/torrentio.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,8 @@ def scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]:
url += identifier

response = self.request_handler.execute(HttpMethod.GET, f"{url}.json", timeout=self.timeout)
if not response.is_ok:
return {}

if not hasattr(response.data, 'streams') or not response.data.streams:
if not response.is_ok or not hasattr(response.data, 'streams') or not response.data.streams:
logger.log("NOT_FOUND", f"No streams found for {item.log_string}")
return {}

torrents: Dict[str, str] = {}
Expand Down
1 change: 1 addition & 0 deletions src/program/services/scrapers/zilean.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def scrape(self, item: MediaItem) -> Dict[str, str]:

response = self.request_handler.execute(HttpMethod.GET, url, params=params, timeout=self.timeout)
if not response.is_ok or not response.data:
logger.log("NOT_FOUND", f"No streams found for {item.log_string}")
return {}

torrents: Dict[str, str] = {}
Expand Down
1 change: 1 addition & 0 deletions src/program/settings/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ class ProwlarrConfig(Observable):

class TorBoxScraperConfig(Observable):
enabled: bool = False
api_key: str = ""
timeout: int = 30


Expand Down

0 comments on commit afdb9f6

Please sign in to comment.