Skip to content

Commit

Permalink
Feature: Search through multiple pages of results
Browse files Browse the repository at this point in the history
  • Loading branch information
NotSimone committed Jan 25, 2024
1 parent 6b07f10 commit b112644
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 12 deletions.
32 changes: 20 additions & 12 deletions __init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def get_book_url(self, identifiers) -> Optional[Tuple]:
isbn = identifiers.get("isbn", None)
if isbn:
# Example output:"https://www.kobo.com/au/en/search?query=9781761108105"
return ("isbn", isbn, self._get_search_url(isbn))
return ("isbn", isbn, self._get_search_url(isbn, 1))
return None

def get_cached_cover_url(self, identifiers) -> Optional[str]:
Expand Down Expand Up @@ -193,10 +193,7 @@ def identify(
result_queue.put(metadata)
else:
log.info("KoboMetadata::identify:: Could not find matching book")

index += 1
if index >= self.prefs["num_matches"]:
return
return

def download_cover(
Expand Down Expand Up @@ -233,8 +230,8 @@ def download_cover(

result_queue.put((self, cover))

def _get_search_url(self, search_str: str) -> str:
query = {"query": search_str, "fcmedia": "Book"}
def _get_search_url(self, search_str: str, page_number: int) -> str:
query = {"query": search_str, "fcmedia": "Book", "pageNumber": page_number}
return f"{self.BASE_URL}{self.prefs['country']}/en/search?{urlencode(query)}"

def _generate_query(self, title: str, authors: list[str]) -> str:
Expand Down Expand Up @@ -272,21 +269,32 @@ def _get_webpage(self, url: str, log: Log, timeout: int) -> Tuple[Optional[html.

# Returns a list of urls that match our search
def _perform_query(self, query: str, log: Log, timeout: int) -> list[str]:
url = self._get_search_url(query)
url = self._get_search_url(query, 1)
log.info(f"KoboMetadata::identify: Searching for book with url: {url}")

tree, is_search = self._get_webpage(url, log, timeout)
if tree is None:
log.info(f"KoboMetadata::_lookup_metadata: Could not get url: {url}")
return []

# Some queries (esp ISBN) can redirect straight to the product page
if is_search:
search_results_elements = tree.xpath("//h2[@class='title product-field']/a")
return [x.get("href") for x in search_results_elements]
else:
# Query redirected straight to product page
if not is_search:
return [url]

search_results_elements = tree.xpath("//h2[@class='title product-field']/a")
results = [x.get("href") for x in search_results_elements]

page_num = 2
while len(results) < self.prefs["num_matches"]:
url = self._get_search_url(query, page_num)
tree, is_search = self._get_webpage(url, log, timeout)
assert tree and is_search
search_results_elements = tree.xpath("//h2[@class='title product-field']/a")
results.extend([x.get("href") for x in search_results_elements])
page_num += 1

return results[: self.prefs["num_matches"]]

# Given the url for a book, parse and return the metadata
def _lookup_metadata(self, url: str, log: Log, timeout: int) -> Optional[Metadata]:
tree, is_search = self._get_webpage(url, log, timeout)
Expand Down
2 changes: 2 additions & 0 deletions format.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
black --line-length 120 .
isort .

0 comments on commit b112644

Please sign in to comment.