Feature: Search through multiple pages of results

NotSimone · Jan 25, 2024 · b112644 · b112644
1 parent 6b07f10
commit b112644
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 12 deletions.
diff --git a/__init__.py b/__init__.py
@@ -142,7 +142,7 @@ def get_book_url(self, identifiers) -> Optional[Tuple]:
         isbn = identifiers.get("isbn", None)
         if isbn:
             # Example output:"https://www.kobo.com/au/en/search?query=9781761108105"
-            return ("isbn", isbn, self._get_search_url(isbn))
+            return ("isbn", isbn, self._get_search_url(isbn, 1))
         return None
 
     def get_cached_cover_url(self, identifiers) -> Optional[str]:
@@ -193,10 +193,7 @@ def identify(
                 result_queue.put(metadata)
             else:
                 log.info("KoboMetadata::identify:: Could not find matching book")
-
             index += 1
-            if index >= self.prefs["num_matches"]:
-                return
         return
 
     def download_cover(
@@ -233,8 +230,8 @@ def download_cover(
 
         result_queue.put((self, cover))
 
-    def _get_search_url(self, search_str: str) -> str:
-        query = {"query": search_str, "fcmedia": "Book"}
+    def _get_search_url(self, search_str: str, page_number: int) -> str:
+        query = {"query": search_str, "fcmedia": "Book", "pageNumber": page_number}
         return f"{self.BASE_URL}{self.prefs['country']}/en/search?{urlencode(query)}"
 
     def _generate_query(self, title: str, authors: list[str]) -> str:
@@ -272,21 +269,32 @@ def _get_webpage(self, url: str, log: Log, timeout: int) -> Tuple[Optional[html.
 
     # Returns a list of urls that match our search
     def _perform_query(self, query: str, log: Log, timeout: int) -> list[str]:
-        url = self._get_search_url(query)
+        url = self._get_search_url(query, 1)
         log.info(f"KoboMetadata::identify: Searching for book with url: {url}")
 
         tree, is_search = self._get_webpage(url, log, timeout)
         if tree is None:
             log.info(f"KoboMetadata::_lookup_metadata: Could not get url: {url}")
             return []
 
-        # Some queries (esp ISBN) can redirect straight to the product page
-        if is_search:
-            search_results_elements = tree.xpath("//h2[@class='title product-field']/a")
-            return [x.get("href") for x in search_results_elements]
-        else:
+        # Query redirected straight to product page
+        if not is_search:
             return [url]
 
+        search_results_elements = tree.xpath("//h2[@class='title product-field']/a")
+        results = [x.get("href") for x in search_results_elements]
+
+        page_num = 2
+        while len(results) < self.prefs["num_matches"]:
+            url = self._get_search_url(query, page_num)
+            tree, is_search = self._get_webpage(url, log, timeout)
+            assert tree and is_search
+            search_results_elements = tree.xpath("//h2[@class='title product-field']/a")
+            results.extend([x.get("href") for x in search_results_elements])
+            page_num += 1
+
+        return results[: self.prefs["num_matches"]]
+
     # Given the url for a book, parse and return the metadata
     def _lookup_metadata(self, url: str, log: Log, timeout: int) -> Optional[Metadata]:
         tree, is_search = self._get_webpage(url, log, timeout)

diff --git a/format.sh b/format.sh
@@ -0,0 +1,2 @@
+black --line-length 120 .
+isort .