Merge branch 'master' into cover_thumbnail

# Conflicts: # test/Calibre-Web TestSummary_Linux.html
apollo1220 · Apr 20, 2022 · 3c98cd1 · 3c98cd1
2 parents 2f3e5ea + 2303fc0
commit 3c98cd1
Show file tree

Hide file tree

Showing 63 changed files with 9,666 additions and 8,753 deletions.
diff --git a/SECURITY.md b/SECURITY.md
@@ -24,20 +24,20 @@ To receive fixes for security vulnerabilities it is required to always upgrade t
 | V 0.6.13      | JavaScript could get executed  in the shelf title                                                                  ||
 | V 0.6.13      | Login with the old session cookie after logout. Thanks to @ibarrionuevo                                            ||
 | V 0.6.14      | CSRF was possible. Thanks to @mik317 and Hagai Wechsler (WhiteSource)                                              |CVE-2021-25965|
-| V 0.6.14      | Migrated some routes to POST-requests (CSRF protection). Thanks to @scara31                                        ||
-| V 0.6.15      | Fix for "javascript:" script links in identifier. Thanks to @scara31                                               ||
+| V 0.6.14      | Migrated some routes to POST-requests (CSRF protection). Thanks to @scara31                                        |CVE-2021-4164|
+| V 0.6.15      | Fix for "javascript:" script links in identifier. Thanks to @scara31                                               |CVE-2021-4170|
 | V 0.6.15      | Cross-Site Scripting vulnerability on uploaded cover file names. Thanks to @ibarrionuevo                           ||
 | V 0.6.15      | Creating public shelfs is now denied if user is missing the edit public shelf right. Thanks to @ibarrionuevo       ||
 | V 0.6.15      | Changed error message in case of trying to delete a shelf unauthorized. Thanks to @ibarrionuevo                    ||
-| V 0.6.16      | JavaScript could get executed on authors page. Thanks to @alicaz                                                   ||
-| V 0.6.16      | Localhost can no longer be used to upload covers. Thanks to @scara31                                               ||
-| V 0.6.16      | Another case where public shelfs could be created without permission is prevented. Thanks to @nhiephon             ||
-| V 0.6.16      | It's prevented to get the name of a private shelfs. Thanks to @nhiephon                                            ||
-| V 0.6.17      | The SSRF Protection can no longer be bypassed via an HTTP redirect. Thanks to @416e6e61                            ||
-| V 0.6.17      | The SSRF Protection can no longer be bypassed via 0.0.0.0 and it's ipv6 equivalent. Thanks to @r0hanSH             ||
+| V 0.6.16      | JavaScript could get executed on authors page. Thanks to @alicaz                                                   |CVE-2022-0352|
+| V 0.6.16      | Localhost can no longer be used to upload covers. Thanks to @scara31                                               |CVE-2022-0339|
+| V 0.6.16      | Another case where public shelfs could be created without permission is prevented. Thanks to @nhiephon             |CVE-2022-0273|
+| V 0.6.16      | It's prevented to get the name of a private shelfs. Thanks to @nhiephon                                            |CVE-2022-0405|
+| V 0.6.17      | The SSRF Protection can no longer be bypassed via an HTTP redirect. Thanks to @416e6e61                            |CVE-2022-0767|
+| V 0.6.17      | The SSRF Protection can no longer be bypassed via 0.0.0.0 and it's ipv6 equivalent. Thanks to @r0hanSH             |CVE-2022-0766|
 | V 0.6.18      | Possible SQL Injection is prevented in user table  Thanks to Iman Sharafaldin (Forward Security)                   ||
-| V 0.6.18      | The SSRF protection no longer can be bypassed by IPV6/IPV4 embedding. Thanks to  @416e6e61                         ||
-| V 0.6.18      | The SSRF protection no longer can be bypassed to connect to other servers in the local network. Thanks to @michaellrowley ||
+| V 0.6.18      | The SSRF protection no longer can be bypassed by IPV6/IPV4 embedding. Thanks to  @416e6e61                         |CVE-2022-0939|
+| V 0.6.18      | The SSRF protection no longer can be bypassed to connect to other servers in the local network. Thanks to @michaellrowley |CVE-2022-0990|
 
 
 ## Statement regarding Log4j (CVE-2021-44228 and related)

diff --git a/cps/comic.py b/cps/comic.py
@@ -130,7 +130,9 @@ def get_comic_info(tmp_file_path, original_file_name, original_file_extension, r
                 series=loaded_metadata.series or "",
                 series_id=loaded_metadata.issue or "",
                 languages=loaded_metadata.language,
-                publisher="")
+                publisher="",
+                pubdate="",
+                identifiers=[])
 
     return BookMeta(
         file_path=tmp_file_path,
@@ -143,4 +145,6 @@ def get_comic_info(tmp_file_path, original_file_name, original_file_extension, r
         series="",
         series_id="",
         languages="",
-        publisher="")
+        publisher="",
+        pubdate="",
+        identifiers=[])
diff --git a/cps/constants.py b/cps/constants.py
@@ -161,7 +161,7 @@ def selected_roles(dictionary):
 
 # :rtype: BookMeta
 BookMeta = namedtuple('BookMeta', 'file_path, extension, title, author, cover, description, tags, series, '
-                                  'series_id, languages, publisher')
+                                  'series_id, languages, publisher, pubdate, identifiers')
 
 STABLE_VERSION = {'version': '0.6.19 Beta'}
 

diff --git a/cps/db.py b/cps/db.py
@@ -903,9 +903,20 @@ def speaking_language(self, languages=None, return_all_languages=False, with_cou
                     .join(books_languages_link).join(Books)\
                     .filter(self.common_filters(return_all_languages=return_all_languages)) \
                     .group_by(text('books_languages_link.lang_code')).all()
+            tags = list()
             for lang in languages:
-                lang[0].name = isoLanguages.get_language_name(get_locale(), lang[0].lang_code)
-            return sorted(languages, key=lambda x: x[0].name, reverse=reverse_order)
+                tag = Category(isoLanguages.get_language_name(get_locale(), lang[0].lang_code), lang[0].lang_code)
+                tags.append([tag, lang[1]])
+            # Append all books without language to list
+            if not return_all_languages:
+                no_lang_count = (self.session.query(Books)
+                                 .outerjoin(books_languages_link).outerjoin(Languages)
+                                 .filter(Languages.lang_code == None)
+                                 .filter(self.common_filters())
+                                 .count())
+                if no_lang_count:
+                    tags.append([Category(_("None"), "none"), no_lang_count])
+            return sorted(tags, key=lambda x: x[0].name, reverse=reverse_order)
         else:
             if not languages:
                 languages = self.session.query(Languages) \
@@ -977,3 +988,22 @@ def lcase(s):
         _log = logger.create()
         _log.error_or_exception(ex)
         return s.lower()
+
+
+class Category:
+    name = None
+    id = None
+    count = None
+    rating = None
+
+    def __init__(self, name, cat_id, rating=None):
+        self.name = name
+        self.id = cat_id
+        self.rating = rating
+        self.count = 1
+
+'''class Count:
+    count = None
+
+    def __init__(self, count):
+        self.count = count'''
diff --git a/cps/editbooks.py b/cps/editbooks.py
@@ -984,8 +984,13 @@ def create_book_on_upload(modify_date, meta):
     # combine path and normalize path from Windows systems
     path = os.path.join(author_dir, title_dir).replace('\\', '/')
 
+    try:
+        pubdate = datetime.strptime(meta.pubdate[:10], "%Y-%m-%d")
+    except:
+        pubdate = datetime(101, 1, 1)
+
     # Calibre adds books with utc as timezone
-    db_book = db.Books(title, "", sort_authors, datetime.utcnow(), datetime(101, 1, 1),
+    db_book = db.Books(title, "", sort_authors, datetime.utcnow(), pubdate,
                        '1', datetime.utcnow(), path, meta.cover, db_author, [], "")
 
     modify_date |= modify_database_object(input_authors, db_book.authors, db.Authors, calibre_db.session,
@@ -1018,6 +1023,16 @@ def create_book_on_upload(modify_date, meta):
 
     # flush content, get db_book.id available
     calibre_db.session.flush()
+
+    # Handle identifiers now that db_book.id is available
+    identifier_list = []
+    for type_key, type_value in meta.identifiers:
+        identifier_list.append(db.Identifiers(type_value, type_key, db_book.id))
+    modification, warning = modify_identifiers(identifier_list, db_book.identifiers, calibre_db.session)
+    if warning:
+        flash(_("Identifiers are not Case Sensitive, Overwriting Old Identifier"), category="warning")
+    modify_date |= modification
+
     return db_book, input_authors, title_dir, renamed_authors
 
 

diff --git a/cps/epub.py b/cps/epub.py
@@ -63,13 +63,15 @@ def get_epub_info(tmp_file_path, original_file_name, original_file_extension):
 
     epub_metadata = {}
 
-    for s in ['title', 'description', 'creator', 'language', 'subject']:
+    for s in ['title', 'description', 'creator', 'language', 'subject', 'publisher', 'date']:
         tmp = p.xpath('dc:%s/text()' % s, namespaces=ns)
         if len(tmp) > 0:
             if s == 'creator':
                 epub_metadata[s] = ' & '.join(split_authors(tmp))
             elif s == 'subject':
                 epub_metadata[s] = ', '.join(tmp)
+            elif s == 'date':
+                epub_metadata[s] = tmp[0][:10]
             else:
                 epub_metadata[s] = tmp[0]
         else:
@@ -78,6 +80,12 @@ def get_epub_info(tmp_file_path, original_file_name, original_file_extension):
     if epub_metadata['subject'] == 'Unknown':
         epub_metadata['subject'] = ''
 
+    if epub_metadata['publisher'] == u'Unknown':
+        epub_metadata['publisher'] = ''
+
+    if epub_metadata['date'] == u'Unknown':
+        epub_metadata['date'] = ''
+
     if epub_metadata['description'] == u'Unknown':
         description = tree.xpath("//*[local-name() = 'description']/text()")
         if len(description) > 0:
@@ -92,6 +100,14 @@ def get_epub_info(tmp_file_path, original_file_name, original_file_extension):
 
     cover_file = parse_epub_cover(ns, tree, epub_zip, cover_path, tmp_file_path)
 
+    identifiers = []
+    for node in p.xpath('dc:identifier', namespaces=ns):
+        identifier_name=node.attrib.values()[-1];
+        identifier_value=node.text;
+        if identifier_name in ('uuid','calibre'):
+            continue;
+        identifiers.append( [identifier_name, identifier_value] )
+
     if not epub_metadata['title']:
         title = original_file_name
     else:
@@ -108,7 +124,9 @@ def get_epub_info(tmp_file_path, original_file_name, original_file_extension):
         series=epub_metadata['series'].encode('utf-8').decode('utf-8'),
         series_id=epub_metadata['series_id'].encode('utf-8').decode('utf-8'),
         languages=epub_metadata['language'],
-        publisher="")
+        publisher=epub_metadata['publisher'].encode('utf-8').decode('utf-8'),
+        pubdate=epub_metadata['date'],
+        identifiers=identifiers)
 
 
 def parse_epub_cover(ns, tree, epub_zip, cover_path, tmp_file_path):

diff --git a/cps/fb2.py b/cps/fb2.py
@@ -77,4 +77,6 @@ def get_author(element):
         series="",
         series_id="",
         languages="",
-        publisher="")
+        publisher="",
+        pubdate="",
+        identifiers=[])
diff --git a/cps/metadata_provider/amazon.py b/cps/metadata_provider/amazon.py
@@ -19,18 +19,23 @@
 import concurrent.futures
 import requests
 from bs4 import BeautifulSoup as BS  # requirement
+from typing import List, Optional
 
 try:
     import cchardet #optional for better speed
 except ImportError:
     pass
+from cps import logger
 from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
 import cps.logger as logger
 
 #from time import time
 from operator import itemgetter
 log = logger.create()
 
+log = logger.create()
+
+
 class Amazon(Metadata):
     __name__ = "Amazon"
     __id__ = "amazon"
@@ -49,17 +54,21 @@ class Amazon(Metadata):
 
     def search(
         self, query: str, generic_cover: str = "", locale: str = "en"
-    ):
+    ) -> Optional[List[MetaRecord]]:
         #timer=time()
         def inner(link, index) -> [dict, int]:
-            try:
-                with self.session as session:
-                    r = session.get(f"https://www.amazon.com{link}")
+            with self.session as session:
+                try:
+                    r = session.get(f"https://www.amazon.com/{link}")
                     r.raise_for_status()
-                    long_soup = BS(r.text, "lxml")  #~4sec :/
-                    soup2 = long_soup.find("div", attrs={"cel_widget_id": "dpx-books-ppd_csm_instrumentation_wrapper"})
-                    if soup2 is None:
-                        return
+                except Exception as ex:
+                    log.warning(ex)
+                    return
+                long_soup = BS(r.text, "lxml")  #~4sec :/
+                soup2 = long_soup.find("div", attrs={"cel_widget_id": "dpx-books-ppd_csm_instrumentation_wrapper"})
+                if soup2 is None:
+                    return
+                try:
                     match = MetaRecord(
                         title = "",
                         authors = "",
@@ -104,27 +113,29 @@ def inner(link, index) -> [dict, int]:
                     except (AttributeError, TypeError):
                         match.cover = ""
                     return match, index
-            except Exception as e:
-                log.error_or_exception(e)
-                return
+                except Exception as e:
+                    log.error_or_exception(e)
+                    return
 
         val = list()
-        try:
-            if self.active:
+        if self.active:
+            try:
                 results = self.session.get(
-                    f"https://www.amazon.com/s?k={query.replace(' ', '+')}"
-                    f"&i=digital-text&sprefix={query.replace(' ', '+')}"
+                    f"https://www.amazon.com/s?k={query.replace(' ', '+')}&i=digital-text&sprefix={query.replace(' ', '+')}"
                     f"%2Cdigital-text&ref=nb_sb_noss",
                     headers=self.headers)
                 results.raise_for_status()
-                soup = BS(results.text, 'html.parser')
-                links_list = [next(filter(lambda i: "digital-text" in i["href"], x.findAll("a")))["href"] for x in
-                              soup.findAll("div", attrs={"data-component-type": "s-search-result"})]
-                with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
-                    fut = {executor.submit(inner, link, index) for index, link in enumerate(links_list[:5])}
-                    val = list(map(lambda x: x.result(), concurrent.futures.as_completed(fut)))
-            result = list(filter(lambda x: x, val))
-            return [x[0] for x in sorted(result, key=itemgetter(1))] #sort by amazons listing order for best relevance
-        except requests.exceptions.HTTPError as e:
-            log.error_or_exception(e)
-            return []
+            except requests.exceptions.HTTPError as e:
+                log.error_or_exception(e)
+                return None
+            except Exception as e:
+                log.warning(e)
+                return None
+            soup = BS(results.text, 'html.parser')
+            links_list = [next(filter(lambda i: "digital-text" in i["href"], x.findAll("a")))["href"] for x in
+                          soup.findAll("div", attrs={"data-component-type": "s-search-result"})]
+            with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
+                fut = {executor.submit(inner, link, index) for index, link in enumerate(links_list[:5])}
+                val = list(map(lambda x : x.result() ,concurrent.futures.as_completed(fut)))
+        result = list(filter(lambda x: x, val))
+        return [x[0] for x in sorted(result, key=itemgetter(1))] #sort by amazons listing order for best relevance
diff --git a/cps/metadata_provider/comicvine.py b/cps/metadata_provider/comicvine.py
@@ -21,8 +21,11 @@
 from urllib.parse import quote
 
 import requests
+from cps import logger
 from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
 
+log = logger.create()
+
 
 class ComicVine(Metadata):
     __name__ = "ComicVine"
@@ -46,10 +49,15 @@ def search(
             if title_tokens:
                 tokens = [quote(t.encode("utf-8")) for t in title_tokens]
                 query = "%20".join(tokens)
-            result = requests.get(
-                f"{ComicVine.BASE_URL}{query}{ComicVine.QUERY_PARAMS}",
-                headers=ComicVine.HEADERS,
-            )
+            try:
+                result = requests.get(
+                    f"{ComicVine.BASE_URL}{query}{ComicVine.QUERY_PARAMS}",
+                    headers=ComicVine.HEADERS,
+                )
+                result.raise_for_status()
+            except Exception as e:
+                log.warning(e)
+                return None
             for result in result.json()["results"]:
                 match = self._parse_search_result(
                     result=result, generic_cover=generic_cover, locale=locale