From dec2d54459a8480c7fd4b3e5f55962d9411c76c4 Mon Sep 17 00:00:00 2001 From: hect0x7 <93357912+hect0x7@users.noreply.github.com> Date: Sat, 6 Apr 2024 10:56:47 +0800 Subject: [PATCH] =?UTF-8?q?v2.5.9:=20=E6=96=B0=E5=A2=9E=E6=8F=92=E4=BB=B6?= =?UTF-8?q?=E3=80=90=E8=B7=B3=E8=BF=87=E4=B8=8B=E8=BD=BD=E5=9B=BE=E7=89=87?= =?UTF-8?q?=E6=95=B0=E5=B0=91=E7=9A=84=E7=AB=A0=E8=8A=82=E3=80=91=EF=BC=8C?= =?UTF-8?q?=E5=8F=AF=E8=BF=87=E6=BB=A4=E4=B8=80=E4=BA=9B=E5=85=AC=E5=91=8A?= =?UTF-8?q?=E7=AB=A0=E8=8A=82;=20=E6=96=B0=E5=A2=9EDownloadable=E5=9F=BA?= =?UTF-8?q?=E7=B1=BB=EF=BC=8C=E5=AF=B9=E6=8F=92=E4=BB=B6=E6=89=A9=E5=B1=95?= =?UTF-8?q?=E6=9B=B4=E5=8F=8B=E5=A5=BD;=20=E4=BF=AE=E5=A4=8D=E6=9C=AC?= =?UTF-8?q?=E5=AD=90=E5=90=8D=E8=BF=87=E9=95=BF=E8=B6=85=E5=87=BA=E5=AF=BC?= =?UTF-8?q?=E8=87=B4oserror=E7=9A=84=E9=94=99=E8=AF=AF;=20=E5=8D=87?= =?UTF-8?q?=E7=BA=A7curl=5Fcffi.=20(#222)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- assets/docs/sources/option_file_syntax.md | 5 +++ requirements-dev.txt | 2 +- setup.py | 2 +- src/jmcomic/__init__.py | 2 +- src/jmcomic/jm_client_impl.py | 8 ++--- src/jmcomic/jm_config.py | 30 +++++++++-------- src/jmcomic/jm_downloader.py | 13 ++++++-- src/jmcomic/jm_entity.py | 25 +++++++++------ src/jmcomic/jm_exception.py | 10 ++---- src/jmcomic/jm_option.py | 12 +++++-- src/jmcomic/jm_plugin.py | 39 ++++++++++++++++++++--- tests/test_jmcomic/test_jm_client.py | 2 +- 12 files changed, 102 insertions(+), 48 deletions(-) diff --git a/assets/docs/sources/option_file_syntax.md b/assets/docs/sources/option_file_syntax.md index d9fbee8c..6e70b853 100644 --- a/assets/docs/sources/option_file_syntax.md +++ b/assets/docs/sources/option_file_syntax.md @@ -198,6 +198,11 @@ plugins: zip_enable: true # 对收藏夹进行压缩 zip_filepath: ${JM_DOWNLOAD_DIR}/export.zip # 压缩文件路径 zip_password: ${ZIP_PASSWORD} # 压缩密码 + + before_photo: + - plugin: skip_photo_with_few_images # 跳过下载章节图片数量过少的章节。一些韩漫的章节是公告,没有实际内容,就可以用该插件来跳过下载这些章节。 + kwargs: + at_least_image_count: 3 after_photo: - plugin: j2p # jpg图片合成为一个pdf插件 diff --git a/requirements-dev.txt b/requirements-dev.txt index a346c968..7ec180fd 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,5 @@ -commonX curl_cffi +commonX PyYAML Pillow psutil diff --git a/setup.py b/setup.py index 264d7540..ee04dd96 100644 --- a/setup.py +++ b/setup.py @@ -27,8 +27,8 @@ package_dir={"": "src"}, python_requires=">=3.7", install_requires=[ - 'commonX>=0.6.4', 'curl_cffi', + 'commonX', 'PyYAML', 'Pillow', 'pycryptodome', diff --git a/src/jmcomic/__init__.py b/src/jmcomic/__init__.py index 2a21021b..873d0e0c 100644 --- a/src/jmcomic/__init__.py +++ b/src/jmcomic/__init__.py @@ -2,7 +2,7 @@ # 被依赖方 <--- 使用方 # config <--- entity <--- toolkit <--- client <--- option <--- downloader -__version__ = '2.5.8' +__version__ = '2.5.9' from .api import * from .jm_plugin import * diff --git a/src/jmcomic/jm_client_impl.py b/src/jmcomic/jm_client_impl.py index a18c03d0..0eda472b 100644 --- a/src/jmcomic/jm_client_impl.py +++ b/src/jmcomic/jm_client_impl.py @@ -224,7 +224,7 @@ def append_params_to_url(self, url, params): # noinspection PyMethodMayBeStatic def decode(self, url: str): - if not JmModuleConfig.flag_decode_url_when_logging or '/search/' not in url: + if not JmModuleConfig.FLAG_DECODE_URL_WHEN_LOGGING or '/search/' not in url: return url from urllib.parse import unquote @@ -767,7 +767,7 @@ def setting(self) -> JmApiResp: # 检查禁漫最新的版本号 setting_ver = str(resp.model_data.version) # 禁漫接口的版本 > jmcomic库内置版本 - if setting_ver > JmMagicConstants.APP_VERSION and JmModuleConfig.flag_use_version_newer_if_behind: + if setting_ver > JmMagicConstants.APP_VERSION and JmModuleConfig.FLAG_USE_VERSION_NEWER_IF_BEHIND: jm_log('api.setting', f'change APP_VERSION from [{JmMagicConstants.APP_VERSION}] to [{setting_ver}]') JmMagicConstants.APP_VERSION = setting_ver @@ -883,7 +883,7 @@ def decide_headers_and_ts(self, kwargs, url): ts = time_stamp() token, tokenparam = JmCryptoTool.token_and_tokenparam(ts, secret=JmMagicConstants.APP_TOKEN_SECRET_2) - elif JmModuleConfig.flag_use_fix_timestamp: + elif JmModuleConfig.FLAG_USE_FIX_TIMESTAMP: ts, token, tokenparam = JmModuleConfig.get_fix_ts_token_tokenparam() else: @@ -954,7 +954,7 @@ def raise_if_resp_should_retry(self, resp): def after_init(self): # 保证拥有cookies,因为移动端要求必须携带cookies,否则会直接跳转同一本子【禁漫娘】 - if JmModuleConfig.flag_api_client_require_cookies: + if JmModuleConfig.FLAG_API_CLIENT_REQUIRE_COOKIES: self.ensure_have_cookies() client_init_cookies_lock = Lock() diff --git a/src/jmcomic/jm_config.py b/src/jmcomic/jm_config.py index 1cfe1fe9..98d28ad6 100644 --- a/src/jmcomic/jm_config.py +++ b/src/jmcomic/jm_config.py @@ -146,18 +146,18 @@ class JmModuleConfig: REGISTRY_EXCEPTION_LISTENER = {} # 执行log的函数 - executor_log = default_jm_logging + EXECUTOR_LOG = default_jm_logging # 使用固定时间戳 - flag_use_fix_timestamp = True + FLAG_USE_FIX_TIMESTAMP = True # 移动端Client初始化cookies - flag_api_client_require_cookies = True + FLAG_API_CLIENT_REQUIRE_COOKIES = True # log开关标记 - flag_enable_jm_log = True + FLAG_ENABLE_JM_LOG = True # log时解码url - flag_decode_url_when_logging = True + FLAG_DECODE_URL_WHEN_LOGGING = True # 当内置的版本号落后时,使用最新的禁漫app版本号 - flag_use_version_newer_if_behind = True + FLAG_USE_VERSION_NEWER_IF_BEHIND = True # 关联dir_rule的自定义字段与对应的处理函数 # 例如: @@ -165,6 +165,10 @@ class JmModuleConfig: AFIELD_ADVICE = dict() PFIELD_ADVICE = dict() + # 当发生 oserror: [Errno 36] File name too long 时, + # 把文件名限制在指定个字符以内 + VAR_FILE_NAME_LENGTH_LIMIT = 100 + @classmethod def downloader_class(cls): if cls.CLASS_DOWNLOADER is not None: @@ -319,12 +323,12 @@ def get_fix_ts_token_tokenparam(cls): # noinspection PyUnusedLocal @classmethod def jm_log(cls, topic: str, msg: str): - if cls.flag_enable_jm_log is True: - cls.executor_log(topic, msg) + if cls.FLAG_ENABLE_JM_LOG is True: + cls.EXECUTOR_LOG(topic, msg) @classmethod def disable_jm_log(cls): - cls.flag_enable_jm_log = False + cls.FLAG_ENABLE_JM_LOG = False @classmethod def new_postman(cls, session=False, **kwargs): @@ -347,7 +351,7 @@ def new_postman(cls, session=False, **kwargs): DEFAULT_CLIENT_CACHE = None # 默认关闭Client缓存。缓存的配置详见 CacheRegistry DEFAULT_PROXIES = ProxyBuilder.system_proxy() # 默认使用系统代理 - default_option_dict: dict = { + DEFAULT_OPTION_DICT: dict = { 'log': None, 'dir_rule': {'rule': 'Bd_Pname', 'base_dir': None}, 'download': { @@ -364,7 +368,7 @@ def new_postman(cls, session=False, **kwargs): 'postman': { 'type': 'cffi', 'meta_data': { - 'impersonate': 'chrome110', + 'impersonate': 'chrome', 'headers': None, 'proxies': None, } @@ -387,11 +391,11 @@ def option_default_dict(cls) -> dict: """ from copy import deepcopy - option_dict = deepcopy(cls.default_option_dict) + option_dict = deepcopy(cls.DEFAULT_OPTION_DICT) # log if option_dict['log'] is None: - option_dict['log'] = cls.flag_enable_jm_log + option_dict['log'] = cls.FLAG_ENABLE_JM_LOG # dir_rule.base_dir dir_rule = option_dict['dir_rule'] diff --git a/src/jmcomic/jm_downloader.py b/src/jmcomic/jm_downloader.py index 7cbce784..e23edaae 100644 --- a/src/jmcomic/jm_downloader.py +++ b/src/jmcomic/jm_downloader.py @@ -29,7 +29,7 @@ def after_photo(self, photo: JmPhotoDetail): f'章节下载完成: [{photo.id}] ({photo.album_id}[{photo.index}/{len(photo.from_album)}])') def before_image(self, image: JmImageDetail, img_save_path): - if image.is_exists: + if image.exists: jm_log('image.before', f'图片已存在: {image.tag} ← [{img_save_path}]' ) @@ -63,6 +63,8 @@ def download_album(self, album_id): def download_by_album_detail(self, album: JmAlbumDetail, client: JmcomicClient): self.before_album(album) + if album.skip: + return self.execute_by_condition( iter_objs=album, apply=lambda photo: self.download_by_photo_detail(photo, client), @@ -80,6 +82,8 @@ def download_by_photo_detail(self, photo: JmPhotoDetail, client: JmcomicClient): client.check_photo(photo) self.before_photo(photo) + if photo.skip: + return self.execute_by_condition( iter_objs=photo, apply=lambda image: self.download_by_image_detail(image, client), @@ -91,16 +95,19 @@ def download_by_image_detail(self, image: JmImageDetail, client: JmcomicClient): img_save_path = self.option.decide_image_filepath(image) image.save_path = img_save_path - image.is_exists = file_exists(img_save_path) + image.exists = file_exists(img_save_path) self.before_image(image, img_save_path) + if image.skip: + return + # let option decide use_cache and decode_image use_cache = self.option.decide_download_cache(image) decode_image = self.option.decide_download_image_decode(image) # skip download - if use_cache is True and image.is_exists: + if use_cache is True and image.exists: return e = None diff --git a/src/jmcomic/jm_entity.py b/src/jmcomic/jm_entity.py index a86c7487..9861e1ab 100644 --- a/src/jmcomic/jm_entity.py +++ b/src/jmcomic/jm_entity.py @@ -3,6 +3,14 @@ from .jm_config import * +class Downloadable: + + def __init__(self): + self.save_path: str = '' + self.exists: bool = False + self.skip = False + + class JmBaseEntity: def to_file(self, filepath): @@ -117,7 +125,7 @@ def idoname(self): def __str__(self): return f'{self.__class__.__name__}' \ '{' \ - f'{self.id}: {self.title}'\ + f'{self.id}: {self.title}' \ '}' @classmethod @@ -156,7 +164,7 @@ def get_dirname(cls, detail: 'DetailEntity', ref: str) -> str: return getattr(detail, ref) -class JmImageDetail(JmBaseEntity): +class JmImageDetail(JmBaseEntity, Downloadable): def __init__(self, aid, @@ -167,7 +175,8 @@ def __init__(self, from_photo=None, query_params=None, index=-1, - ) -> None: + ): + super().__init__() if scramble_id is None or (isinstance(scramble_id, str) and scramble_id == ''): from .jm_toolkit import ExceptionTool ExceptionTool.raises(f'图片的scramble_id不能为空') @@ -182,10 +191,6 @@ def __init__(self, self.query_params: Optional[str] = query_params self.index = index # 从1开始 - # temp fields, in order to simplify passing parameter - self.save_path: str = '' - self.is_exists: bool = False - @property def filename_without_suffix(self): return self.img_file_name @@ -252,7 +257,7 @@ def is_image(cls): return True -class JmPhotoDetail(DetailEntity): +class JmPhotoDetail(DetailEntity, Downloadable): def __init__(self, photo_id, @@ -267,6 +272,7 @@ def __init__(self, author=None, from_album=None, ): + super().__init__() self.photo_id: str = str(photo_id) self.scramble_id: str = str(scramble_id) self.name: str = str(name).strip() @@ -411,7 +417,7 @@ def is_photo(cls): return True -class JmAlbumDetail(DetailEntity): +class JmAlbumDetail(DetailEntity, Downloadable): def __init__(self, album_id, @@ -430,6 +436,7 @@ def __init__(self, tags, related_list=None, ): + super().__init__() self.album_id: str = str(album_id) self.scramble_id: str = str(scramble_id) self.name: str = name diff --git a/src/jmcomic/jm_exception.py b/src/jmcomic/jm_exception.py index 3752d7ea..a6040298 100644 --- a/src/jmcomic/jm_exception.py +++ b/src/jmcomic/jm_exception.py @@ -72,13 +72,6 @@ class ExceptionTool: CONTEXT_KEY_RE_PATTERN = 'pattern' CONTEXT_KEY_MISSING_JM_ID = 'missing_jm_id' - # 兼容旧版本 - - EXTRA_KEY_RESP = 'resp' - EXTRA_KEY_HTML = 'html' - EXTRA_KEY_RE_PATTERN = 'pattern' - EXTRA_KEY_MISSING_JM_ID = 'missing_jm_id' - @classmethod def raises(cls, msg: str, @@ -144,7 +137,8 @@ def raise_missing(cls, :param resp: 响应对象 :param jmid: 禁漫本子/章节id """ - url = resp.url + from .jm_toolkit import JmcomicText + url = JmcomicText.format_album_url(jmid) req_type = "本子" if "album" in url else "章节" cls.raises( diff --git a/src/jmcomic/jm_option.py b/src/jmcomic/jm_option.py index 90b35e19..e965ae8e 100644 --- a/src/jmcomic/jm_option.py +++ b/src/jmcomic/jm_option.py @@ -285,7 +285,15 @@ def decide_image_save_dir(self, photo, ensure_exists=True) -> str: ) if ensure_exists: - mkdir_if_not_exists(save_dir) + try: + mkdir_if_not_exists(save_dir) + except OSError as e: + if e.errno == 36: + # 目录名过长 + limit = JmModuleConfig.VAR_FILE_NAME_LENGTH_LIMIT + jm_log('error', f'目录名过长,无法创建目录,强制缩短到{limit}个字符并重试') + save_dir = save_dir[0:limit] + mkdir_if_not_exists(save_dir) return save_dir @@ -359,7 +367,7 @@ def compatible_with_old_versions(cls, dic): def deconstruct(self) -> Dict: return { 'version': JmModuleConfig.JM_OPTION_VER, - 'log': JmModuleConfig.flag_enable_jm_log, + 'log': JmModuleConfig.FLAG_ENABLE_JM_LOG, 'dir_rule': { 'rule': self.dir_rule.rule_dsl, 'base_dir': self.dir_rule.base_dir, diff --git a/src/jmcomic/jm_plugin.py b/src/jmcomic/jm_plugin.py index 54dc7b37..7ef2e4f3 100644 --- a/src/jmcomic/jm_plugin.py +++ b/src/jmcomic/jm_plugin.py @@ -445,9 +445,7 @@ def apply_filter_then_decide_cache(image: JmImageDetail): if image.img_file_suffix not in allowed_suffix_set: self.log(f'跳过下载图片: {image.tag},' f'因为其后缀\'{image.img_file_suffix}\'不在允许的后缀集合{allowed_suffix_set}内') - # hook is_exists True to skip download - image.is_exists = True - return True + image.skip = True # let option decide return option_decide_cache(image) @@ -484,7 +482,7 @@ def invoke(self, whitelist) -> None: if whitelist is not None: whitelist = set(whitelist) - old_jm_log = JmModuleConfig.executor_log + old_jm_log = JmModuleConfig.EXECUTOR_LOG def new_jm_log(topic, msg): if whitelist is not None and topic not in whitelist: @@ -492,7 +490,7 @@ def new_jm_log(topic, msg): old_jm_log(topic, msg) - JmModuleConfig.executor_log = new_jm_log + JmModuleConfig.EXECUTOR_LOG = new_jm_log class AutoSetBrowserCookiesPlugin(JmOptionPlugin): @@ -963,3 +961,34 @@ def check_photo_update(self, album_id: str, photo_id: str): is_new_photo = True return len(photo_new_list) != 0, photo_new_list + + +class SkipPhotoWithFewImagesPlugin(JmOptionPlugin): + plugin_key = 'skip_photo_with_few_images' + + def invoke(self, + at_least_image_count: int, + photo: Optional[JmPhotoDetail] = None, + image: Optional[JmImageDetail] = None, + album: Optional[JmAlbumDetail] = None, + **kwargs + ): + self.try_mark_photo_skip_and_log(photo, at_least_image_count) + if image is not None: + self.try_mark_photo_skip_and_log(image.from_photo, at_least_image_count) + + def try_mark_photo_skip_and_log(self, photo: JmPhotoDetail, at_least_image_count: int): + if photo is None: + return + + if len(photo) >= at_least_image_count: + return + + self.log(f'跳过下载章节: {photo.id} ({photo.album_id}[{photo.index}/{len(photo.from_album)}]),' + f'因为其图片数: {len(photo)} < {at_least_image_count} (at_least_image_count)') + photo.skip = True + + @classmethod + @field_cache() # 单例 + def build(cls, option: JmOption) -> 'JmOptionPlugin': + return super().build(option) diff --git a/tests/test_jmcomic/test_jm_client.py b/tests/test_jmcomic/test_jm_client.py index fa79133d..95e328d1 100644 --- a/tests/test_jmcomic/test_jm_client.py +++ b/tests/test_jmcomic/test_jm_client.py @@ -234,7 +234,7 @@ def test_cache_result_equal(self): self.assertEqual(ans, id(photo)) def test_search_generator(self): - JmModuleConfig.flag_decode_url_when_logging = False + JmModuleConfig.FLAG_DECODE_URL_WHEN_LOGGING = False gen = self.client.search_gen('MANA') for i, page in enumerate(gen):