diff --git a/.codecov.yml b/.codecov.yml new file mode 100644 index 00000000..f307895e --- /dev/null +++ b/.codecov.yml @@ -0,0 +1,8 @@ +coverage: + status: + project: + default: + target: 88% + threshold: null + patch: false + changes: false diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ae4899c..77abeaf4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03) + +### Changed +- Moderating the logging impact (since 2.0.8) for specific environments (PR #147) + +### Fixed +- Wrong logging level applied when setting kwarg `explain` to True (PR #146) + ## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24) ### Changed - Improvement over Vietnamese detection (PR #126) diff --git a/charset_normalizer/api.py b/charset_normalizer/api.py index 80e608b4..d9e5866a 100644 --- a/charset_normalizer/api.py +++ b/charset_normalizer/api.py @@ -68,20 +68,21 @@ def from_bytes( ) if explain: + previous_logger_level = logger.level # type: int logger.addHandler(explain_handler) + logger.setLevel(logging.DEBUG) length = len(sequences) # type: int if length == 0: - logger.warning( - "Given content is empty, stopping the process very early, returning empty utf_8 str match" - ) + logger.warning("Encoding detection on empty bytes, assuming utf_8 intention.") if explain: logger.removeHandler(explain_handler) + logger.setLevel(previous_logger_level or logging.WARNING) return CharsetMatches([CharsetMatch(sequences, "utf_8", 0.0, False, [], "")]) if cp_isolation is not None: - logger.warning( + logger.debug( "cp_isolation is set. use this flag for debugging purpose. " "limited list of encoding allowed : %s.", ", ".join(cp_isolation), @@ -91,7 +92,7 @@ def from_bytes( cp_isolation = [] if cp_exclusion is not None: - logger.warning( + logger.debug( "cp_exclusion is set. use this flag for debugging purpose. " "limited list of encoding excluded : %s.", ", ".join(cp_exclusion), @@ -101,7 +102,7 @@ def from_bytes( cp_exclusion = [] if length <= (chunk_size * steps): - logger.warning( + logger.debug( "override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.", steps, chunk_size, @@ -187,7 +188,7 @@ def from_bytes( ) # type: bool if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available: - logger.info( + logger.debug( "Encoding %s wont be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.", encoding_iana, ) @@ -218,7 +219,7 @@ def from_bytes( ) except (UnicodeDecodeError, LookupError) as e: if not isinstance(e, LookupError): - logger.warning( + logger.debug( "Code page %s does not fit given bytes sequence at ALL. %s", encoding_iana, str(e), @@ -234,7 +235,7 @@ def from_bytes( break if similar_soft_failure_test: - logger.warning( + logger.info( "%s is deemed too similar to code page %s and was consider unsuited already. Continuing!", encoding_iana, encoding_soft_failed, @@ -329,7 +330,7 @@ def from_bytes( ) # type: float if mean_mess_ratio >= threshold or early_stop_count >= max_chunk_gave_up: tested_but_soft_failure.append(encoding_iana) - logger.warning( + logger.info( "%s was excluded because of initial chaos probing. Gave up %i time(s). " "Computed mean chaos is %f %%.", encoding_iana, @@ -419,6 +420,7 @@ def from_bytes( ) if explain: logger.removeHandler(explain_handler) + logger.setLevel(previous_logger_level) return CharsetMatches([results[encoding_iana]]) if encoding_iana == sig_encoding: @@ -428,6 +430,7 @@ def from_bytes( ) if explain: logger.removeHandler(explain_handler) + logger.setLevel(previous_logger_level) return CharsetMatches([results[encoding_iana]]) if len(results) == 0: @@ -458,6 +461,7 @@ def from_bytes( if explain: logger.removeHandler(explain_handler) + logger.setLevel(previous_logger_level) return results diff --git a/charset_normalizer/version.py b/charset_normalizer/version.py index d48da8ab..a1dc4708 100644 --- a/charset_normalizer/version.py +++ b/charset_normalizer/version.py @@ -2,5 +2,5 @@ Expose version """ -__version__ = "2.0.8" +__version__ = "2.0.9" VERSION = __version__.split(".") diff --git a/docs/conf.py b/docs/conf.py index 5091c05a..6fde29c2 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -74,7 +74,7 @@ # The short X.Y version. version = '2.0' # The full version, including alpha/beta/rc tags. -release = '2.0.8' +release = '2.0.9' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/tests/test_logging.py b/tests/test_logging.py index c7ff069c..80d32112 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -10,7 +10,7 @@ def setup(self): self.logger = logging.getLogger("charset_normalizer") self.logger.handlers.clear() self.logger.addHandler(logging.NullHandler()) - self.logger.level = None + self.logger.level = logging.WARNING def test_explain_true_behavior(self, caplog): test_sequence = b'This is a test sequence of bytes that should be sufficient'