Merge branch 'maint/black-v23.11' into 'main'

MAINT: Upgrade black to version 23.11 and run it on the codebase See merge request heka/medkit!236 changelog: MAINT: Upgrade black to version 23.11 and run it on the codebase
medkit-lib · Nov 23, 2023 · e639bd5 · e639bd5
2 parents f5119ac + 6f945e9
commit e639bd5
Show file tree

Hide file tree

Showing 26 changed files with 103 additions and 106 deletions.
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
@@ -2,6 +2,9 @@ name: lint
 
 on:
   pull_request_target:
+  push:
+    branches:
+      - main
   workflow_dispatch:
 
 permissions:
@@ -15,4 +18,4 @@ jobs:
     - uses: actions/setup-python@v4
       with:
         python-version: '3.x'
-    - uses: pre-commit/action@v3.0.0
+    - uses: pre-commit/action@v3
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -8,7 +8,7 @@ repos:
       - id: check-yaml
 
   - repo: https://github.com/psf/black-pre-commit-mirror
-    rev: '22.8.0'
+    rev: '23.11.0'
     hooks:
       - id: black
 

diff --git a/medkit/audio/preprocessing/power_normalizer.py b/medkit/audio/preprocessing/power_normalizer.py
@@ -10,8 +10,7 @@
 
 
 class PowerNormalizer(PreprocessingOperation):
-    """Normalization operation setting the RMS power of each audio signal to a target value.
-    """
+    """Normalization operation setting the RMS power of each audio signal to a target value."""
 
     def __init__(
         self,

diff --git a/medkit/io/spacy.py b/medkit/io/spacy.py
@@ -3,7 +3,6 @@
 To install them, use `pip install medkit-lib[spacy]`.
 """
 
-
 __all__ = ["SpacyInputConverter", "SpacyOutputConverter"]
 
 from typing import List, Optional
@@ -20,8 +19,7 @@
 
 
 class SpacyInputConverter:
-    """Class in charge of converting spacy documents into a collection of TextDocuments.
-    """
+    """Class in charge of converting spacy documents into a collection of TextDocuments."""
 
     def __init__(
         self,

diff --git a/medkit/text/context/family_detector.py b/medkit/text/context/family_detector.py
@@ -135,14 +135,16 @@ def __init__(
             for rule in self.rules
         ]
         self._exclusion_patterns = [
-            re.compile(
-                "|".join(
-                    f"(?:{r})" for r in rule.exclusion_regexps
-                ),  # join all exclusions in one pattern
-                flags=0 if rule.case_sensitive else re.IGNORECASE,
+            (
+                re.compile(
+                    "|".join(
+                        f"(?:{r})" for r in rule.exclusion_regexps
+                    ),  # join all exclusions in one pattern
+                    flags=0 if rule.case_sensitive else re.IGNORECASE,
+                )
+                if rule.exclusion_regexps
+                else None
             )
-            if rule.exclusion_regexps
-            else None
             for rule in self.rules
         ]
         self._has_non_unicode_sensitive_rule = any(

diff --git a/medkit/text/context/hypothesis_detector.py b/medkit/text/context/hypothesis_detector.py
@@ -195,14 +195,16 @@ def __init__(
             for rule in self.rules
         ]
         self._exclusion_patterns = [
-            re.compile(
-                "|".join(
-                    f"(?:{r})" for r in rule.exclusion_regexps
-                ),  # join all exclusions in one pattern
-                flags=0 if rule.case_sensitive else re.IGNORECASE,
+            (
+                re.compile(
+                    "|".join(
+                        f"(?:{r})" for r in rule.exclusion_regexps
+                    ),  # join all exclusions in one pattern
+                    flags=0 if rule.case_sensitive else re.IGNORECASE,
+                )
+                if rule.exclusion_regexps
+                else None
             )
-            if rule.exclusion_regexps
-            else None
             for rule in self.rules
         ]
         self._has_non_unicode_sensitive_rule = any(

diff --git a/medkit/text/context/negation_detector.py b/medkit/text/context/negation_detector.py
@@ -127,14 +127,16 @@ def __init__(
             for rule in self.rules
         ]
         self._exclusion_patterns = [
-            re.compile(
-                "|".join(
-                    f"(?:{r})" for r in rule.exclusion_regexps
-                ),  # join all exclusions in one pattern
-                flags=0 if rule.case_sensitive else re.IGNORECASE,
+            (
+                re.compile(
+                    "|".join(
+                        f"(?:{r})" for r in rule.exclusion_regexps
+                    ),  # join all exclusions in one pattern
+                    flags=0 if rule.case_sensitive else re.IGNORECASE,
+                )
+                if rule.exclusion_regexps
+                else None
             )
-            if rule.exclusion_regexps
-            else None
             for rule in self.rules
         ]
         self._has_non_unicode_sensitive_rule = any(

diff --git a/medkit/text/metrics/classification.py b/medkit/text/metrics/classification.py
@@ -2,6 +2,7 @@
 This module needs extra-dependencies not installed as core dependencies of medkit.
 To install them, use `pip install medkit-lib[metrics-text-classification]`.
 """
+
 __all__ = ["TextClassificationEvaluator"]
 import logging
 from typing import Dict, List, Union

diff --git a/medkit/text/metrics/irr_utils.py b/medkit/text/metrics/irr_utils.py
@@ -1,4 +1,5 @@
 """Metrics to assess inter-annotator agreement"""
+
 __all__ = ["krippendorff_alpha"]
 from typing import List, Union
 import numpy as np

diff --git a/medkit/text/ner/hf_entity_matcher_trainable.py b/medkit/text/ner/hf_entity_matcher_trainable.py
@@ -2,6 +2,7 @@
 This module needs extra-dependencies not installed as core dependencies of medkit.
 To install them, use `pip install medkit-lib[hf-entity-matcher]`.
 """
+
 __all__ = ["HFEntityMatcherTrainable"]
 import logging
 from pathlib import Path

diff --git a/medkit/text/ner/regexp_matcher.py b/medkit/text/ner/regexp_matcher.py
@@ -187,11 +187,14 @@ def __init__(
             for rule in self.rules
         ]
         self._exclusion_patterns = [
-            re.compile(
-                rule.exclusion_regexp, flags=0 if rule.case_sensitive else re.IGNORECASE
+            (
+                re.compile(
+                    rule.exclusion_regexp,
+                    flags=0 if rule.case_sensitive else re.IGNORECASE,
+                )
+                if rule.exclusion_regexp is not None
+                else None
             )
-            if rule.exclusion_regexp is not None
-            else None
             for rule in self.rules
         ]
         self._has_non_unicode_sensitive_rule = any(

diff --git a/medkit/text/preprocessing/char_rules.py b/medkit/text/preprocessing/char_rules.py
@@ -11,15 +11,15 @@
 #: Rules for ligatures
 LIGATURE_RULES = [
     ("\u00c6", "AE"),
-    ("\u00E6", "ae"),
+    ("\u00e6", "ae"),
     ("\u0152", "OE"),
     ("\u0153", "oe"),
 ]
 #: Rules for fraction characters
 FRACTION_RULES = [
-    ("\u00BC", "1/4"),
-    ("\u00BD", "1/2"),
-    ("\u00BE", "3/4"),
+    ("\u00bc", "1/4"),
+    ("\u00bd", "1/2"),
+    ("\u00be", "3/4"),
     ("\u2150", "1/7"),
     ("\u2151", "1/9"),
     ("\u2152", "1/10"),
@@ -30,16 +30,16 @@
     ("\u2157", "3/5"),
     ("\u2158", "4/5"),
     ("\u2159", "1/6"),
-    ("\u215A", "5/6"),
-    ("\u215B", "1/8"),
-    ("\u215C", "3/8"),
-    ("\u215D", "5/8"),
-    ("\u215E", "7/8"),
+    ("\u215a", "5/6"),
+    ("\u215b", "1/8"),
+    ("\u215c", "3/8"),
+    ("\u215d", "5/8"),
+    ("\u215e", "7/8"),
     ("\u2189", "0/3"),
 ]
 #: Rules for non-standard spaces
 SPACE_RULES = [
-    ("\u00A0", " "),
+    ("\u00a0", " "),
     ("\u1680", " "),
     ("\u2002", " "),
     ("\u2003", " "),
@@ -49,52 +49,52 @@
     ("\u2007", " "),
     ("\u2008", " "),
     ("\u2009", " "),
-    ("\u200A", " "),
-    ("\u200B", " "),
-    ("\u202F", " "),
-    ("\u205F", " "),
+    ("\u200a", " "),
+    ("\u200b", " "),
+    ("\u202f", " "),
+    ("\u205f", " "),
     ("\u2420", " "),
     ("\u3000", " "),
-    ("\u303F", " "),
-    ("\uFEFF", " "),
+    ("\u303f", " "),
+    ("\ufeff", " "),
 ]
 
 #: Rules for sign chars
 SIGN_RULES = [
-    ("\u00A9", ""),  # copyright
-    ("\u00AE", ""),  # registered
+    ("\u00a9", ""),  # copyright
+    ("\u00ae", ""),  # registered
     ("\u2122", ""),  # trade
 ]
 
 #: Rules for dot chars
 DOT_RULES = [
     # horizontal ellipsis
     ("\u2026", "..."),
-    ("\u22EF", "..."),
+    ("\u22ef", "..."),
 ]
 
 #: RegexpReplacer quotation marks: replace double and single quotation marks
 QUOTATION_RULES = [
     ("»", '"'),  # normalize double quotation marks
     ("«", '"'),  # replace double quotation marks
-    ("\u201C", '"'),
-    ("\u201D", '"'),
-    ("\u201E", '"'),
-    ("\u201F", '"'),
+    ("\u201c", '"'),
+    ("\u201d", '"'),
+    ("\u201e", '"'),
+    ("\u201f", '"'),
     ("\u2039", '"'),
-    ("\u203A", '"'),
-    ("\u02F5", '"'),
-    ("\u02F6", '"'),
-    ("\u02DD", '"'),
-    ("\uFF02", '"'),
-    ("\u201A", ""),  # single low quotation (remove)
+    ("\u203a", '"'),
+    ("\u02f5", '"'),
+    ("\u02f6", '"'),
+    ("\u02dd", '"'),
+    ("\uff02", '"'),
+    ("\u201a", ""),  # single low quotation (remove)
     ("\u2018", "'"),  # left side single quotation
     ("\u2019", "'"),  # right side single quotation
-    ("\u201B", "'"),  # single high reverse quotation
-    ("\u02CA", "'"),  # grave accent
+    ("\u201b", "'"),  # single high reverse quotation
+    ("\u02ca", "'"),  # grave accent
     ("\u0060", "'"),
-    ("\u02CB", "'"),  # acute accent
-    ("\u00B4", "'"),
+    ("\u02cb", "'"),  # acute accent
+    ("\u00b4", "'"),
 ]
 
 #: All pre-defined rules for CharReplacer

diff --git a/medkit/text/relations/syntactic_relation_extractor.py b/medkit/text/relations/syntactic_relation_extractor.py
@@ -3,7 +3,6 @@
 To install them, use `pip install medkit-lib[syntactic-relation-extractor]`.
 """
 
-
 __all__ = ["SyntacticRelationExtractor"]
 import logging
 from pathlib import Path

diff --git a/medkit/text/translation/hf_translator.py b/medkit/text/translation/hf_translator.py
@@ -2,6 +2,7 @@
 This module needs extra-dependencies not installed as core dependencies of medkit.
 To install them, use `pip install medkit-lib[hf-translator]`.
 """
+
 from __future__ import annotations
 
 __all__ = ["HFTranslator"]

diff --git a/medkit/tools/mtsamples.py b/medkit/tools/mtsamples.py
@@ -81,9 +81,11 @@ def load_mtsamples(
 
         return [
             TextDocument(
-                text=sample["transcription_translated"]
-                if translated
-                else sample["transcription"],
+                text=(
+                    sample["transcription_translated"]
+                    if translated
+                    else sample["transcription"]
+                ),
                 metadata=dict(
                     id=sample["id"] if translated else sample[""],
                     description=sample["description"],

diff --git a/pyproject.toml b/pyproject.toml
@@ -168,7 +168,7 @@ build-backend = "poetry_dynamic_versioning.backend"
 
 [tool.black]
 line-length = 88
-preview = true
+target-version = ["py38"]
 extend-exclude = ".venv"
 
 [tool.flake8]

diff --git a/tests/large/test_mtsamples.py b/tests/large/test_mtsamples.py
@@ -48,7 +48,7 @@ def test_mt_samples_without_pipeline(caplog):
         (r"(?<=[0-9]\s)°", " degrés"),
         (r"(?<=[0-9])°", " degrés"),
         ("\u00c6", "AE"),  # ascii
-        ("\u00E6", "ae"),  # ascii
+        ("\u00e6", "ae"),  # ascii
         ("\u0152", "OE"),  # ascii
         ("\u0153", "oe"),  # ascii
         (r"«|»", '"'),
@@ -97,7 +97,7 @@ def test_mt_samples_with_doc_pipeline():
         (r"(?<=[0-9]\s)°", " degrés"),
         (r"(?<=[0-9])°", " degrés"),
         ("\u00c6", "AE"),  # ascii
-        ("\u00E6", "ae"),  # ascii
+        ("\u00e6", "ae"),  # ascii
         ("\u0152", "OE"),  # ascii
         ("\u0153", "oe"),  # ascii
         (r"«|»", '"'),

diff --git a/tests/unit/audio/transcription/test_hf_transcriber.py b/tests/unit/audio/transcription/test_hf_transcriber.py
@@ -68,8 +68,7 @@ def test_basic():
 
 
 def test_no_formatting():
-    """No reformatting of transcribed text (raw text as returned by transformers pipeline)
-    """
+    """No reformatting of transcribed text (raw text as returned by transformers pipeline)"""
     transcriber = HFTranscriber(
         model="mock-model",
         output_label="transcribed_text",

diff --git a/tests/unit/core/prov_tracer/test_prov_tracer.py b/tests/unit/core/prov_tracer/test_prov_tracer.py
@@ -39,8 +39,7 @@ def test_multiple_items():
 
 
 def test_multiple_items_with_sources():
-    """Several items generated by an operation, then used as input to another operation
-    """
+    """Several items generated by an operation, then used as input to another operation"""
     tracer = ProvTracer()
     # generate 2 items then prefix them
     generator = Generator(tracer)
@@ -66,8 +65,7 @@ def test_multiple_items_with_sources():
 
 
 def test_intermediate_operation():
-    """Input items passed to an intermediate operation, then intermediate items passed to another operatio
-    """
+    """Input items passed to an intermediate operation, then intermediate items passed to another operatio"""
     tracer = ProvTracer()
     # generate 2 items and prefix them twice with 2 different operations
     generator = Generator(tracer)
@@ -158,8 +156,7 @@ def test_multiple_source():
 
 
 def test_partial_provenance():
-    """Data items generated from input items for which no provenance info is available
-    """
+    """Data items generated from input items for which no provenance info is available"""
     tracer = ProvTracer()
     # generate 2 items then split them it in 2 them merge them
     # provenance info will be provided only by Merger operation