refactor: type annotations

akshay-sarbhukan-aera · May 10, 2021 · 62f8e3f · 62f8e3f
1 parent 615db5a
commit 62f8e3f
Show file tree

Hide file tree

Showing 94 changed files with 538 additions and 380 deletions.
diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml
@@ -37,9 +37,11 @@ jobs:
     - name: Install
       run: make install
 
+    - name: Lint
+      run: make lint
+
     - name: Make distribution
       run: |
-        check-manifest
         python setup.py sdist bdist_wheel
         twine check dist/*
 

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -14,7 +14,7 @@ repos:
     - id: nbqa-pyupgrade
       args: [ --nbqa-mutate, --py36-plus ]
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v2.14.0
+    rev: v2.15.0
     hooks:
     -   id: pyupgrade
         args: ['--py36-plus','--exit-zero-even-if-changed']
@@ -39,6 +39,24 @@ repos:
           - flake8-simplify
           - flake8-eradicate
           - flake8-print
+-   repo: https://github.com/PyCQA/flake8
+    rev: "3.9.2"
+    hooks:
+    -   id: flake8
+        name: flake8-annotations
+        args: [ "--select=ANN001,ANN201,ANN202,ANN205,ANN206,ANN301" ]
+        additional_dependencies:
+          - flake8-annotations
+#          - flake8-annotations-complexity
+#          - flake8-type-checking
+        exclude: |
+          (?x)(
+            ^tests/|
+            ^docsrc/|
+            ^src/pandas_profiling/utils/common.py|
+            ^src/pandas_profiling/model/imghdr_patch.py
+          )
+
 -   repo: https://github.com/asottile/blacken-docs
     rev: v1.10.0
     hooks:

diff --git a/Makefile b/Makefile
@@ -15,7 +15,6 @@ test:
 	pytest tests/unit/
 	pytest tests/issues/
 	pytest --nbval tests/notebooks/
-	flake8 . --select=E9,F63,F7,F82 --show-source --statistics
 	pandas_profiling -h
 
 test_cov:
@@ -27,13 +26,6 @@ test_cov:
 examples:
 	find ./examples -maxdepth 2 -type f -name "*.py" -execdir python {} \;
 
-pypi_package:
-	make install
-	check-manifest
-	python setup.py sdist bdist_wheel
-	twine check dist/*
-	twine upload --skip-existing dist/*
-
 install:
 	pip install -e .[notebook]
 

diff --git a/make.bat b/make.bat
@@ -17,7 +17,6 @@ IF "%1" == "test" (
     pytest tests/unit/
     pytest tests/issues/
     pytest --nbval tests/notebooks/
-    flake8 . --select=E9,F63,F7,F82 --show-source --statistics
     ECHO "Tests completed!"
     GOTO end
 )
@@ -37,16 +36,6 @@ IF "%1" == "examples" (
     GOTO end
 )
 
-IF "%1" == "pypi_package" (
-    make install
-    check-manifest
-    python setup.py sdist bdist_wheel
-    twine check dist/*
-    twine upload --skip-existing dist/*
-    ECHO "PyPi package completed"
-    GOTO end
-)
-
 IF "%1" == "lint" (
     pre-commit run --all-files
     GOTO end

diff --git a/mypy.ini b/mypy.ini
diff --git a/requirements-test.txt b/requirements-test.txt
@@ -5,7 +5,5 @@ pytest-cov
 pytest-benchmark~=3.4.1
 nbval
 pyarrow
-flake8
-check-manifest>=0.41
 twine>=3.1.1
 kaggle
diff --git a/src/pandas_profiling/config.py b/src/pandas_profiling/config.py
@@ -1,11 +1,11 @@
 """Configuration for the package."""
 from enum import Enum
-from typing import Dict, List, Optional
+from typing import Any, Dict, List, Optional
 
 from pydantic import BaseModel, BaseSettings, Field
 
 
-def _merge_dictionaries(dict1, dict2):
+def _merge_dictionaries(dict1: dict, dict2: dict) -> dict:
     """
     Recursive merge dictionaries.
 
@@ -290,13 +290,13 @@ class Settings(BaseSettings):
     html: Html = Html()
     notebook = Notebook()
 
-    def update(self, updates):
+    def update(self, updates: dict) -> "Settings":
         update = _merge_dictionaries(self.dict(), updates)
         return self.parse_obj(self.copy(update=update))
 
 
 class Config:
-    arg_groups = {
+    arg_groups: Dict[str, Any] = {
         "sensitive": {
             "samples": None,
             "duplicates": None,
@@ -361,12 +361,12 @@ class Config:
     }
 
     @staticmethod
-    def get_arg_groups(key):
+    def get_arg_groups(key: str) -> dict:
         kwargs = Config.arg_groups[key]
         return Config.shorthands(kwargs)
 
     @staticmethod
-    def shorthands(kwargs):
+    def shorthands(kwargs: dict) -> dict:
         for key, value in list(kwargs.items()):
             if value is None and key in Config._shorthands:
                 kwargs[key] = Config._shorthands[key]

diff --git a/src/pandas_profiling/controller/console.py b/src/pandas_profiling/controller/console.py
@@ -1,13 +1,13 @@
 """This file add the console interface to the package."""
 import argparse
 from pathlib import Path
-from typing import Union
+from typing import Any, List, Optional
 
 from pandas_profiling.__init__ import ProfileReport, __version__
 from pandas_profiling.utils.dataframe import read_pandas
 
 
-def parse_args(args: Union[list, None] = None) -> argparse.Namespace:
+def parse_args(args: Optional[List[Any]] = None) -> argparse.Namespace:
     """Parse the command line arguments for the `pandas_profiling` binary.
 
     Args:
@@ -96,16 +96,16 @@ def parse_args(args: Union[list, None] = None) -> argparse.Namespace:
     return parser.parse_args(args)
 
 
-def main(args=None) -> None:
+def main(args: Optional[List[Any]] = None) -> None:
     """Run the `pandas_profiling` package.
 
     Args:
       args: Arguments for the programme (Default value=None).
     """
 
     # Parse the arguments
-    args = parse_args(args)
-    kwargs = vars(args)
+    parsed_args = parse_args(args)
+    kwargs = vars(parsed_args)
 
     input_file = Path(kwargs.pop("input_file"))
     output_file = kwargs.pop("output_file")

diff --git a/src/pandas_profiling/controller/pandas_decorator.py b/src/pandas_profiling/controller/pandas_decorator.py
@@ -4,7 +4,7 @@
 from pandas_profiling.profile_report import ProfileReport
 
 
-def profile_report(df, **kwargs) -> ProfileReport:
+def profile_report(df: DataFrame, **kwargs) -> ProfileReport:
     """Profile a DataFrame.
 
     Args:

diff --git a/src/pandas_profiling/expectations_report.py b/src/pandas_profiling/expectations_report.py
@@ -1,12 +1,18 @@
+from typing import Any, Optional
+
+import pandas as pd
+from visions import VisionsTypeset
+
 from pandas_profiling.config import Settings
 from pandas_profiling.model import expectation_algorithms
 from pandas_profiling.model.handler import Handler
 from pandas_profiling.utils.dataframe import slugify
 
 
-# Default handler
 class ExpectationHandler(Handler):
-    def __init__(self, typeset, *args, **kwargs):
+    """Default handler"""
+
+    def __init__(self, typeset: VisionsTypeset, *args, **kwargs):
         mapping = {
             "Unsupported": [expectation_algorithms.generic_expectations],
             "Categorical": [expectation_algorithms.categorical_expectations],
@@ -23,18 +29,21 @@ def __init__(self, typeset, *args, **kwargs):
 
 class ExpectationsReport:
     config: Settings
-    typeset = None
-    df = None
+    df: Optional[pd.DataFrame] = None
+
+    @property
+    def typeset(self) -> Optional[VisionsTypeset]:
+        return None
 
     def to_expectation_suite(
         self,
-        suite_name=None,
-        data_context=None,
-        save_suite=True,
-        run_validation=True,
-        build_data_docs=True,
-        handler=None,
-    ):
+        suite_name: Optional[str] = None,
+        data_context: Optional[Any] = None,
+        save_suite: bool = True,
+        run_validation: bool = True,
+        build_data_docs: bool = True,
+        handler: Optional[Handler] = None,
+    ) -> Any:
         """
         All parameters default to True to make it easier to access the full functionality of Great Expectations out of
         the box.
@@ -77,7 +86,7 @@ def to_expectation_suite(
         batch = ge.dataset.PandasDataset(self.df, expectation_suite=suite)
 
         # Obtain the profiling summary
-        summary = self.get_description()
+        summary = self.get_description()  # type: ignore
 
         # Dispatch to expectations per semantic variable type
         for name, variable_summary in summary["variables"].items():

diff --git a/src/pandas_profiling/model/correlations.py b/src/pandas_profiling/model/correlations.py
@@ -13,31 +13,41 @@
 
 class Correlation:
     @staticmethod
-    def compute(config: Settings, df, summary) -> Optional[pd.DataFrame]:
+    def compute(
+        config: Settings, df: pd.DataFrame, summary: dict
+    ) -> Optional[pd.DataFrame]:
         raise NotImplementedError()
 
 
 class Spearman(Correlation):
     @staticmethod
-    def compute(config: Settings, df, summary) -> Optional[pd.DataFrame]:
+    def compute(
+        config: Settings, df: pd.DataFrame, summary: dict
+    ) -> Optional[pd.DataFrame]:
         return df.corr(method="spearman")
 
 
 class Pearson(Correlation):
     @staticmethod
-    def compute(config: Settings, df, summary) -> Optional[pd.DataFrame]:
+    def compute(
+        config: Settings, df: pd.DataFrame, summary: dict
+    ) -> Optional[pd.DataFrame]:
         return df.corr(method="pearson")
 
 
 class Kendall(Correlation):
     @staticmethod
-    def compute(config: Settings, df, summary) -> Optional[pd.DataFrame]:
+    def compute(
+        config: Settings, df: pd.DataFrame, summary: dict
+    ) -> Optional[pd.DataFrame]:
         return df.corr(method="kendall")
 
 
 class Cramers(Correlation):
     @staticmethod
-    def _cramers_corrected_stat(confusion_matrix, correction: bool) -> float:
+    def _cramers_corrected_stat(
+        confusion_matrix: pd.DataFrame, correction: bool
+    ) -> float:
         """Calculate the Cramer's V corrected stat for two variables.
 
         Args:
@@ -66,7 +76,9 @@ def _cramers_corrected_stat(confusion_matrix, correction: bool) -> float:
         return corr
 
     @staticmethod
-    def compute(config: Settings, df, summary) -> Optional[pd.DataFrame]:
+    def compute(
+        config: Settings, df: pd.DataFrame, summary: dict
+    ) -> Optional[pd.DataFrame]:
         threshold = config.categorical_maximum_correlation_distinct
 
         categoricals = {
@@ -98,7 +110,9 @@ def compute(config: Settings, df, summary) -> Optional[pd.DataFrame]:
 
 class PhiK(Correlation):
     @staticmethod
-    def compute(config: Settings, df, summary) -> Optional[pd.DataFrame]:
+    def compute(
+        config: Settings, df: pd.DataFrame, summary: dict
+    ) -> Optional[pd.DataFrame]:
         threshold = config.categorical_maximum_correlation_distinct
         intcols = {
             key
@@ -128,7 +142,7 @@ def compute(config: Settings, df, summary) -> Optional[pd.DataFrame]:
         return correlation
 
 
-def warn_correlation(correlation_name: str, error):
+def warn_correlation(correlation_name: str, error: str) -> None:
     warnings.warn(
         f"""There was an attempt to calculate the {correlation_name} correlation, but this failed.
 To hide this warning, disable the calculation
@@ -171,7 +185,7 @@ def calculate_correlation(
             config, df, summary
         )
     except (ValueError, AssertionError, TypeError, DataError, IndexError) as e:
-        warn_correlation(correlation_name, e)
+        warn_correlation(correlation_name, str(e))
 
     if correlation is not None and len(correlation) <= 0:
         correlation = None