Skip to content

Commit

Permalink
refactor: type annotations
Browse files Browse the repository at this point in the history
  • Loading branch information
sbrugman committed May 10, 2021
1 parent 615db5a commit 62f8e3f
Show file tree
Hide file tree
Showing 94 changed files with 538 additions and 380 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,11 @@ jobs:
- name: Install
run: make install

- name: Lint
run: make lint

- name: Make distribution
run: |
check-manifest
python setup.py sdist bdist_wheel
twine check dist/*
Expand Down
20 changes: 19 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ repos:
- id: nbqa-pyupgrade
args: [ --nbqa-mutate, --py36-plus ]
- repo: https://github.com/asottile/pyupgrade
rev: v2.14.0
rev: v2.15.0
hooks:
- id: pyupgrade
args: ['--py36-plus','--exit-zero-even-if-changed']
Expand All @@ -39,6 +39,24 @@ repos:
- flake8-simplify
- flake8-eradicate
- flake8-print
- repo: https://github.com/PyCQA/flake8
rev: "3.9.2"
hooks:
- id: flake8
name: flake8-annotations
args: [ "--select=ANN001,ANN201,ANN202,ANN205,ANN206,ANN301" ]
additional_dependencies:
- flake8-annotations
# - flake8-annotations-complexity
# - flake8-type-checking
exclude: |
(?x)(
^tests/|
^docsrc/|
^src/pandas_profiling/utils/common.py|
^src/pandas_profiling/model/imghdr_patch.py
)
- repo: https://github.com/asottile/blacken-docs
rev: v1.10.0
hooks:
Expand Down
8 changes: 0 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ test:
pytest tests/unit/
pytest tests/issues/
pytest --nbval tests/notebooks/
flake8 . --select=E9,F63,F7,F82 --show-source --statistics
pandas_profiling -h

test_cov:
Expand All @@ -27,13 +26,6 @@ test_cov:
examples:
find ./examples -maxdepth 2 -type f -name "*.py" -execdir python {} \;

pypi_package:
make install
check-manifest
python setup.py sdist bdist_wheel
twine check dist/*
twine upload --skip-existing dist/*

install:
pip install -e .[notebook]

Expand Down
11 changes: 0 additions & 11 deletions make.bat
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ IF "%1" == "test" (
pytest tests/unit/
pytest tests/issues/
pytest --nbval tests/notebooks/
flake8 . --select=E9,F63,F7,F82 --show-source --statistics
ECHO "Tests completed!"
GOTO end
)
Expand All @@ -37,16 +36,6 @@ IF "%1" == "examples" (
GOTO end
)

IF "%1" == "pypi_package" (
make install
check-manifest
python setup.py sdist bdist_wheel
twine check dist/*
twine upload --skip-existing dist/*
ECHO "PyPi package completed"
GOTO end
)

IF "%1" == "lint" (
pre-commit run --all-files
GOTO end
Expand Down
28 changes: 0 additions & 28 deletions mypy.ini

This file was deleted.

2 changes: 0 additions & 2 deletions requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,5 @@ pytest-cov
pytest-benchmark~=3.4.1
nbval
pyarrow
flake8
check-manifest>=0.41
twine>=3.1.1
kaggle
12 changes: 6 additions & 6 deletions src/pandas_profiling/config.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""Configuration for the package."""
from enum import Enum
from typing import Dict, List, Optional
from typing import Any, Dict, List, Optional

from pydantic import BaseModel, BaseSettings, Field


def _merge_dictionaries(dict1, dict2):
def _merge_dictionaries(dict1: dict, dict2: dict) -> dict:
"""
Recursive merge dictionaries.
Expand Down Expand Up @@ -290,13 +290,13 @@ class Settings(BaseSettings):
html: Html = Html()
notebook = Notebook()

def update(self, updates):
def update(self, updates: dict) -> "Settings":
update = _merge_dictionaries(self.dict(), updates)
return self.parse_obj(self.copy(update=update))


class Config:
arg_groups = {
arg_groups: Dict[str, Any] = {
"sensitive": {
"samples": None,
"duplicates": None,
Expand Down Expand Up @@ -361,12 +361,12 @@ class Config:
}

@staticmethod
def get_arg_groups(key):
def get_arg_groups(key: str) -> dict:
kwargs = Config.arg_groups[key]
return Config.shorthands(kwargs)

@staticmethod
def shorthands(kwargs):
def shorthands(kwargs: dict) -> dict:
for key, value in list(kwargs.items()):
if value is None and key in Config._shorthands:
kwargs[key] = Config._shorthands[key]
Expand Down
10 changes: 5 additions & 5 deletions src/pandas_profiling/controller/console.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
"""This file add the console interface to the package."""
import argparse
from pathlib import Path
from typing import Union
from typing import Any, List, Optional

from pandas_profiling.__init__ import ProfileReport, __version__
from pandas_profiling.utils.dataframe import read_pandas


def parse_args(args: Union[list, None] = None) -> argparse.Namespace:
def parse_args(args: Optional[List[Any]] = None) -> argparse.Namespace:
"""Parse the command line arguments for the `pandas_profiling` binary.
Args:
Expand Down Expand Up @@ -96,16 +96,16 @@ def parse_args(args: Union[list, None] = None) -> argparse.Namespace:
return parser.parse_args(args)


def main(args=None) -> None:
def main(args: Optional[List[Any]] = None) -> None:
"""Run the `pandas_profiling` package.
Args:
args: Arguments for the programme (Default value=None).
"""

# Parse the arguments
args = parse_args(args)
kwargs = vars(args)
parsed_args = parse_args(args)
kwargs = vars(parsed_args)

input_file = Path(kwargs.pop("input_file"))
output_file = kwargs.pop("output_file")
Expand Down
2 changes: 1 addition & 1 deletion src/pandas_profiling/controller/pandas_decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pandas_profiling.profile_report import ProfileReport


def profile_report(df, **kwargs) -> ProfileReport:
def profile_report(df: DataFrame, **kwargs) -> ProfileReport:
"""Profile a DataFrame.
Args:
Expand Down
33 changes: 21 additions & 12 deletions src/pandas_profiling/expectations_report.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
from typing import Any, Optional

import pandas as pd
from visions import VisionsTypeset

from pandas_profiling.config import Settings
from pandas_profiling.model import expectation_algorithms
from pandas_profiling.model.handler import Handler
from pandas_profiling.utils.dataframe import slugify


# Default handler
class ExpectationHandler(Handler):
def __init__(self, typeset, *args, **kwargs):
"""Default handler"""

def __init__(self, typeset: VisionsTypeset, *args, **kwargs):
mapping = {
"Unsupported": [expectation_algorithms.generic_expectations],
"Categorical": [expectation_algorithms.categorical_expectations],
Expand All @@ -23,18 +29,21 @@ def __init__(self, typeset, *args, **kwargs):

class ExpectationsReport:
config: Settings
typeset = None
df = None
df: Optional[pd.DataFrame] = None

@property
def typeset(self) -> Optional[VisionsTypeset]:
return None

def to_expectation_suite(
self,
suite_name=None,
data_context=None,
save_suite=True,
run_validation=True,
build_data_docs=True,
handler=None,
):
suite_name: Optional[str] = None,
data_context: Optional[Any] = None,
save_suite: bool = True,
run_validation: bool = True,
build_data_docs: bool = True,
handler: Optional[Handler] = None,
) -> Any:
"""
All parameters default to True to make it easier to access the full functionality of Great Expectations out of
the box.
Expand Down Expand Up @@ -77,7 +86,7 @@ def to_expectation_suite(
batch = ge.dataset.PandasDataset(self.df, expectation_suite=suite)

# Obtain the profiling summary
summary = self.get_description()
summary = self.get_description() # type: ignore

# Dispatch to expectations per semantic variable type
for name, variable_summary in summary["variables"].items():
Expand Down
32 changes: 23 additions & 9 deletions src/pandas_profiling/model/correlations.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,31 +13,41 @@

class Correlation:
@staticmethod
def compute(config: Settings, df, summary) -> Optional[pd.DataFrame]:
def compute(
config: Settings, df: pd.DataFrame, summary: dict
) -> Optional[pd.DataFrame]:
raise NotImplementedError()


class Spearman(Correlation):
@staticmethod
def compute(config: Settings, df, summary) -> Optional[pd.DataFrame]:
def compute(
config: Settings, df: pd.DataFrame, summary: dict
) -> Optional[pd.DataFrame]:
return df.corr(method="spearman")


class Pearson(Correlation):
@staticmethod
def compute(config: Settings, df, summary) -> Optional[pd.DataFrame]:
def compute(
config: Settings, df: pd.DataFrame, summary: dict
) -> Optional[pd.DataFrame]:
return df.corr(method="pearson")


class Kendall(Correlation):
@staticmethod
def compute(config: Settings, df, summary) -> Optional[pd.DataFrame]:
def compute(
config: Settings, df: pd.DataFrame, summary: dict
) -> Optional[pd.DataFrame]:
return df.corr(method="kendall")


class Cramers(Correlation):
@staticmethod
def _cramers_corrected_stat(confusion_matrix, correction: bool) -> float:
def _cramers_corrected_stat(
confusion_matrix: pd.DataFrame, correction: bool
) -> float:
"""Calculate the Cramer's V corrected stat for two variables.
Args:
Expand Down Expand Up @@ -66,7 +76,9 @@ def _cramers_corrected_stat(confusion_matrix, correction: bool) -> float:
return corr

@staticmethod
def compute(config: Settings, df, summary) -> Optional[pd.DataFrame]:
def compute(
config: Settings, df: pd.DataFrame, summary: dict
) -> Optional[pd.DataFrame]:
threshold = config.categorical_maximum_correlation_distinct

categoricals = {
Expand Down Expand Up @@ -98,7 +110,9 @@ def compute(config: Settings, df, summary) -> Optional[pd.DataFrame]:

class PhiK(Correlation):
@staticmethod
def compute(config: Settings, df, summary) -> Optional[pd.DataFrame]:
def compute(
config: Settings, df: pd.DataFrame, summary: dict
) -> Optional[pd.DataFrame]:
threshold = config.categorical_maximum_correlation_distinct
intcols = {
key
Expand Down Expand Up @@ -128,7 +142,7 @@ def compute(config: Settings, df, summary) -> Optional[pd.DataFrame]:
return correlation


def warn_correlation(correlation_name: str, error):
def warn_correlation(correlation_name: str, error: str) -> None:
warnings.warn(
f"""There was an attempt to calculate the {correlation_name} correlation, but this failed.
To hide this warning, disable the calculation
Expand Down Expand Up @@ -171,7 +185,7 @@ def calculate_correlation(
config, df, summary
)
except (ValueError, AssertionError, TypeError, DataError, IndexError) as e:
warn_correlation(correlation_name, e)
warn_correlation(correlation_name, str(e))

if correlation is not None and len(correlation) <= 0:
correlation = None
Expand Down
Loading

0 comments on commit 62f8e3f

Please sign in to comment.