diff --git a/Makefile b/Makefile index d46145d8..6f334b0e 100644 --- a/Makefile +++ b/Makefile @@ -67,7 +67,6 @@ clean-test: ## remove test artifacts .PHONY: clean clean: clean-build clean-pyc clean-test clean-coverage clean-docs ## remove all build, test, coverage, docs and Python artifacts - # INSTALL TARGETS .PHONY: install @@ -82,23 +81,16 @@ install-test: clean-build clean-pyc ## install the package and test dependencies install-develop: clean-build clean-pyc ## install the package in editable mode and dependencies for development pip install -e .[dev] - # LINT TARGETS .PHONY: lint -lint: ## check style with flake8 and isort +lint: invoke lint -lint-docs: ## check docs formatting with doc8 and pydocstyle - doc8 . docs/ - pydocstyle copulas/ - .PHONY: fix-lint -fix-lint: ## fix lint issues using autoflake, autopep8, and isort - find copulas tests -name '*.py' | xargs autoflake --in-place --remove-all-unused-imports --remove-unused-variables - autopep8 --in-place --recursive --aggressive copulas tests - isort --apply --atomic copulas tests - +fix-lint: + ruff check --fix . + ruff format . # TEST TARGETS diff --git a/copulas/__init__.py b/copulas/__init__.py index 95752d55..c975f55f 100644 --- a/copulas/__init__.py +++ b/copulas/__init__.py @@ -86,7 +86,8 @@ def validate_random_state(random_state): else: raise TypeError( f'`random_state` {random_state} expected to be an int ' - 'or `np.random.RandomState` object.') + 'or `np.random.RandomState` object.' + ) def get_instance(obj, **kwargs): @@ -192,8 +193,7 @@ def decorated(self, X, *args, **kwargs): if len(X.shape) == 2: return np.fromiter( - (function(self, *x, *args, **kwargs) for x in X), - np.dtype('float64') + (function(self, *x, *args, **kwargs) for x in X), np.dtype('float64') ) else: @@ -243,7 +243,6 @@ def check_valid_values(function): """ def decorated(self, X, *args, **kwargs): - if isinstance(X, pd.DataFrame): W = X.to_numpy() diff --git a/copulas/bivariate/__init__.py b/copulas/bivariate/__init__.py index dafbc821..9f869bef 100644 --- a/copulas/bivariate/__init__.py +++ b/copulas/bivariate/__init__.py @@ -47,7 +47,6 @@ def _compute_empirical(X): right = sum(np.logical_and(U >= base[k], V >= base[k])) / N if left > 0: - z_left.append(base[k]) L.append(left / base[k] ** 2) @@ -151,7 +150,8 @@ def select_copula(X): left_tail, empirical_left_aut, right_tail, empirical_right_aut = _compute_empirical(X) candidate_left_auts, candidate_right_auts = _compute_candidates( - copula_candidates, left_tail, right_tail) + copula_candidates, left_tail, right_tail + ) empirical_aut = np.concatenate((empirical_left_aut, empirical_right_aut)) candidate_auts = [ diff --git a/copulas/bivariate/base.py b/copulas/bivariate/base.py index b39095d1..32a37539 100644 --- a/copulas/bivariate/base.py +++ b/copulas/bivariate/base.py @@ -96,7 +96,7 @@ def __new__(cls, *args, **kwargs): return super(Bivariate, cls).__new__(cls) if not isinstance(copula_type, CopulaTypes): - if (isinstance(copula_type, str) and copula_type.upper() in CopulaTypes.__members__): + if isinstance(copula_type, str) and copula_type.upper() in CopulaTypes.__members__: copula_type = CopulaTypes[copula_type.upper()] else: raise ValueError(f'Invalid copula type {copula_type}') @@ -192,11 +192,7 @@ def to_dict(self): dict: Parameters of the copula. """ - return { - 'copula_type': self.copula_type.name, - 'theta': self.theta, - 'tau': self.tau - } + return {'copula_type': self.copula_type.name, 'theta': self.theta, 'tau': self.tau} @classmethod def from_dict(cls, copula_dict): @@ -297,6 +293,7 @@ def percent_point(self, y, V): self.check_fit() result = [] for _y, _v in zip(y, V): + def f(u): return self.partial_derivative_scalar(u, _v) - _y @@ -330,7 +327,7 @@ def partial_derivative(self, X): np.ndarray """ - delta = (-2 * (X[:, 1] > 0.5) + 1) + delta = -2 * (X[:, 1] > 0.5) + 1 delta = 0.0001 * delta X_prime = X.copy() X_prime[:, 1] += delta @@ -411,10 +408,11 @@ def select_copula(cls, X): """ from copulas.bivariate import select_copula # noqa + warnings.warn( '`Bivariate.select_copula` has been deprecated and will be removed in a later ' 'release. Please use `copulas.bivariate.select_copula` instead', - DeprecationWarning + DeprecationWarning, ) return select_copula(X) diff --git a/copulas/bivariate/clayton.py b/copulas/bivariate/clayton.py index 1dd2aae8..c8ca6c67 100644 --- a/copulas/bivariate/clayton.py +++ b/copulas/bivariate/clayton.py @@ -84,9 +84,10 @@ def cumulative_distribution(self, X): cdfs = [ np.power( np.power(U[i], -self.theta) + np.power(V[i], -self.theta) - 1, - -1.0 / self.theta + -1.0 / self.theta, ) - if (U[i] > 0 and V[i] > 0) else 0 + if (U[i] > 0 and V[i] > 0) + else 0 for i in range(len(U)) ] diff --git a/copulas/bivariate/frank.py b/copulas/bivariate/frank.py index c9557cad..64edee05 100644 --- a/copulas/bivariate/frank.py +++ b/copulas/bivariate/frank.py @@ -162,6 +162,7 @@ def compute_theta(self): def _tau_to_theta(self, alpha): """Relationship between tau and theta as a solvable equation.""" + def debye(t): return t / (np.exp(t) - 1) diff --git a/copulas/datasets.py b/copulas/datasets.py index 98f16921..d47b7870 100644 --- a/copulas/datasets.py +++ b/copulas/datasets.py @@ -33,10 +33,7 @@ def sample_bivariate_age_income(size=1000, seed=42): income += np.random.normal(loc=np.log(age) / 100, scale=10, size=size) income[np.random.randint(0, 10, size=size) == 0] /= 1000 - return pd.DataFrame({ - 'age': age, - 'income': income - }) + return pd.DataFrame({'age': age, 'income': income}) def sample_trivariate_xyz(size=1000, seed=42): @@ -61,11 +58,7 @@ def sample_trivariate_xyz(size=1000, seed=42): with set_random_state(validate_random_state(seed), _dummy_fn): x = stats.beta.rvs(a=0.1, b=0.1, size=size) y = stats.beta.rvs(a=0.1, b=0.5, size=size) - return pd.DataFrame({ - 'x': x, - 'y': y, - 'z': np.random.normal(size=size) + y * 10 - }) + return pd.DataFrame({'x': x, 'y': y, 'z': np.random.normal(size=size) + y * 10}) def sample_univariate_bernoulli(size=1000, seed=42): diff --git a/copulas/multivariate/__init__.py b/copulas/multivariate/__init__.py index fa5083b2..1ee9e5e5 100644 --- a/copulas/multivariate/__init__.py +++ b/copulas/multivariate/__init__.py @@ -5,10 +5,4 @@ from copulas.multivariate.tree import Tree, TreeTypes from copulas.multivariate.vine import VineCopula -__all__ = ( - 'Multivariate', - 'GaussianMultivariate', - 'VineCopula', - 'Tree', - 'TreeTypes' -) +__all__ = ('Multivariate', 'GaussianMultivariate', 'VineCopula', 'Tree', 'TreeTypes') diff --git a/copulas/multivariate/gaussian.py b/copulas/multivariate/gaussian.py index 4c1fd414..2c0437be 100644 --- a/copulas/multivariate/gaussian.py +++ b/copulas/multivariate/gaussian.py @@ -8,8 +8,14 @@ from scipy import stats from copulas import ( - EPSILON, check_valid_values, get_instance, get_qualified_name, random_state, store_args, - validate_random_state) + EPSILON, + check_valid_values, + get_instance, + get_qualified_name, + random_state, + store_args, + validate_random_state, +) from copulas.multivariate.base import Multivariate from copulas.univariate import GaussianUnivariate, Univariate @@ -149,8 +155,7 @@ def probability_density(self, X): self.check_fit() transformed = self._transform_to_normal(X) - return stats.multivariate_normal.pdf( - transformed, cov=self.correlation, allow_singular=True) + return stats.multivariate_normal.pdf(transformed, cov=self.correlation, allow_singular=True) def cumulative_distribution(self, X): """Compute the cumulative distribution value for each point in X. diff --git a/copulas/multivariate/tree.py b/copulas/multivariate/tree.py index 62b1a1b6..70ee7d4e 100644 --- a/copulas/multivariate/tree.py +++ b/copulas/multivariate/tree.py @@ -131,7 +131,7 @@ def get_tau_matrix(self): left_parent, right_parent = edge.parents left_u, right_u = Edge.get_conditional_uni(left_parent, right_parent) - tau[i, j], pvalue = scipy.stats.kendalltau(left_u, right_u) + tau[i, j], _pvalue = scipy.stats.kendalltau(left_u, right_u) return tau @@ -212,8 +212,7 @@ def __str__(self): """Produce printable representation of the class.""" template = 'L:{} R:{} D:{} Copula:{} Theta:{}' return '\n'.join([ - template.format(edge.L, edge.R, edge.D, edge.name, edge.theta) - for edge in self.edges + template.format(edge.L, edge.R, edge.D, edge.name, edge.theta) for edge in self.edges ]) def _serialize_previous_tree(self): @@ -237,11 +236,7 @@ def to_dict(self): Parameters of this Tree. """ fitted = self.fitted - result = { - 'tree_type': self.tree_type, - 'type': get_qualified_name(self), - 'fitted': fitted - } + result = {'tree_type': self.tree_type, 'type': get_qualified_name(self), 'fitted': fitted} if not fitted: return result @@ -451,7 +446,7 @@ def get_tree(tree_type): Instance of a Tree of the specified type. """ if not isinstance(tree_type, TreeTypes): - if (isinstance(tree_type, str) and tree_type.upper() in TreeTypes.__members__): + if isinstance(tree_type, str) and tree_type.upper() in TreeTypes.__members__: tree_type = TreeTypes[tree_type.upper()] else: raise ValueError(f'Invalid tree type {tree_type}') @@ -657,7 +652,7 @@ def to_dict(self): 'theta': self.theta, 'tau': self.tau, 'U': U, - 'likelihood': self.likelihood + 'likelihood': self.likelihood, } @classmethod @@ -674,8 +669,11 @@ def from_dict(cls, edge_dict): Instance of the edge defined on the parameters. """ instance = cls( - edge_dict['index'], edge_dict['L'], edge_dict['R'], - edge_dict['name'], edge_dict['theta'] + edge_dict['index'], + edge_dict['L'], + edge_dict['R'], + edge_dict['name'], + edge_dict['theta'], ) instance.U = np.array(edge_dict['U']) parents = edge_dict['parents'] diff --git a/copulas/multivariate/vine.py b/copulas/multivariate/vine.py index c05adcca..a6ce7173 100644 --- a/copulas/multivariate/vine.py +++ b/copulas/multivariate/vine.py @@ -8,8 +8,13 @@ import pandas as pd from copulas import ( - EPSILON, check_valid_values, get_qualified_name, random_state, store_args, - validate_random_state) + EPSILON, + check_valid_values, + get_qualified_name, + random_state, + store_args, + validate_random_state, +) from copulas.bivariate.base import Bivariate, CopulaTypes from copulas.multivariate.base import Multivariate from copulas.multivariate.tree import Tree, get_tree @@ -103,7 +108,7 @@ def to_dict(self): result = { 'type': get_qualified_name(self), 'vine_type': self.vine_type, - 'fitted': self.fitted + 'fitted': self.fitted, } if not self.fitted: @@ -118,7 +123,7 @@ def to_dict(self): 'tau_mat': self.tau_mat.tolist(), 'u_matrix': self.u_matrix.tolist(), 'unis': [distribution.to_dict() for distribution in self.unis], - 'columns': self.columns + 'columns': self.columns, }) return result @@ -293,8 +298,9 @@ def _sample_row(self): # get index of edge to retrieve for edge in current_tree: if i == 0: - if (edge.L == current and edge.R == visited[0]) or\ - (edge.R == current and edge.L == visited[0]): + if (edge.L == current and edge.R == visited[0]) or ( + edge.R == current and edge.L == visited[0] + ): current_ind = edge.index break else: diff --git a/copulas/optimize/__init__.py b/copulas/optimize/__init__.py index 8c508f7b..71d8f82d 100644 --- a/copulas/optimize/__init__.py +++ b/copulas/optimize/__init__.py @@ -127,7 +127,7 @@ def chandrupatla(f, xmin, xmax, eps_m=None, eps_a=None, maxiter=50): # to determine which method we should use next xi = (a - b) / (c - b) phi = (fa - fb) / (fc - fb) - iqi = np.logical_and(phi**2 < xi, (1 - phi)**2 < 1 - xi) + iqi = np.logical_and(phi**2 < xi, (1 - phi) ** 2 < 1 - xi) if not shape: # scalar case @@ -143,8 +143,9 @@ def chandrupatla(f, xmin, xmax, eps_m=None, eps_a=None, maxiter=50): # array case t = np.full(shape, 0.5) a2, b2, c2, fa2, fb2, fc2 = a[iqi], b[iqi], c[iqi], fa[iqi], fb[iqi], fc[iqi] - t[iqi] = fa2 / (fb2 - fa2) * fc2 / (fb2 - fc2) + (c2 - a2) / \ - (b2 - a2) * fa2 / (fc2 - fa2) * fb2 / (fc2 - fb2) + t[iqi] = fa2 / (fb2 - fa2) * fc2 / (fb2 - fc2) + (c2 - a2) / (b2 - a2) * fa2 / ( + fc2 - fa2 + ) * fb2 / (fc2 - fb2) # limit to the range (tlim, 1-tlim) t = np.minimum(1 - tlim, np.maximum(tlim, t)) diff --git a/copulas/univariate/__init__.py b/copulas/univariate/__init__.py index f756d199..cb68512b 100644 --- a/copulas/univariate/__init__.py +++ b/copulas/univariate/__init__.py @@ -21,5 +21,5 @@ 'ParametricType', 'BoundedType', 'UniformUnivariate', - 'LogLaplace' + 'LogLaplace', ) diff --git a/copulas/univariate/base.py b/copulas/univariate/base.py index 249d605c..79b089e7 100644 --- a/copulas/univariate/base.py +++ b/copulas/univariate/base.py @@ -7,8 +7,13 @@ import numpy as np from copulas import ( - NotFittedError, get_instance, get_qualified_name, random_state, store_args, - validate_random_state) + NotFittedError, + get_instance, + get_qualified_name, + random_state, + store_args, + validate_random_state, +) from copulas.univariate.selection import select_univariate @@ -84,8 +89,14 @@ def _select_candidates(cls, parametric=None, bounded=None): return candidates @store_args - def __init__(self, candidates=None, parametric=None, bounded=None, random_state=None, - selection_sample_size=None): + def __init__( + self, + candidates=None, + parametric=None, + bounded=None, + random_state=None, + selection_sample_size=None, + ): self.candidates = candidates or self._select_candidates(parametric, bounded) self.random_state = validate_random_state(random_state) self.selection_sample_size = selection_sample_size diff --git a/copulas/univariate/beta.py b/copulas/univariate/beta.py index 98eaf95a..d836b826 100644 --- a/copulas/univariate/beta.py +++ b/copulas/univariate/beta.py @@ -28,12 +28,7 @@ def _fit(self, X): loc = np.min(X) scale = np.max(X) - loc a, b, loc, scale = beta.fit(X, loc=loc, scale=scale) - self._params = { - 'loc': loc, - 'scale': scale, - 'a': a, - 'b': b - } + self._params = {'loc': loc, 'scale': scale, 'a': a, 'b': b} def _is_constant(self): return self._params['scale'] == 0 diff --git a/copulas/univariate/gaussian.py b/copulas/univariate/gaussian.py index 5005a782..0c0975b0 100644 --- a/copulas/univariate/gaussian.py +++ b/copulas/univariate/gaussian.py @@ -15,16 +15,10 @@ class GaussianUnivariate(ScipyModel): MODEL_CLASS = norm def _fit_constant(self, X): - self._params = { - 'loc': np.unique(X)[0], - 'scale': 0 - } + self._params = {'loc': np.unique(X)[0], 'scale': 0} def _fit(self, X): - self._params = { - 'loc': np.mean(X), - 'scale': np.std(X) - } + self._params = {'loc': np.mean(X), 'scale': np.std(X)} def _is_constant(self): return self._params['scale'] == 0 diff --git a/copulas/univariate/gaussian_kde.py b/copulas/univariate/gaussian_kde.py index 16259f3a..3b613188 100644 --- a/copulas/univariate/gaussian_kde.py +++ b/copulas/univariate/gaussian_kde.py @@ -129,7 +129,7 @@ def percent_point(self, U, method='chandrupatla'): self.check_fit() if len(U.shape) > 1: - raise ValueError(f'Expected 1d array, got {(U, )}.') + raise ValueError(f'Expected 1d array, got {(U,)}.') if np.any(U > 1.0) or np.any(U < 0.0): raise ValueError('Expected values in range [0.0, 1.0].') @@ -165,11 +165,10 @@ def _fit_constant(self, X): def _fit(self, X): if self._sample_size: - X = gaussian_kde(X, bw_method=self.bw_method, - weights=self.weights).resample(self._sample_size) - self._params = { - 'dataset': X.tolist() - } + X = gaussian_kde(X, bw_method=self.bw_method, weights=self.weights).resample( + self._sample_size + ) + self._params = {'dataset': X.tolist()} self._model = self._get_model() def _is_constant(self): diff --git a/copulas/univariate/student_t.py b/copulas/univariate/student_t.py index 73827c36..6c503d32 100644 --- a/copulas/univariate/student_t.py +++ b/copulas/univariate/student_t.py @@ -22,11 +22,7 @@ def _fit_constant(self, X): def _fit(self, X): dataframe, loc, scale = t.fit(X) - self._params = { - 'df': dataframe, - 'loc': loc, - 'scale': scale - } + self._params = {'df': dataframe, 'loc': loc, 'scale': scale} def _is_constant(self): return self._params['scale'] == 0 diff --git a/copulas/univariate/truncated_gaussian.py b/copulas/univariate/truncated_gaussian.py index bed44e0d..e9aff169 100644 --- a/copulas/univariate/truncated_gaussian.py +++ b/copulas/univariate/truncated_gaussian.py @@ -28,12 +28,7 @@ def __init__(self, minimum=None, maximum=None, random_state=None): def _fit_constant(self, X): constant = np.unique(X)[0] - self._params = { - 'a': constant, - 'b': constant, - 'loc': constant, - 'scale': 0.0 - } + self._params = {'a': constant, 'b': constant, 'loc': constant, 'scale': 0.0} def _fit(self, X): if self.min is None: @@ -51,21 +46,18 @@ def nnlf(params): initial_params = X.mean(), X.std() with warnings.catch_warnings(): warnings.simplefilter('ignore', category=RuntimeWarning) - optimal = fmin_slsqp(nnlf, initial_params, iprint=False, bounds=[ - (self.min, self.max), - (0.0, (self.max - self.min)**2) - ]) + optimal = fmin_slsqp( + nnlf, + initial_params, + iprint=False, + bounds=[(self.min, self.max), (0.0, (self.max - self.min) ** 2)], + ) loc, scale = optimal a = (self.min - loc) / scale b = (self.max - loc) / scale - self._params = { - 'a': a, - 'b': b, - 'loc': loc, - 'scale': scale - } + self._params = {'a': a, 'b': b, 'loc': loc, 'scale': scale} def _is_constant(self): return self._params['a'] == self._params['b'] diff --git a/copulas/univariate/uniform.py b/copulas/univariate/uniform.py index 79ab151f..5805b8ee 100644 --- a/copulas/univariate/uniform.py +++ b/copulas/univariate/uniform.py @@ -15,16 +15,10 @@ class UniformUnivariate(ScipyModel): MODEL_CLASS = uniform def _fit_constant(self, X): - self._params = { - 'loc': np.min(X), - 'scale': np.max(X) - np.min(X) - } + self._params = {'loc': np.min(X), 'scale': np.max(X) - np.min(X)} def _fit(self, X): - self._params = { - 'loc': np.min(X), - 'scale': np.max(X) - np.min(X) - } + self._params = {'loc': np.min(X), 'scale': np.max(X) - np.min(X)} def _is_constant(self): return self._params['scale'] == 0 diff --git a/copulas/visualization.py b/copulas/visualization.py index 60883c34..4b577deb 100644 --- a/copulas/visualization.py +++ b/copulas/visualization.py @@ -31,11 +31,7 @@ def _generate_1d_plot(data, title, labels, colors): plotly.graph_objects._figure.Figure """ fig = ff.create_distplot( - hist_data=data, - group_labels=labels, - show_hist=False, - show_rug=False, - colors=colors + hist_data=data, group_labels=labels, show_hist=False, show_rug=False, colors=colors ) for i, name in enumerate(labels): @@ -52,7 +48,7 @@ def _generate_1d_plot(data, title, labels, colors): font={'size': PlotConfig.FONT_SIZE}, showlegend=True if labels[0] else False, xaxis_title='value', - yaxis_title='frequency' + yaxis_title='frequency', ) return fig @@ -80,10 +76,7 @@ def dist_1d(data, title=None, label=None): title += f" for column '{data.name}'" return _generate_1d_plot( - data=[data], - title=title, - labels=[label], - colors=[PlotConfig.DATACEBO_DARK] + data=[data], title=title, labels=[label], colors=[PlotConfig.DATACEBO_DARK] ) @@ -112,7 +105,7 @@ def compare_1d(real, synth, title=None): data=[real, synth], title=title, labels=['Real', 'Synthetic'], - colors=[PlotConfig.DATACEBO_DARK, PlotConfig.DATACEBO_GREEN] + colors=[PlotConfig.DATACEBO_DARK, PlotConfig.DATACEBO_GREEN], ) @@ -148,7 +141,7 @@ def _generate_scatter_2d_plot(data, columns, color_discrete_map, title): y=columns[1], color='Data', color_discrete_map=color_discrete_map, - symbol='Data' + symbol='Data', ) fig.update_layout( @@ -189,7 +182,7 @@ def scatter_2d(data, columns=None, title=None): data=data, columns=columns, color_discrete_map={'Real': PlotConfig.DATACEBO_DARK}, - title=title + title=title, ) @@ -226,9 +219,9 @@ def compare_2d(real, synth, columns=None, title=None): columns=columns, color_discrete_map={ 'Real': PlotConfig.DATACEBO_DARK, - 'Synthetic': PlotConfig.DATACEBO_GREEN + 'Synthetic': PlotConfig.DATACEBO_GREEN, }, - title=title + title=title, ) @@ -302,14 +295,15 @@ def scatter_3d(data, columns=None, title=None): if columns: title += f" for columns '{columns[0]}', '{columns[1]}' and '{columns[2]}'" elif isinstance(data, pd.DataFrame): - title += \ + title += ( f" for columns '{data.columns[0]}', '{data.columns[1]}' and '{data.columns[2]}'" + ) return _generate_scatter_3d_plot( data=data, columns=columns, color_discrete_map={'Real': PlotConfig.DATACEBO_DARK}, - title=title + title=title, ) @@ -336,15 +330,16 @@ def compare_3d(real, synth, columns=None, title=None): if columns: title += f" for columns '{columns[0]}', '{columns[1]}' and '{columns[2]}'" elif isinstance(data, pd.DataFrame): - title += \ + title += ( f" for columns '{data.columns[0]}', '{data.columns[1]}' and '{data.columns[2]}'" + ) return _generate_scatter_3d_plot( data=data, columns=columns, color_discrete_map={ 'Real': PlotConfig.DATACEBO_DARK, - 'Synthetic': PlotConfig.DATACEBO_GREEN + 'Synthetic': PlotConfig.DATACEBO_GREEN, }, - title=title + title=title, ) diff --git a/pyproject.toml b/pyproject.toml index 76245b4d..d22c242e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,32 +89,7 @@ dev = [ "Jinja2>=2,<4;python_version>='3.12'", # style check - 'flake8>=3.7.7,<8', - 'flake8-absolute-import>=1.0,<2', - 'flake8-builtins>=1.5.3,<3', - 'flake8-comprehensions>=3.6.1,<4', - 'flake8-debugger>=4.0.0,<5', - 'flake8-docstrings>=1.5.0,<2', - 'flake8-eradicate>=1.1.0,<2', - 'flake8-fixme>=1.1.1,<1.2', - 'flake8-mock>=0.3,<1', - 'flake8-multiline-containers>=0.0.18,<0.1', - 'flake8-mutable>=1.2.0,<1.3', - 'flake8-expression-complexity>=0.0.9,<0.1', - 'flake8-print>=4.0.0,<4.1', - 'flake8-pytest-style>=2.0.0,<3', - 'flake8-quotes>=3.3.0,<4', - 'flake8-sfs>=0.0.3,<2', - 'flake8-variables-names>=0.0.4,<0.1', - 'dlint>=0.11.0,<1', - 'isort>=5.13.2,<6', - 'pandas-vet>=0.2.3,<2024', - 'pep8-naming>=0.12.1,<1', - 'pydocstyle>=6.1.1,<7', - - # fix style issues - 'autoflake>=1.1,<3', - 'autopep8>=1.4.3,<3', + 'ruff>=0.3.2,<1', # distribute on PyPI 'twine>=1.10.0,<4', @@ -125,9 +100,6 @@ dev = [ 'tox>=2.9.1,<4', 'invoke', - # Documentation style - 'doc8>=0.8.0,<0.9', - # Large scale evaluation 'urllib3>=1.20,<1.26', 'tabulate>=0.8.3,<0.9', @@ -199,3 +171,52 @@ filename = 'copulas/__init__.py' search = "__version__ = '{current_version}'" replace = "__version__ = '{new_version}'" +[tool.ruff] +preview = true +line-length = 100 +indent-width = 4 +src = ["copulas"] +target-version = "py312" +exclude = [ + "docs", + ".tox", + ".git", + "__pycache__", + ".ipynb_checkpoints" +] + +[tool.ruff.lint] +select = [ + # Pyflakes + "F", + # Pycodestyle + "E", + "W", + "D200", + # isort + "I001", +] +ignore = [ + "E501", + "D107", # Missing docstring in __init__ + "D417", # Missing argument descriptions in the docstring, this is a bug from pydocstyle: https://github.com/PyCQA/pydocstyle/issues/449 +] + +[tool.ruff.format] +quote-style = "single" +indent-style = "space" +preview = true +docstring-code-format = true +docstring-code-line-length = "dynamic" + +[tool.ruff.lint.pep8-naming] +extend-ignore-names = ["X", "C", "X_padded", "Y", "Y_padded"] + +[tool.ruff.lint.isort] +known-first-party = ["copulas"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["F401", "E402", "F403", "F405", "E501", "I001"] + +[tool.ruff.lint.pydocstyle] +convention = "google" diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 6e9c4d8e..00000000 --- a/setup.cfg +++ /dev/null @@ -1,30 +0,0 @@ -[flake8] -max-line-length = 99 -exclude = docs, .git, __pycache__, .ipynb_checkpoints -extend-ignore = - # Missing docstring in __init__ - D107, - # Missing blank line after last section - D413, - # insecure use of "pickle" or "cPickle" - DUO103 - # argument name 'X' should be lowercase - N803, - # variable 'X' in function should be lowercase - N806, - # String literal formatting using f-string. - SFS3, - # Single letter variable names are not allowed - VNE001, - # TokenError: unterminated string literal - E902 -per-file-ignores = - large_scale_evaluation.py:T001 - - -[aliases] -test = pytest - -[doc8] -max-line-length = 99 - diff --git a/tasks.py b/tasks.py index 7518824c..2fbda22d 100644 --- a/tasks.py +++ b/tasks.py @@ -2,16 +2,17 @@ import inspect import operator import os -import tomli -import sys -from packaging.requirements import Requirement -from packaging.version import Version +import platform +import re import shutil import stat +import sys from pathlib import Path +import tomli from invoke import task - +from packaging.requirements import Requirement +from packaging.version import Version COMPARISONS = { '>=': operator.ge, @@ -48,14 +49,14 @@ def numerical(c): def _validate_python_version(line): is_valid = True - for python_version_match in re.finditer(r"python_version(<=?|>=?|==)\'(\d\.?)+\'", line): + for python_version_match in re.finditer(r'python_version(<=?|>=?|==)\'(\d\.?)+\'', line): python_version = python_version_match.group(0) comparison = re.search(r'(>=?|<=?|==)', python_version).group(0) - version_number = python_version.split(comparison)[-1].replace("'", "") + version_number = python_version.split(comparison)[-1].replace("'", '') comparison_function = COMPARISONS[comparison] is_valid = is_valid and comparison_function( - pkg_resources.parse_version(platform.python_version()), - pkg_resources.parse_version(version_number), + Version(platform.python_version()), + Version(version_number), ) return is_valid @@ -77,17 +78,22 @@ def _get_minimum_versions(dependencies, python_version): if req.name not in min_versions: min_version = next( - (spec.version for spec in req.specifier if spec.operator in ('>=', '==')), None) + (spec.version for spec in req.specifier if spec.operator in ('>=', '==')), + None, + ) if min_version: min_versions[req.name] = f'{req.name}=={min_version}' elif '@' not in min_versions[req.name]: existing_version = Version(min_versions[req.name].split('==')[1]) new_version = next( - (spec.version for spec in req.specifier if spec.operator in ('>=', '==')), existing_version) + (spec.version for spec in req.specifier if spec.operator in ('>=', '==')), + existing_version, + ) if new_version > existing_version: - # Change when a valid newer version is found - min_versions[req.name] = f'{req.name}=={new_version}' + min_versions[req.name] = ( + f'{req.name}=={new_version}' # Change when a valid newer version is found + ) return list(min_versions.values()) @@ -102,7 +108,8 @@ def install_minimum(c): minimum_versions = _get_minimum_versions(dependencies, python_version) if minimum_versions: - c.run(f'python -m pip install {" ".join(minimum_versions)}') + install_deps = ' '.join(minimum_versions) + c.run(f'python -m pip install {install_deps}') @task @@ -133,20 +140,20 @@ def readme(c): def tutorials(c): for ipynb_file in glob.glob('tutorials/*.ipynb') + glob.glob('tutorials/**/*.ipynb'): if '.ipynb_checkpoints' not in ipynb_file: - c.run(( - 'jupyter nbconvert --execute --ExecutePreprocessor.timeout=3600 ' - f'--to=html --stdout "{ipynb_file}"' - ), hide='out') + c.run( + ( + 'jupyter nbconvert --execute --ExecutePreprocessor.timeout=3600 ' + f'--to=html --stdout "{ipynb_file}"' + ), + hide='out', + ) @task def lint(c): check_dependencies(c) - c.run('flake8 copulas') - c.run('pydocstyle copulas') - c.run('flake8 tests --ignore=D,SFS2') - c.run('pydocstyle tests') - c.run('isort -c copulas tests') + c.run('ruff check .') + c.run('ruff format . --check') def remove_readonly(func, path, _): diff --git a/tests/__init__.py b/tests/__init__.py index d0f0b7d2..d5fd6922 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -7,7 +7,7 @@ NUMPY_NUMERICAL_DTYPES = set('buifc') -def compare_nested_dicts(first, second, epsilon=10E-6): +def compare_nested_dicts(first, second, epsilon=10e-6): """Compare two dictionaries. Raises an assertion error when a difference is found.""" assert first.keys() == second.keys() @@ -30,7 +30,11 @@ def compare_nested_dicts(first, second, epsilon=10E-6): assert _first == _second, "{}: {} doesn't equal {}".format(key, _first, _second) -def compare_values_epsilon(first, second, epsilon=10E-6,): +def compare_values_epsilon( + first, + second, + epsilon=10e-6, +): """Compare epsilons.""" if pd.isna(first) and pd.isna(second): return True @@ -38,12 +42,11 @@ def compare_values_epsilon(first, second, epsilon=10E-6,): return abs(first - second) < epsilon -def compare_nested_iterables(first, second, epsilon=10E-6): +def compare_nested_iterables(first, second, epsilon=10e-6): """Compare iterables.""" assert len(first) == len(second), 'Iterables should have the same length to be compared.' for index, (_first, _second) in enumerate(zip(first, second)): - message = COMPARE_VALUES_ERROR.format(index, _first, _second) if isinstance(_first, (list, tuple)): @@ -65,7 +68,7 @@ def compare_nested_iterables(first, second, epsilon=10E-6): assert _first == _second, message -def copula_zero_if_arg_zero(copula, dimensions=2, steps=10, tolerance=1E-05): +def copula_zero_if_arg_zero(copula, dimensions=2, steps=10, tolerance=1e-05): """Assert that any call with an argument equal to 0, will return 0. This function helps to test the following analytical property of copulas: @@ -102,7 +105,7 @@ def copula_zero_if_arg_zero(copula, dimensions=2, steps=10, tolerance=1E-05): compare_nested_iterables(result, expected_result, tolerance) -def copula_single_arg_not_one(copula, dimensions=2, steps=10, tolerance=1E-05): +def copula_single_arg_not_one(copula, dimensions=2, steps=10, tolerance=1e-05): """Assert that any call where all arguments minus one are 1, will return the non-1 value. This functions helps to test the following analytic property of copulas: @@ -120,7 +123,7 @@ def copula_single_arg_not_one(copula, dimensions=2, steps=10, tolerance=1E-05): AssertionError: If any value doesn't comply with the expected behavior. """ # Setup - step_values = np.linspace(0.0, 1.0, steps + 1)[1: -1] + step_values = np.linspace(0.0, 1.0, steps + 1)[1:-1] values = [] for index in range(dimensions): diff --git a/tests/end-to-end/bivariate/test_base.py b/tests/end-to-end/bivariate/test_base.py index 9f952e93..eb346eb1 100644 --- a/tests/end-to-end/bivariate/test_base.py +++ b/tests/end-to-end/bivariate/test_base.py @@ -4,8 +4,7 @@ from copulas.bivariate import Bivariate -class TestBivariate(): - +class TestBivariate: @pytest.mark.parametrize('model', Bivariate.subclasses()) def test_fixed_random_state(self, model): """Test that the bivariate models work with a fixed seed. diff --git a/tests/end-to-end/multivariate/test_base.py b/tests/end-to-end/multivariate/test_base.py index 6546fc89..4e61be5f 100644 --- a/tests/end-to-end/multivariate/test_base.py +++ b/tests/end-to-end/multivariate/test_base.py @@ -5,8 +5,7 @@ from copulas.multivariate import GaussianMultivariate, VineCopula -class TestMultivariate(): - +class TestMultivariate: @pytest.mark.parametrize('model', [GaussianMultivariate(), VineCopula('direct')]) def test_fixed_random_state(self, model): """Test that the multivariate models work with a fixed seed. diff --git a/tests/end-to-end/multivariate/test_gaussian.py b/tests/end-to-end/multivariate/test_gaussian.py index 996f4779..7b294868 100644 --- a/tests/end-to-end/multivariate/test_gaussian.py +++ b/tests/end-to-end/multivariate/test_gaussian.py @@ -25,19 +25,18 @@ def test_conditional_sampling(): sampled = gm.sample(3000, conditions={'b': 1}) - np.testing.assert_allclose(sampled['a'].mean(), 1, atol=.5) - np.testing.assert_allclose(sampled['b'].mean(), 1, atol=.5) - np.testing.assert_allclose(sampled['c'].mean(), 1, atol=.5) + np.testing.assert_allclose(sampled['a'].mean(), 1, atol=0.5) + np.testing.assert_allclose(sampled['b'].mean(), 1, atol=0.5) + np.testing.assert_allclose(sampled['c'].mean(), 1, atol=0.5) sampled = gm.sample(3000, conditions={'a': 3, 'b': 3}) - np.testing.assert_allclose(sampled['a'].mean(), 3, atol=.5) - np.testing.assert_allclose(sampled['b'].mean(), 3, atol=.5) - np.testing.assert_allclose(sampled['c'].mean(), 3, atol=.5) + np.testing.assert_allclose(sampled['a'].mean(), 3, atol=0.5) + np.testing.assert_allclose(sampled['b'].mean(), 3, atol=0.5) + np.testing.assert_allclose(sampled['c'].mean(), 3, atol=0.5) class TestGaussian(TestCase): - def setUp(self): self.test_dir = tempfile.TemporaryDirectory() @@ -84,9 +83,7 @@ def test_fit_sample_distribution_instance(self): def test_fit_sample_distribution_dict(self): data = sample_trivariate_xyz() - model = GaussianMultivariate(distribution={ - 'x': GaussianKDE() - }) + model = GaussianMultivariate(distribution={'x': GaussianKDE()}) model.fit(data) sampled_data = model.sample(10) @@ -94,11 +91,13 @@ def test_fit_sample_distribution_dict(self): def test_fit_sample_distribution_dict_multiple(self): data = sample_trivariate_xyz() - model = GaussianMultivariate(distribution={ - 'x': Univariate(parametric=ParametricType.PARAMETRIC), - 'y': BetaUnivariate(), - 'z': GaussianKDE() - }) + model = GaussianMultivariate( + distribution={ + 'x': Univariate(parametric=ParametricType.PARAMETRIC), + 'y': BetaUnivariate(), + 'z': GaussianKDE(), + } + ) model.fit(data) sampled_data = model.sample(10) diff --git a/tests/end-to-end/multivariate/test_vine.py b/tests/end-to-end/multivariate/test_vine.py index e1dd5522..8df375c6 100644 --- a/tests/end-to-end/multivariate/test_vine.py +++ b/tests/end-to-end/multivariate/test_vine.py @@ -7,7 +7,6 @@ class TestGaussian(TestCase): - def setUp(self): self.test_dir = tempfile.TemporaryDirectory() diff --git a/tests/end-to-end/univariate/test_beta.py b/tests/end-to-end/univariate/test_beta.py index f07f0673..1f453001 100644 --- a/tests/end-to-end/univariate/test_beta.py +++ b/tests/end-to-end/univariate/test_beta.py @@ -9,7 +9,6 @@ class TestGaussian(TestCase): - def setUp(self): self.data = beta.rvs(a=1.0, b=1.0, loc=1.0, scale=1.0, size=50000) self.constant = np.full(100, fill_value=5) @@ -30,7 +29,7 @@ def test_fit_sample(self): sampled_data = model.sample(50) assert isinstance(sampled_data, np.ndarray) - assert sampled_data.shape == (50, ) + assert sampled_data.shape == (50,) def test_fit_sample_constant(self): model = BetaUnivariate() @@ -39,7 +38,7 @@ def test_fit_sample_constant(self): sampled_data = model.sample(50) assert isinstance(sampled_data, np.ndarray) - assert sampled_data.shape == (50, ) + assert sampled_data.shape == (50,) assert model._constant_value == 5 np.testing.assert_equal(np.full(50, 5), model.sample(50)) @@ -116,7 +115,7 @@ def test_to_dict_constant(self): 'loc': 5, 'scale': 0, 'a': 1, - 'b': 1 + 'b': 1, } def test_save_load(self): diff --git a/tests/end-to-end/univariate/test_gamma.py b/tests/end-to-end/univariate/test_gamma.py index c365aa37..31c8f22f 100644 --- a/tests/end-to-end/univariate/test_gamma.py +++ b/tests/end-to-end/univariate/test_gamma.py @@ -9,7 +9,6 @@ class TestGaussian(TestCase): - def setUp(self): self.data = gamma.rvs(a=1.0, loc=1.0, scale=1.0, size=50000) self.constant = np.full(100, fill_value=5) @@ -29,7 +28,7 @@ def test_fit_sample(self): sampled_data = model.sample(50) assert isinstance(sampled_data, np.ndarray) - assert sampled_data.shape == (50, ) + assert sampled_data.shape == (50,) def test_fit_sample_constant(self): model = GammaUnivariate() @@ -38,7 +37,7 @@ def test_fit_sample_constant(self): sampled_data = model.sample(50) assert isinstance(sampled_data, np.ndarray) - assert sampled_data.shape == (50, ) + assert sampled_data.shape == (50,) assert model._constant_value == 5 np.testing.assert_equal(np.full(50, 5), model.sample(50)) diff --git a/tests/end-to-end/univariate/test_gaussian.py b/tests/end-to-end/univariate/test_gaussian.py index c5d0e649..6c160ddc 100644 --- a/tests/end-to-end/univariate/test_gaussian.py +++ b/tests/end-to-end/univariate/test_gaussian.py @@ -9,7 +9,6 @@ class TestGaussian(TestCase): - def setUp(self): self.data = norm.rvs(loc=1.0, scale=0.5, size=50000) self.constant = np.full(100, fill_value=5) @@ -28,7 +27,7 @@ def test_fit_sample(self): sampled_data = model.sample(50) assert isinstance(sampled_data, np.ndarray) - assert sampled_data.shape == (50, ) + assert sampled_data.shape == (50,) def test_fit_sample_constant(self): model = GaussianUnivariate() @@ -37,7 +36,7 @@ def test_fit_sample_constant(self): sampled_data = model.sample(50) assert isinstance(sampled_data, np.ndarray) - assert sampled_data.shape == (50, ) + assert sampled_data.shape == (50,) assert model._constant_value == 5 np.testing.assert_equal(np.full(50, 5), model.sample(50)) diff --git a/tests/end-to-end/univariate/test_gaussian_kde.py b/tests/end-to-end/univariate/test_gaussian_kde.py index ed116443..f1c77f73 100644 --- a/tests/end-to-end/univariate/test_gaussian_kde.py +++ b/tests/end-to-end/univariate/test_gaussian_kde.py @@ -11,7 +11,6 @@ class TestGaussian(TestCase): - def setUp(self): self.data = sample_univariate_bimodal() self.constant = np.full(100, fill_value=5) @@ -27,7 +26,7 @@ def test_fit_sample(self): sampled_data = model.sample(50) assert isinstance(sampled_data, np.ndarray) - assert sampled_data.shape == (50, ) + assert sampled_data.shape == (50,) def test_fit_sample_constant(self): model = GaussianKDE() @@ -36,7 +35,7 @@ def test_fit_sample_constant(self): sampled_data = model.sample(50) assert isinstance(sampled_data, np.ndarray) - assert sampled_data.shape == (50, ) + assert sampled_data.shape == (50,) assert model._constant_value == 5 np.testing.assert_equal(np.full(50, 5), model.sample(50)) @@ -101,7 +100,7 @@ def test_to_dict_constant(self): assert params == { 'type': 'copulas.univariate.gaussian_kde.GaussianKDE', - 'dataset': [5] * 100 + 'dataset': [5] * 100, } def test_to_dict_from_dict_constant(self): @@ -148,7 +147,7 @@ def test_gaussiankde_arguments(self): dist = GaussianMultivariate(distribution=GaussianKDE(bw_method=0.01)) dist.fit(data) samples = dist.sample(size).to_numpy()[0] - d, p = ks_2samp(data, samples) + _d, p = ks_2samp(data, samples) assert p >= 0.05 def test_fixed_random_state(self): diff --git a/tests/end-to-end/univariate/test_student_t.py b/tests/end-to-end/univariate/test_student_t.py index ec374a5d..a1f5c6af 100644 --- a/tests/end-to-end/univariate/test_student_t.py +++ b/tests/end-to-end/univariate/test_student_t.py @@ -9,7 +9,6 @@ class TestStudentT(TestCase): - def setUp(self): self.data = t.rvs(df=3.0, loc=1.0, scale=0.5, size=50000) self.constant = np.full(100, fill_value=5) @@ -29,7 +28,7 @@ def test_fit_sample(self): sampled_data = model.sample(50) assert isinstance(sampled_data, np.ndarray) - assert sampled_data.shape == (50, ) + assert sampled_data.shape == (50,) def test_fit_sample_constant(self): model = StudentTUnivariate() @@ -38,7 +37,7 @@ def test_fit_sample_constant(self): sampled_data = model.sample(50) assert isinstance(sampled_data, np.ndarray) - assert sampled_data.shape == (50, ) + assert sampled_data.shape == (50,) assert model._constant_value == 5 np.testing.assert_allclose(np.full(50, 5), model.sample(50), atol=0.2) diff --git a/tests/end-to-end/univariate/test_truncated_gaussian.py b/tests/end-to-end/univariate/test_truncated_gaussian.py index 29e77cd6..707519c2 100644 --- a/tests/end-to-end/univariate/test_truncated_gaussian.py +++ b/tests/end-to-end/univariate/test_truncated_gaussian.py @@ -9,7 +9,6 @@ class TestGaussian(TestCase): - def setUp(self): self.data = truncnorm.rvs(a=0.0, b=4.0, loc=1.0, scale=1.0, size=50000) self.constant = np.full(100, fill_value=5) @@ -30,7 +29,7 @@ def test_fit_sample(self): sampled_data = model.sample(50) assert isinstance(sampled_data, np.ndarray) - assert sampled_data.shape == (50, ) + assert sampled_data.shape == (50,) def test_fit_sample_constant(self): model = TruncatedGaussian() @@ -39,7 +38,7 @@ def test_fit_sample_constant(self): sampled_data = model.sample(50) assert isinstance(sampled_data, np.ndarray) - assert sampled_data.shape == (50, ) + assert sampled_data.shape == (50,) assert model._constant_value == 5 np.testing.assert_equal(np.full(50, 5), model.sample(50)) diff --git a/tests/large_scale_evaluation.py b/tests/large_scale_evaluation.py index 942ab768..0a5d6471 100644 --- a/tests/large_scale_evaluation.py +++ b/tests/large_scale_evaluation.py @@ -30,6 +30,7 @@ Name of the model to test. Can be passed multiple times to evaluate more than one model. """ + import argparse import logging import random @@ -58,7 +59,7 @@ 'GaussianMultivariate()': GaussianMultivariate(), 'VineCopula("center")': VineCopula('center'), 'VineCopula("direct")': VineCopula('direct'), - 'VineCopula("regular")': VineCopula('regular') + 'VineCopula("regular")': VineCopula('regular'), } OUTPUT_COLUMNS = [ 'model_name', @@ -101,8 +102,9 @@ def get_dataset_url(name): def load_data(dataset_name, max_rows, max_columns): """Load the data.""" - LOGGER.debug('Loading dataset %s (max_rows: %s, max_columns: %s)', - dataset_name, max_rows, max_columns) + LOGGER.debug( + 'Loading dataset %s (max_rows: %s, max_columns: %s)', dataset_name, max_rows, max_columns + ) dataset_url = get_dataset_url(dataset_name) data = pd.read_csv(dataset_url, nrows=max_rows) if max_columns: @@ -164,7 +166,7 @@ def evaluate_model_dataset(model_name, dataset_name, max_rows, max_columns): 'error_message': error_message, 'score': score, 'num_columns': len(data.columns), - 'num_rows': len(data) + 'num_rows': len(data), } @@ -192,12 +194,14 @@ def run_evaluation(model_names, dataset_names, max_rows, max_columns): results.append(result) elapsed_time = datetime.utcnow() - start - LOGGER.info('%s datasets tested using model %s in %s', - len(dataset_names), model_name, elapsed_time) + LOGGER.info( + '%s datasets tested using model %s in %s', len(dataset_names), model_name, elapsed_time + ) elapsed_time = datetime.utcnow() - start - LOGGER.info('%s datasets tested %s models in %s', - len(dataset_names), len(model_names), elapsed_time) + LOGGER.info( + '%s datasets tested %s models in %s', len(dataset_names), len(model_names), elapsed_time + ) return pd.DataFrame(results, columns=OUTPUT_COLUMNS) @@ -214,29 +218,48 @@ def _get_parser(): # Parser parser = argparse.ArgumentParser(description='Large scale Copulas evaluation') - parser.add_argument('-v', '--verbose', action='count', default=0, - help='Be verbose. Use -vv for increased verbosity.') - parser.add_argument('-o', '--output-path', type=str, required=False, - help='Path to the CSV file where the report will be dumped') - parser.add_argument('-s', '--sample', type=int, - help=( - 'Limit the test to a number of datasets (sampled randomly)' - ' specified by SAMPLE.' - )) - parser.add_argument('-r', '--max-rows', type=int, - help='Limit the number of rows per dataset.') - parser.add_argument('-c', '--max-columns', type=int, - help='Limit the number of columns per dataset.') - parser.add_argument('-m', '--model', nargs='+', type=_valid_model, - help=( - 'Name of the model to test. Can be passed multiple ' - 'times to evaluate more than one model.' - )) - parser.add_argument('datasets', nargs='*', - help=( - 'Name of the datasets/s to test. If no names are given ' - 'all the available datasets are tested.' - )) + parser.add_argument( + '-v', + '--verbose', + action='count', + default=0, + help='Be verbose. Use -vv for increased verbosity.', + ) + parser.add_argument( + '-o', + '--output-path', + type=str, + required=False, + help='Path to the CSV file where the report will be dumped', + ) + parser.add_argument( + '-s', + '--sample', + type=int, + help=('Limit the test to a number of datasets (sampled randomly)' ' specified by SAMPLE.'), + ) + parser.add_argument('-r', '--max-rows', type=int, help='Limit the number of rows per dataset.') + parser.add_argument( + '-c', '--max-columns', type=int, help='Limit the number of columns per dataset.' + ) + parser.add_argument( + '-m', + '--model', + nargs='+', + type=_valid_model, + help=( + 'Name of the model to test. Can be passed multiple ' + 'times to evaluate more than one model.' + ), + ) + parser.add_argument( + 'datasets', + nargs='*', + help=( + 'Name of the datasets/s to test. If no names are given ' + 'all the available datasets are tested.' + ), + ) return parser @@ -262,12 +285,7 @@ def main(): results = run_evaluation(model_names, dataset_names, args.max_rows, args.max_columns) - print(tabulate.tabulate( - results, - tablefmt='github', - headers=results.columns, - showindex=False - )) + print(tabulate.tabulate(results, tablefmt='github', headers=results.columns, showindex=False)) if args.output_path: LOGGER.info('Saving report to %s', args.output_path) diff --git a/tests/unit/bivariate/test___init__.py b/tests/unit/bivariate/test___init__.py index 737979b7..140d61e0 100644 --- a/tests/unit/bivariate/test___init__.py +++ b/tests/unit/bivariate/test___init__.py @@ -8,12 +8,7 @@ def test_select_copula_negative_tau(): """If tau is negative, should choose frank copula.""" # Setup - X = np.array([ - [0.1, 0.6], - [0.2, 0.5], - [0.3, 0.4], - [0.4, 0.3] - ]) + X = np.array([[0.1, 0.6], [0.2, 0.5], [0.3, 0.4], [0.4, 0.3]]) assert stats.kendalltau(X[:, 0], X[:, 1])[0] < 0 # Run diff --git a/tests/unit/bivariate/test_base.py b/tests/unit/bivariate/test_base.py index 0f42fe61..6fb2a400 100644 --- a/tests/unit/bivariate/test_base.py +++ b/tests/unit/bivariate/test_base.py @@ -7,7 +7,6 @@ class TestBivariate(TestCase): - def setUp(self): self.X = np.array([ [0.2, 0.3], @@ -30,11 +29,7 @@ def test___init__random_state(self): def test_from_dict(self): """From_dict sets the values of a dictionary as attributes of the instance.""" # Setup - parameters = { - 'copula_type': 'FRANK', - 'tau': 0.15, - 'theta': 0.8 - } + parameters = {'copula_type': 'FRANK', 'tau': 0.15, 'theta': 0.8} # Run instance = Bivariate.from_dict(parameters) @@ -53,7 +48,7 @@ def test_to_dict(self): expected_result = { 'copula_type': 'FRANK', 'tau': 0.9128709291752769, - 'theta': 44.2003852484162 + 'theta': 44.2003852484162, } # Run @@ -73,7 +68,7 @@ def test_save(self, json_mock, open_mock): expected_content = { 'copula_type': 'FRANK', 'tau': 0.9128709291752769, - 'theta': 44.2003852484162 + 'theta': 44.2003852484162, } # Run @@ -92,7 +87,7 @@ def test_load_from_file(self, json_mock, open_mock): json_mock.return_value = { 'copula_type': 'FRANK', 'tau': -0.33333333333333337, - 'theta': -3.305771759329249 + 'theta': -3.305771759329249, } # Run diff --git a/tests/unit/bivariate/test_clayton.py b/tests/unit/bivariate/test_clayton.py index 1dbe8187..d43d9542 100644 --- a/tests/unit/bivariate/test_clayton.py +++ b/tests/unit/bivariate/test_clayton.py @@ -7,7 +7,6 @@ class TestClayton(TestCase): - def setUp(self): self.copula = Clayton() self.X = np.array([ @@ -32,10 +31,7 @@ def test_probability_density(self): expected_result = np.array([9.5886, 3.2394]) # Run - result = self.copula.probability_density(np.array([ - [0.2, 0.2], - [0.6, 0.61] - ])) + result = self.copula.probability_density(np.array([[0.2, 0.2], [0.6, 0.61]])) # Check assert isinstance(result, np.ndarray) @@ -48,10 +44,7 @@ def test_cumulative_distribution(self): expected_result = np.array([0.1821, 0.5517]) # Run - result = self.copula.cumulative_distribution(np.array([ - [0.2, 0.2], - [0.6, 0.61] - ])) + result = self.copula.cumulative_distribution(np.array([[0.2, 0.2], [0.6, 0.61]])) # Check assert isinstance(result, np.ndarray) @@ -86,7 +79,7 @@ def test_cdf_zero_if_single_arg_is_zero(self): """Test of the analytical properties of copulas on a range of values of theta.""" # Setup instance = Clayton() - tau_values = np.linspace(0.0, 1.0, 20)[1: -1] + tau_values = np.linspace(0.0, 1.0, 20)[1:-1] # Run/Check for tau in tau_values: @@ -98,7 +91,7 @@ def test_cdf_value_if_all_other_arg_are_one(self): """Test of the analytical properties of copulas on a range of values of theta.""" # Setup instance = Clayton() - tau_values = np.linspace(0.0, 1.0, 20)[1: -1] + tau_values = np.linspace(0.0, 1.0, 20)[1:-1] # Run/Check for tau in tau_values: diff --git a/tests/unit/bivariate/test_frank.py b/tests/unit/bivariate/test_frank.py index f4dea3e5..b053511e 100644 --- a/tests/unit/bivariate/test_frank.py +++ b/tests/unit/bivariate/test_frank.py @@ -8,7 +8,6 @@ class TestFrank(TestCase): - def setUp(self): self.X = np.array([ [0.2, 0.1], @@ -33,10 +32,7 @@ def test_probability_density(self): expected_result = np.array([4.4006, 4.2302]) # Run - result = self.copula.probability_density(np.array([ - [0.2, 0.2], - [0.6, 0.61] - ])) + result = self.copula.probability_density(np.array([[0.2, 0.2], [0.6, 0.61]])) # Check assert isinstance(result, np.ndarray) @@ -49,10 +45,7 @@ def test_cumulative_distribution(self): expected_result = np.array([0.1602, 0.5641]) # Run - result = self.copula.cumulative_distribution(np.array([ - [0.2, 0.2], - [0.6, 0.61] - ])) + result = self.copula.cumulative_distribution(np.array([[0.2, 0.2], [0.6, 0.61]])) # Check assert isinstance(result, np.ndarray) @@ -73,13 +66,10 @@ def test_sample(self, uniform_mock): [0.1007998170183327, 0.2], [0.3501836319841291, 0.4], [0.6498163680158703, 0.6], - [0.8992001829816683, 0.8] + [0.8992001829816683, 0.8], ]) - expected_uniform_call_args_list = [ - ((0, 1, 5), {}), - ((0, 1, 5), {}) - ] + expected_uniform_call_args_list = [((0, 1, 5), {}), ((0, 1, 5), {})] # Run result = instance.sample(5) @@ -94,7 +84,7 @@ def test_cdf_zero_if_single_arg_is_zero(self): """Test of the analytical properties of copulas on a range of values of theta.""" # Setup instance = Frank() - tau_values = np.linspace(-1.0, 1.0, 20)[1: -1] + tau_values = np.linspace(-1.0, 1.0, 20)[1:-1] # Run/Check for tau in tau_values: @@ -106,10 +96,10 @@ def test_cdf_value_if_all_other_arg_are_one(self): """Test of the analytical properties of copulas on a range of values of theta.""" # Setup instance = Frank() - tau_values = np.linspace(-1.0, 1.0, 20)[1: -1] + tau_values = np.linspace(-1.0, 1.0, 20)[1:-1] # Run/Check for tau in tau_values: instance.tau = tau instance.theta = instance.compute_theta() - copula_single_arg_not_one(instance, tolerance=1E-03) + copula_single_arg_not_one(instance, tolerance=1e-03) diff --git a/tests/unit/bivariate/test_gumbel.py b/tests/unit/bivariate/test_gumbel.py index bca7c628..e70f077a 100644 --- a/tests/unit/bivariate/test_gumbel.py +++ b/tests/unit/bivariate/test_gumbel.py @@ -8,7 +8,6 @@ class TestGumbel(TestCase): - def setUp(self): self.copula = Gumbel() self.X = np.array([ @@ -33,10 +32,7 @@ def test_probability_density(self): expected_result = np.array([3.8870, 3.7559]) # Run - result = self.copula.probability_density(np.array([ - [0.2, 0.2], - [0.6, 0.61] - ])) + result = self.copula.probability_density(np.array([[0.2, 0.2], [0.6, 0.61]])) # Check assert isinstance(result, np.ndarray) @@ -49,10 +45,7 @@ def test_cumulative_distribution(self): expected_result = np.array([0.1549, 0.5584]) # Run - result = self.copula.cumulative_distribution(np.array([ - [0.2, 0.2], - [0.6, 0.61] - ])) + result = self.copula.cumulative_distribution(np.array([[0.2, 0.2], [0.6, 0.61]])) # Check assert isinstance(result, np.ndarray) @@ -73,13 +66,10 @@ def test_sample(self, uniform_mock): [0.1142629649994753, 0.2], [0.3446610994349153, 0.4], [0.6171955667476859, 0.6], - [0.8636748995382857, 0.8] + [0.8636748995382857, 0.8], ]) - expected_uniform_call_args_list = [ - ((0, 1, 5), {}), - ((0, 1, 5), {}) - ] + expected_uniform_call_args_list = [((0, 1, 5), {}), ((0, 1, 5), {})] # Run result = instance.sample(5) @@ -93,7 +83,7 @@ def test_cdf_zero_if_single_arg_is_zero(self): """Test of the analytical properties of copulas on a range of values of theta.""" # Setup instance = Gumbel() - tau_values = np.linspace(0.0, 1.0, 20)[1: -1] + tau_values = np.linspace(0.0, 1.0, 20)[1:-1] # Run/Check for tau in tau_values: @@ -105,7 +95,7 @@ def test_cdf_value_if_all_other_arg_are_one(self): """Test of the analytical properties of copulas on a range of values of theta.""" # Setup instance = Gumbel() - tau_values = np.linspace(0.0, 1.0, 20)[1: -1] + tau_values = np.linspace(0.0, 1.0, 20)[1:-1] # Run/Check for tau in tau_values: diff --git a/tests/unit/bivariate/test_independence.py b/tests/unit/bivariate/test_independence.py index 1461b5f8..5d3a7009 100644 --- a/tests/unit/bivariate/test_independence.py +++ b/tests/unit/bivariate/test_independence.py @@ -6,7 +6,6 @@ class TestIndependence(TestCase): - def test___init__(self): """Independence copula can be instantiated directly.""" # Setup / Run @@ -21,10 +20,7 @@ def test_fit(self): """Fit checks that the given values are independent.""" # Setup instance = Independence() - data = np.array([ - [1, 2], - [4, 3] - ]) + data = np.array([[1, 2], [4, 3]]) # Run instance.fit(data) @@ -37,14 +33,7 @@ def test_cumulative_distribution(self): """cumulative_distribution is the product of both probabilities.""" # Setup instance = Independence() - data = np.array([ - [0.0, 0.0], - [0.1, 0.1], - [0.2, 0.2], - [0.5, 0.5], - [0.9, 0.9], - [1.0, 1.0] - ]) + data = np.array([[0.0, 0.0], [0.1, 0.1], [0.2, 0.2], [0.5, 0.5], [0.9, 0.9], [1.0, 1.0]]) expected_result = np.array([ 0.00, diff --git a/tests/unit/multivariate/test_base.py b/tests/unit/multivariate/test_base.py index 298ab59b..00575ae7 100644 --- a/tests/unit/multivariate/test_base.py +++ b/tests/unit/multivariate/test_base.py @@ -4,7 +4,6 @@ class TestMultivariate: - def test_set_random_state(self): """Test `set_random_state` works as expected""" # Setup diff --git a/tests/unit/multivariate/test_gaussian.py b/tests/unit/multivariate/test_gaussian.py index 7c31e774..16d52f8f 100644 --- a/tests/unit/multivariate/test_gaussian.py +++ b/tests/unit/multivariate/test_gaussian.py @@ -10,34 +10,88 @@ class TestGaussianMultivariate(TestCase): - def setUp(self): - """Defines random variable to use on tests. """ + """Defines random variable to use on tests.""" self.data = pd.DataFrame({ 'column1': np.array([ - 2641.16233666, 921.14476418, -651.32239137, 1223.63536668, - 3233.37342355, 1373.22400821, 1959.28188858, 1076.99295365, - 2029.25100261, 1835.52188141, 1170.03850556, 739.42628394, - 1866.65810627, 3703.49786503, 1719.45232017, 258.90206528, - 219.42363944, 609.90212377, 1618.44207239, 2323.2775272, - 3251.78732274, 1430.63989981, -180.57028875, -592.84497457, + 2641.16233666, + 921.14476418, + -651.32239137, + 1223.63536668, + 3233.37342355, + 1373.22400821, + 1959.28188858, + 1076.99295365, + 2029.25100261, + 1835.52188141, + 1170.03850556, + 739.42628394, + 1866.65810627, + 3703.49786503, + 1719.45232017, + 258.90206528, + 219.42363944, + 609.90212377, + 1618.44207239, + 2323.2775272, + 3251.78732274, + 1430.63989981, + -180.57028875, + -592.84497457, ]), 'column2': np.array([ - 180.2425623, 192.35609972, 150.24830291, 156.62123653, - 173.80311908, 191.0922843, 163.22252158, 190.73280428, - 158.52982435, 163.0101334, 205.24904026, 175.42916046, - 208.31821984, 178.98351969, 160.50981075, 163.19294974, - 173.30395132, 215.18996298, 164.71141696, 178.84973821, - 182.99902513, 217.5796917, 201.56983421, 174.92272693 + 180.2425623, + 192.35609972, + 150.24830291, + 156.62123653, + 173.80311908, + 191.0922843, + 163.22252158, + 190.73280428, + 158.52982435, + 163.0101334, + 205.24904026, + 175.42916046, + 208.31821984, + 178.98351969, + 160.50981075, + 163.19294974, + 173.30395132, + 215.18996298, + 164.71141696, + 178.84973821, + 182.99902513, + 217.5796917, + 201.56983421, + 174.92272693, ]), 'column3': np.array([ - -1.42432446, -0.14759864, 0.66476302, -0.04061445, 0.64305762, - 1.79615407, 0.70450457, -0.05886671, -0.36794788, 1.39331262, - 0.39792831, 0.0676313, -0.96761759, 0.67286132, -0.55013279, - -0.53118328, 1.23969655, -0.35985016, -0.03568531, 0.91456357, - 0.49077378, -0.27428204, 0.45857406, 2.29614033 - ]) + -1.42432446, + -0.14759864, + 0.66476302, + -0.04061445, + 0.64305762, + 1.79615407, + 0.70450457, + -0.05886671, + -0.36794788, + 1.39331262, + 0.39792831, + 0.0676313, + -0.96761759, + 0.67286132, + -0.55013279, + -0.53118328, + 1.23969655, + -0.35985016, + -0.03568531, + 0.91456357, + 0.49077378, + -0.27428204, + 0.45857406, + 2.29614033, + ]), }) def test__transform_to_normal_numpy_1d(self): @@ -97,11 +151,7 @@ def test__transform_to_normal_numpy_2d(self): # Failures may occurr on different cpytonn implementations # with different float precision values. # If that happens, atol might need to be increased - expected = np.array([ - [-5.166579, -0.524401], - [0.0, 0.0], - [5.166579, 0.524401] - ]) + expected = np.array([[-5.166579, -0.524401], [0.0, 0.0], [5.166579, 0.524401]]) np.testing.assert_allclose(returned, expected, atol=1e-6) assert dist_a.cdf.call_count == 1 @@ -158,21 +208,14 @@ def test__transform_to_normal_dataframe(self): gm.univariates = [dist_a, dist_b] # Run - data = pd.DataFrame({ - 'a': [3, 4, 5], - 'b': [5, 6, 7] - }) + data = pd.DataFrame({'a': [3, 4, 5], 'b': [5, 6, 7]}) returned = gm._transform_to_normal(data) # Check # Failures may occurr on different cpytonn implementations # with different float precision values. # If that happens, atol might need to be increased - expected = np.array([ - [-5.166579, -0.524401], - [0.0, 0.0], - [5.166579, 0.524401] - ]) + expected = np.array([[-5.166579, -0.524401], [0.0, 0.0], [5.166579, 0.524401]]) np.testing.assert_allclose(returned, expected, atol=1e-6) assert dist_a.cdf.call_count == 1 @@ -192,9 +235,9 @@ def test__get_correlation(self): copula.fit(self.data) expected_correlation = np.array([ - [1., -0.01261819, -0.19821644], - [-0.01261819, 1., -0.16896087], - [-0.19821644, -0.16896087, 1.] + [1.0, -0.01261819, -0.19821644], + [-0.01261819, 1.0, -0.16896087], + [-0.19821644, -0.16896087, 1.0], ]) # Run @@ -242,24 +285,31 @@ def test_fit_distribution_selector(self): On fit, it should use the correct distributions for those that are specified and default to using the base class otherwise. """ - copula = GaussianMultivariate(distribution={ - 'column1': 'copulas.univariate.beta.BetaUnivariate', - 'column2': 'copulas.univariate.gaussian_kde.GaussianKDE', - }) + copula = GaussianMultivariate( + distribution={ + 'column1': 'copulas.univariate.beta.BetaUnivariate', + 'column2': 'copulas.univariate.gaussian_kde.GaussianKDE', + } + ) copula.fit(self.data) - assert get_qualified_name( - copula.univariates[0].__class__) == 'copulas.univariate.beta.BetaUnivariate' - assert get_qualified_name( - copula.univariates[1].__class__) == 'copulas.univariate.gaussian_kde.GaussianKDE' - assert get_qualified_name( - copula.univariates[2].__class__) == 'copulas.univariate.base.Univariate' + assert ( + get_qualified_name(copula.univariates[0].__class__) + == 'copulas.univariate.beta.BetaUnivariate' + ) + assert ( + get_qualified_name(copula.univariates[1].__class__) + == 'copulas.univariate.gaussian_kde.GaussianKDE' + ) + assert ( + get_qualified_name(copula.univariates[2].__class__) + == 'copulas.univariate.base.Univariate' + ) def test_fit_numpy_array(self): - """Fit should work indistinctly with numpy arrays and pandas dataframes """ + """Fit should work indistinctly with numpy arrays and pandas dataframes""" # Setup - copula = GaussianMultivariate( - distribution='copulas.univariate.gaussian.GaussianUnivariate') + copula = GaussianMultivariate(distribution='copulas.univariate.gaussian.GaussianUnivariate') # Run copula.fit(self.data.to_numpy()) @@ -305,7 +355,7 @@ def test_probability_density(self): # Setup copula = GaussianMultivariate(GaussianUnivariate) copula.fit(self.data) - X = np.array([2000., 200., 0.]) + X = np.array([2000.0, 200.0, 0.0]) expected_result = 0.032245296420409846 # Run @@ -319,7 +369,7 @@ def test_cumulative_distribution_fit_df_call_np_array(self): # Setup copula = GaussianMultivariate(GaussianUnivariate) copula.fit(self.data) - X = np.array([2000., 200., 1.]) + X = np.array([2000.0, 200.0, 1.0]) expected_result = 0.4550595153746892 # Run @@ -333,7 +383,7 @@ def test_cumulative_distribution_fit_call_np_array(self): # Setup copula = GaussianMultivariate(GaussianUnivariate) copula.fit(self.data.to_numpy()) - X = np.array([2000., 200., 1.]) + X = np.array([2000.0, 200.0, 1.0]) expected_result = 0.4550595153746892 # Run @@ -347,7 +397,7 @@ def test_cumulative_distribution_fit_call_pd(self): # Setup copula = GaussianMultivariate(GaussianUnivariate) copula.fit(self.data.to_numpy()) - X = np.array([2000., 200., 1.]) + X = np.array([2000.0, 200.0, 1.0]) expected_result = 0.4550595153746892 # Run @@ -366,7 +416,7 @@ def test_sample(self, normal_mock): {'A': 30, 'B': 60, 'C': 250}, {'A': 10, 'B': 65, 'C': 350}, {'A': 20, 'B': 80, 'C': 150}, - {'A': 25, 'B': 70, 'C': 500} + {'A': 25, 'B': 70, 'C': 500}, ]) instance.fit(data) @@ -375,7 +425,7 @@ def test_sample(self, normal_mock): [0.2, 0.2, 0.2], [0.4, 0.4, 0.4], [0.6, 0.6, 0.6], - [0.8, 0.8, 0.8] + [0.8, 0.8, 0.8], ]) expected_result = pd.DataFrame([ @@ -383,7 +433,7 @@ def test_sample(self, normal_mock): {'A': 23.356465996625055, 'B': 71.41421356237309, 'C': 298.7054001888146}, {'A': 24.712931993250110, 'B': 72.82842712474618, 'C': 327.4108003776293}, {'A': 26.069397989875164, 'B': 74.24264068711929, 'C': 356.116200566444}, - {'A': 27.425863986500215, 'B': 75.65685424949238, 'C': 384.8216007552586} + {'A': 27.425863986500215, 'B': 75.65685424949238, 'C': 384.8216007552586}, ]) # Run @@ -393,13 +443,9 @@ def test_sample(self, normal_mock): assert result.equals(expected_result) np.testing.assert_array_equal( - normal_mock.call_args[0][0], - np.zeros(instance.correlation.shape[0]) - ) - np.testing.assert_array_equal( - normal_mock.call_args[0][1], - instance.correlation + normal_mock.call_args[0][0], np.zeros(instance.correlation.shape[0]) ) + np.testing.assert_array_equal(normal_mock.call_args[0][1], instance.correlation) assert normal_mock.call_args[1] == {'size': 5} def test_sample_random_state(self): @@ -411,7 +457,7 @@ def test_sample_random_state(self): {'A': 30, 'B': 60, 'C': 250}, {'A': 10, 'B': 65, 'C': 350}, {'A': 20, 'B': 80, 'C': 150}, - {'A': 25, 'B': 70, 'C': 500} + {'A': 25, 'B': 70, 'C': 500}, ]) instance.fit(data) @@ -421,9 +467,9 @@ def test_sample_random_state(self): [31.50262306, 49.70971698, 429.06537124], [20.31636799, 64.3492326, 384.27561823], [25.00302427, 72.06019812, 415.85215123], - [23.07525773, 66.70901743, 390.8226672] + [23.07525773, 66.70901743, 390.8226672], ]), - columns=['A', 'B', 'C'] + columns=['A', 'B', 'C'], ) # Run @@ -477,12 +523,7 @@ def test_sample_constant_column(self): """ # Setup instance = GaussianMultivariate() - X = np.array([ - [1.0, 2.0], - [1.0, 3.0], - [1.0, 4.0], - [1.0, 5.0] - ]) + X = np.array([[1.0, 2.0], [1.0, 3.0], [1.0, 4.0], [1.0, 5.0]]) instance.fit(X) # Run @@ -502,20 +543,18 @@ def test_sample_constant_column(self): def test__get_conditional_distribution(self): gm = GaussianMultivariate() - gm.correlation = pd.DataFrame({ - 'a': [1, 0.2, 0.3], - 'b': [0.2, 1, 0.4], - 'c': [0.3, 0.4, 1], - }, index=['a', 'b', 'c']) - - conditions = pd.Series({ - 'b': 1 - }) + gm.correlation = pd.DataFrame( + { + 'a': [1, 0.2, 0.3], + 'b': [0.2, 1, 0.4], + 'c': [0.3, 0.4, 1], + }, + index=['a', 'b', 'c'], + ) + + conditions = pd.Series({'b': 1}) means, covariance, columns = gm._get_conditional_distribution(conditions) np.testing.assert_allclose(means, [0.2, 0.4]) - np.testing.assert_allclose(covariance, [ - [0.96, 0.22], - [0.22, 0.84] - ]) + np.testing.assert_allclose(covariance, [[0.96, 0.22], [0.22, 0.84]]) assert columns.tolist() == ['a', 'c'] diff --git a/tests/unit/multivariate/test_tree.py b/tests/unit/multivariate/test_tree.py index 53aff016..19ee0071 100644 --- a/tests/unit/multivariate/test_tree.py +++ b/tests/unit/multivariate/test_tree.py @@ -14,16 +14,11 @@ class TestTree(TestCase): - @pytest.mark.skipif(sys.version_info > (3, 8), reason='Fails on py38. To be reviewed.') def test_to_dict_fit_model(self): # Setup instance = get_tree(TreeTypes.REGULAR) - X = pd.DataFrame(data=[ - [1, 0, 0], - [0, 1, 0], - [0, 0, 1] - ]) + X = pd.DataFrame(data=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]) index = 0 n_nodes = X.shape[1] tau_matrix = X.corr(method='kendall').to_numpy() @@ -43,12 +38,12 @@ def test_to_dict_fit_model(self): 'previous_tree': [ [0.8230112726144534, 0.3384880496294825, 0.3384880496294825], [0.3384880496294825, 0.8230112726144534, 0.3384880496294825], - [0.3384880496294825, 0.3384880496294825, 0.8230112726144534] + [0.3384880496294825, 0.3384880496294825, 0.8230112726144534], ], 'tau_matrix': [ [1.0, -0.49999999999999994, -0.49999999999999994], [-0.49999999999999994, 1.0, -0.49999999999999994], - [-0.49999999999999994, -0.49999999999999994, 1.0] + [-0.49999999999999994, -0.49999999999999994, 1.0], ], 'tree_type': TreeTypes.REGULAR, 'edges': [ @@ -59,14 +54,14 @@ def test_to_dict_fit_model(self): 'R': 1, 'U': [ [0.7969636014074211, 0.6887638642325501, 0.12078520049364487], - [0.6887638642325501, 0.7969636014074211, 0.12078520049364487] + [0.6887638642325501, 0.7969636014074211, 0.12078520049364487], ], 'likelihood': None, 'name': CopulaTypes.FRANK, 'neighbors': [], 'parents': None, 'tau': -0.49999999999999994, - 'theta': -5.736282443655552 + 'theta': -5.736282443655552, }, { 'index': 1, @@ -75,15 +70,15 @@ def test_to_dict_fit_model(self): 'R': 2, 'U': [ [0.12078520049364491, 0.7969636014074213, 0.6887638642325501], - [0.12078520049364491, 0.6887638642325503, 0.7969636014074211] + [0.12078520049364491, 0.6887638642325503, 0.7969636014074211], ], 'likelihood': None, 'name': CopulaTypes.FRANK, 'neighbors': [], 'parents': None, 'tau': -0.49999999999999994, - 'theta': -5.736282443655552 - } + 'theta': -5.736282443655552, + }, ], } @@ -95,10 +90,7 @@ def test_to_dict_fit_model(self): def test_from_dict_unfitted_model(self): # Setup - params = { - 'tree_type': TreeTypes.REGULAR, - 'fitted': False - } + params = {'tree_type': TreeTypes.REGULAR, 'fitted': False} # Run result = Tree.from_dict(params) @@ -120,11 +112,7 @@ def test_serialization_unfitted_model(self): def test_serialization_fit_model(self): # Setup instance = get_tree(TreeTypes.REGULAR) - X = pd.DataFrame(data=[ - [1, 0, 0], - [0, 1, 0], - [0, 0, 1] - ]) + X = pd.DataFrame(data=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]) index = 0 n_nodes = X.shape[1] tau_matrix = X.corr(method='kendall').to_numpy() @@ -149,10 +137,7 @@ def test_prepare_next_tree_first_level(self, bivariate_mock): # Setup instance = get_tree(TreeTypes.REGULAR) instance.level = 1 - instance.u_matrix = np.array([ - [0.1, 0.2], - [0.3, 0.4] - ]) + instance.u_matrix = np.array([[0.1, 0.2], [0.3, 0.4]]) edge = MagicMock(spec=Edge) edge.L = 0 @@ -166,13 +151,13 @@ def test_prepare_next_tree_first_level(self, bivariate_mock): expected_univariate = np.array([ [EPSILON, 0.25, 0.50, 0.75, 1 - EPSILON], - [EPSILON, 0.25, 0.50, 0.75, 1 - EPSILON] + [EPSILON, 0.25, 0.50, 0.75, 1 - EPSILON], ]) flipped_u_matrix = instance.u_matrix[:, [1, 0]] expected_partial_derivative_call_args = [ ((instance.u_matrix,), {}), - ((flipped_u_matrix,), {}) + ((flipped_u_matrix,), {}), ] # Run @@ -185,8 +170,7 @@ def test_prepare_next_tree_first_level(self, bivariate_mock): assert copula_mock.theta == 'copula_theta' compare_nested_iterables( - copula_mock.partial_derivative.call_args_list, - expected_partial_derivative_call_args + copula_mock.partial_derivative.call_args_list, expected_partial_derivative_call_args ) @patch('copulas.multivariate.tree.Edge.get_conditional_uni') @@ -206,25 +190,19 @@ def test_prepare_next_tree_regular_level(self, bivariate_mock, conditional_mock) copula_mock = bivariate_mock.return_value copula_mock.partial_derivative.return_value = np.array([0.0, 0.25, 0.5, 0.75, 1.0]) - conditional_mock.return_value = ( - ['left_u_1', 'left_u_2'], - ['right_u_1', 'right_u_2'] - ) + conditional_mock.return_value = (['left_u_1', 'left_u_2'], ['right_u_1', 'right_u_2']) expected_univariate = np.array([ [EPSILON, 0.25, 0.50, 0.75, 1 - EPSILON], - [EPSILON, 0.25, 0.50, 0.75, 1 - EPSILON] + [EPSILON, 0.25, 0.50, 0.75, 1 - EPSILON], ]) - conditional_univariates = np.array([ - ['left_u_1', 'right_u_1'], - ['left_u_2', 'right_u_2'] - ]) + conditional_univariates = np.array([['left_u_1', 'right_u_1'], ['left_u_2', 'right_u_2']]) flipped_conditional_univariates = conditional_univariates[:, [1, 0]] expected_partial_derivative_call_args = [ ((conditional_univariates,), {}), - ((flipped_conditional_univariates,), {}) + ((flipped_conditional_univariates,), {}), ] # Run @@ -239,8 +217,7 @@ def test_prepare_next_tree_regular_level(self, bivariate_mock, conditional_mock) assert copula_mock.theta == 'copula_theta' compare_nested_iterables( - copula_mock.partial_derivative.call_args_list, - expected_partial_derivative_call_args + copula_mock.partial_derivative.call_args_list, expected_partial_derivative_call_args ) @@ -268,10 +245,10 @@ def test_first_tree_likelihood(self): """Assert first tree likehood is correct.""" uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) - value, new_u = self.tree.get_likelihood(uni_matrix) + value, _new_u = self.tree.get_likelihood(uni_matrix) expected = -0.19988720707143634 - assert abs(value - expected) < 10E-3 + assert abs(value - expected) < 10e-3 def test_get_constraints(self): """Assert get constraint gets correct neighbor nodes.""" @@ -305,7 +282,7 @@ def test_second_tree_likelihood(self): first_tree = get_tree(TreeTypes.CENTER) first_tree.fit(0, 4, tau_mat, u_matrix) uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) - likelihood_first_tree, conditional_uni_first = first_tree.get_likelihood(uni_matrix) + _likelihood_first_tree, conditional_uni_first = first_tree.get_likelihood(uni_matrix) tau = first_tree.get_tau_matrix() # Build second tree @@ -314,7 +291,7 @@ def test_second_tree_likelihood(self): expected_likelihood_second_tree = 0.4888802429313932 # Run - likelihood_second_tree, out_u = second_tree.get_likelihood(conditional_uni_first) + likelihood_second_tree, _out_u = second_tree.get_likelihood(conditional_uni_first) # Check assert compare_values_epsilon(likelihood_second_tree, expected_likelihood_second_tree) @@ -336,7 +313,7 @@ def setUp(self): self.tree.fit(0, 4, self.tau_mat, self.u_matrix) def test_first_tree(self): - """ Assert the construction of first tree is correct + """Assert the construction of first tree is correct The first tree should be: 1 0--2--3 @@ -352,23 +329,23 @@ def test_first_tree(self): @pytest.mark.xfail def test_first_tree_likelihood(self): - """ Assert first tree likehood is correct""" + """Assert first tree likehood is correct""" uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) - value, new_u = self.tree.get_likelihood(uni_matrix) + value, _new_u = self.tree.get_likelihood(uni_matrix) expected = 0.9545348664739628 - assert abs(value - expected) < 10E-3 + assert abs(value - expected) < 10e-3 def test_get_constraints(self): - """ Assert get constraint gets correct neighbor nodes""" + """Assert get constraint gets correct neighbor nodes""" self.tree._get_constraints() assert self.tree.edges[0].neighbors == [1, 2] assert self.tree.edges[1].neighbors == [0, 2] def test_get_tau_matrix(self): - """ Assert second tree likelihood is correct """ + """Assert second tree likelihood is correct""" self.tau = self.tree.get_tau_matrix() test = np.isnan(self.tau) @@ -382,8 +359,8 @@ def test_second_tree_likelihood(self): second_tree.fit(1, 3, tau, self.tree) uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) - first_value, new_u = self.tree.get_likelihood(uni_matrix) - second_value, out_u = second_tree.get_likelihood(new_u) + _first_value, new_u = self.tree.get_likelihood(uni_matrix) + _second_value, _out_u = second_tree.get_likelihood(new_u) # assert second_value < 0 @@ -404,21 +381,21 @@ def setUp(self): self.tree.fit(0, 4, self.tau_mat, self.u_matrix) def test_first_tree(self): - """ Assert 0 is the center node""" + """Assert 0 is the center node""" assert self.tree.edges[0].L == 0 @pytest.mark.xfail def test_first_tree_likelihood(self): - """ Assert first tree likehood is correct""" + """Assert first tree likehood is correct""" uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) - value, new_u = self.tree.get_likelihood(uni_matrix) + value, _new_u = self.tree.get_likelihood(uni_matrix) expected = -0.1207611551427385 - assert abs(value - expected) < 10E-3 + assert abs(value - expected) < 10e-3 def test_get_constraints(self): - """ Assert get constraint gets correct neighbor nodes""" + """Assert get constraint gets correct neighbor nodes""" self.tree._get_constraints() assert self.tree.edges[0].neighbors == [1] @@ -454,11 +431,11 @@ def test_second_tree_likelihood(self): uni_matrix = np.array([[0.1, 0.2, 0.3, 0.4]]) - first_value, new_u = self.tree.get_likelihood(uni_matrix) - second_value, out_u = second_tree.get_likelihood(new_u) + _first_value, new_u = self.tree.get_likelihood(uni_matrix) + second_value, _out_u = second_tree.get_likelihood(new_u) expected = 0.24428294700258632 - assert abs(second_value - expected) < 10E-3 + assert abs(second_value - expected) < 10e-3 class TestEdge(TestCase): @@ -529,21 +506,15 @@ def test_get_conditional_uni(self, adjacent_mock): """get_conditional_uni return the corresponding univariate adjacent to the parents.""" # Setup left = Edge(None, 1, 2, None, None) - left.U = np.array([ - ['left_0_0', 'left_0_1'], - ['left_1_0', 'left_1_1'] - ]) + left.U = np.array([['left_0_0', 'left_0_1'], ['left_1_0', 'left_1_1']]) right = Edge(None, 4, 2, None, None) - right.U = np.array([ - ['right_0_0', 'right_0_1'], - ['right_1_0', 'right_1_1'] - ]) + right.U = np.array([['right_0_0', 'right_0_1'], ['right_1_0', 'right_1_1']]) adjacent_mock.return_value = (0, 1, None) expected_result = ( np.array(['left_1_0', 'left_1_1']), - np.array(['right_1_0', 'right_1_1']) + np.array(['right_1_0', 'right_1_1']), ) # Run @@ -586,7 +557,7 @@ def test_to_dict(self): 'likelihood': None, 'neighbors': [], 'parents': None, - 'tau': None + 'tau': None, } # Run @@ -609,7 +580,7 @@ def test_from_dict(self): 'likelihood': None, 'neighbors': [1], 'parents': None, - 'tau': None + 'tau': None, } # Run @@ -647,11 +618,7 @@ def test_get_likelihood_no_parents(self, bivariate_mock): copula_theta = 'copula_theta' instance = Edge(index, left, right, copula_name, copula_theta) - univariates = np.array([ - [0.25, 0.75], - [0.50, 0.50], - [0.75, 0.25] - ]).T + univariates = np.array([[0.25, 0.75], [0.50, 0.50], [0.75, 0.25]]).T instance_mock = bivariate_mock.return_value instance_mock.probability_density.return_value = [0] @@ -665,12 +632,7 @@ def test_get_likelihood_no_parents(self, bivariate_mock): ] ]) - array2 = np.array([ - [ - [0.50, 0.50], - [0.25, 0.75] - ] - ]) + array2 = np.array([[[0.50, 0.50], [0.25, 0.75]]]) expected_partial_derivative_call_args = [((array1,), {}), ((array2,), {})] @@ -687,8 +649,7 @@ def test_get_likelihood_no_parents(self, bivariate_mock): assert instance_mock.theta == 'copula_theta' compare_nested_iterables( - instance_mock.partial_derivative.call_args_list, - expected_partial_derivative_call_args + instance_mock.partial_derivative.call_args_list, expected_partial_derivative_call_args ) @patch('copulas.multivariate.tree.Bivariate', autospec=True) @@ -709,11 +670,7 @@ def test_get_likelihood_with_parents(self, bivariate_mock): parent_2 = MagicMock(spec=Edge) parent_2.D = {0, 2, 3} - univariates = np.array([ - [0.25, 0.75], - [0.50, 0.50], - [0.75, 0.25] - ]).T + univariates = np.array([[0.25, 0.75], [0.50, 0.50], [0.75, 0.25]]).T instance_mock = bivariate_mock.return_value instance_mock.probability_density.return_value = [0] @@ -726,12 +683,7 @@ def test_get_likelihood_with_parents(self, bivariate_mock): ] ]) - array2 = np.array([ - [ - [0.50, 0.50], - [0.25, 0.75] - ] - ]) + array2 = np.array([[[0.50, 0.50], [0.25, 0.75]]]) expected_partial_derivative_call_args = [((array1,), {}), ((array2,), {})] @@ -748,6 +700,5 @@ def test_get_likelihood_with_parents(self, bivariate_mock): assert instance_mock.theta == 'copula_theta' compare_nested_iterables( - instance_mock.partial_derivative.call_args_list, - expected_partial_derivative_call_args + instance_mock.partial_derivative.call_args_list, expected_partial_derivative_call_args ) diff --git a/tests/unit/multivariate/test_vine.py b/tests/unit/multivariate/test_vine.py index a5d5c054..79e8d2fc 100644 --- a/tests/unit/multivariate/test_vine.py +++ b/tests/unit/multivariate/test_vine.py @@ -12,32 +12,86 @@ class TestVine(TestCase): - def setUp(self): data = pd.DataFrame({ 'column1': np.array([ - 2641.16233666, 921.14476418, -651.32239137, 1223.63536668, - 3233.37342355, 1373.22400821, 1959.28188858, 1076.99295365, - 2029.25100261, 1835.52188141, 1170.03850556, 739.42628394, - 1866.65810627, 3703.49786503, 1719.45232017, 258.90206528, - 219.42363944, 609.90212377, 1618.44207239, 2323.2775272, - 3251.78732274, 1430.63989981, -180.57028875, -592.84497457, + 2641.16233666, + 921.14476418, + -651.32239137, + 1223.63536668, + 3233.37342355, + 1373.22400821, + 1959.28188858, + 1076.99295365, + 2029.25100261, + 1835.52188141, + 1170.03850556, + 739.42628394, + 1866.65810627, + 3703.49786503, + 1719.45232017, + 258.90206528, + 219.42363944, + 609.90212377, + 1618.44207239, + 2323.2775272, + 3251.78732274, + 1430.63989981, + -180.57028875, + -592.84497457, ]), 'column2': np.array([ - 180.2425623, 192.35609972, 150.24830291, 156.62123653, - 173.80311908, 191.0922843, 163.22252158, 190.73280428, - 158.52982435, 163.0101334, 205.24904026, 175.42916046, - 208.31821984, 178.98351969, 160.50981075, 163.19294974, - 173.30395132, 215.18996298, 164.71141696, 178.84973821, - 182.99902513, 217.5796917, 201.56983421, 174.92272693 + 180.2425623, + 192.35609972, + 150.24830291, + 156.62123653, + 173.80311908, + 191.0922843, + 163.22252158, + 190.73280428, + 158.52982435, + 163.0101334, + 205.24904026, + 175.42916046, + 208.31821984, + 178.98351969, + 160.50981075, + 163.19294974, + 173.30395132, + 215.18996298, + 164.71141696, + 178.84973821, + 182.99902513, + 217.5796917, + 201.56983421, + 174.92272693, ]), 'column3': np.array([ - -1.42432446, -0.14759864, 0.66476302, -0.04061445, 0.64305762, - 1.79615407, 0.70450457, -0.05886671, -0.36794788, 1.39331262, - 0.39792831, 0.0676313, -0.96761759, 0.67286132, -0.55013279, - -0.53118328, 1.23969655, -0.35985016, -0.03568531, 0.91456357, - 0.49077378, -0.27428204, 0.45857406, 2.29614033 - ]) + -1.42432446, + -0.14759864, + 0.66476302, + -0.04061445, + 0.64305762, + 1.79615407, + 0.70450457, + -0.05886671, + -0.36794788, + 1.39331262, + 0.39792831, + 0.0676313, + -0.96761759, + 0.67286132, + -0.55013279, + -0.53118328, + 1.23969655, + -0.35985016, + -0.03568531, + 0.91456357, + 0.49077378, + -0.27428204, + 0.45857406, + 2.29614033, + ]), }) self.rvine = VineCopula(TreeTypes.REGULAR) @@ -55,15 +109,15 @@ def test_get_likelihood(self): # FIX ME: there is some randomness in rvine, will do another test rvalue = self.rvine.get_likelihood(uni_matrix) expected = -0.26888124854583245 - assert abs(rvalue - expected) < 10E-3 + assert abs(rvalue - expected) < 10e-3 cvalue = self.cvine.get_likelihood(uni_matrix) expected = -0.27565584158521045 - assert abs(cvalue - expected) < 10E-3 + assert abs(cvalue - expected) < 10e-3 dvalue = self.dvine.get_likelihood(uni_matrix) expected = -0.27565584158521045 - assert abs(dvalue - expected) < 10E-3 + assert abs(dvalue - expected) < 10e-3 def test_serialization_unfitted_model(self): """An unfitted vine can be serialized and deserialized and kept unchanged.""" @@ -80,11 +134,7 @@ def test_serialization_fit_model(self): """A fitted vine can be serialized and deserialized and kept unchanged.""" # Setup instance = VineCopula('regular') - X = pd.DataFrame(data=[ - [1, 0, 0], - [0, 1, 0], - [0, 0, 1] - ]) + X = pd.DataFrame(data=[[1, 0, 0], [0, 1, 0], [0, 0, 1]]) instance.fit(X) # Run @@ -100,12 +150,9 @@ def test_sample_row(self, uniform_mock, randint_mock): """After being fit, a vine can sample new data.""" # Setup instance = VineCopula(TreeTypes.REGULAR) - X = pd.DataFrame([ - [1, 0, 0, 0], - [0, 1, 0, 0], - [0, 0, 1, 0], - [0, 0, 0, 1] - ], columns=list('ABCD')) + X = pd.DataFrame( + [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], columns=list('ABCD') + ) instance.fit(X) uniform_mock.return_value = np.array([0.1, 0.25, 0.5, 0.75]) @@ -126,12 +173,9 @@ def test_sample(self, sample_mock): """After being fit, a vine can sample new data.""" # Setup vine = VineCopula(TreeTypes.REGULAR) - X = pd.DataFrame([ - [1, 0, 0, 0], - [0, 1, 0, 0], - [0, 0, 1, 0], - [0, 0, 0, 1] - ], columns=list('ABCD')) + X = pd.DataFrame( + [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], columns=list('ABCD') + ) vine.fit(X) expected_result = pd.DataFrame([ @@ -156,18 +200,10 @@ def test_sample_random_state(self): """When random_state is set, the generated samples are always the same.""" # Setup vine = VineCopula(TreeTypes.REGULAR, random_state=0) - X = pd.DataFrame([ - [1, 0, 0, 0], - [0, 1, 0, 0], - [0, 0, 1, 0], - [0, 0, 0, 1] - ]) + X = pd.DataFrame([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]) vine.fit(X) - expected_result = pd.DataFrame( - [[0.101933, 0.527734, 0.080266, 0.078328]], - columns=range(4) - ) + expected_result = pd.DataFrame([[0.101933, 0.527734, 0.080266, 0.078328]], columns=range(4)) # Run result = vine.sample(1) diff --git a/tests/unit/optimize/test___init__.py b/tests/unit/optimize/test___init__.py index d45e2814..47de5a6b 100644 --- a/tests/unit/optimize/test___init__.py +++ b/tests/unit/optimize/test___init__.py @@ -6,7 +6,6 @@ class TestOptimize(TestCase): - def test_uniform(self): """Find the zero of a line.""" N = 100 @@ -14,6 +13,7 @@ def test_uniform(self): def _f(x): return x - target + for optimizer in [bisect, chandrupatla]: with self.subTest(optimizer=optimizer): x = optimizer(_f, np.zeros(shape=N), np.ones(shape=N)) @@ -21,8 +21,10 @@ def _f(x): def test_polynomial(self): """Find the zero of a polynomial.""" + def _f(x): return np.power(x - 10.0, 3.0) + for optimizer in [bisect, chandrupatla]: with self.subTest(optimizer=optimizer): x = optimizer(_f, np.array([0.0]), np.array([100.0])) diff --git a/tests/unit/test___init__.py b/tests/unit/test___init__.py index fd861328..2d9a4d68 100644 --- a/tests/unit/test___init__.py +++ b/tests/unit/test___init__.py @@ -9,13 +9,18 @@ import copulas from copulas import ( - _find_addons, check_valid_values, get_instance, random_state, scalarize, validate_random_state, - vectorize) + _find_addons, + check_valid_values, + get_instance, + random_state, + scalarize, + validate_random_state, + vectorize, +) from copulas.multivariate import GaussianMultivariate class TestVectorize(TestCase): - def test_1d_array(self): """When applied to a function it allows it to work with 1-d vectors.""" # Setup @@ -27,15 +32,13 @@ def test_1d_array(self): vector = np.array([1, 2, 3]) args = ['positional', 'arguments'] - kwargs = { - 'keyword': 'arguments' - } + kwargs = {'keyword': 'arguments'} expected_result = np.ones(3) expected_function_call_args_list = [ ((instance, 1, 'positional', 'arguments'), {'keyword': 'arguments'}), ((instance, 2, 'positional', 'arguments'), {'keyword': 'arguments'}), - ((instance, 3, 'positional', 'arguments'), {'keyword': 'arguments'}) + ((instance, 3, 'positional', 'arguments'), {'keyword': 'arguments'}), ] # Run Decorator @@ -72,15 +75,13 @@ def test_2d_array(self): [7, 8, 9], ]) args = ['positional', 'arguments'] - kwargs = { - 'keyword': 'arguments' - } + kwargs = {'keyword': 'arguments'} expected_result = np.ones(3) expected_function_call_args_list = [ ((instance, 1, 2, 3, 'positional', 'arguments'), {'keyword': 'arguments'}), ((instance, 4, 5, 6, 'positional', 'arguments'), {'keyword': 'arguments'}), - ((instance, 7, 8, 9, 'positional', 'arguments'), {'keyword': 'arguments'}) + ((instance, 7, 8, 9, 'positional', 'arguments'), {'keyword': 'arguments'}), ] # Run Decorator @@ -106,9 +107,7 @@ def test_raises_valueerror(self): """If given an array of dimensionality higher than 2 a ValueError is raised.""" # Setup function = MagicMock() - X = np.array([ - [[1, 2, 3]] - ]) + X = np.array([[[1, 2, 3]]]) instance = MagicMock() args = () kwargs = {} @@ -123,7 +122,6 @@ def test_raises_valueerror(self): class TestScalarize(TestCase): - def test_decorator(self): """When applied to a function it allows it to work with scalars.""" # Setup @@ -133,9 +131,7 @@ def test_decorator(self): instance = MagicMock() args = ['positional', 'arguments'] - kwargs = { - 'keyword': 'arguments' - } + kwargs = {'keyword': 'arguments'} expected_result = 'return_value' @@ -159,14 +155,10 @@ def test_decorator(self): class TestCheckValidValues(TestCase): - def test_check_valid_values_raises_valuerror_if_nans(self): """check_valid_values raises a ValueError if is given data with nans.""" # Setup - X = np.array([ - [1.0, np.nan], - [0.0, 1.0] - ]) + X = np.array([[1.0, np.nan], [0.0, 1.0]]) instance_mock = MagicMock() function_mock = MagicMock() @@ -185,10 +177,7 @@ def test_check_valid_values_raises_valuerror_if_nans(self): def test_check_valid_values_raises_valueerror_if_not_numeric(self): """check_valid_values raises a ValueError if is given data with non numeric values.""" # Setup - X = np.array([ - [1.0, 'A'], - [0.0, 1.0] - ]) + X = np.array([[1.0, 'A'], [0.0, 1.0]]) instance_mock = MagicMock() function_mock = MagicMock() @@ -225,7 +214,6 @@ def test_check_valid_values_raises_valueerror_empty_dataset(self): class TestRandomStateDecorator(TestCase): - @patch('copulas.np.random') def test_valid_random_state(self, random_mock): """The decorated function use the random_state attribute if present.""" @@ -251,10 +239,11 @@ def test_valid_random_state(self, random_mock): instance.assert_not_called random_mock.get_state.assert_has_calls([call(), call()]) random_mock.get_state.call_count == 2 - random_mock.RandomState.assert_has_calls( - [call(), call().set_state('random state')]) - random_mock.set_state.assert_has_calls( - [call('desired random state'), call('random state')]) + random_mock.RandomState.assert_has_calls([call(), call().set_state('random state')]) + random_mock.set_state.assert_has_calls([ + call('desired random state'), + call('random state'), + ]) assert random_mock.set_state.call_count == 2 @patch('copulas.np.random') @@ -354,14 +343,14 @@ def test_validate_random_state_invalid(self): # Run with pytest.raises( - TypeError, - match=f'`random_state` {state} expected to be an int or ' - '`np.random.RandomState` object.'): + TypeError, + match=f'`random_state` {state} expected to be an int or ' + '`np.random.RandomState` object.', + ): validate_random_state(state) class TestGetInstance(TestCase): - def test_get_instance_str(self): """Try to get a new instance from a str""" # Run @@ -416,8 +405,7 @@ def test_get_instance_with_kwargs(self): """Try to get a new instance with kwargs""" # Run instance = get_instance( - GaussianMultivariate, - distribution='copulas.univariate.truncnorm.TruncNorm' + GaussianMultivariate, distribution='copulas.univariate.truncnorm.TruncNorm' ) # Asserts @@ -473,6 +461,7 @@ def test__find_addons_object(entry_points_mock, mock_copulas): @patch('copulas.entry_points') def test__find_addons_bad_addon(entry_points_mock, warning_mock): """Test failing to load an add-on generates a warning.""" + # Setup def entry_point_error(): raise ValueError() @@ -563,7 +552,7 @@ def test__find_addons_missing_object(entry_points_mock, warning_mock, mock_copul bad_entry_point = MagicMock() bad_entry_point.name = 'copulas.submodule:missing_object.new_method' entry_points_mock.return_value = [bad_entry_point] - msg = ("Failed to set 'copulas.submodule:missing_object.new_method': missing_object.") + msg = "Failed to set 'copulas.submodule:missing_object.new_method': missing_object." del mock_copulas.submodule.missing_object diff --git a/tests/unit/univariate/test_base.py b/tests/unit/univariate/test_base.py index e2d63a36..26fb89c1 100644 --- a/tests/unit/univariate/test_base.py +++ b/tests/unit/univariate/test_base.py @@ -15,7 +15,6 @@ class TestUnivariate: - def test__select_candidates(self): # Run candidates = Univariate._select_candidates() @@ -29,7 +28,7 @@ def test__select_candidates(self): GammaUnivariate, StudentTUnivariate, UniformUnivariate, - LogLaplace + LogLaplace, } def test__select_candidates_parametric(self): @@ -44,7 +43,7 @@ def test__select_candidates_parametric(self): GammaUnivariate, StudentTUnivariate, UniformUnivariate, - LogLaplace + LogLaplace, } def test__select_candidates_non_parametric(self): @@ -59,32 +58,21 @@ def test__select_candidates_bounded(self): candidates = Univariate._select_candidates(bounded=BoundedType.BOUNDED) # Assert - assert set(candidates) == { - TruncatedGaussian, - BetaUnivariate, - UniformUnivariate - } + assert set(candidates) == {TruncatedGaussian, BetaUnivariate, UniformUnivariate} def test__select_candidates_unbounded(self): # Run candidates = Univariate._select_candidates(bounded=BoundedType.UNBOUNDED) # Assert - assert set(candidates) == { - GaussianKDE, - GaussianUnivariate, - StudentTUnivariate - } + assert set(candidates) == {GaussianKDE, GaussianUnivariate, StudentTUnivariate} def test__select_candidates_semibounded(self): # Run candidates = Univariate._select_candidates(bounded=BoundedType.SEMI_BOUNDED) # Assert - assert set(candidates) == { - GammaUnivariate, - LogLaplace - } + assert set(candidates) == {GammaUnivariate, LogLaplace} def test_fit_constant(self): """If constant values, replace methods.""" diff --git a/tests/unit/univariate/test_beta.py b/tests/unit/univariate/test_beta.py index c5ebbad7..4bf30d3e 100644 --- a/tests/unit/univariate/test_beta.py +++ b/tests/unit/univariate/test_beta.py @@ -8,18 +8,12 @@ class TestBetaUnivariate(TestCase): - def test__fit_constant(self): distribution = BetaUnivariate() distribution._fit_constant(np.array([1, 1, 1, 1])) - assert distribution._params == { - 'a': 1, - 'b': 1, - 'loc': 1, - 'scale': 0 - } + assert distribution._params == {'a': 1, 'b': 1, 'loc': 1, 'scale': 0} def test__fit(self): distribution = BetaUnivariate() @@ -27,12 +21,7 @@ def test__fit(self): data = beta.rvs(size=10000, a=1, b=1, loc=1, scale=1) distribution._fit(data) - expected = { - 'loc': 1, - 'scale': 1, - 'a': 1, - 'b': 1 - } + expected = {'loc': 1, 'scale': 1, 'a': 1, 'b': 1} for key, value in distribution._params.items(): np.testing.assert_allclose(value, expected[key], atol=0.3) @@ -52,12 +41,7 @@ def test__is_constant_false(self): def test__extract_constant(self): distribution = BetaUnivariate() - distribution._params = { - 'loc': 1, - 'scale': 1, - 'a': 1, - 'b': 1 - } + distribution._params = {'loc': 1, 'scale': 1, 'a': 1, 'b': 1} constant = distribution._extract_constant() diff --git a/tests/unit/univariate/test_gamma.py b/tests/unit/univariate/test_gamma.py index a686e386..df21ae2f 100644 --- a/tests/unit/univariate/test_gamma.py +++ b/tests/unit/univariate/test_gamma.py @@ -7,17 +7,12 @@ class TestGammaUnivariate(TestCase): - def test__fit_constant(self): distribution = GammaUnivariate() distribution._fit_constant(np.array([1, 1, 1, 1])) - assert distribution._params == { - 'a': 0, - 'loc': 1, - 'scale': 0 - } + assert distribution._params == {'a': 0, 'loc': 1, 'scale': 0} def test__fit(self): distribution = GammaUnivariate() diff --git a/tests/unit/univariate/test_gaussian.py b/tests/unit/univariate/test_gaussian.py index 57a25492..801fecd6 100644 --- a/tests/unit/univariate/test_gaussian.py +++ b/tests/unit/univariate/test_gaussian.py @@ -7,16 +7,12 @@ class TestGaussianUnivariate(TestCase): - def test__fit_constant(self): distribution = GaussianUnivariate() distribution._fit_constant(np.array([1, 1, 1, 1])) - assert distribution._params == { - 'loc': 1, - 'scale': 0 - } + assert distribution._params == {'loc': 1, 'scale': 0} def test__fit(self): distribution = GaussianUnivariate() @@ -45,10 +41,7 @@ def test__is_constant_false(self): def test__extract_constant(self): distribution = GaussianUnivariate() - distribution._params = { - 'loc': 1, - 'scale': 0 - } + distribution._params = {'loc': 1, 'scale': 0} constant = distribution._extract_constant() diff --git a/tests/unit/univariate/test_gaussian_kde.py b/tests/unit/univariate/test_gaussian_kde.py index 283de849..92675006 100644 --- a/tests/unit/univariate/test_gaussian_kde.py +++ b/tests/unit/univariate/test_gaussian_kde.py @@ -15,13 +15,10 @@ class TestGaussianKDE(TestCase): - def test__get_model_no_sample_size(self): self = MagicMock() self._sample_size = None - self._params = { - 'dataset': np.array([1, 2, 3, 4, 5]) - } + self._params = {'dataset': np.array([1, 2, 3, 4, 5])} self.bw_method = None self.weights = None model = GaussianKDE._get_model(self) @@ -33,9 +30,7 @@ def test__get_model_no_sample_size(self): def test__get_model_sample_size(self): self = MagicMock() self._sample_size = 3 - self._params = { - 'dataset': np.array([1, 2, 3, 4, 5]) - } + self._params = {'dataset': np.array([1, 2, 3, 4, 5])} self.bw_method = None self.weights = None model = GaussianKDE._get_model(self) @@ -46,9 +41,7 @@ def test__get_model_sample_size(self): def test__get_bounds(self): self = MagicMock() - self._params = { - 'dataset': np.array([1, 2, 3, 4, 5]) - } + self._params = {'dataset': np.array([1, 2, 3, 4, 5])} lower, upper = GaussianKDE._get_bounds(self) @@ -110,12 +103,14 @@ def test_cumulative_distribution(self): instance = GaussianKDE() instance.fit(np.array([0.9, 1.0, 1.1])) - cdf = instance.cumulative_distribution(np.array([ - 0.0, # There is no data below this (cdf = 0.0). - 1.0, # Half the data is below this (cdf = 0.5). - 2.0, # All the data is below this (cdf = 1.0). - -1.0 # There is no data below this (cdf = 0). - ])) + cdf = instance.cumulative_distribution( + np.array([ + 0.0, # There is no data below this (cdf = 0.0). + 1.0, # Half the data is below this (cdf = 0.5). + 2.0, # All the data is below this (cdf = 1.0). + -1.0, # There is no data below this (cdf = 0). + ]) + ) assert np.all(np.isclose(cdf, np.array([0.0, 0.5, 1.0, 0.0]), atol=1e-3)) @@ -149,7 +144,7 @@ def test_percent_point_invalid_value(self): error_msg = r'Expected values in range \[0.0, 1.0\].' with pytest.raises(ValueError, match=error_msg): - instance.percent_point(np.array([2.])) + instance.percent_point(np.array([2.0])) def test_percent_point_invertibility(self): instance = GaussianKDE() diff --git a/tests/unit/univariate/test_log_laplace.py b/tests/unit/univariate/test_log_laplace.py index b9de86f3..80381159 100644 --- a/tests/unit/univariate/test_log_laplace.py +++ b/tests/unit/univariate/test_log_laplace.py @@ -7,17 +7,12 @@ class TestLogLaplaceUnivariate(TestCase): - def test__fit_constant(self): distribution = LogLaplace() distribution._fit_constant(np.array([1, 1, 1, 1])) - assert distribution._params == { - 'c': 2, - 'loc': 1, - 'scale': 0 - } + assert distribution._params == {'c': 2, 'loc': 1, 'scale': 0} def test__fit(self): distribution = LogLaplace() @@ -49,11 +44,7 @@ def test__is_constant_false(self): def test__extract_constant(self): distribution = LogLaplace() - distribution._params = { - 'c': 2, - 'loc': 1, - 'scale': 0 - } + distribution._params = {'c': 2, 'loc': 1, 'scale': 0} constant = distribution._extract_constant() diff --git a/tests/unit/univariate/test_selection.py b/tests/unit/univariate/test_selection.py index b6921a37..8aef778d 100644 --- a/tests/unit/univariate/test_selection.py +++ b/tests/unit/univariate/test_selection.py @@ -77,11 +77,7 @@ def test_select_univariate_failures(get_instance_mock): """ gaussian_mock = Mock() gaussian_mock.fit.side_effect = Exception() - get_instance_mock.side_effect = [ - gaussian_mock, - BetaUnivariate(), - BetaUnivariate() - ] + get_instance_mock.side_effect = [gaussian_mock, BetaUnivariate(), BetaUnivariate()] normal_data = np.random.normal(size=1000) candidates = [GaussianUnivariate, BetaUnivariate] diff --git a/tests/unit/univariate/test_student_t.py b/tests/unit/univariate/test_student_t.py index 24e90099..a98e4003 100644 --- a/tests/unit/univariate/test_student_t.py +++ b/tests/unit/univariate/test_student_t.py @@ -7,7 +7,6 @@ class TestStudentTUnivariate(TestCase): - def test__fit(self): distribution = StudentTUnivariate() @@ -38,11 +37,7 @@ def test__is_constant_false(self): def test__extract_constant(self): distribution = StudentTUnivariate() - distribution._params = { - 'df': 1, - 'loc': 1, - 'scale': 0 - } + distribution._params = {'df': 1, 'loc': 1, 'scale': 0} constant = distribution._extract_constant() diff --git a/tests/unit/univariate/test_truncated_gaussian.py b/tests/unit/univariate/test_truncated_gaussian.py index 31e07663..8107de92 100644 --- a/tests/unit/univariate/test_truncated_gaussian.py +++ b/tests/unit/univariate/test_truncated_gaussian.py @@ -9,18 +9,12 @@ class TestTruncatedGaussian(TestCase): - def test__fit_constant(self): distribution = TruncatedGaussian() distribution._fit_constant(np.array([1, 1, 1, 1])) - assert distribution._params == { - 'a': 1, - 'b': 1, - 'loc': 1, - 'scale': 0 - } + assert distribution._params == {'a': 1, 'b': 1, 'loc': 1, 'scale': 0} def test__fit(self): distribution = TruncatedGaussian() @@ -28,24 +22,17 @@ def test__fit(self): data = truncnorm.rvs(size=10000, a=0, b=3, loc=3, scale=1) distribution._fit(data) - expected = { - 'loc': 3, - 'scale': 1, - 'a': 0, - 'b': 3 - } + expected = {'loc': 3, 'scale': 1, 'a': 0, 'b': 3} for key, value in distribution._params.items(): np.testing.assert_allclose(value, expected[key], atol=0.3) @patch('copulas.univariate.truncated_gaussian.fmin_slsqp') def test__fit_silences_warnings(self, mocked_wrapper): """Test the ``_fit`` method does not emit RuntimeWarnings.""" + # Setup def mock_fmin_sqlsqp(*args, **kwargs): - warnings.warn( - message='Runtime Warning occured!', - category=RuntimeWarning - ) + warnings.warn(message='Runtime Warning occured!', category=RuntimeWarning) return 0, 1 mocked_wrapper.side_effect = mock_fmin_sqlsqp @@ -74,12 +61,7 @@ def test__is_constant_false(self): def test__extract_constant(self): distribution = TruncatedGaussian() - distribution._params = { - 'a': 1, - 'b': 1, - 'loc': 1, - 'scale': 0 - } + distribution._params = {'a': 1, 'b': 1, 'loc': 1, 'scale': 0} constant = distribution._extract_constant() diff --git a/tests/unit/univariate/test_uniform.py b/tests/unit/univariate/test_uniform.py index aedffc1b..ca81d896 100644 --- a/tests/unit/univariate/test_uniform.py +++ b/tests/unit/univariate/test_uniform.py @@ -7,15 +7,11 @@ class TestUniformUnivariate(TestCase): - def test__fit_constant(self): distribution = UniformUnivariate() distribution._fit_constant(np.array([1, 1, 1, 1])) - assert distribution._params == { - 'loc': 1, - 'scale': 0 - } + assert distribution._params == {'loc': 1, 'scale': 0} def test__fit(self): distribution = UniformUnivariate() @@ -47,10 +43,7 @@ def test__is_constant_false(self): def test__extract_constant(self): distribution = UniformUnivariate() - distribution._params = { - 'loc': 1, - 'scale': 0 - } + distribution._params = {'loc': 1, 'scale': 0} constant = distribution._extract_constant()