Skip to content

Commit

Permalink
[ADD] Coverage calculation (#224)
Browse files Browse the repository at this point in the history
* [ADD] Coverage calculation

* [Fix] Flake8

* [fix] rebase artifacts

* [Fix] smac reqs

* [Fix] Make traditional test robust

* [Fix] unit test

* [Fix] test_evaluate

* [Fix] Try more time for cross validation

* Fix mypy post rebase

* Fix unit test
  • Loading branch information
franchuterivera authored Jun 2, 2021
1 parent f9fe056 commit 9e7d3e2
Show file tree
Hide file tree
Showing 19 changed files with 426 additions and 209 deletions.
42 changes: 42 additions & 0 deletions .codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#see https://github.com/codecov/support/wiki/Codecov-Yaml
codecov:
notify:
require_ci_to_pass: yes

coverage:
precision: 2 # 2 = xx.xx%, 0 = xx%
round: nearest # how coverage is rounded: down/up/nearest
range: 10...90 # custom range of coverage colors from red -> yellow -> green
status:
# https://codecov.readme.io/v1.0/docs/commit-status
project:
default:
against: auto
target: 70% # specify the target coverage for each commit status
threshold: 50% # allow this little decrease on project
# https://github.com/codecov/support/wiki/Filtering-Branches
# branches: master
if_ci_failed: error
# https://github.com/codecov/support/wiki/Patch-Status
patch:
default:
against: auto
target: 30% # specify the target "X%" coverage to hit
threshold: 50% # allow this much decrease on patch
changes: false

parsers:
gcov:
branch_detection:
conditional: true
loop: true
macro: false
method: false
javascript:
enable_partials: false

comment:
layout: header, diff
require_changes: false
behavior: default # update if exists else create new
branches: *
26 changes: 26 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# .coveragerc to control coverage.py
[run]
branch = True

[report]
# Regexes for lines to exclude from consideration
exclude_lines =
# Have to re-enable the standard pragma
pragma: no cover

# Don't complain about missing debug-only code:
def __repr__
if self\.debug

# Don't complain if tests don't hit defensive assertion code:
raise AssertionError
raise NotImplementedError

# Don't complain if non-runnable code isn't run:
if 0:
if __name__ == .__main__.:

ignore_errors = True

[html]
directory = coverage_html_report
7 changes: 5 additions & 2 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@ jobs:
strategy:
matrix:
python-version: [3.6, 3.7, 3.8]
fail-fast: false
include:
- python-version: 3.8
code-cov: true
fail-fast: false
max-parallel: 2

steps:
Expand All @@ -29,7 +32,7 @@ jobs:
echo "::set-output name=BEFORE::$(git status --porcelain -b)"
- name: Run tests
run: |
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=autoPyTorch --cov-report=xml'; fi
if [ ${{ matrix.code-cov }} ]; then codecov='--cov=autoPyTorch --cov-report=xml --cov-config=.coveragerc'; fi
python -m pytest --forked --durations=20 --timeout=600 --timeout-method=signal -v $codecov test
- name: Check for files left behind by test
if: ${{ always() }}
Expand Down
7 changes: 5 additions & 2 deletions autoPyTorch/api/base_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
STRING_TO_OUTPUT_TYPES,
STRING_TO_TASK_TYPES,
)
from autoPyTorch.data.base_validator import BaseInputValidator
from autoPyTorch.datasets.base_dataset import BaseDataset
from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes
from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager
Expand Down Expand Up @@ -203,6 +204,8 @@ def __init__(
self._multiprocessing_context = 'fork'
self._dask_client = SingleThreadedClient()

self.InputValidator: Optional[BaseInputValidator] = None

self.search_space_updates = search_space_updates
if search_space_updates is not None:
if not isinstance(self.search_space_updates,
Expand Down Expand Up @@ -273,8 +276,8 @@ def get_search_space(self, dataset: BaseDataset = None) -> ConfigurationSpace:
include=self.include_components,
exclude=self.exclude_components,
search_space_updates=self.search_space_updates)
raise Exception("No search space initialised and no dataset passed. "
"Can't create default search space without the dataset")
raise ValueError("No search space initialised and no dataset passed. "
"Can't create default search space without the dataset")

def _get_logger(self, name: str) -> PicklableClientLogger:
"""
Expand Down
5 changes: 4 additions & 1 deletion autoPyTorch/datasets/base_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,10 @@ def __init__(
if len(self.train_tensors) == 2 and self.train_tensors[1] is not None:
self.output_type: str = type_of_target(self.train_tensors[1])

if STRING_TO_OUTPUT_TYPES[self.output_type] in CLASSIFICATION_OUTPUTS:
if (
self.output_type in STRING_TO_OUTPUT_TYPES
and STRING_TO_OUTPUT_TYPES[self.output_type] in CLASSIFICATION_OUTPUTS
):
self.output_shape = len(np.unique(self.train_tensors[1]))
else:
self.output_shape = self.train_tensors[1].shape[-1] if self.train_tensors[1].ndim > 1 else 1
Expand Down
5 changes: 4 additions & 1 deletion autoPyTorch/datasets/resampling_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,10 @@ def stratified_k_fold_cross_validation(random_state: np.random.RandomState,
indices: np.ndarray,
**kwargs: Any
) -> List[Tuple[np.ndarray, np.ndarray]]:
cv = StratifiedKFold(n_splits=num_splits, random_state=random_state)

shuffle = kwargs.get('shuffle', True)
cv = StratifiedKFold(n_splits=num_splits, shuffle=shuffle,
random_state=random_state if not shuffle else None)
splits = list(cv.split(indices, kwargs["stratify"]))
return splits

Expand Down
12 changes: 0 additions & 12 deletions autoPyTorch/datasets/tabular_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,6 @@
)


class Value2Index(object):
def __init__(self, values: list):
assert all(not (pd.isna(v)) for v in values)
self.values = {v: i for i, v in enumerate(values)}

def __getitem__(self, item: Any) -> int:
if pd.isna(item):
return 0
else:
return self.values[item] + 1


class TabularDataset(BaseDataset):
"""
Base class for datasets used in AutoPyTorch
Expand Down
Empty file.
153 changes: 0 additions & 153 deletions autoPyTorch/search_space/search_space.py

This file was deleted.

5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,10 @@
"codecov",
"pep8",
"mypy",
"openml"
"openml",
"emcee",
"scikit-optimize",
"pyDOE",
],
"examples": [
"matplotlib",
Expand Down
2 changes: 2 additions & 0 deletions test/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging.handlers
import os
import re
import shutil
Expand Down Expand Up @@ -299,6 +300,7 @@ def get_fit_dictionary(X, y, validator, backend):
'metrics_during_training': True,
'split_id': 0,
'backend': backend,
'logger_port': logging.handlers.DEFAULT_TCP_LOGGING_PORT,
}
backend.save_datamanager(datamanager)
return fit_dictionary
Expand Down
Loading

0 comments on commit 9e7d3e2

Please sign in to comment.