Skip to content

Commit 1ad6ec2

Browse files
authored
Merge pull request #225 from DoubleML/s-restructure-doubleml
Restructure doubleml
2 parents 8431daf + ece1b45 commit 1ad6ec2

File tree

116 files changed

+1079
-837
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

116 files changed

+1079
-837
lines changed

.github/workflows/codeql.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,18 @@ jobs:
2424

2525
steps:
2626
- name: Checkout
27-
uses: actions/checkout@v3
27+
uses: actions/checkout@v4
2828

2929
- name: Initialize CodeQL
30-
uses: github/codeql-action/init@v2
30+
uses: github/codeql-action/init@v3
3131
with:
3232
languages: ${{ matrix.language }}
3333
queries: +security-and-quality
3434

3535
- name: Autobuild
36-
uses: github/codeql-action/autobuild@v2
36+
uses: github/codeql-action/autobuild@v3
3737

3838
- name: Perform CodeQL Analysis
39-
uses: github/codeql-action/analyze@v2
39+
uses: github/codeql-action/analyze@v3
4040
with:
4141
category: "/language:${{ matrix.language }}"

.github/workflows/deploy_pkg.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@ jobs:
1212
runs-on: ubuntu-latest
1313

1414
steps:
15-
- uses: actions/checkout@v3
15+
- uses: actions/checkout@v4
1616
with:
1717
persist-credentials: false
1818

1919
- name: Install python
20-
uses: actions/setup-python@v4
20+
uses: actions/setup-python@v5
2121
with:
2222
python-version: '3.8'
2323

@@ -32,7 +32,7 @@ jobs:
3232
pip install wheel
3333
python setup.py sdist bdist_wheel
3434
35-
- uses: actions/upload-artifact@v3
35+
- uses: actions/upload-artifact@v4
3636
with:
3737
name: DoubleML-pkg
3838
path: dist/

.github/workflows/pytest.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,11 @@ jobs:
3030
- {os: 'ubuntu-latest', python-version: '3.11'}
3131

3232
steps:
33-
- uses: actions/checkout@v3
33+
- uses: actions/checkout@v4
3434
with:
3535
fetch-depth: 2
3636
- name: Set up Python ${{ matrix.config.python-version }}
37-
uses: actions/setup-python@v4
37+
uses: actions/setup-python@v5
3838
with:
3939
python-version: ${{ matrix.config.python-version }}
4040
- name: Install OpenMP runtime for unit tests with xgboost learners

doubleml/__init__.py

+14-13
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,33 @@
11
from pkg_resources import get_distribution
22

3-
from .double_ml_plr import DoubleMLPLR
4-
from .double_ml_pliv import DoubleMLPLIV
5-
from .double_ml_irm import DoubleMLIRM
6-
from .double_ml_iivm import DoubleMLIIVM
3+
from .plm.plr import DoubleMLPLR
4+
from .plm.pliv import DoubleMLPLIV
5+
from .irm.irm import DoubleMLIRM
6+
from .irm.iivm import DoubleMLIIVM
77
from .double_ml_data import DoubleMLData, DoubleMLClusterData
8-
from .double_ml_blp import DoubleMLBLP
9-
from .double_ml_did import DoubleMLDID
10-
from .double_ml_did_cs import DoubleMLDIDCS
11-
from .double_ml_qte import DoubleMLQTE
12-
from .double_ml_pq import DoubleMLPQ
13-
from .double_ml_lpq import DoubleMLLPQ
14-
from .double_ml_cvar import DoubleMLCVAR
15-
from .double_ml_policytree import DoubleMLPolicyTree
8+
from .did.did import DoubleMLDID
9+
from .did.did_cs import DoubleMLDIDCS
10+
from .irm.qte import DoubleMLQTE
11+
from .irm.pq import DoubleMLPQ
12+
from .irm.lpq import DoubleMLLPQ
13+
from .irm.cvar import DoubleMLCVAR
14+
15+
from .utils.blp import DoubleMLBLP
16+
from .utils.policytree import DoubleMLPolicyTree
1617

1718
__all__ = ['DoubleMLPLR',
1819
'DoubleMLPLIV',
1920
'DoubleMLIRM',
2021
'DoubleMLIIVM',
2122
'DoubleMLData',
2223
'DoubleMLClusterData',
23-
'DoubleMLBLP',
2424
'DoubleMLDID',
2525
'DoubleMLDIDCS',
2626
'DoubleMLPQ',
2727
'DoubleMLQTE',
2828
'DoubleMLLPQ',
2929
'DoubleMLCVAR',
30+
'DoubleMLBLP',
3031
'DoubleMLPolicyTree']
3132

3233
__version__ = get_distribution('doubleml').version

doubleml/did/__init__.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
"""
2+
The :mod:`doubleml.did` module implements double machine learning estimates based on difference in differences models.
3+
"""
4+
5+
from .did import DoubleMLDID
6+
from .did_cs import DoubleMLDIDCS
7+
8+
__all__ = [
9+
"DoubleMLDID",
10+
"DoubleMLDIDCS",
11+
]

doubleml/double_ml_did.py renamed to doubleml/did/did.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33
from sklearn.utils.multiclass import type_of_target
44
import warnings
55

6-
from .double_ml import DoubleML
7-
from .double_ml_data import DoubleMLData
8-
from .double_ml_score_mixins import LinearScoreMixin
6+
from ..double_ml import DoubleML
7+
from ..double_ml_data import DoubleMLData
8+
from ..double_ml_score_mixins import LinearScoreMixin
99

10-
from ._utils import _dml_cv_predict, _get_cond_smpls, _dml_tune, _trimm
11-
from ._utils_checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity
10+
from ..utils._estimation import _dml_cv_predict, _get_cond_smpls, _dml_tune, _trimm
11+
from ..utils._checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity
1212

1313

1414
class DoubleMLDID(LinearScoreMixin, DoubleML):
@@ -117,6 +117,8 @@ def __init__(self,
117117

118118
# set stratication for resampling
119119
self._strata = self._dml_data.d
120+
if draw_sample_splitting:
121+
self.draw_sample_splitting()
120122

121123
# check learners
122124
ml_g_is_classifier = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=True)

doubleml/double_ml_did_cs.py renamed to doubleml/did/did_cs.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33
from sklearn.utils.multiclass import type_of_target
44
import warnings
55

6-
from .double_ml import DoubleML
7-
from .double_ml_data import DoubleMLData
8-
from .double_ml_score_mixins import LinearScoreMixin
6+
from ..double_ml import DoubleML
7+
from ..double_ml_data import DoubleMLData
8+
from ..double_ml_score_mixins import LinearScoreMixin
99

10-
from ._utils import _dml_cv_predict, _trimm, _get_cond_smpls_2d, _dml_tune
11-
from ._utils_checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity
10+
from ..utils._estimation import _dml_cv_predict, _trimm, _get_cond_smpls_2d, _dml_tune
11+
from ..utils._checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity
1212

1313

1414
class DoubleMLDIDCS(LinearScoreMixin, DoubleML):
@@ -117,6 +117,8 @@ def __init__(self,
117117

118118
# set stratication for resampling
119119
self._strata = self._dml_data.d.reshape(-1, 1) + 2 * self._dml_data.t.reshape(-1, 1)
120+
if draw_sample_splitting:
121+
self.draw_sample_splitting()
120122

121123
# check learners
122124
ml_g_is_classifier = self._check_learner(ml_g, 'ml_g', regressor=True, classifier=True)

doubleml/did/tests/__init__.py

Whitespace-only changes.

doubleml/tests/_utils_did_cs_manual.py renamed to doubleml/did/tests/_utils_did_cs_manual.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import numpy as np
22
from sklearn.base import clone
33

4-
from ._utils import fit_predict, fit_predict_proba, tune_grid_search
4+
from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search
55
from ._utils_did_manual import did_dml1, did_dml2
66

77

doubleml/tests/_utils_did_manual.py renamed to doubleml/did/tests/_utils_did_manual.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import numpy as np
22
from sklearn.base import clone
33

4-
from ._utils_boot import boot_manual, draw_weights
5-
from ._utils import fit_predict, fit_predict_proba, tune_grid_search
4+
from ...tests._utils_boot import boot_manual, draw_weights
5+
from ...tests._utils import fit_predict, fit_predict_proba, tune_grid_search
66

77

88
def fit_did(y, x, d,

doubleml/did/tests/conftest.py

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import numpy as np
2+
import pytest
3+
4+
from doubleml.datasets import make_did_SZ2020
5+
6+
7+
@pytest.fixture(scope='session',
8+
params=[(500, 1),
9+
(1000, 1),
10+
(1000, 2)])
11+
def generate_data_did(request):
12+
params = request.param
13+
np.random.seed(1111)
14+
# setting parameters
15+
n = params[0]
16+
dpg = params[1]
17+
18+
# generating data
19+
data = make_did_SZ2020(n, dgp_type=dpg, return_type='array')
20+
21+
return data
22+
23+
24+
@pytest.fixture(scope='session',
25+
params=[(500, 1),
26+
(1000, 1),
27+
(1000, 2)])
28+
def generate_data_did_cs(request):
29+
params = request.param
30+
np.random.seed(1111)
31+
# setting parameters
32+
n = params[0]
33+
dpg = params[1]
34+
35+
# generating data
36+
data = make_did_SZ2020(n, dgp_type=dpg, cross_sectional_data=True, return_type='array')
37+
38+
return data

doubleml/tests/test_did.py renamed to doubleml/did/tests/test_did.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import doubleml as dml
1111

12-
from ._utils import draw_smpls
12+
from ...tests._utils import draw_smpls
1313
from ._utils_did_manual import fit_did, boot_did, fit_sensitivity_elements_did
1414

1515

@@ -122,14 +122,14 @@ def dml_did_fixture(generate_data_did, learner, score, in_sample_normalization,
122122

123123
@pytest.mark.ci
124124
def test_dml_did_coef(dml_did_fixture):
125-
assert math.isclose(dml_did_fixture['coef'],
125+
assert math.isclose(dml_did_fixture['coef'][0],
126126
dml_did_fixture['coef_manual'],
127127
rel_tol=1e-9, abs_tol=1e-4)
128128

129129

130130
@pytest.mark.ci
131131
def test_dml_did_se(dml_did_fixture):
132-
assert math.isclose(dml_did_fixture['se'],
132+
assert math.isclose(dml_did_fixture['se'][0],
133133
dml_did_fixture['se_manual'],
134134
rel_tol=1e-9, abs_tol=1e-4)
135135

@@ -189,8 +189,8 @@ def test_dml_did_experimental(generate_data_did, in_sample_normalization, learne
189189
score='experimental',
190190
in_sample_normalization=in_sample_normalization)
191191
dml_did_obj_with_ml_m.fit()
192-
assert math.isclose(dml_did_obj_with_ml_m.coef,
193-
dml_did_obj_without_ml_m.coef,
192+
assert math.isclose(dml_did_obj_with_ml_m.coef[0],
193+
dml_did_obj_without_ml_m.coef[0],
194194
rel_tol=1e-9, abs_tol=1e-4)
195195

196196
msg = ('A learner ml_m has been provided for score = "experimental" but will be ignored. '

doubleml/tests/test_did_cs.py renamed to doubleml/did/tests/test_did_cs.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import doubleml as dml
1111

12-
from ._utils import draw_smpls
12+
from ...tests._utils import draw_smpls
1313
from ._utils_did_cs_manual import fit_did_cs, fit_sensitivity_elements_did_cs
1414
from ._utils_did_manual import boot_did
1515

@@ -63,7 +63,8 @@ def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normaliza
6363

6464
np.random.seed(3141)
6565
n_obs = len(y)
66-
all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d)
66+
67+
all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d+2*t)
6768
obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, t=t)
6869

6970
np.random.seed(3141)
@@ -122,14 +123,14 @@ def dml_did_cs_fixture(generate_data_did_cs, learner, score, in_sample_normaliza
122123

123124
@pytest.mark.ci
124125
def test_dml_did_cs_coef(dml_did_cs_fixture):
125-
assert math.isclose(dml_did_cs_fixture['coef'],
126+
assert math.isclose(dml_did_cs_fixture['coef'][0],
126127
dml_did_cs_fixture['coef_manual'],
127128
rel_tol=1e-9, abs_tol=1e-4)
128129

129130

130131
@pytest.mark.ci
131132
def test_dml_did_cs_se(dml_did_cs_fixture):
132-
assert math.isclose(dml_did_cs_fixture['se'],
133+
assert math.isclose(dml_did_cs_fixture['se'][0],
133134
dml_did_cs_fixture['se_manual'],
134135
rel_tol=1e-9, abs_tol=1e-4)
135136

@@ -189,8 +190,8 @@ def test_dml_did_cs_experimental(generate_data_did_cs, in_sample_normalization,
189190
score='experimental',
190191
in_sample_normalization=in_sample_normalization)
191192
dml_did_obj_with_ml_m.fit()
192-
assert math.isclose(dml_did_obj_with_ml_m.coef,
193-
dml_did_obj_without_ml_m.coef,
193+
assert math.isclose(dml_did_obj_with_ml_m.coef[0],
194+
dml_did_obj_without_ml_m.coef[0],
194195
rel_tol=1e-9, abs_tol=1e-4)
195196

196197
msg = ('A learner ml_m has been provided for score = "experimental" but will be ignored. '

doubleml/tests/test_did_cs_tune.py renamed to doubleml/did/tests/test_did_cs_tune.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import doubleml as dml
1111

12-
from ._utils import draw_smpls
12+
from ...tests._utils import draw_smpls
1313
from ._utils_did_manual import boot_did
1414
from ._utils_did_cs_manual import fit_did_cs, tune_nuisance_did_cs
1515

@@ -77,15 +77,22 @@ def dml_did_cs_fixture(generate_data_did_cs, learner_g, learner_m, score, in_sam
7777
ml_g = clone(learner_g)
7878
ml_m = clone(learner_m)
7979

80+
n_obs = len(y)
81+
all_smpls = draw_smpls(n_obs, n_folds, n_rep=1, groups=d+2*t)
82+
8083
np.random.seed(3141)
8184
obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, t=t)
8285
dml_did_cs_obj = dml.DoubleMLDIDCS(obj_dml_data,
8386
ml_g, ml_m,
8487
n_folds,
8588
score=score,
8689
in_sample_normalization=in_sample_normalization,
87-
dml_procedure=dml_procedure)
90+
dml_procedure=dml_procedure,
91+
draw_sample_splitting=False)
92+
# synchronize the sample splitting
93+
dml_did_cs_obj.set_sample_splitting(all_smpls=all_smpls)
8894

95+
np.random.seed(3141)
8996
# tune hyperparameters
9097
tune_res = dml_did_cs_obj.tune(par_grid, tune_on_folds=tune_on_folds,
9198
n_folds_tune=n_folds_tune,
@@ -95,8 +102,6 @@ def dml_did_cs_fixture(generate_data_did_cs, learner_g, learner_m, score, in_sam
95102
dml_did_cs_obj.fit()
96103

97104
np.random.seed(3141)
98-
n_obs = len(y)
99-
all_smpls = draw_smpls(n_obs, n_folds)
100105
smpls = all_smpls[0]
101106

102107
if tune_on_folds:
@@ -152,14 +157,14 @@ def dml_did_cs_fixture(generate_data_did_cs, learner_g, learner_m, score, in_sam
152157

153158
@pytest.mark.ci
154159
def test_dml_did_cs_coef(dml_did_cs_fixture):
155-
assert math.isclose(dml_did_cs_fixture['coef'],
160+
assert math.isclose(dml_did_cs_fixture['coef'][0],
156161
dml_did_cs_fixture['coef_manual'],
157162
rel_tol=1e-9, abs_tol=1e-4)
158163

159164

160165
@pytest.mark.ci
161166
def test_dml_did_cs_se(dml_did_cs_fixture):
162-
assert math.isclose(dml_did_cs_fixture['se'],
167+
assert math.isclose(dml_did_cs_fixture['se'][0],
163168
dml_did_cs_fixture['se_manual'],
164169
rel_tol=1e-9, abs_tol=1e-4)
165170

doubleml/tests/test_did_external_predictions.py renamed to doubleml/did/tests/test_did_external_predictions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from doubleml import DoubleMLDID
66
from doubleml.datasets import make_did_SZ2020
77
from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
8-
from ._utils import draw_smpls
8+
from ...tests._utils import draw_smpls
99

1010

1111
@pytest.fixture(scope="module", params=["observational", "experimental"])

0 commit comments

Comments
 (0)