diff --git a/artifacts/model/.gitignore b/artifacts/model/.gitignore new file mode 100644 index 0000000..0cb31b2 --- /dev/null +++ b/artifacts/model/.gitignore @@ -0,0 +1 @@ +/model.pkl diff --git a/dvc.lock b/dvc.lock index 0ee0b6a..046f478 100644 --- a/dvc.lock +++ b/dvc.lock @@ -4,8 +4,8 @@ stages: cmd: python src/stage_01_prepare.py --config=configs/config.yaml --params=params.yaml deps: - path: configs/config.yaml - md5: c76f754cebca2dcd58bed9c403bbf7ee - size: 329 + md5: 508d0ae144ca6fa812180f6d0057a3f2 + size: 374 - path: data/data.xml md5: a304afb96060aad90176268345e10355 size: 37891850 @@ -39,8 +39,8 @@ stages: md5: 553034c4cf40efc63c99c19fe98610d0 size: 18986541 - path: configs/config.yaml - md5: c76f754cebca2dcd58bed9c403bbf7ee - size: 329 + md5: 508d0ae144ca6fa812180f6d0057a3f2 + size: 374 - path: src/stage_02_featurization.py md5: e07203a587149c1f889134b58d8112a3 size: 2991 @@ -61,3 +61,27 @@ stages: - path: artifacts/features/train.pkl md5: ecc944089c3a29ca33e6cfd158f81b0d size: 10134795 + train: + cmd: python src/stage_03_train.py --config=configs/config.yaml --params=params.yaml + deps: + - path: artifacts/features/train.pkl + md5: ecc944089c3a29ca33e6cfd158f81b0d + size: 10134795 + - path: configs/config.yaml + md5: 508d0ae144ca6fa812180f6d0057a3f2 + size: 374 + - path: src/stage_03_train.py + md5: bb5dcfadc16299ec07fd19fd9ceba2fd + size: 2223 + - path: src/utils/common.py + md5: ebe79d372c0b5256c6dacc26b27a7acf + size: 840 + params: + params.yaml: + train.min_split: 16 + train.n_est: 100 + train.seed: 2021 + outs: + - path: artifacts/model/model.pkl + md5: 20ac77b4d7237b3cf0c6e676bdc7dbbc + size: 3558016 diff --git a/dvc.yaml b/dvc.yaml index 5f9fd26..94f39e2 100644 --- a/dvc.yaml +++ b/dvc.yaml @@ -29,4 +29,18 @@ stages: - featurize.ngrams outs: - artifacts/features/train.pkl - - artifacts/features/test.pkl \ No newline at end of file + - artifacts/features/test.pkl + + train: + cmd: python src/stage_03_train.py --config=configs/config.yaml --params=params.yaml + deps: + - src/stage_03_train.py + - artifacts/features/train.pkl + - src/utils/common.py + - configs/config.yaml + params: + - train.seed + - train.n_est + - train.min_split + outs: + - artifacts/model/model.pkl \ No newline at end of file