From bc36daeb8591cd84c59db5863a573a399295ea79 Mon Sep 17 00:00:00 2001 From: shivpalSW Date: Thu, 13 Jul 2023 17:37:25 +0530 Subject: [PATCH] stage -1-dvc updated --- artifacts/prepared/.gitignore | 2 ++ dvc.lock | 31 +++++++++++++++++++++++++++++++ dvc.yaml | 22 +++++++++++++++------- 3 files changed, 48 insertions(+), 7 deletions(-) create mode 100644 artifacts/prepared/.gitignore create mode 100644 dvc.lock diff --git a/artifacts/prepared/.gitignore b/artifacts/prepared/.gitignore new file mode 100644 index 0000000..4478cdd --- /dev/null +++ b/artifacts/prepared/.gitignore @@ -0,0 +1,2 @@ +/train.tsv +/test.tsv diff --git a/dvc.lock b/dvc.lock new file mode 100644 index 0000000..0d46e9f --- /dev/null +++ b/dvc.lock @@ -0,0 +1,31 @@ +schema: '2.0' +stages: + prepare_data: + cmd: python src/stage_01_prepare.py --config=configs/config.yaml --params=params.yaml + deps: + - path: configs/config.yaml + md5: ef78262a4a0652b7f26da188dec74419 + size: 230 + - path: data/data.xml + md5: a304afb96060aad90176268345e10355 + size: 37891850 + - path: src/stage_01_prepare.py + md5: 47a1121c82f1e79ff3004abb9c4d5d4d + size: 2096 + - path: src/utils/common.py + md5: 48b676b0d599169fc5e054b0829a5a4f + size: 519 + - path: src/utils/data_mgmt.py + md5: c985e579010a81c5462ebf8184649fe4 + size: 871 + params: + params.yaml: + prepare.seed: 2021 + prepare.split: 0.2 + outs: + - path: artifacts/prepared/test.tsv + md5: 60c757f3b30604473de7fa775dfc00d9 + size: 4899778 + - path: artifacts/prepared/train.tsv + md5: 553034c4cf40efc63c99c19fe98610d0 + size: 18986541 diff --git a/dvc.yaml b/dvc.yaml index ad80635..d1e53f7 100644 --- a/dvc.yaml +++ b/dvc.yaml @@ -1,8 +1,16 @@ # add stages here -# stages: -# stage_name: -# cmd: python src/stage_01______.py -# deps: -# - src/stage_01______.py -# outs: -# - output_data_of_file_path \ No newline at end of file +stages: + prepare_data: + cmd: python src/stage_01_prepare.py --config=configs/config.yaml --params=params.yaml + deps: + - src/stage_01_prepare.py + - data/data.xml + - src/utils/common.py + - src/utils/data_mgmt.py + - configs/config.yaml + params: + - prepare.seed + - prepare.split + outs: + - artifacts/prepared/train.tsv + - artifacts/prepared/test.tsv \ No newline at end of file