Skip to content

Commit

Permalink
stage-02 base file
Browse files Browse the repository at this point in the history
  • Loading branch information
shivpalSW committed Jul 13, 2023
1 parent bc19c56 commit c0d028a
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 1 deletion.
4 changes: 4 additions & 0 deletions configs/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ artifacts:
PREPARED_DATA: prepared
TRAIN_DATA: train.tsv
TEST_DATA: test.tsv
FEATURIZED_DATA: features
FEATURIZED_OUT_TRAIN: train.pkl
FEATURIZED_OUT_TEST: test.pkl


source_data:
data_dir : data
Expand Down
2 changes: 1 addition & 1 deletion src/stage_00_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import random


STAGE = "STAGE_NAME" ## <<< change stage name
STAGE = "Templates Stage"## <<< change stage name

logging.basicConfig(
filename=os.path.join("logs", 'running_logs.log'),
Expand Down
48 changes: 48 additions & 0 deletions src/stage_02_featurization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import argparse
import os
import shutil
from tqdm import tqdm
import logging
from src.utils.common import read_yaml, create_directories, get_df


STAGE = "Two"

logging.basicConfig(
filename=os.path.join("logs", 'running_logs.log'),
level=logging.INFO,
format="[%(asctime)s: %(levelname)s: %(module)s]: %(message)s",
filemode="a"
)

def main(config_path, params_path):
config = read_yaml(config_path)
params = read_yaml(params_path)

artifacts = config["artifacts"]
prepared_data_dir_path = os.path.join(artifacts["ARTIFACTS_DIR"], artifacts["PREPARED_DATA"])
train_data_path = os.path.join(prepared_data_dir_path, artifacts["TRAIN_DATA"])
test_data_path = os.path.join(prepared_data_dir_path, artifacts["TEST_DATA"])

featurized_data_dir_path = os.path.join(artifacts["ARTIFACTS_DIR"], artifacts["FEATURIZED_DATA"])
create_directories([featurized_data_dir_path])

featurized_train_data_path = os.path.join(featurized_data_dir_path, artifacts["FEATURIZED_OUT_TRAIN"])
featurized_test_data_path = os.path.join(featurized_data_dir_path, artifacts["FEATURIZED_OUT_TEST"])



if __name__ == '__main__':
args = argparse.ArgumentParser()
args.add_argument("--config", "-c", default="configs/config.yaml")
args.add_argument("--params", "-p", default="params.yaml")
parsed_args = args.parse_args()

try:
logging.info("\n********************")
logging.info(f">>>>> stage {STAGE} started <<<<<")
main(config_path=parsed_args.config, params_path=parsed_args.params)
logging.info(f">>>>> stage {STAGE} completed!<<<<<\n")
except Exception as e:
logging.exception(e)
raise e

0 comments on commit c0d028a

Please sign in to comment.