Skip to content

Commit

Permalink
Close #192.
Browse files Browse the repository at this point in the history
  • Loading branch information
xehu committed Apr 10, 2024
1 parent ce0e662 commit 768d818
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 1 deletion.
2 changes: 1 addition & 1 deletion feature_engine/featurize.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
# )
# feature_builder.featurize(col="message")

# # Tiny multi-task
# Tiny multi-task
# tiny_multi_task_feature_builder = FeatureBuilder(
# input_file_path = "../feature_engine/tpm-data/cleaned_data/test_data/multi_task_TINY.csv",
# vector_directory = "../feature_engine/tpm-data/vector_data/",
Expand Down
4 changes: 4 additions & 0 deletions feature_engine/utils/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ def assert_key_columns_present(df):
# Assert that key columns are present
if {'conversation_num', 'message', 'speaker_nickname'}.issubset(df.columns):
print("Confirmed that data has `conversation_num`, `message`, and `speaker_nickname` columns!")
# ensure no NA's in essential columns
df['message'] = df['message'].fillna('')
df['conversation_num'] = df['conversation_num'].fillna(0)
df['speaker_nickname'] = df['speaker_nickname'].fillna(0)
else:
print("One of `conversation_num`, `message`, or `speaker_nickname` is missing! Raising error...")
print("Columns available: ")
Expand Down

0 comments on commit 768d818

Please sign in to comment.