Skip to content

Commit

Permalink
encoding & vect/bert paths
Browse files Browse the repository at this point in the history
  • Loading branch information
zhouhelena committed Apr 26, 2024
1 parent 17ad142 commit 006bb42
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 5 deletions.
7 changes: 4 additions & 3 deletions feature_engine/feature_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import re
import numpy as np
from pathlib import Path
import time

# Imports from feature files and classes
# from utils.summarize_chat_level_features import *
Expand Down Expand Up @@ -92,9 +93,9 @@ def __init__(
df_type = df_type + "/cumulative/within_task/"
df_type = df_type + "/cumulative/"

file_name = re.findall("\/([^\/]+)$", self.input_file_path)[0]
self.vect_path = self.vector_directory + "sentence/" + df_type + "/"+ file_name
self.bert_path = self.vector_directory + "sentiment/" + df_type + "/"+ file_name
base_file_name = f"features_{int(time.time())}.csv"
self.vect_path = vector_directory + "sentence/" + ("turns" if self.turns else "chats") + "/" + base_file_name
self.bert_path = vector_directory + "sentiment/" + ("turns" if self.turns else "chats") + "/" + base_file_name
self.output_file_path_chat_level = re.sub('chat', 'turn', output_file_path_chat_level) if self.turns else output_file_path_chat_level

# Check + generate embeddings
Expand Down
11 changes: 9 additions & 2 deletions feature_engine/featurize.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,18 @@
# Importing the Feature Generating Class
from feature_builder import FeatureBuilder
import pandas as pd
import chardet

# Main Function
if __name__ == "__main__":
chat_df = pd.read_csv("../feature_engine/testing/data/cleaned_data/test_chat_level.csv")
conv_df = pd.read_csv("../feature_engine/testing/data/cleaned_data/test_conv_level.csv")
with open("../feature_engine/testing/data/cleaned_data/test_chat_level.csv", 'rb') as file:
chat_encoding = chardet.detect(file.read())

with open("../feature_engine/testing/data/cleaned_data/test_conv_level.csv", 'rb') as file:
conv_encoding = chardet.detect(file.read())

chat_df = pd.read_csv("../feature_engine/testing/data/cleaned_data/test_chat_level.csv", encoding=chat_encoding['encoding'])
conv_df = pd.read_csv("../feature_engine/testing/data/cleaned_data/test_conv_level.csv", encoding=conv_encoding['encoding'])

# Instantiating the Feature Generating Class
# Calling the "engine"/"driver" function of the FeatureBuilder class
Expand Down

0 comments on commit 006bb42

Please sign in to comment.