Skip to content

Commit

Permalink
stage -02 train data tfidf matrix saved
Browse files Browse the repository at this point in the history
  • Loading branch information
shivpalSW committed Jul 16, 2023
1 parent cee8e82 commit bfc8ef3
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/stage_02_featurization.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@ def main(config_path, params_path):
# print(train_words[: 5])

bag_of_words = CountVectorizer(
stop_words="enlish",
stop_words="english",
max_features= max_features,
ngram_range=(1,ngrams)
)

bag_of_words.fit(train_words)
train_words_binary_matrix = bag_of_words..transform(train_words)
train_words_binary_matrix = bag_of_words.transform(train_words)

tfidf = TfidfTransformer(smooth_idf=False)
tfidf.fit(train_words_binary_matrix)
Expand Down

0 comments on commit bfc8ef3

Please sign in to comment.