Skip to content

Commit

Permalink
fix: 124 solution
Browse files Browse the repository at this point in the history
  • Loading branch information
terryyz committed May 11, 2024
1 parent a642dba commit 8b04c94
Show file tree
Hide file tree
Showing 5 changed files with 1,053 additions and 1,072 deletions.
15 changes: 3 additions & 12 deletions data/clean/f_124_armel.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,9 @@ def f_124(text):
words 1
dtype: int64
"""
# Normalize the text to lowercase
text = text.lower()

# Use regex to find words, considering words as sequences of alphabetic characters
words = re.findall(r'\b\p{L}+\b', text)

# Filter out stopwords
filtered_words = [word for word in words if word not in STOPWORDS]

# Count the frequency of each word using pandas Series
word_counts = pd.Series(filtered_words).value_counts()

words = re.findall(r"\b\w+\b", text.lower())
words = [word for word in words if word not in STOPWORDS]
word_counts = pd.Series(words).value_counts().rename(None)
return word_counts


Expand Down
7 changes: 3 additions & 4 deletions data/processed/52_w_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,9 @@ def task_func(text):
words 1
dtype: int64
"""
text = text.lower()
words = re.findall(r'\b\p{L}+\b', text)
filtered_words = [word for word in words if word not in STOPWORDS]
word_counts = pd.Series(filtered_words).value_counts()
words = re.findall(r"\b\w+\b", text.lower())
words = [word for word in words if word not in STOPWORDS]
word_counts = pd.Series(words).value_counts().rename(None)
return word_counts

import unittest
Expand Down
15 changes: 3 additions & 12 deletions data/raw/f_124_armel.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,9 @@ def f_124(text):
words 1
dtype: int64
"""
# Normalize the text to lowercase
text = text.lower()

# Use regex to find words, considering words as sequences of alphabetic characters
words = re.findall(r'\b\p{L}+\b', text)

# Filter out stopwords
filtered_words = [word for word in words if word not in STOPWORDS]

# Count the frequency of each word using pandas Series
word_counts = pd.Series(filtered_words).value_counts()

words = re.findall(r"\b\w+\b", text.lower())
words = [word for word in words if word not in STOPWORDS]
word_counts = pd.Series(words).value_counts().rename(None)
return word_counts


Expand Down
Loading

0 comments on commit 8b04c94

Please sign in to comment.