Skip to content

Commit 8b04c94

Browse files
committed
fix: 124 solution
1 parent a642dba commit 8b04c94

File tree

5 files changed

+1053
-1072
lines changed

5 files changed

+1053
-1072
lines changed

data/clean/f_124_armel.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,9 @@ def f_124(text):
3030
words 1
3131
dtype: int64
3232
"""
33-
# Normalize the text to lowercase
34-
text = text.lower()
35-
36-
# Use regex to find words, considering words as sequences of alphabetic characters
37-
words = re.findall(r'\b\p{L}+\b', text)
38-
39-
# Filter out stopwords
40-
filtered_words = [word for word in words if word not in STOPWORDS]
41-
42-
# Count the frequency of each word using pandas Series
43-
word_counts = pd.Series(filtered_words).value_counts()
44-
33+
words = re.findall(r"\b\w+\b", text.lower())
34+
words = [word for word in words if word not in STOPWORDS]
35+
word_counts = pd.Series(words).value_counts().rename(None)
4536
return word_counts
4637

4738

data/processed/52_w_doc.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,9 @@ def task_func(text):
3030
words 1
3131
dtype: int64
3232
"""
33-
text = text.lower()
34-
words = re.findall(r'\b\p{L}+\b', text)
35-
filtered_words = [word for word in words if word not in STOPWORDS]
36-
word_counts = pd.Series(filtered_words).value_counts()
33+
words = re.findall(r"\b\w+\b", text.lower())
34+
words = [word for word in words if word not in STOPWORDS]
35+
word_counts = pd.Series(words).value_counts().rename(None)
3736
return word_counts
3837

3938
import unittest

data/raw/f_124_armel.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,9 @@ def f_124(text):
3030
words 1
3131
dtype: int64
3232
"""
33-
# Normalize the text to lowercase
34-
text = text.lower()
35-
36-
# Use regex to find words, considering words as sequences of alphabetic characters
37-
words = re.findall(r'\b\p{L}+\b', text)
38-
39-
# Filter out stopwords
40-
filtered_words = [word for word in words if word not in STOPWORDS]
41-
42-
# Count the frequency of each word using pandas Series
43-
word_counts = pd.Series(filtered_words).value_counts()
44-
33+
words = re.findall(r"\b\w+\b", text.lower())
34+
words = [word for word in words if word not in STOPWORDS]
35+
word_counts = pd.Series(words).value_counts().rename(None)
4536
return word_counts
4637

4738

0 commit comments

Comments
 (0)