bigcode-project
diff --git a/‎data/clean/f_124_armel.py
Lines changed: 3 additions & 12 deletions b/‎data/clean/f_124_armel.py
Lines changed: 3 additions & 12 deletions
diff --git a/‎data/processed/52_w_doc.py
Lines changed: 3 additions & 4 deletions b/‎data/processed/52_w_doc.py
Lines changed: 3 additions & 4 deletions
diff --git a/‎data/raw/f_124_armel.py
Lines changed: 3 additions & 12 deletions b/‎data/raw/f_124_armel.py
Lines changed: 3 additions & 12 deletions
@@ -30,18 +30,9 @@ def f_124(text):
     words       1
     dtype: int64
     """
-    # Normalize the text to lowercase
-    text = text.lower()
-    
-    # Use regex to find words, considering words as sequences of alphabetic characters
-    words = re.findall(r'\b\p{L}+\b', text)
-    
-    # Filter out stopwords
-    filtered_words = [word for word in words if word not in STOPWORDS]
-    
-    # Count the frequency of each word using pandas Series
-    word_counts = pd.Series(filtered_words).value_counts()
-    
+    words = re.findall(r"\b\w+\b", text.lower())
+    words = [word for word in words if word not in STOPWORDS]
+    word_counts = pd.Series(words).value_counts().rename(None)
     return word_counts
 
 
 
@@ -30,10 +30,9 @@ def task_func(text):
     words       1
     dtype: int64
     """
-    text = text.lower()
-    words = re.findall(r'\b\p{L}+\b', text)
-    filtered_words = [word for word in words if word not in STOPWORDS]
-    word_counts = pd.Series(filtered_words).value_counts()
+    words = re.findall(r"\b\w+\b", text.lower())
+    words = [word for word in words if word not in STOPWORDS]
+    word_counts = pd.Series(words).value_counts().rename(None)
     return word_counts
 
 import unittest
 
@@ -30,18 +30,9 @@ def f_124(text):
     words       1
     dtype: int64
     """
-    # Normalize the text to lowercase
-    text = text.lower()
-    
-    # Use regex to find words, considering words as sequences of alphabetic characters
-    words = re.findall(r'\b\p{L}+\b', text)
-    
-    # Filter out stopwords
-    filtered_words = [word for word in words if word not in STOPWORDS]
-    
-    # Count the frequency of each word using pandas Series
-    word_counts = pd.Series(filtered_words).value_counts()
-    
+    words = re.findall(r"\b\w+\b", text.lower())
+    words = [word for word in words if word not in STOPWORDS]
+    word_counts = pd.Series(words).value_counts().rename(None)
     return word_counts