Skip to content

Commit ddb5813

Browse files
authored
Create remove stop words.py
1 parent e05562e commit ddb5813

File tree

1 file changed

+23
-0
lines changed

1 file changed

+23
-0
lines changed

remove stop words.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from nltk import word_tokenize
2+
from nltk.corpus import stopwords
3+
4+
5+
stop_words = set(stopwords.words('english'))
6+
7+
8+
def remove_stopwords(sentence):
9+
#removes all the stop words like "is,the,a, etc."
10+
clean_sent =[]
11+
for w in word_tokenize(sentence):
12+
if not w in stop_words:
13+
clean_sent.append(w)
14+
return " ".join(clean_sent)
15+
# return ' '.join([w for w in word_tokenize(sentence) if not w in stop_words]) #5 lines of code can be written in one line
16+
17+
text = """Harry Potter is the most miserable, lonely boy you can imagine. He’s shunned by his relatives, the Dursley’s, that have raised him since he was an infant. He’s forced to live in the cupboard under the stairs, forced to wear his cousin Dudley’s hand-me-down clothes, and forced to go to his neighbour’s house when the rest of the family is doing something fun. Yes, he’s just about as miserable as you can get."""
18+
remove_stopwords(text)
19+
20+
"""
21+
OUTPUT:
22+
'Harry Potter miserable , lonely boy imagine . He ’ shunned relatives , Dursley ’ , raised since infant . He ’ forced live cupboard stairs , forced wear cousin Dudley ’ hand-me-down clothes , forced go neighbour ’ house rest family something fun . Yes , ’ miserable get .'
23+
"""

0 commit comments

Comments
 (0)