Skip to content

Commit c0040c4

Browse files
authored
Create remove_tags.py
1 parent c30b85e commit c0040c4

File tree

1 file changed

+19
-0
lines changed

1 file changed

+19
-0
lines changed

remove_tags.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# coding: utf-8
2+
import nltk
3+
from nltk import word_tokenize
4+
5+
def remove_Tags(text):
6+
"""
7+
take string input and clean string without tags.
8+
use regex to remove the html tags.
9+
"""
10+
cleaned_text = re.sub('<[^<]+?>','', text)
11+
return cleaned_text
12+
13+
def main():
14+
text = """<head><body>hello world!</body></head>"""
15+
print (remove_Tags(text))
16+
17+
if __name__ == '__main__':
18+
main()
19+

0 commit comments

Comments
 (0)