Skip to content

Commit

Permalink
clean description text from html unicode
Browse files Browse the repository at this point in the history
  • Loading branch information
iamaziz committed Oct 16, 2015
1 parent 63eafc4 commit 0c4654f
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions termfeed/feed.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,12 @@ def open_it():
return False

def clean_txt(txt):
"""clean txt from html tags"""
cleaned = re.sub(r'<.*?>', '', txt)
"""clean txt from e.g. html tags"""
cleaned = re.sub(r'<.*?>', '', txt) # remove html
cleaned = cleaned.replace('&lt;', '<').replace('&gt;', '>') # retain html code tags
cleaned = cleaned.replace('&quot;', '"')
cleaned = cleaned.replace('&rsquo;', "'")
cleaned = cleaned.replace('&nbsp;', ' ') # italized text
return cleaned

def _continue():
Expand Down

0 comments on commit 0c4654f

Please sign in to comment.