Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,17 @@ Usage::

If you need different HTML output just subclass and override the ``format_*`` methods.

"broadcast" mentions and old-style retweets are now available as well:

>>> from ttp import ttp
>>> p = ttp.Parser()
>>> result = p.parse(".@eadmundo has added broadcast mentions!")
>>> result.broadcast
'eadmundo'
>>> result = p.parse("RT @eadmundo, also old-style retweets")
>>> result.retweet
'eadmundo'

You can also ask for the span tags to be returned for each entity::

>>> p = ttp.Parser(include_spans=True)
Expand Down
44 changes: 44 additions & 0 deletions ttp/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,50 @@ def test_username_non_reply(self):
self.assertEqual(result.users, [u'username'])
self.assertEqual(result.reply, None)

# Broadcast mentions
def test_username_broadcast_mention_at_start(self):
result = self.parser.parse(u'.@username')
self.assertEqual(result.html, u'.<a href="http://twitter.com/username">@username</a>')
self.assertEqual(result.users, [u'username'])
self.assertEqual(result.broadcast, u'username')

def test_username_broadcast_mention_in_middle(self):
result = self.parser.parse(u'something .@username')
self.assertEqual(result.html, u'something .<a href="http://twitter.com/username">@username</a>')
self.assertEqual(result.users, [u'username'])
self.assertEqual(result.broadcast, u'username')

# Retweets
def test_username_old_style_retweet(self):
result = self.parser.parse(u'retweet RT @username something')
self.assertEqual(result.html, u'retweet RT <a href="http://twitter.com/username">@username</a> something')
self.assertEqual(result.retweet, u'username')

def test_username_old_style_retweet_at_beginning(self):
result = self.parser.parse(u'RT @username something')
self.assertEqual(result.html, u'RT <a href="http://twitter.com/username">@username</a> something')
self.assertEqual(result.retweet, u'username')

def test_username_quoted_retweet(self):
result = self.parser.parse(u'retweet "@username something"')
self.assertEqual(result.html, u'retweet "<a href="http://twitter.com/username">@username</a> something"')
self.assertEqual(result.retweet, u'username')

def test_username_curly_quoted_retweet(self):
result = self.parser.parse(u'retweet “@username something”')
self.assertEqual(result.html, u'retweet “<a href="http://twitter.com/username">@username</a> something”')
self.assertEqual(result.retweet, u'username')

def test_username_quoted_retweet_at_beginning(self):
result = self.parser.parse(u'"@username something"')
self.assertEqual(result.html, u'"<a href="http://twitter.com/username">@username</a> something"')
self.assertEqual(result.retweet, u'username')

def test_username_curly_quoted_retweet_at_beginning(self):
result = self.parser.parse(u'“@username something”')
self.assertEqual(result.html, u'“<a href="http://twitter.com/username">@username</a> something”')
self.assertEqual(result.retweet, u'username')

# List tests ---------------------------------------------------------------
# --------------------------------------------------------------------------
def test_list_preceeded(self):
Expand Down
24 changes: 22 additions & 2 deletions ttp/ttp.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
AT_SIGNS = ur'[@\uff20]'
UTF_CHARS = ur'a-z0-9_\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u00ff'
SPACES = ur'[\u0020\u00A0\u1680\u180E\u2002-\u202F\u205F\u2060\u3000]'
QUOTES = ur'[\u0022\u201C]'

# Lists
LIST_PRE_CHARS = ur'([^a-z0-9_]|^)'
Expand All @@ -41,6 +42,8 @@
USERNAME_REGEX = re.compile(ur'\B' + AT_SIGNS + LIST_END_CHARS, re.IGNORECASE)
REPLY_REGEX = re.compile(ur'^(?:' + SPACES + ur')*' + AT_SIGNS
+ ur'([a-z0-9_]{1,20}).*', re.IGNORECASE)
BROADCAST_REGEX = re.compile('.' + AT_SIGNS + LIST_END_CHARS, re.IGNORECASE)
RETWEET_REGEX = re.compile('(?:RT' + SPACES + '|' + QUOTES + ')' + AT_SIGNS + LIST_END_CHARS, re.IGNORECASE)

# Hashtags
HASHTAG_EXP = ur'(^|[^0-9A-Z&/]+)(#|\uff03)([0-9A-Z_]*[A-Z_]+[%s]*)' % UTF_CHARS
Expand Down Expand Up @@ -86,6 +89,15 @@ class ParseResult(object):
Note: It's generally better to rely on the Tweet JSON/XML in order to
find out if it's a reply or not.

- broadcast
A string containing the username this tweet was a broadcast mention to
(e.g. preceeded by a dot to ensure that mention is broadcast even to
non-followers of that username)

- retweet
A string containing the username this tweet was a retweet of (as indicated by
either old-style RT or encased in quotes)

- lists
A list containing all the valid lists in the Tweet.
Each list item is a tuple in the format (username, listname).
Expand All @@ -100,11 +112,13 @@ class ParseResult(object):

'''

def __init__(self, urls, users, reply, lists, tags, html):
def __init__(self, urls, users, reply, broadcast, retweet, lists, tags, html):
self.urls = urls if urls else []
self.users = users if users else []
self.lists = lists if lists else []
self.reply = reply if reply else None
self.broadcast = broadcast if broadcast else None
self.retweet = retweet if retweet else None
self.tags = tags if tags else []
self.html = html

Expand All @@ -127,8 +141,14 @@ def parse(self, text, html=True):
reply = REPLY_REGEX.match(text)
reply = reply.groups(0)[0] if reply is not None else None

broadcast = BROADCAST_REGEX.search(text)
broadcast = broadcast.groups(0)[0] if broadcast is not None else None

retweet = RETWEET_REGEX.search(text)
retweet = retweet.groups(0)[0] if retweet is not None else None

parsed_html = self._html(text) if html else self._text(text)
return ParseResult(self._urls, self._users, reply,
return ParseResult(self._urls, self._users, reply, broadcast, retweet,
self._lists, self._tags, parsed_html)

def _text(self, text):
Expand Down