Skip to content

Commit

Permalink
[twitter] add 'syndication' option (#2354)
Browse files Browse the repository at this point in the history
to fetch age-restricted content using Twitter's  syndication API
  • Loading branch information
mikf committed Apr 1, 2022
1 parent a53cfc8 commit 1171911
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 32 deletions.
10 changes: 10 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2182,6 +2182,16 @@ Description
``4096x4096``, ``orig``, ``large``, ``medium``, and ``small``.


extractor.twitter.syndication
-----------------------------
Type
``bool``
Default
``false``
Description
Retrieve age-restricted content using Twitter's syndication API.


extractor.twitter.logout
------------------------
Type
Expand Down
112 changes: 80 additions & 32 deletions gallery_dl/extractor/twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,23 +217,24 @@ def _transform_tweet(self, tweet):
if "legacy" in tweet:
tweet = tweet["legacy"]

tget = tweet.get
entities = tweet["entities"]
tdata = {
"tweet_id" : text.parse_int(tweet["id_str"]),
"retweet_id" : text.parse_int(
tweet.get("retweeted_status_id_str")),
tget("retweeted_status_id_str")),
"quote_id" : text.parse_int(
tweet.get("quoted_status_id_str")),
tget("quoted_status_id_str")),
"reply_id" : text.parse_int(
tweet.get("in_reply_to_status_id_str")),
tget("in_reply_to_status_id_str")),
"date" : text.parse_datetime(
tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"),
"user" : user,
"lang" : tweet["lang"],
"favorite_count": tweet["favorite_count"],
"quote_count" : tweet["quote_count"],
"reply_count" : tweet["reply_count"],
"retweet_count" : tweet["retweet_count"],
"favorite_count": tget("favorite_count"),
"quote_count" : tget("quote_count"),
"reply_count" : tget("reply_count"),
"retweet_count" : tget("retweet_count"),
}

hashtags = entities.get("hashtags")
Expand All @@ -248,7 +249,7 @@ def _transform_tweet(self, tweet):
"nick": u["name"],
} for u in mentions]

content = tweet["full_text"]
content = tget("full_text") or tget("text") or ""
urls = entities.get("urls")
if urls:
for url in urls:
Expand All @@ -269,33 +270,36 @@ def _transform_tweet(self, tweet):
return tdata

def _transform_user(self, user):
uid = user.get("rest_id") or user["id_str"]

try:
return self._user_cache[user.get("rest_id") or user["id_str"]]
return self._user_cache[uid]
except KeyError:
pass

uid = user.get("rest_id") or user["id_str"]
if "legacy" in user:
user = user["legacy"]

uget = user.get
entities = user["entities"]

self._user_cache[uid] = udata = {
"id" : text.parse_int(uid),
"name" : user["screen_name"],
"nick" : user["name"],
"location" : user["location"],
"location" : uget("location"),
"date" : text.parse_datetime(
user["created_at"], "%a %b %d %H:%M:%S %z %Y"),
"verified" : user.get("verified", False),
"profile_banner" : user.get("profile_banner_url", ""),
"profile_image" : user.get(
uget("created_at"), "%a %b %d %H:%M:%S %z %Y"),
"verified" : uget("verified", False),
"profile_banner" : uget("profile_banner_url", ""),
"profile_image" : uget(
"profile_image_url_https", "").replace("_normal.", "."),
"favourites_count": user["favourites_count"],
"followers_count" : user["followers_count"],
"friends_count" : user["friends_count"],
"listed_count" : user["listed_count"],
"media_count" : user["media_count"],
"statuses_count" : user["statuses_count"],
"favourites_count": uget("favourites_count"),
"followers_count" : uget("followers_count"),
"friends_count" : uget("friends_count"),
"listed_count" : uget("listed_count"),
"media_count" : uget("media_count"),
"statuses_count" : uget("statuses_count"),
}

descr = user["description"]
Expand Down Expand Up @@ -653,6 +657,11 @@ class TwitterTweetExtractor(TwitterExtractor):
("https://twitter.com/i/web/status/1486373748911575046", {
"count": 4,
}),
# age-restricted (#2354)
("https://twitter.com/mightbecursed/status/1492954264909479936", {
"options": (("syndication", True),),
"count": 1,
}),
)

def __init__(self, match):
Expand Down Expand Up @@ -770,6 +779,7 @@ def __init__(self, extractor):
}

self._nsfw_warning = True
self._syndication = extractor.config("syndication")
self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
self._user = None

Expand Down Expand Up @@ -1153,9 +1163,10 @@ def _pagination_tweets(self, endpoint, variables, path=None):
elif esw("conversationthread-"):
tweets.extend(entry["content"]["items"])
elif esw("tombstone-"):
self._report_tombstone(
entry,
entry["content"]["itemContent"]["tombstoneInfo"])
item = entry["content"]["itemContent"]
item["tweet_results"] = \
{"result": {"tombstone": item["tombstoneInfo"]}}
tweets.append(entry)
elif esw("cursor-bottom-"):
cursor = entry["content"]
if not cursor.get("stopOnEmptyResponse", True):
Expand All @@ -1168,8 +1179,10 @@ def _pagination_tweets(self, endpoint, variables, path=None):
tweet = ((entry.get("content") or entry["item"])
["itemContent"]["tweet_results"]["result"])
if "tombstone" in tweet:
self._report_tombstone(entry, tweet["tombstone"])
continue
tweet = self._process_tombstone(
entry, tweet["tombstone"])
if not tweet:
continue
if "tweet" in tweet:
tweet = tweet["tweet"]
legacy = tweet["legacy"]
Expand Down Expand Up @@ -1259,10 +1272,45 @@ def _pagination_users(self, endpoint, variables, path=None):
return
variables["cursor"] = cursor

def _report_tombstone(self, entry, tombstone):
def _process_tombstone(self, entry, tombstone):
text = (tombstone.get("richText") or tombstone["text"])["text"]
if text.startswith("Age-restricted") and self._nsfw_warning:
self.extractor.log.warning(text)
self._nsfw_warning = False
self.extractor.log.debug(
"Skipping %s (%s)", entry["entryId"].rpartition("-")[2], text)
tweet_id = entry["entryId"].rpartition("-")[2]

if text.startswith("Age-restricted"):
if self._syndication:
return self._syndication_tweet(tweet_id)
elif self._nsfw_warning:
self._nsfw_warning = False
self.extractor.log.warning('"%s"', text)

self.extractor.log.debug("Skipping %s (\"%s\")", tweet_id, text)

def _syndication_tweet(self, tweet_id):
tweet = self.extractor.request(
"https://cdn.syndication.twimg.com/tweet?id=" + tweet_id).json()

tweet["user"]["description"] = ""
tweet["user"]["entities"] = {"description": {}}

if "video" in tweet:
video = tweet["video"]
del video["variants"][:-1]
video["variants"][0]["url"] = video["variants"][0]["src"]
tweet["extended_entities"] = {"media": [{
"video_info" : video,
"original_info": {"width" : 0, "height": 0},
}]}
elif "photos" in tweet:
for p in tweet["photos"]:
p["media_url_https"] = p["url"]
p["original_info"] = {
"width" : p["width"],
"height": p["height"],
}
tweet["extended_entities"] = {"media": tweet["photos"]}

return {
"rest_id": tweet["id_str"],
"legacy" : tweet,
"user" : tweet["user"],
}

0 comments on commit 1171911

Please sign in to comment.