This repository was archived by the owner on Oct 2, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinstagram.py
89 lines (71 loc) · 2.66 KB
/
instagram.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# coding=utf-8
from __future__ import unicode_literals, absolute_import, print_function, division
try:
from ujson import loads
except ImportError:
from json import loads
from sopel.module import rule
from sopel.tools import SopelMemory
from requests import get
from datetime import datetime
import re
instagram_regex = r'.*(https?:\/\/(?:www\.){0,1}instagram\.com\/p\/[a-zA-Z0-9_-]+)\s?.*'
instagram_pattern = re.compile(instagram_regex)
def setup(bot):
if not bot.memory.contains('url_callbacks'):
bot.memory['url_callbacks'] = SopelMemory()
bot.memory['url_callbacks'][instagram_pattern] = instaparse
def shutdown(bot):
del bot.memory['url_callbacks'][instagram_pattern]
# TODO: Parse Instagram profile page
@rule(instagram_regex)
def instaparse(bot, trigger):
# Get the embedded JSON
json = get_insta_json(trigger.group(1))
bot.say(parse_insta_json(json))
def get_insta_json(url):
headers = {"Accept-Language": "en"}
url = url.replace("https://", "http://")
r = get(url, headers=headers)
# Extract JSON from html source code
json_start = r.text.find("window._sharedData") + 21
json_stops = r.text.find("</script>", json_start) - 1
json_astxt = r.text[json_start:json_stops]
return loads(json_astxt)
def parse_insta_json(json):
# Parse JSON content
needed = json['entry_data']['PostPage'][0]['graphql']['shortcode_media']
iwidth = needed['dimensions']['width']
iheight = needed['dimensions']['height']
iuser = needed['owner']['username']
ifname = needed['owner']['full_name']
ilikes = needed['edge_media_preview_like']['count']
icomms = needed['edge_media_to_comment']['count']
idate = needed['taken_at_timestamp']
pubdate = datetime.utcfromtimestamp(idate).strftime('%Y-%m-%d %H:%M:%S')
ivideo = needed['is_video']
# Does the post have a caption?
try:
icap = needed['edge_media_to_caption']['edges'][0]['node']['text']
# Strip newlines
icap = icap.replace('\n', ' ')
# Truncate caption
icap = (icap[:256] + u'…') if len(icap) > 256 else icap
except Exception: # TODO: be specific
icap = False
# Build bot response
if ivideo is True:
botmessage = "📹 Video by "
else:
botmessage = "📸 Photo by "
if ifname is None:
botmessage += "@%s" % iuser
else:
botmessage += "%s (@%s)" % (ifname, iuser)
if icap is not False:
botmessage += u" | " + icap
botmessage += u" | " + str(iwidth) + "x" + str(iheight)
botmessage += u" | Likes: {:,} | Comments: {:,}".format(ilikes, icomms)
botmessage += u" | Uploaded: " + pubdate
# Ta-da!
return botmessage