Skip to content

Commit

Permalink
Merge branch 'release/0.1.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
turicas committed Feb 24, 2025
2 parents 5954eba + 451b564 commit e8bc70f
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 75 deletions.
82 changes: 29 additions & 53 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,19 @@ profile = client.profile_from_user_id(user_id)
pprint(profile)

# Result:
# ThreadsProfile(id=18133069,
# username='wsj',
# full_name='The Wall Street Journal',
# biography='Since 1889 🗞️',
# ThreadsProfile(id=787132,
# username='natgeo',
# full_name='National Geographic',
# biography='Inspiring the explorer in everyone 🌎',
# is_verified=True,
# followers=1221734,
# followers=15594746,
# pictures=[Picture(width=150, height=150, content_type=None),
# Picture(width=320, height=320, content_type=None),
# Picture(width=640, height=640, content_type=None)],
# Picture(width=320, height=320, content_type=None)],
# is_private=False,
# bio_links=[Link(url='http://wsj.com/',
# bio_links=[Link(url='https://on.natgeo.com/instagram',
# title=None,
# is_verified=False,
# id=17987258489277581,
# id=17953782509778540,
# display_text=None)],
# is_threads_only_user=False)
```
Expand All @@ -85,55 +84,33 @@ for index, post in enumerate(client.profile_posts(user_id), start=1):

# Result:
# Got post 01:
# ThreadsPost(id='3570337216739782440_18133069',
# user_id=18133069,
# username='wsj',
# ThreadsPost(id='3574333699320466889_787132',
# user_id=787132,
# username='natgeo',
# is_verified=True,
# text='After Allison Pomeroy lost most of her vision two years ago, '
# 'her husband began reading menus, signage and other text out '
# 'loud to her. He doesn’t need to anymore.🔗 '
# 'https://on.wsj.com/3QqLWz2',
# links=[Link(url='https://on.wsj.com/3QqLWz2',
# title=None,
# is_verified=None,
# id=None,
# display_text='on.wsj.com/3QqLW…')],
# published_at=datetime.datetime(2025, 2, 18, 0, 9, 56, tzinfo=datetime.timezone.utc),
# likes=5,
# replies=0,
# reposts=0,
# quotes=0,
# text="Three-toed sloths can hang onto trees even when they're "
# "asleep 🦥 What's your favorite tree-climbing animal?",
# links=[],
# published_at=datetime.datetime(2025, 2, 23, 9, 30, 20, tzinfo=datetime.timezone.utc),
# likes=761,
# replies=17,
# reposts=24,
# quotes=3,
# is_private=False,
# pictures=[Picture(width=1440, height=1440, content_type=None),
# Picture(width=1080, height=1080, content_type=None),
# Picture(width=720, height=720, content_type=None),
# Picture(width=640, height=640, content_type=None),
# Picture(width=480, height=480, content_type=None),
# Picture(width=320, height=320, content_type=None),
# Picture(width=240, height=240, content_type=None),
# Picture(width=1080, height=1080, content_type=None),
# Picture(width=750, height=750, content_type=None),
# Picture(width=640, height=640, content_type=None),
# Picture(width=480, height=480, content_type=None),
# Picture(width=320, height=320, content_type=None),
# Picture(width=240, height=240, content_type=None),
# Picture(width=150, height=150, content_type=None)],
# media=[Picture(width=1439, height=1873, content_type=None)],
# reply_control='everyone',
# media_type=1,
# accessibility_caption='Photo by The Wall Street Journal on '
# 'February 17, 2025. May be an image of 2 '
# 'people, sunglasses, glasses and text that '
# "says 'Meta's AI-P Powered Ray Ray-Bans Bans "
# 'Are Life- Life-Enhancing for the Blind '
# "WSJ'.",
# accessibility_caption='Photo by National Geographic on February '
# '23, 2025. May be an image of slow loris, '
# "sloth and text that says 'ロ BRUCE BRUCEDALE "
# "DALE'.",
# is_paid_partnership=None,
# like_and_view_counts_disabled=False,
# videos=[],
# has_audio=None,
# original_width=1440,
# original_height=1440,
# code='DGMYsg8Ia8o',
# reshares=None)
# original_width=1439,
# original_height=1873,
# code='DGalY_YtE3J',
# reshares=6)
# [...]
```

Expand Down Expand Up @@ -317,13 +294,12 @@ The `data/threads-profile-posts.csv` CSV file will be created with the following
- `reposts`
- `quotes`
- `is_private`
- `pictures`
- `media`
- `reply_control`
- `media_type`
- `accessibility_caption`
- `is_paid_partnership`
- `like_and_view_counts_disabled`
- `videos`
- `has_audio`
- `original_width`
- `original_height`
Expand Down
2 changes: 1 addition & 1 deletion outgram/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .instagram import Instagram # noqa
from .threads import Threads # noqa

__version__ = (0, 1, 0) # noqa
__version__ = (0, 1, 1) # noqa
7 changes: 3 additions & 4 deletions outgram/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,13 +222,12 @@ class ThreadsPost(BasePost):
reposts: int
quotes: int
is_private: bool
pictures: List[Picture] = field(default_factory=list)
media: List[Union[Picture, Video]] = field(default_factory=list)
reply_control: Optional[str] = None
media_type: Optional[int] = None
accessibility_caption: Optional[str] = None
is_paid_partnership: Optional[bool] = None
like_and_view_counts_disabled: Optional[bool] = None
videos: List[Video] = field(default_factory=list)
has_audio: Optional[bool] = None
original_width: Optional[int] = None
original_height: Optional[int] = None
Expand All @@ -237,7 +236,7 @@ class ThreadsPost(BasePost):

def serialize(self) -> Dict[str, Any]:
row = super().serialize()
for key in ("links", "pictures", "videos"):
for key in ("links", "media"):
row[key] = "\n".join(link["url"] for link in row[key])
return row

Expand All @@ -247,7 +246,7 @@ def url(self):

def get_media(self) -> List[Type[BaseMedia]]:
"""Return all media objects of this post"""
return (self.pictures or []) + (self.videos or [])
return self.media or []


# Instagram-related dataclasses
Expand Down
34 changes: 24 additions & 10 deletions outgram/threads.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,29 @@ def profile(self, username_or_id: Union[str, int], raw: bool = False) -> Union[T
return self.profile_from_username(username_or_id)

def _extract_post(self, post_data: dict) -> ThreadsPost:
pictures = []
if "image_versions2" in post_data and "candidates" in post_data["image_versions2"]:
pictures = [
Picture(pic["url"], pic["width"], pic["height"]) for pic in post_data["image_versions2"]["candidates"]
]
if len(post_data.get("carousel_media") or []) > 0:
inner_media = post_data["carousel_media"]
else:
inner_media = [post_data]
post_media = []
for obj in inner_media:
obj_picture = None
if "image_versions2" in obj and "candidates" in obj["image_versions2"]:
images = [
Picture(pic["url"], pic["width"], pic["height"]) for pic in obj["image_versions2"]["candidates"]
]
images.sort(key=lambda obj: obj.width, reverse=True)
if images:
obj_picture = images[0]
if len(obj.get("video_versions") or []) > 0:
video = obj["video_versions"][0]
video = Video(type=video["type"], url=video["url"])
if obj_picture is not None:
video.thumbnail = obj_picture
post_media.append(video)
elif obj_picture is not None:
post_media.append(obj_picture)

text = ""
links = []
if "text_post_app_info" in post_data and "text_fragments" in post_data["text_post_app_info"]:
Expand All @@ -133,9 +151,6 @@ def _extract_post(self, post_data: dict) -> ThreadsPost:
display_text=fragment["link_fragment"]["display_text"],
)
)
videos = []
if post_data.get("video_versions"):
videos = [Video(type=v["type"], url=v["url"]) for v in post_data["video_versions"]]
user = post_data["user"]
return ThreadsPost(
id=post_data["id"],
Expand All @@ -150,13 +165,12 @@ def _extract_post(self, post_data: dict) -> ThreadsPost:
reposts=post_data["text_post_app_info"].get("repost_count", 0),
quotes=post_data["text_post_app_info"].get("quote_count", 0),
is_private=user.get("text_post_app_is_private", False),
pictures=pictures,
media=post_media,
reply_control=post_data["text_post_app_info"].get("reply_control"),
media_type=post_data.get("media_type"),
accessibility_caption=post_data.get("accessibility_caption"),
is_paid_partnership=post_data.get("is_paid_partnership"),
like_and_view_counts_disabled=post_data.get("like_and_view_counts_disabled"),
videos=videos,
has_audio=post_data.get("has_audio"),
original_width=post_data.get("original_width"),
original_height=post_data.get("original_height"),
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = outgram
version = 0.1.0
version = 0.1.1
description = Client for Instagram and Threads public GraphQL APIs (Python library + CLI)
long_description = file: README.md
long_description_content_type = text/markdown
Expand Down
13 changes: 8 additions & 5 deletions tests/test_instagram.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""End-to-end tests for the command-line interface main functions, which will run most part of the codebase"""

import csv
import random
from zipfile import ZipFile

from outgram.cli import archive, collect_profile, collect_profile_posts, instagram_post
Expand All @@ -21,14 +22,16 @@
INSTAGRAM_USER_MIX.append(username)
else:
INSTAGRAM_USER_MIX.append(user_id)
INSTAGRAM_POST_CODES = (
INSTAGRAM_POST_CODES = [
"DEhf2uTJUs0", # zuck
"DF-rojvO4g-", # oficialfernandatorres
"C2SuqhGv3U0", # crio.cafe
"DGLQHNhOoke", # oficialfernandatorres
"COWY0ydHUrI", # crio.cafe
"DFWFT4LyfSJ", # ficcoesespetaculo
)
]
random.shuffle(INSTAGRAM_USERNAMES)
random.shuffle(INSTAGRAM_USER_IDS)
random.shuffle(INSTAGRAM_USER_MIX)
random.shuffle(INSTAGRAM_POST_CODES)


def test_instagram_profile_with_usernames(temp_dir):
Expand Down Expand Up @@ -155,4 +158,4 @@ def test_instagram_post(temp_dir):
data = list(csv.DictReader(fobj))
assert len(data) == max_posts
user_ids_found = set(row["author_id"] for row in data)
assert len(user_ids_found) == 3 # The first 3 post codes are from different user each one
assert len(user_ids_found) == max_posts # There are no post codes from the same profile more than once
7 changes: 6 additions & 1 deletion tests/test_threads.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""End-to-end tests for the command-line interface main functions, which will run most part of the codebase"""

import csv
import random
from collections import Counter
from zipfile import ZipFile

Expand All @@ -15,6 +16,7 @@
"wsj": "18133069",
"nonoinvestidor": "42799100757",
"filosofia.liquida": "6828796459",
"mulheresdocafemataotomazina": "3540507650",
}
THREADS_USERNAMES = list(THREADS_USERNAME_ID.keys())
THREADS_USER_IDS = list(THREADS_USERNAME_ID.values())
Expand All @@ -24,6 +26,9 @@
THREADS_USER_MIX.append(username)
else:
THREADS_USER_MIX.append(user_id)
random.shuffle(THREADS_USERNAMES)
random.shuffle(THREADS_USER_IDS)
random.shuffle(THREADS_USER_MIX)


def test_threads_profile_with_usernames(temp_dir):
Expand Down Expand Up @@ -103,7 +108,7 @@ def test_threads_profile_posts(temp_dir):
data = list(csv.DictReader(fobj))
counter = Counter(row["user_id"] for row in data)
user_ids_found = set(counter.keys())
assert user_ids_found == set(THREADS_USER_IDS)
assert user_ids_found.issubset(THREADS_USER_IDS)
assert len(data) == max_posts
assert set(counter.values()) == {max_posts_per_user}

Expand Down

0 comments on commit e8bc70f

Please sign in to comment.