Merge branch 'release/0.1.1'

PythonicCafe · Feb 24, 2025 · e8bc70f · e8bc70f
2 parents 5954eba + 451b564
commit e8bc70f
Show file tree

Hide file tree

Showing 7 changed files with 72 additions and 75 deletions.
diff --git a/README.md b/README.md
@@ -53,20 +53,19 @@ profile = client.profile_from_user_id(user_id)
 pprint(profile)
 
 # Result:
-# ThreadsProfile(id=18133069,
-#                username='wsj',
-#                full_name='The Wall Street Journal',
-#                biography='Since 1889 🗞️',
+# ThreadsProfile(id=787132,
+#                username='natgeo',
+#                full_name='National Geographic',
+#                biography='Inspiring the explorer in everyone 🌎',
 #                is_verified=True,
-#                followers=1221734,
+#                followers=15594746,
 #                pictures=[Picture(width=150, height=150, content_type=None),
-#                          Picture(width=320, height=320, content_type=None),
-#                          Picture(width=640, height=640, content_type=None)],
+#                          Picture(width=320, height=320, content_type=None)],
 #                is_private=False,
-#                bio_links=[Link(url='http://wsj.com/',
+#                bio_links=[Link(url='https://on.natgeo.com/instagram',
 #                                title=None,
 #                                is_verified=False,
-#                                id=17987258489277581,
+#                                id=17953782509778540,
 #                                display_text=None)],
 #                is_threads_only_user=False)
 ```
@@ -85,55 +84,33 @@ for index, post in enumerate(client.profile_posts(user_id), start=1):
 
 # Result:
 # Got post 01:
-# ThreadsPost(id='3570337216739782440_18133069',
-#             user_id=18133069,
-#             username='wsj',
+# ThreadsPost(id='3574333699320466889_787132',
+#             user_id=787132,
+#             username='natgeo',
 #             is_verified=True,
-#             text='After Allison Pomeroy lost most of her vision two years ago, '
-#                  'her husband began reading menus, signage and other text out '
-#                  'loud to her. He doesn’t need to anymore.🔗 '
-#                  'https://on.wsj.com/3QqLWz2',
-#             links=[Link(url='https://on.wsj.com/3QqLWz2',
-#                         title=None,
-#                         is_verified=None,
-#                         id=None,
-#                         display_text='on.wsj.com/3QqLW…')],
-#             published_at=datetime.datetime(2025, 2, 18, 0, 9, 56, tzinfo=datetime.timezone.utc),
-#             likes=5,
-#             replies=0,
-#             reposts=0,
-#             quotes=0,
+#             text="Three-toed sloths can hang onto trees even when they're "
+#                  "asleep 🦥 What's your favorite tree-climbing animal?",
+#             links=[],
+#             published_at=datetime.datetime(2025, 2, 23, 9, 30, 20, tzinfo=datetime.timezone.utc),
+#             likes=761,
+#             replies=17,
+#             reposts=24,
+#             quotes=3,
 #             is_private=False,
-#             pictures=[Picture(width=1440, height=1440, content_type=None),
-#                       Picture(width=1080, height=1080, content_type=None),
-#                       Picture(width=720, height=720, content_type=None),
-#                       Picture(width=640, height=640, content_type=None),
-#                       Picture(width=480, height=480, content_type=None),
-#                       Picture(width=320, height=320, content_type=None),
-#                       Picture(width=240, height=240, content_type=None),
-#                       Picture(width=1080, height=1080, content_type=None),
-#                       Picture(width=750, height=750, content_type=None),
-#                       Picture(width=640, height=640, content_type=None),
-#                       Picture(width=480, height=480, content_type=None),
-#                       Picture(width=320, height=320, content_type=None),
-#                       Picture(width=240, height=240, content_type=None),
-#                       Picture(width=150, height=150, content_type=None)],
+#             media=[Picture(width=1439, height=1873, content_type=None)],
 #             reply_control='everyone',
 #             media_type=1,
-#             accessibility_caption='Photo by The Wall Street Journal on '
-#                                   'February 17, 2025. May be an image of 2 '
-#                                   'people, sunglasses, glasses and text that '
-#                                   "says 'Meta's AI-P Powered Ray Ray-Bans Bans "
-#                                   'Are Life- Life-Enhancing for the Blind '
-#                                   "WSJ'.",
+#             accessibility_caption='Photo by National Geographic on February '
+#                                   '23, 2025. May be an image of slow loris, '
+#                                   "sloth and text that says 'ロ BRUCE BRUCEDALE "
+#                                   "DALE'.",
 #             is_paid_partnership=None,
 #             like_and_view_counts_disabled=False,
-#             videos=[],
 #             has_audio=None,
-#             original_width=1440,
-#             original_height=1440,
-#             code='DGMYsg8Ia8o',
-#             reshares=None)
+#             original_width=1439,
+#             original_height=1873,
+#             code='DGalY_YtE3J',
+#             reshares=6)
 # [...]
 ```
 
@@ -317,13 +294,12 @@ The `data/threads-profile-posts.csv` CSV file will be created with the following
 - `reposts`
 - `quotes`
 - `is_private`
-- `pictures`
+- `media`
 - `reply_control`
 - `media_type`
 - `accessibility_caption`
 - `is_paid_partnership`
 - `like_and_view_counts_disabled`
-- `videos`
 - `has_audio`
 - `original_width`
 - `original_height`

diff --git a/outgram/__init__.py b/outgram/__init__.py
@@ -1,4 +1,4 @@
 from .instagram import Instagram  # noqa
 from .threads import Threads  # noqa
 
-__version__ = (0, 1, 0)  # noqa
+__version__ = (0, 1, 1)  # noqa
diff --git a/outgram/models.py b/outgram/models.py
@@ -222,13 +222,12 @@ class ThreadsPost(BasePost):
     reposts: int
     quotes: int
     is_private: bool
-    pictures: List[Picture] = field(default_factory=list)
+    media: List[Union[Picture, Video]] = field(default_factory=list)
     reply_control: Optional[str] = None
     media_type: Optional[int] = None
     accessibility_caption: Optional[str] = None
     is_paid_partnership: Optional[bool] = None
     like_and_view_counts_disabled: Optional[bool] = None
-    videos: List[Video] = field(default_factory=list)
     has_audio: Optional[bool] = None
     original_width: Optional[int] = None
     original_height: Optional[int] = None
@@ -237,7 +236,7 @@ class ThreadsPost(BasePost):
 
     def serialize(self) -> Dict[str, Any]:
         row = super().serialize()
-        for key in ("links", "pictures", "videos"):
+        for key in ("links", "media"):
             row[key] = "\n".join(link["url"] for link in row[key])
         return row
 
@@ -247,7 +246,7 @@ def url(self):
 
     def get_media(self) -> List[Type[BaseMedia]]:
         """Return all media objects of this post"""
-        return (self.pictures or []) + (self.videos or [])
+        return self.media or []
 
 
 # Instagram-related dataclasses

diff --git a/outgram/threads.py b/outgram/threads.py
@@ -116,11 +116,29 @@ def profile(self, username_or_id: Union[str, int], raw: bool = False) -> Union[T
             return self.profile_from_username(username_or_id)
 
     def _extract_post(self, post_data: dict) -> ThreadsPost:
-        pictures = []
-        if "image_versions2" in post_data and "candidates" in post_data["image_versions2"]:
-            pictures = [
-                Picture(pic["url"], pic["width"], pic["height"]) for pic in post_data["image_versions2"]["candidates"]
-            ]
+        if len(post_data.get("carousel_media") or []) > 0:
+            inner_media = post_data["carousel_media"]
+        else:
+            inner_media = [post_data]
+        post_media = []
+        for obj in inner_media:
+            obj_picture = None
+            if "image_versions2" in obj and "candidates" in obj["image_versions2"]:
+                images = [
+                    Picture(pic["url"], pic["width"], pic["height"]) for pic in obj["image_versions2"]["candidates"]
+                ]
+                images.sort(key=lambda obj: obj.width, reverse=True)
+                if images:
+                    obj_picture = images[0]
+            if len(obj.get("video_versions") or []) > 0:
+                video = obj["video_versions"][0]
+                video = Video(type=video["type"], url=video["url"])
+                if obj_picture is not None:
+                    video.thumbnail = obj_picture
+                post_media.append(video)
+            elif obj_picture is not None:
+                post_media.append(obj_picture)
+
         text = ""
         links = []
         if "text_post_app_info" in post_data and "text_fragments" in post_data["text_post_app_info"]:
@@ -133,9 +151,6 @@ def _extract_post(self, post_data: dict) -> ThreadsPost:
                             display_text=fragment["link_fragment"]["display_text"],
                         )
                     )
-        videos = []
-        if post_data.get("video_versions"):
-            videos = [Video(type=v["type"], url=v["url"]) for v in post_data["video_versions"]]
         user = post_data["user"]
         return ThreadsPost(
             id=post_data["id"],
@@ -150,13 +165,12 @@ def _extract_post(self, post_data: dict) -> ThreadsPost:
             reposts=post_data["text_post_app_info"].get("repost_count", 0),
             quotes=post_data["text_post_app_info"].get("quote_count", 0),
             is_private=user.get("text_post_app_is_private", False),
-            pictures=pictures,
+            media=post_media,
             reply_control=post_data["text_post_app_info"].get("reply_control"),
             media_type=post_data.get("media_type"),
             accessibility_caption=post_data.get("accessibility_caption"),
             is_paid_partnership=post_data.get("is_paid_partnership"),
             like_and_view_counts_disabled=post_data.get("like_and_view_counts_disabled"),
-            videos=videos,
             has_audio=post_data.get("has_audio"),
             original_width=post_data.get("original_width"),
             original_height=post_data.get("original_height"),

diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = outgram
-version = 0.1.0
+version = 0.1.1
 description = Client for Instagram and Threads public GraphQL APIs (Python library + CLI)
 long_description = file: README.md
 long_description_content_type = text/markdown

diff --git a/tests/test_instagram.py b/tests/test_instagram.py
@@ -1,6 +1,7 @@
 """End-to-end tests for the command-line interface main functions, which will run most part of the codebase"""
 
 import csv
+import random
 from zipfile import ZipFile
 
 from outgram.cli import archive, collect_profile, collect_profile_posts, instagram_post
@@ -21,14 +22,16 @@
         INSTAGRAM_USER_MIX.append(username)
     else:
         INSTAGRAM_USER_MIX.append(user_id)
-INSTAGRAM_POST_CODES = (
+INSTAGRAM_POST_CODES = [
     "DEhf2uTJUs0",  # zuck
     "DF-rojvO4g-",  # oficialfernandatorres
     "C2SuqhGv3U0",  # crio.cafe
-    "DGLQHNhOoke",  # oficialfernandatorres
-    "COWY0ydHUrI",  # crio.cafe
     "DFWFT4LyfSJ",  # ficcoesespetaculo
-)
+]
+random.shuffle(INSTAGRAM_USERNAMES)
+random.shuffle(INSTAGRAM_USER_IDS)
+random.shuffle(INSTAGRAM_USER_MIX)
+random.shuffle(INSTAGRAM_POST_CODES)
 
 
 def test_instagram_profile_with_usernames(temp_dir):
@@ -155,4 +158,4 @@ def test_instagram_post(temp_dir):
         data = list(csv.DictReader(fobj))
     assert len(data) == max_posts
     user_ids_found = set(row["author_id"] for row in data)
-    assert len(user_ids_found) == 3  # The first 3 post codes are from different user each one
+    assert len(user_ids_found) == max_posts  # There are no post codes from the same profile more than once
diff --git a/tests/test_threads.py b/tests/test_threads.py
@@ -1,6 +1,7 @@
 """End-to-end tests for the command-line interface main functions, which will run most part of the codebase"""
 
 import csv
+import random
 from collections import Counter
 from zipfile import ZipFile
 
@@ -15,6 +16,7 @@
     "wsj": "18133069",
     "nonoinvestidor": "42799100757",
     "filosofia.liquida": "6828796459",
+    "mulheresdocafemataotomazina": "3540507650",
 }
 THREADS_USERNAMES = list(THREADS_USERNAME_ID.keys())
 THREADS_USER_IDS = list(THREADS_USERNAME_ID.values())
@@ -24,6 +26,9 @@
         THREADS_USER_MIX.append(username)
     else:
         THREADS_USER_MIX.append(user_id)
+random.shuffle(THREADS_USERNAMES)
+random.shuffle(THREADS_USER_IDS)
+random.shuffle(THREADS_USER_MIX)
 
 
 def test_threads_profile_with_usernames(temp_dir):
@@ -103,7 +108,7 @@ def test_threads_profile_posts(temp_dir):
         data = list(csv.DictReader(fobj))
     counter = Counter(row["user_id"] for row in data)
     user_ids_found = set(counter.keys())
-    assert user_ids_found == set(THREADS_USER_IDS)
+    assert user_ids_found.issubset(THREADS_USER_IDS)
     assert len(data) == max_posts
     assert set(counter.values()) == {max_posts_per_user}