Merge pull request #31 from birdhouses/feature/discover-popular-posts…

…-based-on-hashtags Feature/discover popular posts based on hashtags
birdhouses · Apr 27, 2023 · 60ebcac · 60ebcac
2 parents a57cdee + d13234e
commit 60ebcac
Show file tree

Hide file tree

Showing 5 changed files with 191 additions and 27 deletions.
diff --git a/.gitignore b/.gitignore
@@ -10,6 +10,7 @@ session.json
 /followed_users
 /comments
 /src/artifacts/*
+/src/saved_posts/
 # C extensions
 *.so
 

diff --git a/example.config.json b/example.config.json
@@ -1,30 +1,62 @@
 {
-    "accounts": [
-      {
-        "username": "username",
-        "password": "password",
-        "follow_users": {
-          "enabled": true,
-          "follows_per_day": 100,
-          "source_account": "instagram",
-          "engagement": {
-            "like_recent_posts": true,
-            "like_count": 3
-          }
-        },
-        "unfollow_users": {
-          "enabled": true,
-          "unfollow_after": "days-hours-minutes-seconds"
+  "accounts": [
+    {
+      "username": "username",
+      "password": "password",
+      "follow_users": {
+        "enabled": true,
+        "follows_per_day": 20,
+        "source_account": "instagram",
+        "engagement": {
+          "like_recent_posts": true,
+          "like_count": 3
+        }
+      },
+      "unfollow_users": {
+        "enabled": true,
+        "unfollow_after": "0-12-0-0"
+      },
+      "comment_on_media": {
+        "enabled": true,
+        "comment_on_tag": "instagram",
+        "amount_per_day": 50,
+        "comments": [
+          "Comments ..."
+        ]
+      },
+      "media_auto_discovery": {
+        "enabled": true,
+        "from_tag": "instagram",
+        "amount_per_day": 2,
+        "save_captions": true,
+        "avoid_duplicates": true,
+        "post_requirements": {
+          "min_likes": 1,
+          "min_comments": 1,
+          "detect_caption_language": true,
+          "languages": ["en"],
+          "allowed_post_types": [
+            "photo",
+            "video",
+            "igtv",
+            "reel",
+            "album"
+          ]
         },
-        "comment_on_media": {
+        "author_requirements": {
           "enabled": true,
-          "comment_on_tag": "hashtag",
-          "amount_per_day": 5,
-          "comments": [
-            "Comment 1.."
-          ]
+          "detect_biography_keywords": false,
+          "biography_keywords": [
+            "keyword",
+            "another one"
+          ],
+          "detect_biography_language": true,
+          "languages": ["en"],
+          "min_followers": 1,
+          "max_following": 1000
         }
       }
-    ]
-  }
+    }
+  ]
+}
 
diff --git a/src/instabot/__init__.py b/src/instabot/__init__.py
@@ -1,4 +1,5 @@
-from .utils import get_client, load_config, get_user_id, get_followers, calculate_sleep_time, parse_time_string, logger
+from .utils import next_proxy, get_client, load_config, get_user_id, get_followers, calculate_sleep_time, parse_time_string, logger
 from .follow import follow_user_followers, unfollow_users, save_followed_user, load_followed_users, filter_users_to_unfollow, remove_unfollowed_user, mark_unfollowed_user, user_not_followed_before
 from .like_media import like_recent_posts
-from .comment import comment_on_media
+from .comment import comment_on_media
+from .media_discovery import media_auto_discovery
diff --git a/src/instabot/media_discovery.py b/src/instabot/media_discovery.py
@@ -0,0 +1,122 @@
+import instabot
+import asyncio
+import langdetect
+from instagrapi import Client
+import os
+
+MIN_LIKES = "min_likes"
+MIN_COMMENTS = "min_comments"
+DETECT_CAPTION_LANGUAGE = "detect_caption_language"
+LANGUAGES = "languages"
+ALLOWED_POST_TYPES = "allowed_post_types"
+
+async def media_auto_discovery(client, account):
+    config = account['media_auto_discovery']
+
+    while True:
+        posts = client.hashtag_medias_top(config['from_tag'])
+        sleep_time = instabot.calculate_sleep_time(config['amount_per_day'])
+        for post in posts:
+            if await passes_requirements(client, post, config):
+                store_post(client, account, post)
+                await asyncio.sleep(sleep_time)
+
+async def passes_requirements(cl, post, config):
+    return (await check_post_requirements(cl, post, config) and
+            await check_author_requirements(cl, post, config))
+
+async def check_post_requirements(cl, post, config):
+    post_req = config['post_requirements']
+    post = cl.media_info(post.id)
+
+    if post.like_count < post_req[MIN_LIKES] or post.comment_count < post_req[MIN_COMMENTS]:
+        return False
+
+    if post_req[DETECT_CAPTION_LANGUAGE]:
+        if not is_language_allowed(post.caption_text, post_req[LANGUAGES]):
+            return False
+
+    if not is_post_type_allowed(post, post_req[ALLOWED_POST_TYPES]):
+        return False
+
+    return True
+
+async def check_author_requirements(cl, post, config):
+    author_req = config['author_requirements']
+    if not author_req['enabled']:
+        return True
+
+    author = cl.user_info_by_username(post.user.username)
+
+    if (author_req['min_followers'] >= author.follower_count or
+            author_req['max_following'] <= author.following_count):
+        return False
+
+    if author_req['detect_biography_keywords']:
+        if not has_keywords(author.biography, author_req['biography_keywords']):
+            return False
+
+    if author_req['detect_biography_language']:
+        if not is_language_allowed(author.biography, author_req[LANGUAGES]):
+            return False
+
+    return True
+
+def is_language_allowed(text, allowed_languages):
+    try:
+        detected_language = langdetect.detect(text)
+        return detected_language in allowed_languages
+    except:
+        return True
+
+def is_post_type_allowed(post, allowed_post_types):
+    post_type = get_post_type(post)
+    return post_type is None or post_type.lower() in allowed_post_types
+
+def get_post_type(post):
+    if post.media_type == 1:
+        return 'photo'
+    elif post.media_type == 2 and post.product_type == 'feed':
+        return 'video'
+    elif post.media_type == 2 and post.product_type == 'igtv':
+        return 'igtv'
+    elif post.media_type == 8:
+        return 'album'
+    return None
+
+def has_keywords(biography, keywords):
+    biography_lower = biography.lower()
+    return any(keyword.lower() in biography_lower for keyword in keywords)
+
+def store_post(cl, account, post):
+    photo_download_path = f"./saved_posts/{account['username']}/photo_downloads"
+    video_download_path = f"./saved_posts/{account['username']}/video_downloads"
+    album_download_path = f"./saved_posts/{account['username']}/album_downloads"
+
+    if post.media_type == 1:
+        download_photo(cl, post, photo_download_path)
+    elif post.media_type == 2:
+        download_video(cl, post, video_download_path)
+    elif post.media_type == 8:
+        download_album(cl, post, album_download_path)
+
+def download_photo(cl, post, photo_download_path):
+    os.makedirs(photo_download_path, exist_ok=True)
+    try:
+        cl.photo_download(post.pk, photo_download_path)
+    except:
+        instabot.logger.info(f"Failed to download photo for post {post.id}")
+
+def download_video(cl, post, video_download_path):
+    os.makedirs(video_download_path, exist_ok=True)
+    try:
+        cl.video_download(post.pk, video_download_path)
+    except:
+        instabot.logger.info(f"Failed to download video for post {post.id}")
+
+def download_album(cl, post, album_download_path):
+    os.makedirs(album_download_path, exist_ok=True)
+    try:
+        cl.album_download(post.pk, album_download_path)
+    except:
+        instabot.logger.info(f"Failed to download album for post {post.id}")
diff --git a/src/main.py b/src/main.py
@@ -1,8 +1,9 @@
 import instabot
 import asyncio
-from instabot import follow_user_followers, unfollow_users, comment_on_media
+from instabot import follow_user_followers, unfollow_users, comment_on_media, media_auto_discovery, logger
 from instabot import logger
 from threading import Thread
+from instagrapi import Client
 
 async def main(account):
     username = account['username']
@@ -26,6 +27,13 @@ async def main(account):
             comment_task = tg.create_task(
                 comment_on_media(cl, account)
             )
+        if account['media_auto_discovery']['enabled']:
+            ### TODO: Check if unauthenticated IG account proxy requests are working
+            ### If they are not working, use authenticated IG account for proxy
+            cl = instabot.get_client(account['username'], account['password'])
+            media_task = tg.create_task(
+                media_auto_discovery(cl, account)
+            )
 
 def run_account(account):
     asyncio.run(main(account))