-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Get comment replies concurrently using asyncio
- Loading branch information
Showing
9 changed files
with
164 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,30 +1,102 @@ | ||
import asyncio | ||
from glom import glom | ||
|
||
from contentapi.youtube import comment_threads | ||
from contentapi.youtube import comment_threads, comments | ||
from contentapi.youtube.youtube_helpers import api_pages_iterator | ||
from .db import insert_many_comments | ||
|
||
WORKERS_NUM = 13 | ||
|
||
def run_all(video_id: str, api_key: str, max_items: int = None): | ||
return keep_all_video_comment_thread_ids(video_id, api_key, max_items) | ||
|
||
async def run_all(video_id: str, api_key: str, max_items: int = None): | ||
if not max_items: | ||
max_items = 999_999_999 | ||
shared_data = {'comments_count': 0} | ||
top_level_comment_queue = asyncio.Queue(maxsize=max_items or 0) | ||
|
||
def keep_all_video_comment_thread_ids(video_id: str, api_key: str, max_items: int = None): | ||
all_comment_thread_ids = [] | ||
# Start replies workers, they wait to get ids from the top_level_comment_queue, | ||
# fetch replies from the api and save content to db | ||
replies_workers = [asyncio.create_task(replies_worker( | ||
top_level_comment_queue, api_key, shared_data, max_items)) for _ in range(WORKERS_NUM)] | ||
|
||
# Read top level comments, save content to db and fill the top_level_comment_queue | ||
await get_all_top_level_comments( | ||
top_level_comment_queue, video_id, api_key, shared_data, max_items) | ||
await asyncio.gather(*replies_workers, return_exceptions=True) | ||
|
||
|
||
async def get_all_top_level_comments( | ||
top_level_comment_queue: asyncio.Queue, | ||
video_id: str, | ||
api_key: str, | ||
shared_data: dict, | ||
max_items: int | ||
): | ||
comment_threads_api = comment_threads.CommentThreads(api_key) | ||
pages_iterator = api_pages_iterator( | ||
comment_threads_api.list, | ||
max_results=max_items, | ||
part={comment_threads.ParamPart.ID}, | ||
part={comment_threads.ParamPart.ID, comment_threads.ParamPart.SNIPPET}, | ||
order=comment_threads.ParamOrder.RELEVANCE, | ||
video_id=video_id, | ||
) | ||
for page, err in pages_iterator: | ||
async for items, err in pages_iterator: | ||
if err: | ||
# TODO: handle error | ||
break # TODO: handle error | ||
top_level_comments = [comments.parse_api_comment( | ||
glom(item, 'snippet.topLevelComment')) for item in items] | ||
if not top_level_comments: | ||
continue | ||
if shared_data['comments_count'] >= max_items: | ||
break | ||
page_ids: list[str] = glom(page, ('items', ['id']), default=[]) | ||
if not page_ids: | ||
# TODO: log warning about empty page | ||
pass | ||
else: | ||
all_comment_thread_ids.extend(page_ids) | ||
return all_comment_thread_ids | ||
comments_left: int = max_items - shared_data['comments_count'] | ||
top_level_comments_to_send = top_level_comments[:comments_left] | ||
shared_data['comments_count'] += len(top_level_comments_to_send) | ||
|
||
# Save in db | ||
insert_many_comments(top_level_comments_to_send) | ||
|
||
# Send to queue | ||
for tl_comment in top_level_comments_to_send: | ||
await top_level_comment_queue.put(tl_comment.comment_id) | ||
|
||
|
||
async def get_all_comment_replies( | ||
comment_id: str, | ||
api_key: str, | ||
shared_data: dict, | ||
max_items: int | ||
): | ||
comments_api = comments.Comments(api_key) | ||
pages_iterator = api_pages_iterator( | ||
comments_api.list, | ||
max_results=max_items, | ||
part={comments.ParamPart.ID, comments.ParamPart.SNIPPET}, | ||
parent_id=comment_id, | ||
) | ||
async for items, err in pages_iterator: | ||
if err: | ||
break # TODO: handle error | ||
replies = [comments.parse_api_comment(item) for item in items] | ||
if not replies: | ||
continue | ||
if shared_data['comments_count'] >= max_items: | ||
break | ||
comments_left: int = max_items - shared_data['comments_count'] | ||
replies_to_send = replies[:comments_left] | ||
shared_data['comments_count'] += len(replies_to_send) | ||
|
||
# Save in db | ||
insert_many_comments(replies_to_send) | ||
|
||
|
||
async def replies_worker( | ||
top_level_comment_queue: asyncio.Queue, | ||
api_key: str, | ||
shared_data: dict, | ||
max_items: int | ||
): | ||
while shared_data['comments_count'] < max_items: | ||
comment_id = await top_level_comment_queue.get() | ||
await get_all_comment_replies(comment_id, api_key, shared_data, max_items) | ||
top_level_comment_queue.task_done() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
def insert_many_comments(many): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,18 @@ | ||
import os | ||
import time | ||
from flaskr import app | ||
from flask import request | ||
from .bl import run_all | ||
|
||
|
||
@app.route('/youtube/videos/<id>/comments', methods=['PUT']) | ||
def scan_youtube_video_comments(id: str): | ||
async def scan_youtube_video_comments(id: str): | ||
max_items = request.args.get('max', None, int) | ||
API_KEY = os.environ.get('YOUTUBE_API_KEY', '') | ||
re = run_all(video_id=id, api_key=API_KEY, max_items=max_items) | ||
return {'res': re}, 202 | ||
|
||
# TODO: Change this | ||
re = comments_api.list( | ||
part={comments.ParamPart.ID, comments.ParamPart.SNIPPET}, | ||
id={id}, | ||
max_results=10 | ||
) | ||
return re, 202 | ||
start_time = time.perf_counter() | ||
try: | ||
await run_all(video_id=id, api_key=API_KEY, max_items=max_items) | ||
elapsed = time.perf_counter() - start_time | ||
return {"message": "Accepted", "time": elapsed}, 202 | ||
except Exception: | ||
return {"message": "Failed"}, 500 |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from dataclasses import dataclass | ||
|
||
|
||
@dataclass | ||
class YoutubeComment: | ||
comment_id: str | ||
video_id: str | ||
text_original: str | ||
parent_id: str | ||
author_channel_id: str | ||
like_count: int | ||
published_at: str | ||
updated_at: str |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters