Skip to content

Commit b63dce7

Browse files
authored
Merge pull request #343 from AccelerationConsortium/copilot/fix-341
Add Playwright-based YouTube video downloader with Google authentication
2 parents fe071f1 + 98e8501 commit b63dce7

File tree

3 files changed

+201
-0
lines changed

3 files changed

+201
-0
lines changed
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
import json
2+
from pathlib import Path
3+
4+
import pyotp
5+
from google.oauth2.credentials import Credentials
6+
from googleapiclient.discovery import build
7+
from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
8+
from playwright.sync_api import sync_playwright
9+
10+
from src.ac_training_lab.video_editing.my_secrets import (
11+
EMAIL,
12+
PASSWORD,
13+
TOTP_SECRET,
14+
YOUTUBE_CLIENT_ID,
15+
YOUTUBE_CLIENT_SECRET,
16+
YOUTUBE_REFRESH_TOKEN,
17+
YOUTUBE_TOKEN,
18+
YOUTUBE_TOKEN_URI,
19+
)
20+
21+
# Set up TOTP for 2FA
22+
totp = pyotp.TOTP(TOTP_SECRET)
23+
24+
OUTPUT_DIR = Path(__file__).parent / "downloaded_videos"
25+
PROCESSED_JSON = Path(__file__).parent / "processed.json"
26+
27+
28+
def list_my_playlists(youtube):
29+
playlist_ids = []
30+
request = youtube.playlists().list(part="snippet", mine=True, maxResults=50)
31+
32+
while request:
33+
response = request.execute()
34+
for item in response.get("items", []):
35+
playlist_id = item["id"]
36+
title = item["snippet"]["title"]
37+
print(f"{title}: {playlist_id}")
38+
playlist_ids.append(playlist_id)
39+
40+
request = youtube.playlists().list_next(request, response)
41+
42+
return playlist_ids
43+
44+
45+
def list_videos_in_playlist(youtube, playlist_id):
46+
video_ids = []
47+
request = youtube.playlistItems().list(
48+
part="snippet", playlistId=playlist_id, maxResults=50
49+
)
50+
51+
while request:
52+
response = request.execute()
53+
for item in response["items"]:
54+
video_id = item["snippet"]["resourceId"]["videoId"]
55+
title = item["snippet"]["title"]
56+
print(f" {title}: {video_id}")
57+
video_ids.append(video_id)
58+
59+
request = youtube.playlistItems().list_next(request, response)
60+
61+
return video_ids
62+
63+
64+
def setup_youtube_client():
65+
credentials = Credentials(
66+
token=YOUTUBE_TOKEN,
67+
refresh_token=YOUTUBE_REFRESH_TOKEN,
68+
token_uri=YOUTUBE_TOKEN_URI,
69+
client_id=YOUTUBE_CLIENT_ID,
70+
client_secret=YOUTUBE_CLIENT_SECRET,
71+
scopes=["https://www.googleapis.com/auth/youtube.force-ssl"],
72+
)
73+
return build("youtube", "v3", credentials=credentials)
74+
75+
76+
def load_processed():
77+
if PROCESSED_JSON.exists():
78+
with open(PROCESSED_JSON, "r") as f:
79+
return json.load(f)
80+
return {}
81+
82+
83+
def get_pending_downloads(youtube, processed_videos, downloaded_ids):
84+
all_videos = {}
85+
playlist_ids = list_my_playlists(youtube)
86+
for playlist_id in playlist_ids:
87+
video_ids = list_videos_in_playlist(youtube, playlist_id)
88+
all_videos[playlist_id] = [
89+
vid
90+
for vid in video_ids
91+
if vid not in processed_videos.get(playlist_id, [])
92+
and vid not in downloaded_ids
93+
]
94+
return all_videos
95+
96+
97+
def login_google(page):
98+
page.goto("https://accounts.google.com/")
99+
page.get_by_role("textbox", name="Email or phone").fill(EMAIL)
100+
page.get_by_role("button", name="Next").click()
101+
page.wait_for_selector('input[name="Passwd"]')
102+
page.get_by_role("textbox", name="Enter your password").fill(PASSWORD)
103+
page.get_by_role("button", name="Next").click()
104+
105+
# TOTP if needed
106+
try:
107+
page.get_by_role(
108+
"link", name="Get a verification code from the Google Authenticator app"
109+
).wait_for(timeout=5000)
110+
except PlaywrightTimeoutError:
111+
print("No TOTP prompt")
112+
return
113+
114+
page.get_by_role(
115+
"link", name="Get a verification code from the Google Authenticator app"
116+
).click()
117+
page.wait_for_selector('input[name="totpPin"]', timeout=5000)
118+
page.fill('input[name="totpPin"]', totp.now())
119+
page.get_by_role("button", name="Next").click()
120+
page.wait_for_url("https://myaccount.google.com/?pli=1", timeout=10000)
121+
122+
123+
def download_video(page, video_id):
124+
try:
125+
print(f"Navigating to video {video_id}...")
126+
page.goto(f"https://studio.youtube.com/video/{video_id}/edit/", timeout=15000)
127+
page.get_by_role("button", name="Options").wait_for(timeout=5000)
128+
page.get_by_role("button", name="Options").click()
129+
print(f"Opened video {video_id} options.")
130+
131+
page.get_by_role("menuitem", name="Download").wait_for(timeout=5000)
132+
with page.expect_download(timeout=10000) as download_info:
133+
page.get_by_role("menuitem", name="Download").click()
134+
print(f"Began downloading video {video_id}...")
135+
136+
download = download_info.value
137+
OUTPUT_DIR.mkdir(exist_ok=True)
138+
file_path = OUTPUT_DIR / download.suggested_filename
139+
download.save_as(file_path)
140+
print(f"Downloaded: {file_path}")
141+
142+
except Exception as e:
143+
print(f"Failed to download video {video_id}: {e}")
144+
145+
146+
def main():
147+
youtube = setup_youtube_client()
148+
processed_videos = load_processed()
149+
downloaded_ids = set([f.stem for f in OUTPUT_DIR.glob("*.mp4")])
150+
151+
pending = get_pending_downloads(youtube, processed_videos, downloaded_ids)
152+
print(f"Pending downloads: {sum(len(v) for v in pending.values())}")
153+
154+
with sync_playwright() as p:
155+
browser = p.chromium.launch(headless=False)
156+
context = browser.new_context(accept_downloads=True)
157+
page = context.new_page()
158+
159+
login_google(page)
160+
161+
for _, videos in pending.items():
162+
for video_id in videos:
163+
download_video(page, video_id)
164+
165+
browser.close()
166+
167+
168+
if __name__ == "__main__":
169+
main()
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
EMAIL = ""
2+
PASSWORD = ""
3+
TOTP_SECRET = ""
4+
YOUTUBE_TOKEN = ""
5+
YOUTUBE_REFRESH_TOKEN = ""
6+
YOUTUBE_TOKEN_URI = ""
7+
YOUTUBE_CLIENT_ID = ""
8+
YOUTUBE_CLIENT_SECRET = ""
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
cachetools==5.5.2
2+
certifi==2025.7.14
3+
charset-normalizer==3.4.2
4+
google-api-core==2.25.1
5+
google-api-python-client==2.177.0
6+
google-auth==2.40.3
7+
google-auth-httplib2==0.2.0
8+
googleapis-common-protos==1.70.0
9+
greenlet==3.2.3
10+
httplib2==0.22.0
11+
idna==3.10
12+
playwright==1.54.0
13+
proto-plus==1.26.1
14+
protobuf==6.31.1
15+
pyasn1==0.6.1
16+
pyasn1_modules==0.4.2
17+
pyee==13.0.0
18+
pyotp==2.9.0
19+
pyparsing==3.2.3
20+
requests==2.32.4
21+
rsa==4.9.1
22+
typing_extensions==4.14.1
23+
uritemplate==4.2.0
24+
urllib3==2.5.0

0 commit comments

Comments
 (0)