Skip to content

Commit

Permalink
[go] Improve video id extraction (closes #25207, closes #25216, closes
Browse files Browse the repository at this point in the history
  • Loading branch information
dstftw committed Apr 20, 2021
1 parent c4a451b commit ac19c3a
Showing 1 changed file with 38 additions and 8 deletions.
46 changes: 38 additions & 8 deletions youtube_dl/extractor/go.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
import re

from .adobepass import AdobePassIE
from ..compat import compat_str
from ..utils import (
int_or_none,
determine_ext,
parse_age_limit,
try_get,
urlencode_postdata,
ExtractorError,
)
Expand Down Expand Up @@ -116,6 +118,18 @@ class GoIE(AdobePassIE):
# m3u8 download
'skip_download': True,
},
}, {
'url': 'https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot',
'info_dict': {
'id': 'VDKA22600213',
'ext': 'mp4',
'title': 'Pilot',
'description': 'md5:74306df917cfc199d76d061d66bebdb4',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
'only_matching': True,
Expand Down Expand Up @@ -149,14 +163,30 @@ def _real_extract(self, url):
brand = site_info.get('brand')
if not video_id or not site_info:
webpage = self._download_webpage(url, display_id or video_id)
video_id = self._search_regex(
(
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
r'data-video-id=["\']*(VDKA\w+)',
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
), webpage, 'video id', default=video_id)
data = self._parse_json(
self._search_regex(
r'["\']__abc_com__["\']\s*\]\s*=\s*({.+?})\s*;', webpage,
'data', default='{}'),
display_id or video_id, fatal=False)
# https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot
layout = try_get(data, lambda x: x['page']['content']['video']['layout'], dict)
video_id = None
if layout:
video_id = try_get(
layout,
(lambda x: x['videoid'], lambda x: x['video']['id']),
compat_str)
if not video_id:
video_id = self._search_regex(
(
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
r'data-video-id=["\']*(VDKA\w+)',
# page.analytics.videoIdCode
r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\w+)',
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
), webpage, 'video id', default=video_id)
if not site_info:
brand = self._search_regex(
(r'data-brand=\s*["\']\s*(\d+)',
Expand Down

0 comments on commit ac19c3a

Please sign in to comment.