[bot] AutoMerging: merge all upstream's changes:

* https://github.com/ytdl-org/youtube-dl: [RAI] Fix extraction of http formats [RAI] Extend formats with direct http mp4 link (PR ytdl-org#27990) [RAI] Added checks for DRM protected content (PR ytdl-org#27657)
hellopony · Apr 5, 2022 · 466c401 · 466c401
2 parents 83123d3 + 871645a
commit 466c401
Showing 1 changed file with 119 additions and 3 deletions.
diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py
@@ -5,15 +5,16 @@
 
 from .common import InfoExtractor
 from ..compat import (
-    compat_urlparse,
     compat_str,
+    compat_urlparse,
 )
 from ..utils import (
-    ExtractorError,
     determine_ext,
+    ExtractorError,
     find_xpath_attr,
     fix_xml_ampersands,
     GeoRestrictedError,
+    HEADRequest,
     int_or_none,
     parse_duration,
     remove_start,
@@ -96,12 +97,100 @@ def _extract_relinker_info(self, relinker_url, video_id):
         if not formats and geoprotection is True:
             self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
 
+        formats.extend(self._create_http_urls(relinker_url, formats))
+
         return dict((k, v) for k, v in {
             'is_live': is_live,
             'duration': duration,
             'formats': formats,
         }.items() if v is not None)
 
+    def _create_http_urls(self, relinker_url, fmts):
+        _RELINKER_REG = r'https?://(?P<host>[^/]+?)/(?:i/)?(?P<extra>[^/]+?)/(?P<path>.+?)/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?'
+        _MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s'
+        _QUALITY = {
+            # tbr: w, h
+            '250': [352, 198],
+            '400': [512, 288],
+            '700': [512, 288],
+            '800': [700, 394],
+            '1200': [736, 414],
+            '1800': [1024, 576],
+            '2400': [1280, 720],
+            '3200': [1440, 810],
+            '3600': [1440, 810],
+            '5000': [1920, 1080],
+            '10000': [1920, 1080],
+        }
+
+        def test_url(url):
+            resp = self._request_webpage(
+                HEADRequest(url), None, headers={'User-Agent': 'Rai'},
+                fatal=False, errnote=False, note=False)
+
+            if resp is False:
+                return False
+
+            if resp.code == 200:
+                return False if resp.url == url else resp.url
+            return None
+
+        def get_format_info(tbr):
+            import math
+            br = int_or_none(tbr)
+            if len(fmts) == 1 and not br:
+                br = fmts[0].get('tbr')
+            if br > 300:
+                tbr = compat_str(math.floor(br / 100) * 100)
+            else:
+                tbr = '250'
+
+            # try extracting info from available m3u8 formats
+            format_copy = None
+            for f in fmts:
+                if f.get('tbr'):
+                    br_limit = math.floor(br / 100)
+                    if br_limit - 1 <= math.floor(f['tbr'] / 100) <= br_limit + 1:
+                        format_copy = f.copy()
+            return {
+                'width': format_copy.get('width'),
+                'height': format_copy.get('height'),
+                'tbr': format_copy.get('tbr'),
+                'vcodec': format_copy.get('vcodec'),
+                'acodec': format_copy.get('acodec'),
+                'fps': format_copy.get('fps'),
+                'format_id': 'https-%s' % tbr,
+            } if format_copy else {
+                'width': _QUALITY[tbr][0],
+                'height': _QUALITY[tbr][1],
+                'format_id': 'https-%s' % tbr,
+                'tbr': int(tbr),
+            }
+
+        loc = test_url(_MP4_TMPL % (relinker_url, '*'))
+        if not isinstance(loc, compat_str):
+            return []
+
+        mobj = re.match(
+            _RELINKER_REG,
+            test_url(relinker_url) or '')
+        if not mobj:
+            return []
+
+        available_qualities = mobj.group('quality').split(',') if mobj.group('quality') else ['*']
+        available_qualities = [i for i in available_qualities if i]
+
+        formats = []
+        for q in available_qualities:
+            fmt = {
+                'url': _MP4_TMPL % (relinker_url, q),
+                'protocol': 'https',
+                'ext': 'mp4',
+            }
+            fmt.update(get_format_info(q))
+            formats.append(fmt)
+        return formats
+
     @staticmethod
     def _extract_subtitles(url, video_data):
         STL_EXT = 'stl'
@@ -151,13 +240,33 @@ class RaiPlayIE(RaiBaseIE):
         'params': {
             'skip_download': True,
         },
+    }, {
+        # 1080p direct mp4 url
+        'url': 'https://www.raiplay.it/video/2021/03/Leonardo-S1E1-b5703b02-82ee-475a-85b6-c9e4a8adf642.html',
+        'md5': '2e501e8651d72f05ffe8f5d286ad560b',
+        'info_dict': {
+            'id': 'b5703b02-82ee-475a-85b6-c9e4a8adf642',
+            'ext': 'mp4',
+            'title': 'Leonardo - S1E1',
+            'alt_title': 'St 1 Ep 1 - Episodio 1',
+            'description': 'md5:f5360cd267d2de146e4e3879a5a47d31',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'uploader': 'Rai 1',
+            'duration': 3229,
+            'series': 'Leonardo',
+            'season': 'Season 1',
+        },
     }, {
         'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
         'only_matching': True,
     }, {
         # subtitles at 'subtitlesArray' key (see #27698)
         'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html',
         'only_matching': True,
+    }, {
+        # DRM protected
+        'url': 'https://www.raiplay.it/video/2020/09/Lo-straordinario-mondo-di-Zoey-S1E1-Lo-straordinario-potere-di-Zoey-ed493918-1d32-44b7-8454-862e473d00ff.html',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -166,6 +275,13 @@ def _real_extract(self, url):
         media = self._download_json(
             base + '.json', video_id, 'Downloading video JSON')
 
+        if try_get(
+                media,
+                (lambda x: x['rights_management']['rights']['drm'],
+                 lambda x: x['program_info']['rights_management']['rights']['drm']),
+                dict):
+            raise ExtractorError('This video is DRM protected.', expected=True)
+
         title = media['name']
 
         video = media['video']
@@ -307,7 +423,7 @@ class RaiIE(RaiBaseIE):
     }, {
         # with ContentItem in og:url
         'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html',
-        'md5': '6865dd00cf0bbf5772fdd89d59bd768a',
+        'md5': '06345bd97c932f19ffb129973d07a020',
         'info_dict': {
             'id': 'efb17665-691c-45d5-a60c-5301333cbb0c',
             'ext': 'mp4',