[Cleanup] Remove some unnecessary groups in regexes (yt-dlp#1738)

Authored by: Ashish0804
masfake0 · Nov 30, 2021 · 73f035e · 73f035e
1 parent 0cbed93
commit 73f035e
Show file tree

Hide file tree

Showing 22 changed files with 31 additions and 30 deletions.
diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py
@@ -4,7 +4,7 @@
 
 
 class AmazonStoreIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/(?:[^/]+/)?(?:dp|gp/product)/(?P<id>[^/&#$?]+)'
+    _VALID_URL = r'https?://(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/(?:[^/]+/)?(?:dp|gp/product)/(?P<id>[^/&#$?]+)'
 
     _TESTS = [{
         'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/',

diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py
@@ -11,7 +11,7 @@
 
 
 class CanalAlphaIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?canalalpha\.ch/play/[^/]+/[^/]+/(?P<id>\d+)/?.*'
+    _VALID_URL = r'https?://(?:www\.)?canalalpha\.ch/play/[^/]+/[^/]+/(?P<id>\d+)/?.*'
 
     _TESTS = [{
         'url': 'https://www.canalalpha.ch/play/le-journal/episode/24520/jeudi-28-octobre-2021',

diff --git a/yt_dlp/extractor/chingari.py b/yt_dlp/extractor/chingari.py
@@ -67,7 +67,7 @@ def _get_post(self, id, post_data):
 
 
 class ChingariIE(ChingariBaseIE):
-    _VALID_URL = r'(?:https?://)(?:www\.)?chingari\.io/share/post\?id=(?P<id>[^&/#?]+)'
+    _VALID_URL = r'https?://(?:www\.)?chingari\.io/share/post\?id=(?P<id>[^&/#?]+)'
     _TESTS = [{
         'url': 'https://chingari.io/share/post?id=612f8f4ce1dc57090e8a7beb',
         'info_dict': {
@@ -102,7 +102,7 @@ def _real_extract(self, url):
 
 
 class ChingariUserIE(ChingariBaseIE):
-    _VALID_URL = r'(?:https?://)(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)'
+    _VALID_URL = r'https?://(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)'
     _TESTS = [{
         'url': 'https://chingari.io/dada1023',
         'playlist_mincount': 3,

diff --git a/yt_dlp/extractor/cozytv.py b/yt_dlp/extractor/cozytv.py
@@ -6,7 +6,7 @@
 
 
 class CozyTVIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?cozy\.tv/(?P<uploader>[^/]+)/replays/(?P<id>[^/$#&?]+)'
+    _VALID_URL = r'https?://(?:www\.)?cozy\.tv/(?P<uploader>[^/]+)/replays/(?P<id>[^/$#&?]+)'
 
     _TESTS = [{
         'url': 'https://cozy.tv/beardson/replays/2021-11-19_1',

diff --git a/yt_dlp/extractor/epicon.py b/yt_dlp/extractor/epicon.py
@@ -8,7 +8,7 @@
 
 
 class EpiconIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?epicon\.in/(?:documentaries|movies|tv-shows/[^/?#]+/[^/?#]+)/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?epicon\.in/(?:documentaries|movies|tv-shows/[^/?#]+/[^/?#]+)/(?P<id>[^/?#]+)'
     _TESTS = [{
         'url': 'https://www.epicon.in/documentaries/air-battle-of-srinagar',
         'info_dict': {
@@ -84,7 +84,7 @@ def _real_extract(self, url):
 
 
 class EpiconSeriesIE(InfoExtractor):
-    _VALID_URL = r'(?!.*season)(?:https?://)(?:www\.)?epicon\.in/tv-shows/(?P<id>[^/?#]+)'
+    _VALID_URL = r'(?!.*season)https?://(?:www\.)?epicon\.in/tv-shows/(?P<id>[^/?#]+)'
     _TESTS = [{
         'url': 'https://www.epicon.in/tv-shows/1-of-something',
         'playlist_mincount': 5,

diff --git a/yt_dlp/extractor/euscreen.py b/yt_dlp/extractor/euscreen.py
@@ -10,7 +10,7 @@
 
 
 class EUScreenIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?euscreen\.eu/item.html\?id=(?P<id>[^&?$/]+)'
+    _VALID_URL = r'https?://(?:www\.)?euscreen\.eu/item.html\?id=(?P<id>[^&?$/]+)'
 
     _TESTS = [{
         'url': 'https://euscreen.eu/item.html?id=EUS_0EBCBF356BFC4E12A014023BA41BD98C',

diff --git a/yt_dlp/extractor/gab.py b/yt_dlp/extractor/gab.py
@@ -15,7 +15,7 @@
 
 
 class GabTVIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)tv.gab.com/channel/[^/]+/view/(?P<id>[a-z0-9-]+)'
+    _VALID_URL = r'https?://tv\.gab\.com/channel/[^/]+/view/(?P<id>[a-z0-9-]+)'
     _TESTS = [{
         'url': 'https://tv.gab.com/channel/wurzelroot/view/why-was-america-in-afghanistan-61217eacea5665de450d0488',
         'info_dict': {

diff --git a/yt_dlp/extractor/gronkh.py b/yt_dlp/extractor/gronkh.py
@@ -6,7 +6,7 @@
 
 
 class GronkhIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?gronkh\.tv/stream/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/stream/(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'https://gronkh.tv/stream/536',

diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py
@@ -296,7 +296,7 @@ def _real_extract(self, url):
 
 class HotStarSeriesIE(HotStarBaseIE):
     IE_NAME = 'hotstar:series'
-    _VALID_URL = r'(?P<url>(?:https?://)(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))'
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))'
     _TESTS = [{
         'url': 'https://www.hotstar.com/in/tv/radhakrishn/1260000646',
         'info_dict': {

diff --git a/yt_dlp/extractor/koo.py b/yt_dlp/extractor/koo.py
@@ -8,7 +8,7 @@
 
 
 class KooIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)'
+    _VALID_URL = r'https?://(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)'
     _TESTS = [{  # Test for video in the comments
         'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde',
         'info_dict': {

diff --git a/yt_dlp/extractor/mlssoccer.py b/yt_dlp/extractor/mlssoccer.py
@@ -6,7 +6,7 @@
 
 class MLSSoccerIE(InfoExtractor):
     _VALID_DOMAINS = r'(?:(?:cfmontreal|intermiamicf|lagalaxy|lafc|houstondynamofc|dcunited|atlutd|mlssoccer|fcdallas|columbuscrew|coloradorapids|fccincinnati|chicagofirefc|austinfc|nashvillesc|whitecapsfc|sportingkc|soundersfc|sjearthquakes|rsl|timbers|philadelphiaunion|orlandocitysc|newyorkredbulls|nycfc)\.com|(?:torontofc)\.ca|(?:revolutionsoccer)\.net)'
-    _VALID_URL = r'(?:https?://)(?:www\.)?%s/video/#?(?P<id>[^/&$#?]+)' % _VALID_DOMAINS
+    _VALID_URL = r'https?://(?:www\.)?%s/video/#?(?P<id>[^/&$#?]+)' % _VALID_DOMAINS
 
     _TESTS = [{
         'url': 'https://www.mlssoccer.com/video/the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986#the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986',

diff --git a/yt_dlp/extractor/musescore.py b/yt_dlp/extractor/musescore.py
@@ -5,15 +5,15 @@
 
 
 class MuseScoreIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?musescore\.com/(?:user/\d+|[^/]+)(?:/scores)?/(?P<id>[^#&?]+)'
+    _VALID_URL = r'https?://(?:www\.)?musescore\.com/(?:user/\d+|[^/]+)(?:/scores)?/(?P<id>[^#&?]+)'
     _TESTS = [{
         'url': 'https://musescore.com/user/73797/scores/142975',
         'info_dict': {
             'id': '142975',
             'ext': 'mp3',
             'title': 'WA Mozart Marche Turque (Turkish March fingered)',
             'description': 'md5:7ede08230e4eaabd67a4a98bb54d07be',
-            'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+',
+            'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+',
             'uploader': 'PapyPiano',
             'creator': 'Wolfgang Amadeus Mozart',
         }
@@ -24,7 +24,7 @@ class MuseScoreIE(InfoExtractor):
             'ext': 'mp3',
             'title': 'Sweet Child O\' Mine  – Guns N\' Roses sweet child',
             'description': 'md5:4dca71191c14abc312a0a4192492eace',
-            'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+',
+            'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+',
             'uploader': 'roxbelviolin',
             'creator': 'Guns N´Roses Arr. Roxbel Violin',
         }
@@ -35,7 +35,7 @@ class MuseScoreIE(InfoExtractor):
             'ext': 'mp3',
             'title': 'Für Elise – Beethoven',
             'description': 'md5:49515a3556d5ecaf9fa4b2514064ac34',
-            'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+',
+            'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+',
             'uploader': 'ClassicMan',
             'creator': 'Ludwig van Beethoven (1770–1827)',
         }

diff --git a/yt_dlp/extractor/mxplayer.py b/yt_dlp/extractor/mxplayer.py
@@ -180,7 +180,7 @@ def _real_extract(self, url):
 
 
 class MxplayerShowIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?mxplayer\.in/show/(?P<display_id>[-\w]+)-(?P<id>\w+)/?(?:$|[#?])'
+    _VALID_URL = r'https?://(?:www\.)?mxplayer\.in/show/(?P<display_id>[-\w]+)-(?P<id>\w+)/?(?:$|[#?])'
     _TESTS = [{
         'url': 'https://www.mxplayer.in/show/watch-chakravartin-ashoka-samrat-series-online-a8f44e3cc0814b5601d17772cedf5417',
         'playlist_mincount': 440,

diff --git a/yt_dlp/extractor/onefootball.py b/yt_dlp/extractor/onefootball.py
@@ -5,7 +5,7 @@
 
 
 class OneFootballIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?onefootball\.com/[a-z]{2}/video/[^/&?#]+-(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?onefootball\.com/[a-z]{2}/video/[^/&?#]+-(?P<id>\d+)'
 
     _TESTS = [{
         'url': 'https://onefootball.com/en/video/highlights-fc-zuerich-3-3-fc-basel-34012334',

diff --git a/yt_dlp/extractor/planetmarathi.py b/yt_dlp/extractor/planetmarathi.py
@@ -9,7 +9,7 @@
 
 
 class PlanetMarathiIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?planetmarathi\.com/titles/(?P<id>[^/#&?$]+)'
+    _VALID_URL = r'https?://(?:www\.)?planetmarathi\.com/titles/(?P<id>[^/#&?$]+)'
     _TESTS = [{
         'url': 'https://www.planetmarathi.com/titles/ek-unad-divas',
         'playlist_mincount': 2,

diff --git a/yt_dlp/extractor/projectveritas.py b/yt_dlp/extractor/projectveritas.py
@@ -10,7 +10,7 @@
 
 
 class ProjectVeritasIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?projectveritas\.com/(?P<type>news|video)/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?projectveritas\.com/(?P<type>news|video)/(?P<id>[^/?#]+)'
     _TESTS = [{
         'url': 'https://www.projectveritas.com/news/exclusive-inside-the-new-york-and-new-jersey-hospitals-battling-coronavirus/',
         'info_dict': {

diff --git a/yt_dlp/extractor/shemaroome.py b/yt_dlp/extractor/shemaroome.py
@@ -16,7 +16,7 @@
 
 
 class ShemarooMeIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?shemaroome\.com/(?:movies|shows)/(?P<id>[^?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?shemaroome\.com/(?:movies|shows)/(?P<id>[^?#]+)'
     _TESTS = [{
         'url': 'https://www.shemaroome.com/movies/dil-hai-tumhaara',
         'info_dict': {
@@ -78,7 +78,7 @@ def _real_extract(self, url):
         iv = [0] * 16
         m3u8_url = intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv))
         m3u8_url = m3u8_url[:-compat_ord((m3u8_url[-1]))].decode('ascii')
-        formats = self._extract_m3u8_formats(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']})
+        formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']})
         self._sort_formats(formats)
 
         release_date = self._html_search_regex(
@@ -91,6 +91,7 @@ def _real_extract(self, url):
             subtitles.setdefault('EN', []).append({
                 'url': self._proto_relative_url(sub_url),
             })
+        subtitles = self._merge_subtitles(subtitles, m3u8_subs)
         description = self._html_search_regex(r'(?s)>Synopsis(</.+?)</', webpage, 'description', fatal=False)
 
         return {

diff --git a/yt_dlp/extractor/skynewsau.py b/yt_dlp/extractor/skynewsau.py
@@ -9,7 +9,7 @@
 
 
 class SkyNewsAUIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?skynews\.com\.au/[^/]+/[^/]+/[^/]+/video/(?P<id>[a-z0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?skynews\.com\.au/[^/]+/[^/]+/[^/]+/video/(?P<id>[a-z0-9]+)'
 
     _TESTS = [{
         'url': 'https://www.skynews.com.au/world-news/united-states/incredible-vision-shows-lava-overflowing-from-spains-la-palma-volcano/video/0f4c6243d6903502c01251f228b91a71',

diff --git a/yt_dlp/extractor/threespeak.py b/yt_dlp/extractor/threespeak.py
@@ -11,7 +11,7 @@
 
 
 class ThreeSpeakIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?3speak\.tv/watch\?v\=[^/]+/(?P<id>[^/$&#?]+)'
+    _VALID_URL = r'https?://(?:www\.)?3speak\.tv/watch\?v\=[^/]+/(?P<id>[^/$&#?]+)'
 
     _TESTS = [{
         'url': 'https://3speak.tv/watch?v=dannyshine/wjgoxyfy',
@@ -75,7 +75,7 @@ def _real_extract(self, url):
 
 
 class ThreeSpeakUserIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?3speak\.tv/user/(?P<id>[^/$&?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?3speak\.tv/user/(?P<id>[^/$&?#]+)'
 
     _TESTS = [{
         'url': 'https://3speak.tv/user/theycallmedan',

diff --git a/yt_dlp/extractor/utreon.py b/yt_dlp/extractor/utreon.py
@@ -13,7 +13,7 @@
 
 
 class UtreonIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://)(?:www\.)?utreon.com/v/(?P<id>[a-zA-Z0-9_-]+)'
+    _VALID_URL = r'https?://(?:www\.)?utreon.com/v/(?P<id>[a-zA-Z0-9_-]+)'
     _TESTS = [{
         'url': 'https://utreon.com/v/z_I7ikQbuDw',
         'info_dict': {

diff --git a/yt_dlp/extractor/voot.py b/yt_dlp/extractor/voot.py
@@ -15,7 +15,7 @@ class VootIE(InfoExtractor):
     _VALID_URL = r'''(?x)
                     (?:
                         voot:|
-                        (?:https?://)(?:www\.)?voot\.com/?
+                        https?://(?:www\.)?voot\.com/?
                         (?:
                             movies/[^/]+/|
                             (?:shows|kids)/(?:[^/]+/){4}

diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py
@@ -21,7 +21,7 @@ class Zee5IE(InfoExtractor):
     _VALID_URL = r'''(?x)
                      (?:
                         zee5:|
-                        (?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)?
+                        https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
                         (?:
                             (?:tvshows|kids|zee5originals)(?:/[^#/?]+){3}
                             |movies/[^#/?]+
@@ -174,7 +174,7 @@ class Zee5SeriesIE(InfoExtractor):
     _VALID_URL = r'''(?x)
                      (?:
                         zee5:series:|
-                        (?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)?
+                        https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
                         (?:tvshows|kids|zee5originals)(?:/[^#/?]+){2}/
                      )
                      (?P<id>[^#/?]+)/?(?:$|[?#])