Skip to content

Commit

Permalink
Add all format filtering operators also to --match-filter
Browse files Browse the repository at this point in the history
PR: ytdl-org/youtube-dl#27361

Authored by: max-te
  • Loading branch information
max-te authored and pukkandan committed Aug 4, 2021
1 parent 678da2f commit 77b87f0
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 42 deletions.
34 changes: 15 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -338,25 +338,21 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
COUNT views
--max-views COUNT Do not download any videos with more than
COUNT views
--match-filter FILTER Generic video filter. Specify any key (see
"OUTPUT TEMPLATE" for a list of available
keys) to match if the key is present, !key
to check if the key is not present,
key>NUMBER (like "view_count > 12", also
works with >=, <, <=, !=, =) to compare
against a number, key = 'LITERAL' (like
"uploader = 'Mike Smith'", also works with
!=) to match against a string literal and &
to require multiple matches. Values which
are not known are excluded unless you put a
question mark (?) after the operator. For
example, to only match videos that have
been liked more than 100 times and disliked
less than 50 times (or the dislike
functionality is not available at the given
service), but who also have a description,
use --match-filter "like_count > 100 &
dislike_count <? 50 & description"
--match-filter FILTER Generic video filter. Any field (see
"OUTPUT TEMPLATE") can be compared with a
number or a quoted string using the
operators defined in "Filtering formats".
You can also simply specify a field to
match if the field is present and "!field"
to check if the field is not present.
Multiple filters can be checked using "&".
For example, to only match videos that are
not live, has a like count more than 100, a
dislike count less than 50 (or the dislike
field is not available), and also has a
description that contains "python", use
--match-filter "!is_live & like_count>100 &
dislike_count<?50 & description*='python'"
--no-match-filter Do not use generic video filter (default)
--no-playlist Download only the video, if the URL refers
to a video and a playlist
Expand Down
12 changes: 11 additions & 1 deletion test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1207,7 +1207,6 @@ def test_render_table(self):
'9999 51')

def test_match_str(self):
self.assertRaises(ValueError, match_str, 'xy>foobar', {})
self.assertFalse(match_str('xy', {'x': 1200}))
self.assertTrue(match_str('!xy', {'x': 1200}))
self.assertTrue(match_str('x', {'x': 1200}))
Expand All @@ -1224,6 +1223,17 @@ def test_match_str(self):
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
self.assertTrue(match_str('y^=foo', {'y': 'foobar42'}))
self.assertFalse(match_str('y!^=foo', {'y': 'foobar42'}))
self.assertFalse(match_str('y^=bar', {'y': 'foobar42'}))
self.assertTrue(match_str('y!^=bar', {'y': 'foobar42'}))
self.assertRaises(ValueError, match_str, 'x^=42', {'x': 42})
self.assertTrue(match_str('y*=bar', {'y': 'foobar42'}))
self.assertFalse(match_str('y!*=bar', {'y': 'foobar42'}))
self.assertFalse(match_str('y*=baz', {'y': 'foobar42'}))
self.assertTrue(match_str('y!*=baz', {'y': 'foobar42'}))
self.assertTrue(match_str('y$=42', {'y': 'foobar42'}))
self.assertFalse(match_str('y$=43', {'y': 'foobar42'}))
self.assertFalse(match_str(
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 90, 'description': 'foo'}))
Expand Down
26 changes: 10 additions & 16 deletions yt_dlp/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,22 +375,16 @@ def _dict_from_options_callback(
'--match-filter',
metavar='FILTER', dest='match_filter', default=None,
help=(
'Generic video filter. '
'Specify any key (see "OUTPUT TEMPLATE" for a list of available keys) to '
'match if the key is present, '
'!key to check if the key is not present, '
'key>NUMBER (like "view_count > 12", also works with '
'>=, <, <=, !=, =) to compare against a number, '
'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) '
'to match against a string literal '
'and & to require multiple matches. '
'Values which are not known are excluded unless you '
'put a question mark (?) after the operator. '
'For example, to only match videos that have been liked more than '
'100 times and disliked less than 50 times (or the dislike '
'functionality is not available at the given service), but who '
'also have a description, use --match-filter '
'"like_count > 100 & dislike_count <? 50 & description"'))
'Generic video filter. Any field (see "OUTPUT TEMPLATE") can be compared with a '
'number or a string using the operators defined in "Filtering formats". '
'You can also simply specify a field to match if the field is present '
'and "!field" to check if the field is not present. '
'Multiple filters can be checked using "&". '
'For example, to only match videos that are not live, '
'has a like count more than 100, a dislike count less than 50 '
'(or the dislike field is not available), and also has a description '
'that contains "python", use --match-filter "!is_live & '
'like_count>100 & dislike_count<?50 & description*=\'python\'"'))
selection.add_option(
'--no-match-filter',
metavar='FILTER', dest='match_filter', action='store_const', const=None,
Expand Down
19 changes: 13 additions & 6 deletions yt_dlp/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4663,17 +4663,20 @@ def filter_using_list(row, filterArray):


def _match_one(filter_part, dct):
# TODO: Generalize code with YoutubeDL._build_format_filter
COMPARISON_OPERATORS = {
'<': operator.lt,
'<=': operator.le,
'>': operator.gt,
'>=': operator.ge,
'=': operator.eq,
'!=': operator.ne,
'*=': operator.contains,
'^=': lambda attr, value: attr.startswith(value),
'$=': lambda attr, value: attr.endswith(value),
}
operator_rex = re.compile(r'''(?x)\s*
(?P<key>[a-z_]+)
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?:
(?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
(?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
Expand All @@ -4683,7 +4686,11 @@ def _match_one(filter_part, dct):
''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
m = operator_rex.search(filter_part)
if m:
op = COMPARISON_OPERATORS[m.group('op')]
unnegated_op = COMPARISON_OPERATORS[m.group('op')]
if m.group('negation'):
op = lambda attr, value: not unnegated_op(attr, value)
else:
op = unnegated_op
actual_value = dct.get(m.group('key'))
if (m.group('quotedstrval') is not None
or m.group('strval') is not None
Expand All @@ -4693,14 +4700,14 @@ def _match_one(filter_part, dct):
# https://github.com/ytdl-org/youtube-dl/issues/11082).
or actual_value is not None and m.group('intval') is not None
and isinstance(actual_value, compat_str)):
if m.group('op') not in ('=', '!='):
raise ValueError(
'Operator %s does not support string values!' % m.group('op'))
comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
quote = m.group('quote')
if quote is not None:
comparison_value = comparison_value.replace(r'\%s' % quote, quote)
else:
if m.group('op') in ('*=', '^=', '$='):
raise ValueError(
'Operator %s only supports string values!' % m.group('op'))
try:
comparison_value = int(m.group('intval'))
except ValueError:
Expand Down

0 comments on commit 77b87f0

Please sign in to comment.