From 253fd24d31b3cdec667ca6d733483930b50f00d6 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 13 May 2024 20:32:11 -0400 Subject: [PATCH 1/7] Use contextlib.suppress --- cssutils/css/selector.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cssutils/css/selector.py b/cssutils/css/selector.py index 4fb82408..987287db 100644 --- a/cssutils/css/selector.py +++ b/cssutils/css/selector.py @@ -8,6 +8,7 @@ __all__ = ['Selector'] +import contextlib import xml.dom import cssutils @@ -221,12 +222,11 @@ def _setSelectorText(self, selectorText): # noqa: C901 # might be (selectorText, namespaces) selectorText, namespaces = self._splitNamespacesOff(selectorText) - try: + with contextlib.suppress(AttributeError): # uses parent stylesheets namespaces if available, # otherwise given ones namespaces = self.parent.parentRule.parentStyleSheet.namespaces - except AttributeError: - pass + tokenizer = self._tokenize2(selectorText) if not tokenizer: self._log.error('Selector: No selectorText given.') From cec52e376deedb27974ecc62c0bd87c27c2d7e64 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 13 May 2024 20:37:25 -0400 Subject: [PATCH 2/7] Short circuit and reduce indentation. --- cssutils/css/selector.py | 1088 +++++++++++++++++++------------------- 1 file changed, 541 insertions(+), 547 deletions(-) diff --git a/cssutils/css/selector.py b/cssutils/css/selector.py index 987287db..b6f1119e 100644 --- a/cssutils/css/selector.py +++ b/cssutils/css/selector.py @@ -230,605 +230,599 @@ def _setSelectorText(self, selectorText): # noqa: C901 tokenizer = self._tokenize2(selectorText) if not tokenizer: self._log.error('Selector: No selectorText given.') - else: - # prepare tokenlist: - # "*" -> type "universal" - # "*"|IDENT + "|" -> combined to "namespace_prefix" - # "|" -> type "namespace_prefix" - # "." + IDENT -> combined to "class" - # ":" + IDENT, ":" + FUNCTION -> pseudo-class - # FUNCTION "not(" -> negation - # "::" + IDENT, "::" + FUNCTION -> pseudo-element - tokens = [] - for t in tokenizer: - typ, val, lin, col = t - if val == ':' and tokens and self._tokenvalue(tokens[-1]) == ':': - # combine ":" and ":" - tokens[-1] = (typ, '::', lin, col) - - elif typ == 'IDENT' and tokens and self._tokenvalue(tokens[-1]) == '.': - # class: combine to .IDENT - tokens[-1] = ('class', '.' + val, lin, col) - elif ( - typ == 'IDENT' - and tokens - and self._tokenvalue(tokens[-1]).startswith(':') - and not self._tokenvalue(tokens[-1]).endswith('(') - ): - # pseudo-X: combine to :IDENT or ::IDENT but not ":a(" + "b" - if self._tokenvalue(tokens[-1]).startswith('::'): - t = 'pseudo-element' - else: - t = 'pseudo-class' - tokens[-1] = (t, self._tokenvalue(tokens[-1]) + val, lin, col) - - elif ( - typ == 'FUNCTION' - and val == 'not(' - and tokens - and ':' == self._tokenvalue(tokens[-1]) - ): - tokens[-1] = ('negation', ':' + val, lin, tokens[-1][3]) - elif ( - typ == 'FUNCTION' - and tokens - and self._tokenvalue(tokens[-1]).startswith(':') - ): - # pseudo-X: combine to :FUNCTION( or ::FUNCTION( - if self._tokenvalue(tokens[-1]).startswith('::'): - t = 'pseudo-element' - else: - t = 'pseudo-class' - tokens[-1] = (t, self._tokenvalue(tokens[-1]) + val, lin, col) - - elif ( - val == '*' - and tokens - and self._type(tokens[-1]) == 'namespace_prefix' - and self._tokenvalue(tokens[-1]).endswith('|') - ): - # combine prefix|* - tokens[-1] = ( - 'universal', - self._tokenvalue(tokens[-1]) + val, - lin, - col, - ) - elif val == '*': - # universal: "*" - tokens.append(('universal', val, lin, col)) - - elif ( - val == '|' - and tokens - and self._type(tokens[-1]) in (self._prods.IDENT, 'universal') - and self._tokenvalue(tokens[-1]).find('|') == -1 - ): - # namespace_prefix: "IDENT|" or "*|" - tokens[-1] = ( - 'namespace_prefix', - self._tokenvalue(tokens[-1]) + '|', - lin, - col, - ) - elif val == '|': - # namespace_prefix: "|" - tokens.append(('namespace_prefix', val, lin, col)) - + return + + # prepare tokenlist: + # "*" -> type "universal" + # "*"|IDENT + "|" -> combined to "namespace_prefix" + # "|" -> type "namespace_prefix" + # "." + IDENT -> combined to "class" + # ":" + IDENT, ":" + FUNCTION -> pseudo-class + # FUNCTION "not(" -> negation + # "::" + IDENT, "::" + FUNCTION -> pseudo-element + tokens = [] + for t in tokenizer: + typ, val, lin, col = t + if val == ':' and tokens and self._tokenvalue(tokens[-1]) == ':': + # combine ":" and ":" + tokens[-1] = (typ, '::', lin, col) + + elif typ == 'IDENT' and tokens and self._tokenvalue(tokens[-1]) == '.': + # class: combine to .IDENT + tokens[-1] = ('class', '.' + val, lin, col) + elif ( + typ == 'IDENT' + and tokens + and self._tokenvalue(tokens[-1]).startswith(':') + and not self._tokenvalue(tokens[-1]).endswith('(') + ): + # pseudo-X: combine to :IDENT or ::IDENT but not ":a(" + "b" + if self._tokenvalue(tokens[-1]).startswith('::'): + t = 'pseudo-element' else: - tokens.append(t) - - tokenizer = iter(tokens) - - # for closures: must be a mutable - new = { - 'context': [''], # stack of: 'attrib', 'negation', 'pseudo' - 'element': None, - '_PREFIX': None, - 'specificity': [0, 0, 0, 0], # mutable, finally a tuple! - 'wellformed': True, - } - # used for equality checks and setting of a space combinator - S = ' ' - - def append(seq, val, typ=None, token=None): # noqa: C901 - """ - appends to seq - - namespace_prefix, IDENT will be combined to a tuple - (prefix, name) where prefix might be None, the empty string - or a prefix. - - Saved are also: - - specificity definition: style, id, class/att, type - - element: the element this Selector is for - """ - context = new['context'][-1] - if token: - line, col = token[2], token[3] + t = 'pseudo-class' + tokens[-1] = (t, self._tokenvalue(tokens[-1]) + val, lin, col) + + elif ( + typ == 'FUNCTION' + and val == 'not(' + and tokens + and ':' == self._tokenvalue(tokens[-1]) + ): + tokens[-1] = ('negation', ':' + val, lin, tokens[-1][3]) + elif ( + typ == 'FUNCTION' + and tokens + and self._tokenvalue(tokens[-1]).startswith(':') + ): + # pseudo-X: combine to :FUNCTION( or ::FUNCTION( + if self._tokenvalue(tokens[-1]).startswith('::'): + t = 'pseudo-element' else: - line, col = None, None - - if typ == '_PREFIX': - # SPECIAL TYPE: save prefix for combination with next - new['_PREFIX'] = val[:-1] - # handle next time - return - - if new['_PREFIX'] is not None: - # as saved from before and reset to None - prefix, new['_PREFIX'] = new['_PREFIX'], None - elif typ == 'universal' and '|' in val: - # val == *|* or prefix|* - prefix, val = val.split('|') + t = 'pseudo-class' + tokens[-1] = (t, self._tokenvalue(tokens[-1]) + val, lin, col) + + elif ( + val == '*' + and tokens + and self._type(tokens[-1]) == 'namespace_prefix' + and self._tokenvalue(tokens[-1]).endswith('|') + ): + # combine prefix|* + tokens[-1] = ( + 'universal', + self._tokenvalue(tokens[-1]) + val, + lin, + col, + ) + elif val == '*': + # universal: "*" + tokens.append(('universal', val, lin, col)) + + elif ( + val == '|' + and tokens + and self._type(tokens[-1]) in (self._prods.IDENT, 'universal') + and self._tokenvalue(tokens[-1]).find('|') == -1 + ): + # namespace_prefix: "IDENT|" or "*|" + tokens[-1] = ( + 'namespace_prefix', + self._tokenvalue(tokens[-1]) + '|', + lin, + col, + ) + elif val == '|': + # namespace_prefix: "|" + tokens.append(('namespace_prefix', val, lin, col)) + + else: + tokens.append(t) + + tokenizer = iter(tokens) + + # for closures: must be a mutable + new = { + 'context': [''], # stack of: 'attrib', 'negation', 'pseudo' + 'element': None, + '_PREFIX': None, + 'specificity': [0, 0, 0, 0], # mutable, finally a tuple! + 'wellformed': True, + } + # used for equality checks and setting of a space combinator + S = ' ' + + def append(seq, val, typ=None, token=None): # noqa: C901 + """ + appends to seq + + namespace_prefix, IDENT will be combined to a tuple + (prefix, name) where prefix might be None, the empty string + or a prefix. + + Saved are also: + - specificity definition: style, id, class/att, type + - element: the element this Selector is for + """ + context = new['context'][-1] + if token: + line, col = token[2], token[3] + else: + line, col = None, None + + if typ == '_PREFIX': + # SPECIAL TYPE: save prefix for combination with next + new['_PREFIX'] = val[:-1] + # handle next time + return + + if new['_PREFIX'] is not None: + # as saved from before and reset to None + prefix, new['_PREFIX'] = new['_PREFIX'], None + elif typ == 'universal' and '|' in val: + # val == *|* or prefix|* + prefix, val = val.split('|') + else: + prefix = None + + # namespace + if (typ.endswith('-selector') or typ == 'universal') and not ( + 'attribute-selector' == typ and not prefix + ): + # att **IS NOT** in default ns + if prefix == '*': + # *|name: in ANY_NS + namespaceURI = cssutils._ANYNS + elif prefix is None: + # e or *: default namespace with prefix u'' + # or local-name() + namespaceURI = namespaces.get('', None) + elif prefix == '': + # |name or |*: in no (or the empty) namespace + namespaceURI = '' else: - prefix = None - - # namespace - if (typ.endswith('-selector') or typ == 'universal') and not ( - 'attribute-selector' == typ and not prefix + # explicit namespace prefix + # does not raise KeyError, see _SimpleNamespaces + namespaceURI = namespaces[prefix] + + if namespaceURI is None: + new['wellformed'] = False + self._log.error( + 'Selector: No namespaceURI found ' 'for prefix %r' % prefix, + token=token, + error=xml.dom.NamespaceErr, + ) + return + + # val is now (namespaceprefix, name) tuple + val = (namespaceURI, val) + + # specificity + if not context or context == 'negation': + if 'id' == typ: + new['specificity'][1] += 1 + elif 'class' == typ or '[' == val: + new['specificity'][2] += 1 + elif typ in ( + 'type-selector', + 'negation-type-selector', + 'pseudo-element', ): - # att **IS NOT** in default ns - if prefix == '*': - # *|name: in ANY_NS - namespaceURI = cssutils._ANYNS - elif prefix is None: - # e or *: default namespace with prefix u'' - # or local-name() - namespaceURI = namespaces.get('', None) - elif prefix == '': - # |name or |*: in no (or the empty) namespace - namespaceURI = '' - else: - # explicit namespace prefix - # does not raise KeyError, see _SimpleNamespaces - namespaceURI = namespaces[prefix] - - if namespaceURI is None: - new['wellformed'] = False - self._log.error( - 'Selector: No namespaceURI found ' - 'for prefix %r' % prefix, - token=token, - error=xml.dom.NamespaceErr, - ) - return - - # val is now (namespaceprefix, name) tuple - val = (namespaceURI, val) - - # specificity - if not context or context == 'negation': - if 'id' == typ: - new['specificity'][1] += 1 - elif 'class' == typ or '[' == val: - new['specificity'][2] += 1 - elif typ in ( - 'type-selector', - 'negation-type-selector', - 'pseudo-element', - ): - new['specificity'][3] += 1 - if not context and typ in ('type-selector', 'universal'): - # define element - new['element'] = val - - seq.append(val, typ, line=line, col=col) - - # expected constants - simple_selector_sequence = ( - 'type_selector universal HASH class ' 'attrib pseudo negation ' - ) - simple_selector_sequence2 = 'HASH class attrib pseudo negation ' + new['specificity'][3] += 1 + if not context and typ in ('type-selector', 'universal'): + # define element + new['element'] = val + + seq.append(val, typ, line=line, col=col) - element_name = 'element_name' + # expected constants + simple_selector_sequence = ( + 'type_selector universal HASH class ' 'attrib pseudo negation ' + ) + simple_selector_sequence2 = 'HASH class attrib pseudo negation ' - negation_arg = 'type_selector universal HASH class attrib pseudo' - negationend = ')' + element_name = 'element_name' - attname = 'prefix attribute' - attname2 = 'attribute' - attcombinator = 'combinator ]' # optional - attvalue = 'value' # optional - attend = ']' + negation_arg = 'type_selector universal HASH class attrib pseudo' + negationend = ')' - expressionstart = 'PLUS - DIMENSION NUMBER STRING IDENT' - expression = expressionstart + ' )' + attname = 'prefix attribute' + attname2 = 'attribute' + attcombinator = 'combinator ]' # optional + attvalue = 'value' # optional + attend = ']' - combinator = ' combinator' + expressionstart = 'PLUS - DIMENSION NUMBER STRING IDENT' + expression = expressionstart + ' )' - def _COMMENT(expected, seq, token, tokenizer=None): - "special implementation for comment token" - append(seq, cssutils.css.CSSComment([token]), 'COMMENT', token=token) + combinator = ' combinator' + + def _COMMENT(expected, seq, token, tokenizer=None): + "special implementation for comment token" + append(seq, cssutils.css.CSSComment([token]), 'COMMENT', token=token) + return expected + + def _S(expected, seq, token, tokenizer=None): + # S + context = new['context'][-1] + if context.startswith('pseudo-'): + if seq and seq[-1].value not in '+-': + # e.g. x:func(a + b) + append(seq, S, 'S', token=token) return expected - def _S(expected, seq, token, tokenizer=None): - # S - context = new['context'][-1] - if context.startswith('pseudo-'): - if seq and seq[-1].value not in '+-': - # e.g. x:func(a + b) - append(seq, S, 'S', token=token) - return expected - - elif context != 'attrib' and 'combinator' in expected: - append(seq, S, 'descendant', token=token) - return simple_selector_sequence + combinator + elif context != 'attrib' and 'combinator' in expected: + append(seq, S, 'descendant', token=token) + return simple_selector_sequence + combinator + + else: + return expected + def _universal(expected, seq, token, tokenizer=None): + # *|* or prefix|* + context = new['context'][-1] + val = self._tokenvalue(token) + if 'universal' in expected: + append(seq, val, 'universal', token=token) + + if 'negation' == context: + return negationend else: - return expected + return simple_selector_sequence2 + combinator - def _universal(expected, seq, token, tokenizer=None): - # *|* or prefix|* - context = new['context'][-1] - val = self._tokenvalue(token) - if 'universal' in expected: - append(seq, val, 'universal', token=token) + else: + new['wellformed'] = False + self._log.error('Selector: Unexpected universal.', token=token) + return expected - if 'negation' == context: - return negationend - else: - return simple_selector_sequence2 + combinator + def _namespace_prefix(expected, seq, token, tokenizer=None): + # prefix| => element_name + # or prefix| => attribute_name if attrib + context = new['context'][-1] + val = self._tokenvalue(token) + if 'attrib' == context and 'prefix' in expected: + # [PREFIX|att] + append(seq, val, '_PREFIX', token=token) + return attname2 + elif 'type_selector' in expected: + # PREFIX|* + append(seq, val, '_PREFIX', token=token) + return element_name + else: + new['wellformed'] = False + self._log.error('Selector: Unexpected namespace prefix.', token=token) + return expected + def _pseudo(expected, seq, token, tokenizer=None): + # pseudo-class or pseudo-element :a ::a :a( ::a( + """ + /* '::' starts a pseudo-element, ':' a pseudo-class */ + /* Exceptions: :first-line, :first-letter, :before and + :after. */ + /* Note that pseudo-elements are restricted to one per selector + and */ + /* occur only in the last simple_selector_sequence. */ + """ + context = new['context'][-1] + val, typ = self._tokenvalue(token, normalize=True), self._type(token) + if 'pseudo' in expected: + if val in (':first-line', ':first-letter', ':before', ':after'): + # always pseudo-element ??? + typ = 'pseudo-element' + append(seq, val, typ, token=token) + + if val.endswith('('): + # function + # "pseudo-" "class" or "element" + new['context'].append(typ) + return expressionstart + elif 'negation' == context: + return negationend + elif 'pseudo-element' == typ: + # only one per element, check at ) also! + return combinator else: - new['wellformed'] = False - self._log.error('Selector: Unexpected universal.', token=token) - return expected + return simple_selector_sequence2 + combinator - def _namespace_prefix(expected, seq, token, tokenizer=None): - # prefix| => element_name - # or prefix| => attribute_name if attrib - context = new['context'][-1] - val = self._tokenvalue(token) - if 'attrib' == context and 'prefix' in expected: - # [PREFIX|att] - append(seq, val, '_PREFIX', token=token) - return attname2 - elif 'type_selector' in expected: - # PREFIX|* - append(seq, val, '_PREFIX', token=token) - return element_name - else: - new['wellformed'] = False - self._log.error( - 'Selector: Unexpected namespace prefix.', token=token - ) - return expected - - def _pseudo(expected, seq, token, tokenizer=None): - # pseudo-class or pseudo-element :a ::a :a( ::a( - """ - /* '::' starts a pseudo-element, ':' a pseudo-class */ - /* Exceptions: :first-line, :first-letter, :before and - :after. */ - /* Note that pseudo-elements are restricted to one per selector - and */ - /* occur only in the last simple_selector_sequence. */ - """ - context = new['context'][-1] - val, typ = self._tokenvalue(token, normalize=True), self._type(token) - if 'pseudo' in expected: - if val in (':first-line', ':first-letter', ':before', ':after'): - # always pseudo-element ??? - typ = 'pseudo-element' - append(seq, val, typ, token=token) - - if val.endswith('('): - # function - # "pseudo-" "class" or "element" - new['context'].append(typ) - return expressionstart - elif 'negation' == context: - return negationend - elif 'pseudo-element' == typ: - # only one per element, check at ) also! - return combinator - else: - return simple_selector_sequence2 + combinator + else: + new['wellformed'] = False + self._log.error('Selector: Unexpected start of pseudo.', token=token) + return expected - else: - new['wellformed'] = False - self._log.error( - 'Selector: Unexpected start of pseudo.', token=token - ) - return expected - - def _expression(expected, seq, token, tokenizer=None): - # [ [ PLUS | '-' | DIMENSION | NUMBER | STRING | IDENT ] S* ]+ - context = new['context'][-1] - val, typ = self._tokenvalue(token), self._type(token) - if context.startswith('pseudo-'): - append(seq, val, typ, token=token) - return expression - else: - new['wellformed'] = False - self._log.error('Selector: Unexpected %s.' % typ, token=token) - return expected - - def _attcombinator(expected, seq, token, tokenizer=None): - # context: attrib - # PREFIXMATCH | SUFFIXMATCH | SUBSTRINGMATCH | INCLUDES | - # DASHMATCH - context = new['context'][-1] - val, typ = self._tokenvalue(token), self._type(token) - if 'attrib' == context and 'combinator' in expected: - # combinator in attrib - append(seq, val, typ.lower(), token=token) - return attvalue - else: - new['wellformed'] = False - self._log.error('Selector: Unexpected %s.' % typ, token=token) - return expected + def _expression(expected, seq, token, tokenizer=None): + # [ [ PLUS | '-' | DIMENSION | NUMBER | STRING | IDENT ] S* ]+ + context = new['context'][-1] + val, typ = self._tokenvalue(token), self._type(token) + if context.startswith('pseudo-'): + append(seq, val, typ, token=token) + return expression + else: + new['wellformed'] = False + self._log.error('Selector: Unexpected %s.' % typ, token=token) + return expected - def _string(expected, seq, token, tokenizer=None): - # identifier - context = new['context'][-1] - typ, val = self._type(token), self._stringtokenvalue(token) + def _attcombinator(expected, seq, token, tokenizer=None): + # context: attrib + # PREFIXMATCH | SUFFIXMATCH | SUBSTRINGMATCH | INCLUDES | + # DASHMATCH + context = new['context'][-1] + val, typ = self._tokenvalue(token), self._type(token) + if 'attrib' == context and 'combinator' in expected: + # combinator in attrib + append(seq, val, typ.lower(), token=token) + return attvalue + else: + new['wellformed'] = False + self._log.error('Selector: Unexpected %s.' % typ, token=token) + return expected - # context: attrib - if 'attrib' == context and 'value' in expected: - # attrib: [...=VALUE] - append(seq, val, typ, token=token) - return attend + def _string(expected, seq, token, tokenizer=None): + # identifier + context = new['context'][-1] + typ, val = self._type(token), self._stringtokenvalue(token) - # context: pseudo - elif context.startswith('pseudo-'): - # :func(...) - append(seq, val, typ, token=token) - return expression + # context: attrib + if 'attrib' == context and 'value' in expected: + # attrib: [...=VALUE] + append(seq, val, typ, token=token) + return attend - else: - new['wellformed'] = False - self._log.error('Selector: Unexpected STRING.', token=token) - return expected + # context: pseudo + elif context.startswith('pseudo-'): + # :func(...) + append(seq, val, typ, token=token) + return expression - def _ident(expected, seq, token, tokenizer=None): - # identifier - context = new['context'][-1] - val, typ = self._tokenvalue(token), self._type(token) + else: + new['wellformed'] = False + self._log.error('Selector: Unexpected STRING.', token=token) + return expected - # context: attrib - if 'attrib' == context and 'attribute' in expected: - # attrib: [...|ATT...] - append(seq, val, 'attribute-selector', token=token) - return attcombinator + def _ident(expected, seq, token, tokenizer=None): + # identifier + context = new['context'][-1] + val, typ = self._tokenvalue(token), self._type(token) + + # context: attrib + if 'attrib' == context and 'attribute' in expected: + # attrib: [...|ATT...] + append(seq, val, 'attribute-selector', token=token) + return attcombinator + + elif 'attrib' == context and 'value' in expected: + # attrib: [...=VALUE] + append(seq, val, 'attribute-value', token=token) + return attend + + # context: negation + elif 'negation' == context: + # negation: (prefix|IDENT) + append(seq, val, 'negation-type-selector', token=token) + return negationend + + # context: pseudo + elif context.startswith('pseudo-'): + # :func(...) + append(seq, val, typ, token=token) + return expression + + elif 'type_selector' in expected or element_name == expected: + # element name after ns or complete type_selector + append(seq, val, 'type-selector', token=token) + return simple_selector_sequence2 + combinator + + else: + new['wellformed'] = False + self._log.error('Selector: Unexpected IDENT.', token=token) + return expected - elif 'attrib' == context and 'value' in expected: - # attrib: [...=VALUE] - append(seq, val, 'attribute-value', token=token) - return attend + def _class(expected, seq, token, tokenizer=None): + # .IDENT + context = new['context'][-1] + val = self._tokenvalue(token) + if 'class' in expected: + append(seq, val, 'class', token=token) - # context: negation - elif 'negation' == context: - # negation: (prefix|IDENT) - append(seq, val, 'negation-type-selector', token=token) + if 'negation' == context: return negationend + else: + return simple_selector_sequence2 + combinator - # context: pseudo - elif context.startswith('pseudo-'): - # :func(...) - append(seq, val, typ, token=token) - return expression + else: + new['wellformed'] = False + self._log.error('Selector: Unexpected class.', token=token) + return expected - elif 'type_selector' in expected or element_name == expected: - # element name after ns or complete type_selector - append(seq, val, 'type-selector', token=token) - return simple_selector_sequence2 + combinator + def _hash(expected, seq, token, tokenizer=None): + # #IDENT + context = new['context'][-1] + val = self._tokenvalue(token) + if 'HASH' in expected: + append(seq, val, 'id', token=token) + if 'negation' == context: + return negationend else: - new['wellformed'] = False - self._log.error('Selector: Unexpected IDENT.', token=token) - return expected - - def _class(expected, seq, token, tokenizer=None): - # .IDENT - context = new['context'][-1] - val = self._tokenvalue(token) - if 'class' in expected: - append(seq, val, 'class', token=token) + return simple_selector_sequence2 + combinator - if 'negation' == context: - return negationend - else: - return simple_selector_sequence2 + combinator + else: + new['wellformed'] = False + self._log.error('Selector: Unexpected HASH.', token=token) + return expected - else: - new['wellformed'] = False - self._log.error('Selector: Unexpected class.', token=token) - return expected + def _char(expected, seq, token, tokenizer=None): # noqa: C901 + # + > ~ ) [ ] + - + context = new['context'][-1] + val = self._tokenvalue(token) - def _hash(expected, seq, token, tokenizer=None): - # #IDENT + # context: attrib + if ']' == val and 'attrib' == context and ']' in expected: + # end of attrib + append(seq, val, 'attribute-end', token=token) + context = new['context'].pop() # attrib is done context = new['context'][-1] - val = self._tokenvalue(token) - if 'HASH' in expected: - append(seq, val, 'id', token=token) + if 'negation' == context: + return negationend + else: + return simple_selector_sequence2 + combinator - if 'negation' == context: - return negationend - else: - return simple_selector_sequence2 + combinator + elif '=' == val and 'attrib' == context and 'combinator' in expected: + # combinator in attrib + append(seq, val, 'equals', token=token) + return attvalue + # context: negation + elif ')' == val and 'negation' == context and ')' in expected: + # not(negation_arg)" + append(seq, val, 'negation-end', token=token) + new['context'].pop() # negation is done + context = new['context'][-1] + return simple_selector_sequence + combinator + + # context: pseudo (at least one expression) + elif val in '+-' and context.startswith('pseudo-'): + # :func(+ -)" + _names = {'+': 'plus', '-': 'minus'} + if val == '+' and seq and seq[-1].value == S: + seq.replace(-1, val, _names[val]) else: - new['wellformed'] = False - self._log.error('Selector: Unexpected HASH.', token=token) - return expected + append(seq, val, _names[val], token=token) + return expression - def _char(expected, seq, token, tokenizer=None): # noqa: C901 - # + > ~ ) [ ] + - - context = new['context'][-1] - val = self._tokenvalue(token) - - # context: attrib - if ']' == val and 'attrib' == context and ']' in expected: - # end of attrib - append(seq, val, 'attribute-end', token=token) - context = new['context'].pop() # attrib is done - context = new['context'][-1] - if 'negation' == context: - return negationend - else: - return simple_selector_sequence2 + combinator - - elif '=' == val and 'attrib' == context and 'combinator' in expected: - # combinator in attrib - append(seq, val, 'equals', token=token) - return attvalue - - # context: negation - elif ')' == val and 'negation' == context and ')' in expected: - # not(negation_arg)" - append(seq, val, 'negation-end', token=token) - new['context'].pop() # negation is done - context = new['context'][-1] + elif ( + ')' == val and context.startswith('pseudo-') and expression == expected + ): + # :func(expression)" + append(seq, val, 'function-end', token=token) + new['context'].pop() # pseudo is done + if 'pseudo-element' == context: + return combinator + else: return simple_selector_sequence + combinator - # context: pseudo (at least one expression) - elif val in '+-' and context.startswith('pseudo-'): - # :func(+ -)" - _names = {'+': 'plus', '-': 'minus'} - if val == '+' and seq and seq[-1].value == S: - seq.replace(-1, val, _names[val]) - else: - append(seq, val, _names[val], token=token) - return expression - - elif ( - ')' == val - and context.startswith('pseudo-') - and expression == expected - ): - # :func(expression)" - append(seq, val, 'function-end', token=token) - new['context'].pop() # pseudo is done - if 'pseudo-element' == context: - return combinator - else: - return simple_selector_sequence + combinator - - # context: ROOT - elif '[' == val and 'attrib' in expected: - # start of [attrib] - append(seq, val, 'attribute-start', token=token) - new['context'].append('attrib') - return attname - - elif val in '+>~' and 'combinator' in expected: - # no other combinator except S may be following - _names = { - '>': 'child', - '+': 'adjacent-sibling', - '~': 'following-sibling', - } - if seq and seq[-1].value == S: - seq.replace(-1, val, _names[val]) - else: - append(seq, val, _names[val], token=token) - return simple_selector_sequence - - elif ',' == val: - # not a selectorlist - new['wellformed'] = False - self._log.error( - 'Selector: Single selector only.', - error=xml.dom.InvalidModificationErr, - token=token, - ) - return expected - + # context: ROOT + elif '[' == val and 'attrib' in expected: + # start of [attrib] + append(seq, val, 'attribute-start', token=token) + new['context'].append('attrib') + return attname + + elif val in '+>~' and 'combinator' in expected: + # no other combinator except S may be following + _names = { + '>': 'child', + '+': 'adjacent-sibling', + '~': 'following-sibling', + } + if seq and seq[-1].value == S: + seq.replace(-1, val, _names[val]) else: - new['wellformed'] = False - self._log.error('Selector: Unexpected CHAR.', token=token) - return expected - - def _negation(expected, seq, token, tokenizer=None): - # not( - val = self._tokenvalue(token, normalize=True) - if 'negation' in expected: - new['context'].append('negation') - append(seq, val, 'negation-start', token=token) - return negation_arg - else: - new['wellformed'] = False - self._log.error('Selector: Unexpected negation.', token=token) - return expected + append(seq, val, _names[val], token=token) + return simple_selector_sequence - def _atkeyword(expected, seq, token, tokenizer=None): - "invalidates selector" + elif ',' == val: + # not a selectorlist new['wellformed'] = False - self._log.error('Selector: Unexpected ATKEYWORD.', token=token) + self._log.error( + 'Selector: Single selector only.', + error=xml.dom.InvalidModificationErr, + token=token, + ) return expected - # expected: only|not or mediatype, mediatype, feature, and - newseq = self._tempSeq() - - wellformed, expected = self._parse( - expected=simple_selector_sequence, - seq=newseq, - tokenizer=tokenizer, - productions={ - 'CHAR': _char, - 'class': _class, - 'HASH': _hash, - 'STRING': _string, - 'IDENT': _ident, - 'namespace_prefix': _namespace_prefix, - 'negation': _negation, - 'pseudo-class': _pseudo, - 'pseudo-element': _pseudo, - 'universal': _universal, - # pseudo - 'NUMBER': _expression, - 'DIMENSION': _expression, - # attribute - 'PREFIXMATCH': _attcombinator, - 'SUFFIXMATCH': _attcombinator, - 'SUBSTRINGMATCH': _attcombinator, - 'DASHMATCH': _attcombinator, - 'INCLUDES': _attcombinator, - 'S': _S, - 'COMMENT': _COMMENT, - 'ATKEYWORD': _atkeyword, - }, - ) - wellformed = wellformed and new['wellformed'] + else: + new['wellformed'] = False + self._log.error('Selector: Unexpected CHAR.', token=token) + return expected - # post condition - if len(new['context']) > 1 or not newseq: - wellformed = False - self._log.error( - 'Selector: Invalid or incomplete selector: %s' - % self._valuestr(selectorText) - ) + def _negation(expected, seq, token, tokenizer=None): + # not( + val = self._tokenvalue(token, normalize=True) + if 'negation' in expected: + new['context'].append('negation') + append(seq, val, 'negation-start', token=token) + return negation_arg + else: + new['wellformed'] = False + self._log.error('Selector: Unexpected negation.', token=token) + return expected - if expected == 'element_name': - wellformed = False - self._log.error( - 'Selector: No element name found: %s' % self._valuestr(selectorText) - ) + def _atkeyword(expected, seq, token, tokenizer=None): + "invalidates selector" + new['wellformed'] = False + self._log.error('Selector: Unexpected ATKEYWORD.', token=token) + return expected + + # expected: only|not or mediatype, mediatype, feature, and + newseq = self._tempSeq() + + wellformed, expected = self._parse( + expected=simple_selector_sequence, + seq=newseq, + tokenizer=tokenizer, + productions={ + 'CHAR': _char, + 'class': _class, + 'HASH': _hash, + 'STRING': _string, + 'IDENT': _ident, + 'namespace_prefix': _namespace_prefix, + 'negation': _negation, + 'pseudo-class': _pseudo, + 'pseudo-element': _pseudo, + 'universal': _universal, + # pseudo + 'NUMBER': _expression, + 'DIMENSION': _expression, + # attribute + 'PREFIXMATCH': _attcombinator, + 'SUFFIXMATCH': _attcombinator, + 'SUBSTRINGMATCH': _attcombinator, + 'DASHMATCH': _attcombinator, + 'INCLUDES': _attcombinator, + 'S': _S, + 'COMMENT': _COMMENT, + 'ATKEYWORD': _atkeyword, + }, + ) + wellformed = wellformed and new['wellformed'] + + # post condition + if len(new['context']) > 1 or not newseq: + wellformed = False + self._log.error( + 'Selector: Invalid or incomplete selector: %s' + % self._valuestr(selectorText) + ) - if expected == simple_selector_sequence and newseq: - wellformed = False - self._log.error( - 'Selector: Cannot end with combinator: %s' - % self._valuestr(selectorText) - ) + if expected == 'element_name': + wellformed = False + self._log.error( + 'Selector: No element name found: %s' % self._valuestr(selectorText) + ) - if ( - newseq - and hasattr(newseq[-1].value, 'strip') - and newseq[-1].value.strip() == '' - ): - del newseq[-1] - - # set - if wellformed: - self.__namespaces = namespaces - self._element = new['element'] - self._specificity = tuple(new['specificity']) - self._setSeq(newseq) - # filter that only used ones are kept - self.__namespaces = self._getUsedNamespaces() + if expected == simple_selector_sequence and newseq: + wellformed = False + self._log.error( + 'Selector: Cannot end with combinator: %s' + % self._valuestr(selectorText) + ) + + if ( + newseq + and hasattr(newseq[-1].value, 'strip') + and newseq[-1].value.strip() == '' + ): + del newseq[-1] + + # set + if wellformed: + self.__namespaces = namespaces + self._element = new['element'] + self._specificity = tuple(new['specificity']) + self._setSeq(newseq) + # filter that only used ones are kept + self.__namespaces = self._getUsedNamespaces() selectorText = property( _getSelectorText, From 4775886139c36ff783996a485583e351dea9a77c Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 13 May 2024 20:47:16 -0400 Subject: [PATCH 3/7] Extract _prepare_tokens --- cssutils/css/selector.py | 180 ++++++++++++++++++++------------------- 1 file changed, 92 insertions(+), 88 deletions(-) diff --git a/cssutils/css/selector.py b/cssutils/css/selector.py index b6f1119e..fa428865 100644 --- a/cssutils/css/selector.py +++ b/cssutils/css/selector.py @@ -232,94 +232,7 @@ def _setSelectorText(self, selectorText): # noqa: C901 self._log.error('Selector: No selectorText given.') return - # prepare tokenlist: - # "*" -> type "universal" - # "*"|IDENT + "|" -> combined to "namespace_prefix" - # "|" -> type "namespace_prefix" - # "." + IDENT -> combined to "class" - # ":" + IDENT, ":" + FUNCTION -> pseudo-class - # FUNCTION "not(" -> negation - # "::" + IDENT, "::" + FUNCTION -> pseudo-element - tokens = [] - for t in tokenizer: - typ, val, lin, col = t - if val == ':' and tokens and self._tokenvalue(tokens[-1]) == ':': - # combine ":" and ":" - tokens[-1] = (typ, '::', lin, col) - - elif typ == 'IDENT' and tokens and self._tokenvalue(tokens[-1]) == '.': - # class: combine to .IDENT - tokens[-1] = ('class', '.' + val, lin, col) - elif ( - typ == 'IDENT' - and tokens - and self._tokenvalue(tokens[-1]).startswith(':') - and not self._tokenvalue(tokens[-1]).endswith('(') - ): - # pseudo-X: combine to :IDENT or ::IDENT but not ":a(" + "b" - if self._tokenvalue(tokens[-1]).startswith('::'): - t = 'pseudo-element' - else: - t = 'pseudo-class' - tokens[-1] = (t, self._tokenvalue(tokens[-1]) + val, lin, col) - - elif ( - typ == 'FUNCTION' - and val == 'not(' - and tokens - and ':' == self._tokenvalue(tokens[-1]) - ): - tokens[-1] = ('negation', ':' + val, lin, tokens[-1][3]) - elif ( - typ == 'FUNCTION' - and tokens - and self._tokenvalue(tokens[-1]).startswith(':') - ): - # pseudo-X: combine to :FUNCTION( or ::FUNCTION( - if self._tokenvalue(tokens[-1]).startswith('::'): - t = 'pseudo-element' - else: - t = 'pseudo-class' - tokens[-1] = (t, self._tokenvalue(tokens[-1]) + val, lin, col) - - elif ( - val == '*' - and tokens - and self._type(tokens[-1]) == 'namespace_prefix' - and self._tokenvalue(tokens[-1]).endswith('|') - ): - # combine prefix|* - tokens[-1] = ( - 'universal', - self._tokenvalue(tokens[-1]) + val, - lin, - col, - ) - elif val == '*': - # universal: "*" - tokens.append(('universal', val, lin, col)) - - elif ( - val == '|' - and tokens - and self._type(tokens[-1]) in (self._prods.IDENT, 'universal') - and self._tokenvalue(tokens[-1]).find('|') == -1 - ): - # namespace_prefix: "IDENT|" or "*|" - tokens[-1] = ( - 'namespace_prefix', - self._tokenvalue(tokens[-1]) + '|', - lin, - col, - ) - elif val == '|': - # namespace_prefix: "|" - tokens.append(('namespace_prefix', val, lin, col)) - - else: - tokens.append(t) - - tokenizer = iter(tokens) + tokenizer = self._prepare_tokens(tokenizer) # for closures: must be a mutable new = { @@ -824,6 +737,97 @@ def _atkeyword(expected, seq, token, tokenizer=None): # filter that only used ones are kept self.__namespaces = self._getUsedNamespaces() + def _prepare_tokens(self, tokenizer): # noqa: C901 + """ + "*" -> type "universal" + "*"|IDENT + "|" -> combined to "namespace_prefix" + "|" -> type "namespace_prefix" + "." + IDENT -> combined to "class" + ":" + IDENT, ":" + FUNCTION -> pseudo-class + FUNCTION "not(" -> negation + "::" + IDENT, "::" + FUNCTION -> pseudo-element + """ + tokens = [] + for t in tokenizer: + typ, val, lin, col = t + if val == ':' and tokens and self._tokenvalue(tokens[-1]) == ':': + # combine ":" and ":" + tokens[-1] = (typ, '::', lin, col) + + elif typ == 'IDENT' and tokens and self._tokenvalue(tokens[-1]) == '.': + # class: combine to .IDENT + tokens[-1] = ('class', '.' + val, lin, col) + elif ( + typ == 'IDENT' + and tokens + and self._tokenvalue(tokens[-1]).startswith(':') + and not self._tokenvalue(tokens[-1]).endswith('(') + ): + # pseudo-X: combine to :IDENT or ::IDENT but not ":a(" + "b" + if self._tokenvalue(tokens[-1]).startswith('::'): + t = 'pseudo-element' + else: + t = 'pseudo-class' + tokens[-1] = (t, self._tokenvalue(tokens[-1]) + val, lin, col) + + elif ( + typ == 'FUNCTION' + and val == 'not(' + and tokens + and ':' == self._tokenvalue(tokens[-1]) + ): + tokens[-1] = ('negation', ':' + val, lin, tokens[-1][3]) + elif ( + typ == 'FUNCTION' + and tokens + and self._tokenvalue(tokens[-1]).startswith(':') + ): + # pseudo-X: combine to :FUNCTION( or ::FUNCTION( + if self._tokenvalue(tokens[-1]).startswith('::'): + t = 'pseudo-element' + else: + t = 'pseudo-class' + tokens[-1] = (t, self._tokenvalue(tokens[-1]) + val, lin, col) + + elif ( + val == '*' + and tokens + and self._type(tokens[-1]) == 'namespace_prefix' + and self._tokenvalue(tokens[-1]).endswith('|') + ): + # combine prefix|* + tokens[-1] = ( + 'universal', + self._tokenvalue(tokens[-1]) + val, + lin, + col, + ) + elif val == '*': + # universal: "*" + tokens.append(('universal', val, lin, col)) + + elif ( + val == '|' + and tokens + and self._type(tokens[-1]) in (self._prods.IDENT, 'universal') + and self._tokenvalue(tokens[-1]).find('|') == -1 + ): + # namespace_prefix: "IDENT|" or "*|" + tokens[-1] = ( + 'namespace_prefix', + self._tokenvalue(tokens[-1]) + '|', + lin, + col, + ) + elif val == '|': + # namespace_prefix: "|" + tokens.append(('namespace_prefix', val, lin, col)) + + else: + tokens.append(t) + + return iter(tokens) + selectorText = property( _getSelectorText, _setSelectorText, From 80af16bcfd41a3ab94cc3514fd9ecb4aeb7f289b Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Tue, 14 May 2024 14:48:23 -0400 Subject: [PATCH 4/7] Move the 'New' object into a dataclass. --- cssutils/css/selector.py | 114 +++++++++++++++++++++------------------ 1 file changed, 61 insertions(+), 53 deletions(-) diff --git a/cssutils/css/selector.py b/cssutils/css/selector.py index fa428865..2ca87663 100644 --- a/cssutils/css/selector.py +++ b/cssutils/css/selector.py @@ -6,9 +6,12 @@ - .isSubselector(selector) """ +from __future__ import annotations + __all__ = ['Selector'] import contextlib +import dataclasses import xml.dom import cssutils @@ -16,6 +19,17 @@ from cssutils.util import _SimpleNamespaces +@dataclasses.dataclass +class New: + context: list[str] = dataclasses.field(default_factory=lambda: ['']) + "stack of: 'attrib', 'negation', 'pseudo'" + element: str | None = None + _PREFIX: str | None = None + specificity: list[int] = dataclasses.field(default_factory=lambda: [0] * 4) + "mutable, finally a tuple!" + wellformed: bool = True + + class Selector(cssutils.util.Base2): """ (cssutils) a single selector in a :class:`~cssutils.css.SelectorList` @@ -234,14 +248,8 @@ def _setSelectorText(self, selectorText): # noqa: C901 tokenizer = self._prepare_tokens(tokenizer) - # for closures: must be a mutable - new = { - 'context': [''], # stack of: 'attrib', 'negation', 'pseudo' - 'element': None, - '_PREFIX': None, - 'specificity': [0, 0, 0, 0], # mutable, finally a tuple! - 'wellformed': True, - } + new = New() + # used for equality checks and setting of a space combinator S = ' ' @@ -257,7 +265,7 @@ def append(seq, val, typ=None, token=None): # noqa: C901 - specificity definition: style, id, class/att, type - element: the element this Selector is for """ - context = new['context'][-1] + context = new.context[-1] if token: line, col = token[2], token[3] else: @@ -265,13 +273,13 @@ def append(seq, val, typ=None, token=None): # noqa: C901 if typ == '_PREFIX': # SPECIAL TYPE: save prefix for combination with next - new['_PREFIX'] = val[:-1] + new._PREFIX = val[:-1] # handle next time return - if new['_PREFIX'] is not None: + if new._PREFIX is not None: # as saved from before and reset to None - prefix, new['_PREFIX'] = new['_PREFIX'], None + prefix, new._PREFIX = new._PREFIX, None elif typ == 'universal' and '|' in val: # val == *|* or prefix|* prefix, val = val.split('|') @@ -299,7 +307,7 @@ def append(seq, val, typ=None, token=None): # noqa: C901 namespaceURI = namespaces[prefix] if namespaceURI is None: - new['wellformed'] = False + new.wellformed = False self._log.error( 'Selector: No namespaceURI found ' 'for prefix %r' % prefix, token=token, @@ -313,18 +321,18 @@ def append(seq, val, typ=None, token=None): # noqa: C901 # specificity if not context or context == 'negation': if 'id' == typ: - new['specificity'][1] += 1 + new.specificity[1] += 1 elif 'class' == typ or '[' == val: - new['specificity'][2] += 1 + new.specificity[2] += 1 elif typ in ( 'type-selector', 'negation-type-selector', 'pseudo-element', ): - new['specificity'][3] += 1 + new.specificity[3] += 1 if not context and typ in ('type-selector', 'universal'): # define element - new['element'] = val + new.element = val seq.append(val, typ, line=line, col=col) @@ -357,7 +365,7 @@ def _COMMENT(expected, seq, token, tokenizer=None): def _S(expected, seq, token, tokenizer=None): # S - context = new['context'][-1] + context = new.context[-1] if context.startswith('pseudo-'): if seq and seq[-1].value not in '+-': # e.g. x:func(a + b) @@ -373,7 +381,7 @@ def _S(expected, seq, token, tokenizer=None): def _universal(expected, seq, token, tokenizer=None): # *|* or prefix|* - context = new['context'][-1] + context = new.context[-1] val = self._tokenvalue(token) if 'universal' in expected: append(seq, val, 'universal', token=token) @@ -384,14 +392,14 @@ def _universal(expected, seq, token, tokenizer=None): return simple_selector_sequence2 + combinator else: - new['wellformed'] = False + new.wellformed = False self._log.error('Selector: Unexpected universal.', token=token) return expected def _namespace_prefix(expected, seq, token, tokenizer=None): # prefix| => element_name # or prefix| => attribute_name if attrib - context = new['context'][-1] + context = new.context[-1] val = self._tokenvalue(token) if 'attrib' == context and 'prefix' in expected: # [PREFIX|att] @@ -402,7 +410,7 @@ def _namespace_prefix(expected, seq, token, tokenizer=None): append(seq, val, '_PREFIX', token=token) return element_name else: - new['wellformed'] = False + new.wellformed = False self._log.error('Selector: Unexpected namespace prefix.', token=token) return expected @@ -416,7 +424,7 @@ def _pseudo(expected, seq, token, tokenizer=None): and */ /* occur only in the last simple_selector_sequence. */ """ - context = new['context'][-1] + context = new.context[-1] val, typ = self._tokenvalue(token, normalize=True), self._type(token) if 'pseudo' in expected: if val in (':first-line', ':first-letter', ':before', ':after'): @@ -427,7 +435,7 @@ def _pseudo(expected, seq, token, tokenizer=None): if val.endswith('('): # function # "pseudo-" "class" or "element" - new['context'].append(typ) + new.context.append(typ) return expressionstart elif 'negation' == context: return negationend @@ -438,19 +446,19 @@ def _pseudo(expected, seq, token, tokenizer=None): return simple_selector_sequence2 + combinator else: - new['wellformed'] = False + new.wellformed = False self._log.error('Selector: Unexpected start of pseudo.', token=token) return expected def _expression(expected, seq, token, tokenizer=None): # [ [ PLUS | '-' | DIMENSION | NUMBER | STRING | IDENT ] S* ]+ - context = new['context'][-1] + context = new.context[-1] val, typ = self._tokenvalue(token), self._type(token) if context.startswith('pseudo-'): append(seq, val, typ, token=token) return expression else: - new['wellformed'] = False + new.wellformed = False self._log.error('Selector: Unexpected %s.' % typ, token=token) return expected @@ -458,20 +466,20 @@ def _attcombinator(expected, seq, token, tokenizer=None): # context: attrib # PREFIXMATCH | SUFFIXMATCH | SUBSTRINGMATCH | INCLUDES | # DASHMATCH - context = new['context'][-1] + context = new.context[-1] val, typ = self._tokenvalue(token), self._type(token) if 'attrib' == context and 'combinator' in expected: # combinator in attrib append(seq, val, typ.lower(), token=token) return attvalue else: - new['wellformed'] = False + new.wellformed = False self._log.error('Selector: Unexpected %s.' % typ, token=token) return expected def _string(expected, seq, token, tokenizer=None): # identifier - context = new['context'][-1] + context = new.context[-1] typ, val = self._type(token), self._stringtokenvalue(token) # context: attrib @@ -487,13 +495,13 @@ def _string(expected, seq, token, tokenizer=None): return expression else: - new['wellformed'] = False + new.wellformed = False self._log.error('Selector: Unexpected STRING.', token=token) return expected def _ident(expected, seq, token, tokenizer=None): # identifier - context = new['context'][-1] + context = new.context[-1] val, typ = self._tokenvalue(token), self._type(token) # context: attrib @@ -525,13 +533,13 @@ def _ident(expected, seq, token, tokenizer=None): return simple_selector_sequence2 + combinator else: - new['wellformed'] = False + new.wellformed = False self._log.error('Selector: Unexpected IDENT.', token=token) return expected def _class(expected, seq, token, tokenizer=None): # .IDENT - context = new['context'][-1] + context = new.context[-1] val = self._tokenvalue(token) if 'class' in expected: append(seq, val, 'class', token=token) @@ -542,13 +550,13 @@ def _class(expected, seq, token, tokenizer=None): return simple_selector_sequence2 + combinator else: - new['wellformed'] = False + new.wellformed = False self._log.error('Selector: Unexpected class.', token=token) return expected def _hash(expected, seq, token, tokenizer=None): # #IDENT - context = new['context'][-1] + context = new.context[-1] val = self._tokenvalue(token) if 'HASH' in expected: append(seq, val, 'id', token=token) @@ -559,21 +567,21 @@ def _hash(expected, seq, token, tokenizer=None): return simple_selector_sequence2 + combinator else: - new['wellformed'] = False + new.wellformed = False self._log.error('Selector: Unexpected HASH.', token=token) return expected def _char(expected, seq, token, tokenizer=None): # noqa: C901 # + > ~ ) [ ] + - - context = new['context'][-1] + context = new.context[-1] val = self._tokenvalue(token) # context: attrib if ']' == val and 'attrib' == context and ']' in expected: # end of attrib append(seq, val, 'attribute-end', token=token) - context = new['context'].pop() # attrib is done - context = new['context'][-1] + context = new.context.pop() # attrib is done + context = new.context[-1] if 'negation' == context: return negationend else: @@ -588,8 +596,8 @@ def _char(expected, seq, token, tokenizer=None): # noqa: C901 elif ')' == val and 'negation' == context and ')' in expected: # not(negation_arg)" append(seq, val, 'negation-end', token=token) - new['context'].pop() # negation is done - context = new['context'][-1] + new.context.pop() # negation is done + context = new.context[-1] return simple_selector_sequence + combinator # context: pseudo (at least one expression) @@ -607,7 +615,7 @@ def _char(expected, seq, token, tokenizer=None): # noqa: C901 ): # :func(expression)" append(seq, val, 'function-end', token=token) - new['context'].pop() # pseudo is done + new.context.pop() # pseudo is done if 'pseudo-element' == context: return combinator else: @@ -617,7 +625,7 @@ def _char(expected, seq, token, tokenizer=None): # noqa: C901 elif '[' == val and 'attrib' in expected: # start of [attrib] append(seq, val, 'attribute-start', token=token) - new['context'].append('attrib') + new.context.append('attrib') return attname elif val in '+>~' and 'combinator' in expected: @@ -635,7 +643,7 @@ def _char(expected, seq, token, tokenizer=None): # noqa: C901 elif ',' == val: # not a selectorlist - new['wellformed'] = False + new.wellformed = False self._log.error( 'Selector: Single selector only.', error=xml.dom.InvalidModificationErr, @@ -644,7 +652,7 @@ def _char(expected, seq, token, tokenizer=None): # noqa: C901 return expected else: - new['wellformed'] = False + new.wellformed = False self._log.error('Selector: Unexpected CHAR.', token=token) return expected @@ -652,17 +660,17 @@ def _negation(expected, seq, token, tokenizer=None): # not( val = self._tokenvalue(token, normalize=True) if 'negation' in expected: - new['context'].append('negation') + new.context.append('negation') append(seq, val, 'negation-start', token=token) return negation_arg else: - new['wellformed'] = False + new.wellformed = False self._log.error('Selector: Unexpected negation.', token=token) return expected def _atkeyword(expected, seq, token, tokenizer=None): "invalidates selector" - new['wellformed'] = False + new.wellformed = False self._log.error('Selector: Unexpected ATKEYWORD.', token=token) return expected @@ -698,10 +706,10 @@ def _atkeyword(expected, seq, token, tokenizer=None): 'ATKEYWORD': _atkeyword, }, ) - wellformed = wellformed and new['wellformed'] + wellformed = wellformed and new.wellformed # post condition - if len(new['context']) > 1 or not newseq: + if len(new.context) > 1 or not newseq: wellformed = False self._log.error( 'Selector: Invalid or incomplete selector: %s' @@ -731,8 +739,8 @@ def _atkeyword(expected, seq, token, tokenizer=None): # set if wellformed: self.__namespaces = namespaces - self._element = new['element'] - self._specificity = tuple(new['specificity']) + self._element = new.element + self._specificity = tuple(new.specificity) self._setSeq(newseq) # filter that only used ones are kept self.__namespaces = self._getUsedNamespaces() From 8afae87aa6f474bfb18c66c1650ac8b25fe8bfc3 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Tue, 14 May 2024 15:02:09 -0400 Subject: [PATCH 5/7] Moved append function into 'New' class. --- cssutils/css/selector.py | 227 ++++++++++++++++++++------------------- 1 file changed, 116 insertions(+), 111 deletions(-) diff --git a/cssutils/css/selector.py b/cssutils/css/selector.py index 2ca87663..090fca50 100644 --- a/cssutils/css/selector.py +++ b/cssutils/css/selector.py @@ -20,7 +20,12 @@ @dataclasses.dataclass -class New: +class New(cssutils.util._BaseClass): + """ + Derives from _BaseClass to provide self._log. + """ + + namespaces: dict[str, str] context: list[str] = dataclasses.field(default_factory=lambda: ['']) "stack of: 'attrib', 'negation', 'pseudo'" element: str | None = None @@ -29,6 +34,89 @@ class New: "mutable, finally a tuple!" wellformed: bool = True + def append(self, seq, val, typ=None, token=None): # noqa: C901 + """ + appends to seq + + namespace_prefix, IDENT will be combined to a tuple + (prefix, name) where prefix might be None, the empty string + or a prefix. + + Saved are also: + - specificity definition: style, id, class/att, type + - element: the element this Selector is for + """ + context = self.context[-1] + if token: + line, col = token[2], token[3] + else: + line, col = None, None + + if typ == '_PREFIX': + # SPECIAL TYPE: save prefix for combination with next + self._PREFIX = val[:-1] + # handle next time + return + + if self._PREFIX is not None: + # as saved from before and reset to None + prefix, self._PREFIX = self._PREFIX, None + elif typ == 'universal' and '|' in val: + # val == *|* or prefix|* + prefix, val = val.split('|') + else: + prefix = None + + # namespace + if (typ.endswith('-selector') or typ == 'universal') and not ( + 'attribute-selector' == typ and not prefix + ): + # att **IS NOT** in default ns + if prefix == '*': + # *|name: in ANY_NS + namespaceURI = cssutils._ANYNS + elif prefix is None: + # e or *: default namespace with prefix u'' + # or local-name() + namespaceURI = self.namespaces.get('', None) + elif prefix == '': + # |name or |*: in no (or the empty) namespace + namespaceURI = '' + else: + # explicit namespace prefix + # does not raise KeyError, see _SimpleNamespaces + namespaceURI = self.namespaces[prefix] + + if namespaceURI is None: + self.wellformed = False + self._log.error( + 'Selector: No namespaceURI found ' 'for prefix %r' % prefix, + token=token, + error=xml.dom.NamespaceErr, + ) + return + + # val is now (namespaceprefix, name) tuple + val = (namespaceURI, val) + + # specificity + if not context or context == 'negation': + if 'id' == typ: + self.specificity[1] += 1 + elif 'class' == typ or '[' == val: + self.specificity[2] += 1 + elif typ in ( + 'type-selector', + 'negation-type-selector', + 'pseudo-element', + ): + self.specificity[3] += 1 + if not context and typ in ('type-selector', 'universal'): + # define element + self.element = val + + seq.append(val, typ, line=line, col=col) + class Selector(cssutils.util.Base2): """ @@ -248,94 +336,11 @@ def _setSelectorText(self, selectorText): # noqa: C901 tokenizer = self._prepare_tokens(tokenizer) - new = New() + new = New(namespaces=namespaces) # used for equality checks and setting of a space combinator S = ' ' - def append(seq, val, typ=None, token=None): # noqa: C901 - """ - appends to seq - - namespace_prefix, IDENT will be combined to a tuple - (prefix, name) where prefix might be None, the empty string - or a prefix. - - Saved are also: - - specificity definition: style, id, class/att, type - - element: the element this Selector is for - """ - context = new.context[-1] - if token: - line, col = token[2], token[3] - else: - line, col = None, None - - if typ == '_PREFIX': - # SPECIAL TYPE: save prefix for combination with next - new._PREFIX = val[:-1] - # handle next time - return - - if new._PREFIX is not None: - # as saved from before and reset to None - prefix, new._PREFIX = new._PREFIX, None - elif typ == 'universal' and '|' in val: - # val == *|* or prefix|* - prefix, val = val.split('|') - else: - prefix = None - - # namespace - if (typ.endswith('-selector') or typ == 'universal') and not ( - 'attribute-selector' == typ and not prefix - ): - # att **IS NOT** in default ns - if prefix == '*': - # *|name: in ANY_NS - namespaceURI = cssutils._ANYNS - elif prefix is None: - # e or *: default namespace with prefix u'' - # or local-name() - namespaceURI = namespaces.get('', None) - elif prefix == '': - # |name or |*: in no (or the empty) namespace - namespaceURI = '' - else: - # explicit namespace prefix - # does not raise KeyError, see _SimpleNamespaces - namespaceURI = namespaces[prefix] - - if namespaceURI is None: - new.wellformed = False - self._log.error( - 'Selector: No namespaceURI found ' 'for prefix %r' % prefix, - token=token, - error=xml.dom.NamespaceErr, - ) - return - - # val is now (namespaceprefix, name) tuple - val = (namespaceURI, val) - - # specificity - if not context or context == 'negation': - if 'id' == typ: - new.specificity[1] += 1 - elif 'class' == typ or '[' == val: - new.specificity[2] += 1 - elif typ in ( - 'type-selector', - 'negation-type-selector', - 'pseudo-element', - ): - new.specificity[3] += 1 - if not context and typ in ('type-selector', 'universal'): - # define element - new.element = val - - seq.append(val, typ, line=line, col=col) - # expected constants simple_selector_sequence = ( 'type_selector universal HASH class ' 'attrib pseudo negation ' @@ -360,7 +365,7 @@ def append(seq, val, typ=None, token=None): # noqa: C901 def _COMMENT(expected, seq, token, tokenizer=None): "special implementation for comment token" - append(seq, cssutils.css.CSSComment([token]), 'COMMENT', token=token) + new.append(seq, cssutils.css.CSSComment([token]), 'COMMENT', token=token) return expected def _S(expected, seq, token, tokenizer=None): @@ -369,11 +374,11 @@ def _S(expected, seq, token, tokenizer=None): if context.startswith('pseudo-'): if seq and seq[-1].value not in '+-': # e.g. x:func(a + b) - append(seq, S, 'S', token=token) + new.append(seq, S, 'S', token=token) return expected elif context != 'attrib' and 'combinator' in expected: - append(seq, S, 'descendant', token=token) + new.append(seq, S, 'descendant', token=token) return simple_selector_sequence + combinator else: @@ -384,7 +389,7 @@ def _universal(expected, seq, token, tokenizer=None): context = new.context[-1] val = self._tokenvalue(token) if 'universal' in expected: - append(seq, val, 'universal', token=token) + new.append(seq, val, 'universal', token=token) if 'negation' == context: return negationend @@ -403,11 +408,11 @@ def _namespace_prefix(expected, seq, token, tokenizer=None): val = self._tokenvalue(token) if 'attrib' == context and 'prefix' in expected: # [PREFIX|att] - append(seq, val, '_PREFIX', token=token) + new.append(seq, val, '_PREFIX', token=token) return attname2 elif 'type_selector' in expected: # PREFIX|* - append(seq, val, '_PREFIX', token=token) + new.append(seq, val, '_PREFIX', token=token) return element_name else: new.wellformed = False @@ -430,7 +435,7 @@ def _pseudo(expected, seq, token, tokenizer=None): if val in (':first-line', ':first-letter', ':before', ':after'): # always pseudo-element ??? typ = 'pseudo-element' - append(seq, val, typ, token=token) + new.append(seq, val, typ, token=token) if val.endswith('('): # function @@ -455,7 +460,7 @@ def _expression(expected, seq, token, tokenizer=None): context = new.context[-1] val, typ = self._tokenvalue(token), self._type(token) if context.startswith('pseudo-'): - append(seq, val, typ, token=token) + new.append(seq, val, typ, token=token) return expression else: new.wellformed = False @@ -470,7 +475,7 @@ def _attcombinator(expected, seq, token, tokenizer=None): val, typ = self._tokenvalue(token), self._type(token) if 'attrib' == context and 'combinator' in expected: # combinator in attrib - append(seq, val, typ.lower(), token=token) + new.append(seq, val, typ.lower(), token=token) return attvalue else: new.wellformed = False @@ -485,13 +490,13 @@ def _string(expected, seq, token, tokenizer=None): # context: attrib if 'attrib' == context and 'value' in expected: # attrib: [...=VALUE] - append(seq, val, typ, token=token) + new.append(seq, val, typ, token=token) return attend # context: pseudo elif context.startswith('pseudo-'): # :func(...) - append(seq, val, typ, token=token) + new.append(seq, val, typ, token=token) return expression else: @@ -507,29 +512,29 @@ def _ident(expected, seq, token, tokenizer=None): # context: attrib if 'attrib' == context and 'attribute' in expected: # attrib: [...|ATT...] - append(seq, val, 'attribute-selector', token=token) + new.append(seq, val, 'attribute-selector', token=token) return attcombinator elif 'attrib' == context and 'value' in expected: # attrib: [...=VALUE] - append(seq, val, 'attribute-value', token=token) + new.append(seq, val, 'attribute-value', token=token) return attend # context: negation elif 'negation' == context: # negation: (prefix|IDENT) - append(seq, val, 'negation-type-selector', token=token) + new.append(seq, val, 'negation-type-selector', token=token) return negationend # context: pseudo elif context.startswith('pseudo-'): # :func(...) - append(seq, val, typ, token=token) + new.append(seq, val, typ, token=token) return expression elif 'type_selector' in expected or element_name == expected: # element name after ns or complete type_selector - append(seq, val, 'type-selector', token=token) + new.append(seq, val, 'type-selector', token=token) return simple_selector_sequence2 + combinator else: @@ -542,7 +547,7 @@ def _class(expected, seq, token, tokenizer=None): context = new.context[-1] val = self._tokenvalue(token) if 'class' in expected: - append(seq, val, 'class', token=token) + new.append(seq, val, 'class', token=token) if 'negation' == context: return negationend @@ -559,7 +564,7 @@ def _hash(expected, seq, token, tokenizer=None): context = new.context[-1] val = self._tokenvalue(token) if 'HASH' in expected: - append(seq, val, 'id', token=token) + new.append(seq, val, 'id', token=token) if 'negation' == context: return negationend @@ -579,7 +584,7 @@ def _char(expected, seq, token, tokenizer=None): # noqa: C901 # context: attrib if ']' == val and 'attrib' == context and ']' in expected: # end of attrib - append(seq, val, 'attribute-end', token=token) + new.append(seq, val, 'attribute-end', token=token) context = new.context.pop() # attrib is done context = new.context[-1] if 'negation' == context: @@ -589,13 +594,13 @@ def _char(expected, seq, token, tokenizer=None): # noqa: C901 elif '=' == val and 'attrib' == context and 'combinator' in expected: # combinator in attrib - append(seq, val, 'equals', token=token) + new.append(seq, val, 'equals', token=token) return attvalue # context: negation elif ')' == val and 'negation' == context and ')' in expected: # not(negation_arg)" - append(seq, val, 'negation-end', token=token) + new.append(seq, val, 'negation-end', token=token) new.context.pop() # negation is done context = new.context[-1] return simple_selector_sequence + combinator @@ -607,14 +612,14 @@ def _char(expected, seq, token, tokenizer=None): # noqa: C901 if val == '+' and seq and seq[-1].value == S: seq.replace(-1, val, _names[val]) else: - append(seq, val, _names[val], token=token) + new.append(seq, val, _names[val], token=token) return expression elif ( ')' == val and context.startswith('pseudo-') and expression == expected ): # :func(expression)" - append(seq, val, 'function-end', token=token) + new.append(seq, val, 'function-end', token=token) new.context.pop() # pseudo is done if 'pseudo-element' == context: return combinator @@ -624,7 +629,7 @@ def _char(expected, seq, token, tokenizer=None): # noqa: C901 # context: ROOT elif '[' == val and 'attrib' in expected: # start of [attrib] - append(seq, val, 'attribute-start', token=token) + new.append(seq, val, 'attribute-start', token=token) new.context.append('attrib') return attname @@ -638,7 +643,7 @@ def _char(expected, seq, token, tokenizer=None): # noqa: C901 if seq and seq[-1].value == S: seq.replace(-1, val, _names[val]) else: - append(seq, val, _names[val], token=token) + new.append(seq, val, _names[val], token=token) return simple_selector_sequence elif ',' == val: @@ -661,7 +666,7 @@ def _negation(expected, seq, token, tokenizer=None): val = self._tokenvalue(token, normalize=True) if 'negation' in expected: new.context.append('negation') - append(seq, val, 'negation-start', token=token) + new.append(seq, val, 'negation-start', token=token) return negation_arg else: new.wellformed = False From cd435f724a45fc994aeea1dad1e4bad38242733a Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Tue, 14 May 2024 15:36:32 -0400 Subject: [PATCH 6/7] Moved productions into the 'New' class (and constants into their own class). --- cssutils/css/selector.py | 751 ++++++++++++++++++++------------------- 1 file changed, 382 insertions(+), 369 deletions(-) diff --git a/cssutils/css/selector.py b/cssutils/css/selector.py index 090fca50..c5298448 100644 --- a/cssutils/css/selector.py +++ b/cssutils/css/selector.py @@ -19,12 +19,41 @@ from cssutils.util import _SimpleNamespaces +class Constants: + "expected constants" + + # used for equality checks and setting of a space combinator + S = ' ' + + simple_selector_sequence = ( + 'type_selector universal HASH class ' 'attrib pseudo negation ' + ) + simple_selector_sequence2 = 'HASH class attrib pseudo negation ' + + element_name = 'element_name' + + negation_arg = 'type_selector universal HASH class attrib pseudo' + negationend = ')' + + attname = 'prefix attribute' + attname2 = 'attribute' + attcombinator = 'combinator ]' # optional + attvalue = 'value' # optional + attend = ']' + + expressionstart = 'PLUS - DIMENSION NUMBER STRING IDENT' + expression = expressionstart + ' )' + + combinator = ' combinator' + + @dataclasses.dataclass class New(cssutils.util._BaseClass): """ Derives from _BaseClass to provide self._log. """ + selector: Selector namespaces: dict[str, str] context: list[str] = dataclasses.field(default_factory=lambda: ['']) "stack of: 'attrib', 'negation', 'pseudo'" @@ -117,6 +146,354 @@ def append(self, seq, val, typ=None, token=None): # noqa: C901 seq.append(val, typ, line=line, col=col) + def _COMMENT(self, expected, seq, token, tokenizer=None): + "special implementation for comment token" + self.append(seq, cssutils.css.CSSComment([token]), 'COMMENT', token=token) + return expected + + def _S(self, expected, seq, token, tokenizer=None): + # S + context = self.context[-1] + if context.startswith('pseudo-'): + if seq and seq[-1].value not in '+-': + # e.g. x:func(a + b) + self.append(seq, Constants.S, 'S', token=token) + return expected + + elif context != 'attrib' and 'combinator' in expected: + self.append(seq, Constants.S, 'descendant', token=token) + return Constants.simple_selector_sequence + Constants.combinator + + else: + return expected + + def _universal(self, expected, seq, token, tokenizer=None): + # *|* or prefix|* + context = self.context[-1] + val = self.selector._tokenvalue(token) + if 'universal' in expected: + self.append(seq, val, 'universal', token=token) + + if 'negation' == context: + return Constants.negationend + else: + return Constants.simple_selector_sequence2 + Constants.combinator + + else: + self.wellformed = False + self._log.error('Selector: Unexpected universal.', token=token) + return expected + + def _namespace_prefix(self, expected, seq, token, tokenizer=None): + # prefix| => element_name + # or prefix| => attribute_name if attrib + context = self.context[-1] + val = self.selector._tokenvalue(token) + if 'attrib' == context and 'prefix' in expected: + # [PREFIX|att] + self.append(seq, val, '_PREFIX', token=token) + return Constants.attname2 + elif 'type_selector' in expected: + # PREFIX|* + self.append(seq, val, '_PREFIX', token=token) + return Constants.element_name + else: + self.wellformed = False + self._log.error('Selector: Unexpected namespace prefix.', token=token) + return expected + + def _pseudo(self, expected, seq, token, tokenizer=None): + # pseudo-class or pseudo-element :a ::a :a( ::a( + """ + /* '::' starts a pseudo-element, ':' a pseudo-class */ + /* Exceptions: :first-line, :first-letter, :before and + :after. */ + /* Note that pseudo-elements are restricted to one per selector + and */ + /* occur only in the last simple_selector_sequence. */ + """ + context = self.context[-1] + val, typ = ( + self.selector._tokenvalue(token, normalize=True), + self.selector._type(token), + ) + if 'pseudo' in expected: + if val in (':first-line', ':first-letter', ':before', ':after'): + # always pseudo-element ??? + typ = 'pseudo-element' + self.append(seq, val, typ, token=token) + + if val.endswith('('): + # function + # "pseudo-" "class" or "element" + self.context.append(typ) + return Constants.expressionstart + elif 'negation' == context: + return Constants.negationend + elif 'pseudo-element' == typ: + # only one per element, check at ) also! + return Constants.combinator + else: + return Constants.simple_selector_sequence2 + Constants.combinator + + else: + self.wellformed = False + self._log.error('Selector: Unexpected start of pseudo.', token=token) + return expected + + def _expression(self, expected, seq, token, tokenizer=None): + # [ [ PLUS | '-' | DIMENSION | NUMBER | STRING | IDENT ] S* ]+ + context = self.context[-1] + val, typ = self.selector._tokenvalue(token), self.selector._type(token) + if context.startswith('pseudo-'): + self.append(seq, val, typ, token=token) + return Constants.expression + else: + self.wellformed = False + self._log.error('Selector: Unexpected %s.' % typ, token=token) + return expected + + def _attcombinator(self, expected, seq, token, tokenizer=None): + # context: attrib + # PREFIXMATCH | SUFFIXMATCH | SUBSTRINGMATCH | INCLUDES | + # DASHMATCH + context = self.context[-1] + val, typ = self.selector._tokenvalue(token), self.selector._type(token) + if 'attrib' == context and 'combinator' in expected: + # combinator in attrib + self.append(seq, val, typ.lower(), token=token) + return Constants.attvalue + else: + self.wellformed = False + self._log.error('Selector: Unexpected %s.' % typ, token=token) + return expected + + def _string(self, expected, seq, token, tokenizer=None): + # identifier + context = self.context[-1] + typ, val = self.selector._type(token), self.selector._stringtokenvalue(token) + + # context: attrib + if 'attrib' == context and 'value' in expected: + # attrib: [...=VALUE] + self.append(seq, val, typ, token=token) + return Constants.attend + + # context: pseudo + elif context.startswith('pseudo-'): + # :func(...) + self.append(seq, val, typ, token=token) + return Constants.expression + + else: + self.wellformed = False + self._log.error('Selector: Unexpected STRING.', token=token) + return expected + + def _ident(self, expected, seq, token, tokenizer=None): + # identifier + context = self.context[-1] + val, typ = self.selector._tokenvalue(token), self.selector._type(token) + + # context: attrib + if 'attrib' == context and 'attribute' in expected: + # attrib: [...|ATT...] + self.append(seq, val, 'attribute-selector', token=token) + return Constants.attcombinator + + elif 'attrib' == context and 'value' in expected: + # attrib: [...=VALUE] + self.append(seq, val, 'attribute-value', token=token) + return Constants.attend + + # context: negation + elif 'negation' == context: + # negation: (prefix|IDENT) + self.append(seq, val, 'negation-type-selector', token=token) + return Constants.negationend + + # context: pseudo + elif context.startswith('pseudo-'): + # :func(...) + self.append(seq, val, typ, token=token) + return Constants.expression + + elif 'type_selector' in expected or Constants.element_name == expected: + # element name after ns or complete type_selector + self.append(seq, val, 'type-selector', token=token) + return Constants.simple_selector_sequence2 + Constants.combinator + + else: + self.wellformed = False + self._log.error('Selector: Unexpected IDENT.', token=token) + return expected + + def _class(self, expected, seq, token, tokenizer=None): + # .IDENT + context = self.context[-1] + val = self.selector._tokenvalue(token) + if 'class' in expected: + self.append(seq, val, 'class', token=token) + + if 'negation' == context: + return Constants.negationend + else: + return Constants.simple_selector_sequence2 + Constants.combinator + + else: + self.wellformed = False + self._log.error('Selector: Unexpected class.', token=token) + return expected + + def _hash(self, expected, seq, token, tokenizer=None): + # #IDENT + context = self.context[-1] + val = self.selector._tokenvalue(token) + if 'HASH' in expected: + self.append(seq, val, 'id', token=token) + + if 'negation' == context: + return Constants.negationend + else: + return Constants.simple_selector_sequence2 + Constants.combinator + + else: + self.wellformed = False + self._log.error('Selector: Unexpected HASH.', token=token) + return expected + + def _char(self, expected, seq, token, tokenizer=None): # noqa: C901 + # + > ~ ) [ ] + - + context = self.context[-1] + val = self.selector._tokenvalue(token) + + # context: attrib + if ']' == val and 'attrib' == context and ']' in expected: + # end of attrib + self.append(seq, val, 'attribute-end', token=token) + context = self.context.pop() # attrib is done + context = self.context[-1] + if 'negation' == context: + return Constants.negationend + else: + return Constants.simple_selector_sequence2 + Constants.combinator + + elif '=' == val and 'attrib' == context and 'combinator' in expected: + # combinator in attrib + self.append(seq, val, 'equals', token=token) + return Constants.attvalue + + # context: negation + elif ')' == val and 'negation' == context and ')' in expected: + # not(negation_arg)" + self.append(seq, val, 'negation-end', token=token) + self.context.pop() # negation is done + context = self.context[-1] + return Constants.simple_selector_sequence + Constants.combinator + + # context: pseudo (at least one expression) + elif val in '+-' and context.startswith('pseudo-'): + # :func(+ -)" + _names = {'+': 'plus', '-': 'minus'} + if val == '+' and seq and seq[-1].value == Constants.S: + seq.replace(-1, val, _names[val]) + else: + self.append(seq, val, _names[val], token=token) + return Constants.expression + + elif ( + ')' == val + and context.startswith('pseudo-') + and Constants.expression == expected + ): + # :func(expression)" + self.append(seq, val, 'function-end', token=token) + self.context.pop() # pseudo is done + if 'pseudo-element' == context: + return Constants.combinator + else: + return Constants.simple_selector_sequence + Constants.combinator + + # context: ROOT + elif '[' == val and 'attrib' in expected: + # start of [attrib] + self.append(seq, val, 'attribute-start', token=token) + self.context.append('attrib') + return Constants.attname + + elif val in '+>~' and 'combinator' in expected: + # no other combinator except S may be following + _names = { + '>': 'child', + '+': 'adjacent-sibling', + '~': 'following-sibling', + } + if seq and seq[-1].value == Constants.S: + seq.replace(-1, val, _names[val]) + else: + self.append(seq, val, _names[val], token=token) + return Constants.simple_selector_sequence + + elif ',' == val: + # not a selectorlist + self.wellformed = False + self._log.error( + 'Selector: Single selector only.', + error=xml.dom.InvalidModificationErr, + token=token, + ) + return expected + + else: + self.wellformed = False + self._log.error('Selector: Unexpected CHAR.', token=token) + return expected + + def _negation(self, expected, seq, token, tokenizer=None): + # not( + val = self.selector._tokenvalue(token, normalize=True) + if 'negation' in expected: + self.context.append('negation') + self.append(seq, val, 'negation-start', token=token) + return Constants.negation_arg + else: + self.wellformed = False + self._log.error('Selector: Unexpected negation.', token=token) + return expected + + def _atkeyword(self, expected, seq, token, tokenizer=None): + "invalidates selector" + self.wellformed = False + self._log.error('Selector: Unexpected ATKEYWORD.', token=token) + return expected + + @property + def productions(self): + return { + 'CHAR': self._char, + 'class': self._class, + 'HASH': self._hash, + 'STRING': self._string, + 'IDENT': self._ident, + 'namespace_prefix': self._namespace_prefix, + 'negation': self._negation, + 'pseudo-class': self._pseudo, + 'pseudo-element': self._pseudo, + 'universal': self._universal, + # pseudo + 'NUMBER': self._expression, + 'DIMENSION': self._expression, + # attribute + 'PREFIXMATCH': self._attcombinator, + 'SUFFIXMATCH': self._attcombinator, + 'SUBSTRINGMATCH': self._attcombinator, + 'DASHMATCH': self._attcombinator, + 'INCLUDES': self._attcombinator, + 'S': self._S, + 'COMMENT': self._COMMENT, + 'ATKEYWORD': self._atkeyword, + } + class Selector(cssutils.util.Base2): """ @@ -302,7 +679,7 @@ def _getSelectorText(self): """Return serialized format.""" return cssutils.ser.do_css_Selector(self) - def _setSelectorText(self, selectorText): # noqa: C901 + def _setSelectorText(self, selectorText): """ :param selectorText: parsable string or a tuple of (selectorText, dict-of-namespaces). @@ -336,380 +713,16 @@ def _setSelectorText(self, selectorText): # noqa: C901 tokenizer = self._prepare_tokens(tokenizer) - new = New(namespaces=namespaces) - - # used for equality checks and setting of a space combinator - S = ' ' - - # expected constants - simple_selector_sequence = ( - 'type_selector universal HASH class ' 'attrib pseudo negation ' - ) - simple_selector_sequence2 = 'HASH class attrib pseudo negation ' - - element_name = 'element_name' - - negation_arg = 'type_selector universal HASH class attrib pseudo' - negationend = ')' - - attname = 'prefix attribute' - attname2 = 'attribute' - attcombinator = 'combinator ]' # optional - attvalue = 'value' # optional - attend = ']' - - expressionstart = 'PLUS - DIMENSION NUMBER STRING IDENT' - expression = expressionstart + ' )' - - combinator = ' combinator' - - def _COMMENT(expected, seq, token, tokenizer=None): - "special implementation for comment token" - new.append(seq, cssutils.css.CSSComment([token]), 'COMMENT', token=token) - return expected - - def _S(expected, seq, token, tokenizer=None): - # S - context = new.context[-1] - if context.startswith('pseudo-'): - if seq and seq[-1].value not in '+-': - # e.g. x:func(a + b) - new.append(seq, S, 'S', token=token) - return expected - - elif context != 'attrib' and 'combinator' in expected: - new.append(seq, S, 'descendant', token=token) - return simple_selector_sequence + combinator - - else: - return expected - - def _universal(expected, seq, token, tokenizer=None): - # *|* or prefix|* - context = new.context[-1] - val = self._tokenvalue(token) - if 'universal' in expected: - new.append(seq, val, 'universal', token=token) - - if 'negation' == context: - return negationend - else: - return simple_selector_sequence2 + combinator - - else: - new.wellformed = False - self._log.error('Selector: Unexpected universal.', token=token) - return expected - - def _namespace_prefix(expected, seq, token, tokenizer=None): - # prefix| => element_name - # or prefix| => attribute_name if attrib - context = new.context[-1] - val = self._tokenvalue(token) - if 'attrib' == context and 'prefix' in expected: - # [PREFIX|att] - new.append(seq, val, '_PREFIX', token=token) - return attname2 - elif 'type_selector' in expected: - # PREFIX|* - new.append(seq, val, '_PREFIX', token=token) - return element_name - else: - new.wellformed = False - self._log.error('Selector: Unexpected namespace prefix.', token=token) - return expected - - def _pseudo(expected, seq, token, tokenizer=None): - # pseudo-class or pseudo-element :a ::a :a( ::a( - """ - /* '::' starts a pseudo-element, ':' a pseudo-class */ - /* Exceptions: :first-line, :first-letter, :before and - :after. */ - /* Note that pseudo-elements are restricted to one per selector - and */ - /* occur only in the last simple_selector_sequence. */ - """ - context = new.context[-1] - val, typ = self._tokenvalue(token, normalize=True), self._type(token) - if 'pseudo' in expected: - if val in (':first-line', ':first-letter', ':before', ':after'): - # always pseudo-element ??? - typ = 'pseudo-element' - new.append(seq, val, typ, token=token) - - if val.endswith('('): - # function - # "pseudo-" "class" or "element" - new.context.append(typ) - return expressionstart - elif 'negation' == context: - return negationend - elif 'pseudo-element' == typ: - # only one per element, check at ) also! - return combinator - else: - return simple_selector_sequence2 + combinator - - else: - new.wellformed = False - self._log.error('Selector: Unexpected start of pseudo.', token=token) - return expected - - def _expression(expected, seq, token, tokenizer=None): - # [ [ PLUS | '-' | DIMENSION | NUMBER | STRING | IDENT ] S* ]+ - context = new.context[-1] - val, typ = self._tokenvalue(token), self._type(token) - if context.startswith('pseudo-'): - new.append(seq, val, typ, token=token) - return expression - else: - new.wellformed = False - self._log.error('Selector: Unexpected %s.' % typ, token=token) - return expected - - def _attcombinator(expected, seq, token, tokenizer=None): - # context: attrib - # PREFIXMATCH | SUFFIXMATCH | SUBSTRINGMATCH | INCLUDES | - # DASHMATCH - context = new.context[-1] - val, typ = self._tokenvalue(token), self._type(token) - if 'attrib' == context and 'combinator' in expected: - # combinator in attrib - new.append(seq, val, typ.lower(), token=token) - return attvalue - else: - new.wellformed = False - self._log.error('Selector: Unexpected %s.' % typ, token=token) - return expected - - def _string(expected, seq, token, tokenizer=None): - # identifier - context = new.context[-1] - typ, val = self._type(token), self._stringtokenvalue(token) - - # context: attrib - if 'attrib' == context and 'value' in expected: - # attrib: [...=VALUE] - new.append(seq, val, typ, token=token) - return attend - - # context: pseudo - elif context.startswith('pseudo-'): - # :func(...) - new.append(seq, val, typ, token=token) - return expression - - else: - new.wellformed = False - self._log.error('Selector: Unexpected STRING.', token=token) - return expected - - def _ident(expected, seq, token, tokenizer=None): - # identifier - context = new.context[-1] - val, typ = self._tokenvalue(token), self._type(token) - - # context: attrib - if 'attrib' == context and 'attribute' in expected: - # attrib: [...|ATT...] - new.append(seq, val, 'attribute-selector', token=token) - return attcombinator - - elif 'attrib' == context and 'value' in expected: - # attrib: [...=VALUE] - new.append(seq, val, 'attribute-value', token=token) - return attend - - # context: negation - elif 'negation' == context: - # negation: (prefix|IDENT) - new.append(seq, val, 'negation-type-selector', token=token) - return negationend - - # context: pseudo - elif context.startswith('pseudo-'): - # :func(...) - new.append(seq, val, typ, token=token) - return expression - - elif 'type_selector' in expected or element_name == expected: - # element name after ns or complete type_selector - new.append(seq, val, 'type-selector', token=token) - return simple_selector_sequence2 + combinator - - else: - new.wellformed = False - self._log.error('Selector: Unexpected IDENT.', token=token) - return expected - - def _class(expected, seq, token, tokenizer=None): - # .IDENT - context = new.context[-1] - val = self._tokenvalue(token) - if 'class' in expected: - new.append(seq, val, 'class', token=token) - - if 'negation' == context: - return negationend - else: - return simple_selector_sequence2 + combinator - - else: - new.wellformed = False - self._log.error('Selector: Unexpected class.', token=token) - return expected - - def _hash(expected, seq, token, tokenizer=None): - # #IDENT - context = new.context[-1] - val = self._tokenvalue(token) - if 'HASH' in expected: - new.append(seq, val, 'id', token=token) - - if 'negation' == context: - return negationend - else: - return simple_selector_sequence2 + combinator - - else: - new.wellformed = False - self._log.error('Selector: Unexpected HASH.', token=token) - return expected - - def _char(expected, seq, token, tokenizer=None): # noqa: C901 - # + > ~ ) [ ] + - - context = new.context[-1] - val = self._tokenvalue(token) - - # context: attrib - if ']' == val and 'attrib' == context and ']' in expected: - # end of attrib - new.append(seq, val, 'attribute-end', token=token) - context = new.context.pop() # attrib is done - context = new.context[-1] - if 'negation' == context: - return negationend - else: - return simple_selector_sequence2 + combinator - - elif '=' == val and 'attrib' == context and 'combinator' in expected: - # combinator in attrib - new.append(seq, val, 'equals', token=token) - return attvalue - - # context: negation - elif ')' == val and 'negation' == context and ')' in expected: - # not(negation_arg)" - new.append(seq, val, 'negation-end', token=token) - new.context.pop() # negation is done - context = new.context[-1] - return simple_selector_sequence + combinator - - # context: pseudo (at least one expression) - elif val in '+-' and context.startswith('pseudo-'): - # :func(+ -)" - _names = {'+': 'plus', '-': 'minus'} - if val == '+' and seq and seq[-1].value == S: - seq.replace(-1, val, _names[val]) - else: - new.append(seq, val, _names[val], token=token) - return expression - - elif ( - ')' == val and context.startswith('pseudo-') and expression == expected - ): - # :func(expression)" - new.append(seq, val, 'function-end', token=token) - new.context.pop() # pseudo is done - if 'pseudo-element' == context: - return combinator - else: - return simple_selector_sequence + combinator - - # context: ROOT - elif '[' == val and 'attrib' in expected: - # start of [attrib] - new.append(seq, val, 'attribute-start', token=token) - new.context.append('attrib') - return attname - - elif val in '+>~' and 'combinator' in expected: - # no other combinator except S may be following - _names = { - '>': 'child', - '+': 'adjacent-sibling', - '~': 'following-sibling', - } - if seq and seq[-1].value == S: - seq.replace(-1, val, _names[val]) - else: - new.append(seq, val, _names[val], token=token) - return simple_selector_sequence - - elif ',' == val: - # not a selectorlist - new.wellformed = False - self._log.error( - 'Selector: Single selector only.', - error=xml.dom.InvalidModificationErr, - token=token, - ) - return expected - - else: - new.wellformed = False - self._log.error('Selector: Unexpected CHAR.', token=token) - return expected - - def _negation(expected, seq, token, tokenizer=None): - # not( - val = self._tokenvalue(token, normalize=True) - if 'negation' in expected: - new.context.append('negation') - new.append(seq, val, 'negation-start', token=token) - return negation_arg - else: - new.wellformed = False - self._log.error('Selector: Unexpected negation.', token=token) - return expected - - def _atkeyword(expected, seq, token, tokenizer=None): - "invalidates selector" - new.wellformed = False - self._log.error('Selector: Unexpected ATKEYWORD.', token=token) - return expected + new = New(selector=self, namespaces=namespaces) # expected: only|not or mediatype, mediatype, feature, and newseq = self._tempSeq() wellformed, expected = self._parse( - expected=simple_selector_sequence, + expected=Constants.simple_selector_sequence, seq=newseq, tokenizer=tokenizer, - productions={ - 'CHAR': _char, - 'class': _class, - 'HASH': _hash, - 'STRING': _string, - 'IDENT': _ident, - 'namespace_prefix': _namespace_prefix, - 'negation': _negation, - 'pseudo-class': _pseudo, - 'pseudo-element': _pseudo, - 'universal': _universal, - # pseudo - 'NUMBER': _expression, - 'DIMENSION': _expression, - # attribute - 'PREFIXMATCH': _attcombinator, - 'SUFFIXMATCH': _attcombinator, - 'SUBSTRINGMATCH': _attcombinator, - 'DASHMATCH': _attcombinator, - 'INCLUDES': _attcombinator, - 'S': _S, - 'COMMENT': _COMMENT, - 'ATKEYWORD': _atkeyword, - }, + productions=new.productions, ) wellformed = wellformed and new.wellformed @@ -727,7 +740,7 @@ def _atkeyword(expected, seq, token, tokenizer=None): 'Selector: No element name found: %s' % self._valuestr(selectorText) ) - if expected == simple_selector_sequence and newseq: + if expected == Constants.simple_selector_sequence and newseq: wellformed = False self._log.error( 'Selector: Cannot end with combinator: %s' From bde08c88da00f8fceec2203ec31859479596f6c2 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Tue, 14 May 2024 15:40:27 -0400 Subject: [PATCH 7/7] Remove 'else' clauses; none of the 'if' clauses fall through. --- cssutils/css/selector.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/cssutils/css/selector.py b/cssutils/css/selector.py index c5298448..054bc5c4 100644 --- a/cssutils/css/selector.py +++ b/cssutils/css/selector.py @@ -378,13 +378,13 @@ def _char(self, expected, seq, token, tokenizer=None): # noqa: C901 else: return Constants.simple_selector_sequence2 + Constants.combinator - elif '=' == val and 'attrib' == context and 'combinator' in expected: + if '=' == val and 'attrib' == context and 'combinator' in expected: # combinator in attrib self.append(seq, val, 'equals', token=token) return Constants.attvalue # context: negation - elif ')' == val and 'negation' == context and ')' in expected: + if ')' == val and 'negation' == context and ')' in expected: # not(negation_arg)" self.append(seq, val, 'negation-end', token=token) self.context.pop() # negation is done @@ -392,7 +392,7 @@ def _char(self, expected, seq, token, tokenizer=None): # noqa: C901 return Constants.simple_selector_sequence + Constants.combinator # context: pseudo (at least one expression) - elif val in '+-' and context.startswith('pseudo-'): + if val in '+-' and context.startswith('pseudo-'): # :func(+ -)" _names = {'+': 'plus', '-': 'minus'} if val == '+' and seq and seq[-1].value == Constants.S: @@ -401,7 +401,7 @@ def _char(self, expected, seq, token, tokenizer=None): # noqa: C901 self.append(seq, val, _names[val], token=token) return Constants.expression - elif ( + if ( ')' == val and context.startswith('pseudo-') and Constants.expression == expected @@ -415,13 +415,13 @@ def _char(self, expected, seq, token, tokenizer=None): # noqa: C901 return Constants.simple_selector_sequence + Constants.combinator # context: ROOT - elif '[' == val and 'attrib' in expected: + if '[' == val and 'attrib' in expected: # start of [attrib] self.append(seq, val, 'attribute-start', token=token) self.context.append('attrib') return Constants.attname - elif val in '+>~' and 'combinator' in expected: + if val in '+>~' and 'combinator' in expected: # no other combinator except S may be following _names = { '>': 'child', @@ -434,7 +434,7 @@ def _char(self, expected, seq, token, tokenizer=None): # noqa: C901 self.append(seq, val, _names[val], token=token) return Constants.simple_selector_sequence - elif ',' == val: + if ',' == val: # not a selectorlist self.wellformed = False self._log.error( @@ -444,10 +444,9 @@ def _char(self, expected, seq, token, tokenizer=None): # noqa: C901 ) return expected - else: - self.wellformed = False - self._log.error('Selector: Unexpected CHAR.', token=token) - return expected + self.wellformed = False + self._log.error('Selector: Unexpected CHAR.', token=token) + return expected def _negation(self, expected, seq, token, tokenizer=None): # not(