diff --git a/prometheus_client/exposition.py b/prometheus_client/exposition.py index 4bcf1c70..7427cf93 100644 --- a/prometheus_client/exposition.py +++ b/prometheus_client/exposition.py @@ -20,6 +20,7 @@ from .openmetrics import exposition as openmetrics from .registry import CollectorRegistry, REGISTRY from .utils import floatToGoString +from .validation import _is_valid_legacy_metric_name __all__ = ( 'CONTENT_TYPE_LATEST', @@ -247,19 +248,26 @@ class TmpServer(ThreadingWSGIServer): def generate_latest(registry: CollectorRegistry = REGISTRY) -> bytes: """Returns the metrics from the registry in latest text format as a string.""" - def sample_line(line): - if line.labels: - labelstr = '{{{0}}}'.format(','.join( + def sample_line(samples): + if samples.labels: + labelstr = '{0}'.format(','.join( ['{}="{}"'.format( - k, v.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"')) - for k, v in sorted(line.labels.items())])) + openmetrics.escape_label_name(k), openmetrics._escape(v)) + for k, v in sorted(samples.labels.items())])) else: labelstr = '' timestamp = '' - if line.timestamp is not None: + if samples.timestamp is not None: # Convert to milliseconds. - timestamp = f' {int(float(line.timestamp) * 1000):d}' - return f'{line.name}{labelstr} {floatToGoString(line.value)}{timestamp}\n' + timestamp = f' {int(float(samples.timestamp) * 1000):d}' + if _is_valid_legacy_metric_name(samples.name): + if labelstr: + labelstr = '{{{0}}}'.format(labelstr) + return f'{samples.name}{labelstr} {floatToGoString(samples.value)}{timestamp}\n' + maybe_comma = '' + if labelstr: + maybe_comma = ',' + return f'{{{openmetrics.escape_metric_name(samples.name)}{maybe_comma}{labelstr}}} {floatToGoString(samples.value)}{timestamp}\n' output = [] for metric in registry.collect(): @@ -282,8 +290,8 @@ def sample_line(line): mtype = 'untyped' output.append('# HELP {} {}\n'.format( - mname, metric.documentation.replace('\\', r'\\').replace('\n', r'\n'))) - output.append(f'# TYPE {mname} {mtype}\n') + openmetrics.escape_metric_name(mname), metric.documentation.replace('\\', r'\\').replace('\n', r'\n'))) + output.append(f'# TYPE {openmetrics.escape_metric_name(mname)} {mtype}\n') om_samples: Dict[str, List[str]] = {} for s in metric.samples: @@ -299,9 +307,9 @@ def sample_line(line): raise for suffix, lines in sorted(om_samples.items()): - output.append('# HELP {}{} {}\n'.format(metric.name, suffix, - metric.documentation.replace('\\', r'\\').replace('\n', r'\n'))) - output.append(f'# TYPE {metric.name}{suffix} gauge\n') + output.append('# HELP {} {}\n'.format(openmetrics.escape_metric_name(metric.name + suffix), + metric.documentation.replace('\\', r'\\').replace('\n', r'\n'))) + output.append(f'# TYPE {openmetrics.escape_metric_name(metric.name + suffix)} gauge\n') output.extend(lines) return ''.join(output).encode('utf-8') diff --git a/prometheus_client/metrics.py b/prometheus_client/metrics.py index cceaafda..46175860 100644 --- a/prometheus_client/metrics.py +++ b/prometheus_client/metrics.py @@ -10,19 +10,21 @@ from . import values # retain this import style for testability from .context_managers import ExceptionCounter, InprogressTracker, Timer -from .metrics_core import ( - Metric, METRIC_LABEL_NAME_RE, METRIC_NAME_RE, - RESERVED_METRIC_LABEL_NAME_RE, -) +from .metrics_core import Metric from .registry import Collector, CollectorRegistry, REGISTRY from .samples import Exemplar, Sample from .utils import floatToGoString, INF +from .validation import ( + _validate_exemplar, _validate_labelnames, _validate_metric_name, +) T = TypeVar('T', bound='MetricWrapperBase') F = TypeVar("F", bound=Callable[..., Any]) def _build_full_name(metric_type, name, namespace, subsystem, unit): + if not name: + raise ValueError('Metric name should not be empty') full_name = '' if namespace: full_name += namespace + '_' @@ -38,31 +40,6 @@ def _build_full_name(metric_type, name, namespace, subsystem, unit): return full_name -def _validate_labelname(l): - if not METRIC_LABEL_NAME_RE.match(l): - raise ValueError('Invalid label metric name: ' + l) - if RESERVED_METRIC_LABEL_NAME_RE.match(l): - raise ValueError('Reserved label metric name: ' + l) - - -def _validate_labelnames(cls, labelnames): - labelnames = tuple(labelnames) - for l in labelnames: - _validate_labelname(l) - if l in cls._reserved_labelnames: - raise ValueError('Reserved label metric name: ' + l) - return labelnames - - -def _validate_exemplar(exemplar): - runes = 0 - for k, v in exemplar.items(): - _validate_labelname(k) - runes += len(k) - runes += len(v) - if runes > 128: - raise ValueError('Exemplar labels have %d UTF-8 characters, exceeding the limit of 128') - def _get_use_created() -> bool: return os.environ.get("PROMETHEUS_DISABLE_CREATED_SERIES", 'False').lower() not in ('true', '1', 't') @@ -139,8 +116,7 @@ def __init__(self: T, self._documentation = documentation self._unit = unit - if not METRIC_NAME_RE.match(self._name): - raise ValueError('Invalid metric name: ' + self._name) + _validate_metric_name(self._name) if self._is_parent(): # Prepare the fields needed for child metrics. diff --git a/prometheus_client/metrics_core.py b/prometheus_client/metrics_core.py index b09cea04..27d1712d 100644 --- a/prometheus_client/metrics_core.py +++ b/prometheus_client/metrics_core.py @@ -1,15 +1,12 @@ -import re from typing import Dict, List, Optional, Sequence, Tuple, Union from .samples import Exemplar, NativeHistogram, Sample, Timestamp +from .validation import _validate_metric_name METRIC_TYPES = ( 'counter', 'gauge', 'summary', 'histogram', 'gaugehistogram', 'unknown', 'info', 'stateset', ) -METRIC_NAME_RE = re.compile(r'^[a-zA-Z_:][a-zA-Z0-9_:]*$') -METRIC_LABEL_NAME_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$') -RESERVED_METRIC_LABEL_NAME_RE = re.compile(r'^__.*$') class Metric: @@ -24,8 +21,7 @@ class Metric: def __init__(self, name: str, documentation: str, typ: str, unit: str = ''): if unit and not name.endswith("_" + unit): name += "_" + unit - if not METRIC_NAME_RE.match(name): - raise ValueError('Invalid metric name: ' + name) + _validate_metric_name(name) self.name: str = name self.documentation: str = documentation self.unit: str = unit diff --git a/prometheus_client/openmetrics/exposition.py b/prometheus_client/openmetrics/exposition.py index 1959847b..84600605 100644 --- a/prometheus_client/openmetrics/exposition.py +++ b/prometheus_client/openmetrics/exposition.py @@ -2,6 +2,9 @@ from ..utils import floatToGoString +from ..validation import ( + _is_valid_legacy_labelname, _is_valid_legacy_metric_name, +) CONTENT_TYPE_LATEST = 'application/openmetrics-text; version=1.0.0; charset=utf-8' """Content type of the latest OpenMetrics text format""" @@ -24,18 +27,27 @@ def generate_latest(registry): try: mname = metric.name output.append('# HELP {} {}\n'.format( - mname, metric.documentation.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"'))) - output.append(f'# TYPE {mname} {metric.type}\n') + escape_metric_name(mname), _escape(metric.documentation))) + output.append(f'# TYPE {escape_metric_name(mname)} {metric.type}\n') if metric.unit: - output.append(f'# UNIT {mname} {metric.unit}\n') + output.append(f'# UNIT {escape_metric_name(mname)} {metric.unit}\n') for s in metric.samples: - if s.labels: - labelstr = '{{{0}}}'.format(','.join( - ['{}="{}"'.format( - k, v.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"')) - for k, v in sorted(s.labels.items())])) + if not _is_valid_legacy_metric_name(s.name): + labelstr = escape_metric_name(s.name) + if s.labels: + labelstr += ', ' else: labelstr = '' + + if s.labels: + items = sorted(s.labels.items()) + labelstr += ','.join( + ['{}="{}"'.format( + escape_label_name(k), _escape(v)) + for k, v in items]) + if labelstr: + labelstr = "{" + labelstr + "}" + if s.exemplar: if not _is_valid_exemplar_metric(metric, s): raise ValueError(f"Metric {metric.name} has exemplars, but is not a histogram bucket or counter") @@ -59,16 +71,47 @@ def generate_latest(registry): timestamp = '' if s.timestamp is not None: timestamp = f' {s.timestamp}' - output.append('{}{} {}{}{}\n'.format( - s.name, - labelstr, - floatToGoString(s.value), - timestamp, - exemplarstr, - )) + if _is_valid_legacy_metric_name(s.name): + output.append('{}{} {}{}{}\n'.format( + s.name, + labelstr, + floatToGoString(s.value), + timestamp, + exemplarstr, + )) + else: + output.append('{} {}{}{}\n'.format( + labelstr, + floatToGoString(s.value), + timestamp, + exemplarstr, + )) except Exception as exception: exception.args = (exception.args or ('',)) + (metric,) raise output.append('# EOF\n') return ''.join(output).encode('utf-8') + + +def escape_metric_name(s: str) -> str: + """Escapes the metric name and puts it in quotes iff the name does not + conform to the legacy Prometheus character set. + """ + if _is_valid_legacy_metric_name(s): + return s + return '"{}"'.format(_escape(s)) + + +def escape_label_name(s: str) -> str: + """Escapes the label name and puts it in quotes iff the name does not + conform to the legacy Prometheus character set. + """ + if _is_valid_legacy_labelname(s): + return s + return '"{}"'.format(_escape(s)) + + +def _escape(s: str) -> str: + """Performs backslash escaping on backslash, newline, and double-quote characters.""" + return s.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"') diff --git a/prometheus_client/openmetrics/parser.py b/prometheus_client/openmetrics/parser.py index 39a44dc2..1d270915 100644 --- a/prometheus_client/openmetrics/parser.py +++ b/prometheus_client/openmetrics/parser.py @@ -5,9 +5,14 @@ import math import re -from ..metrics_core import Metric, METRIC_LABEL_NAME_RE +from ..metrics_core import Metric +from ..parser import ( + _last_unquoted_char, _next_unquoted_char, _parse_value, _split_quoted, + _unquote_unescape, parse_labels, +) from ..samples import BucketSpan, Exemplar, NativeHistogram, Sample, Timestamp from ..utils import floatToGoString +from ..validation import _is_valid_legacy_metric_name, _validate_metric_name def text_string_to_metric_families(text): @@ -73,16 +78,6 @@ def _unescape_help(text): return ''.join(result) -def _parse_value(value): - value = ''.join(value) - if value != value.strip() or '_' in value: - raise ValueError(f"Invalid value: {value!r}") - try: - return int(value) - except ValueError: - return float(value) - - def _parse_timestamp(timestamp): timestamp = ''.join(timestamp) if not timestamp: @@ -113,165 +108,31 @@ def _is_character_escaped(s, charpos): return num_bslashes % 2 == 1 -def _parse_labels_with_state_machine(text): - # The { has already been parsed. - state = 'startoflabelname' - labelname = [] - labelvalue = [] - labels = {} - labels_len = 0 - - for char in text: - if state == 'startoflabelname': - if char == '}': - state = 'endoflabels' - else: - state = 'labelname' - labelname.append(char) - elif state == 'labelname': - if char == '=': - state = 'labelvaluequote' - else: - labelname.append(char) - elif state == 'labelvaluequote': - if char == '"': - state = 'labelvalue' - else: - raise ValueError("Invalid line: " + text) - elif state == 'labelvalue': - if char == '\\': - state = 'labelvalueslash' - elif char == '"': - ln = ''.join(labelname) - if not METRIC_LABEL_NAME_RE.match(ln): - raise ValueError("Invalid line, bad label name: " + text) - if ln in labels: - raise ValueError("Invalid line, duplicate label name: " + text) - labels[ln] = ''.join(labelvalue) - labelname = [] - labelvalue = [] - state = 'endoflabelvalue' - else: - labelvalue.append(char) - elif state == 'endoflabelvalue': - if char == ',': - state = 'labelname' - elif char == '}': - state = 'endoflabels' - else: - raise ValueError("Invalid line: " + text) - elif state == 'labelvalueslash': - state = 'labelvalue' - if char == '\\': - labelvalue.append('\\') - elif char == 'n': - labelvalue.append('\n') - elif char == '"': - labelvalue.append('"') - else: - labelvalue.append('\\' + char) - elif state == 'endoflabels': - if char == ' ': - break - else: - raise ValueError("Invalid line: " + text) - labels_len += 1 - return labels, labels_len - - -def _parse_labels(text): - labels = {} - - # Raise error if we don't have valid labels - if text and "=" not in text: - raise ValueError - - # Copy original labels - sub_labels = text - try: - # Process one label at a time - while sub_labels: - # The label name is before the equal - value_start = sub_labels.index("=") - label_name = sub_labels[:value_start] - sub_labels = sub_labels[value_start + 1:] - - # Check for missing quotes - if not sub_labels or sub_labels[0] != '"': - raise ValueError - - # The first quote is guaranteed to be after the equal - value_substr = sub_labels[1:] - - # Check for extra commas - if not label_name or label_name[0] == ',': - raise ValueError - if not value_substr or value_substr[-1] == ',': - raise ValueError - - # Find the last unescaped quote - i = 0 - while i < len(value_substr): - i = value_substr.index('"', i) - if not _is_character_escaped(value_substr[:i], i): - break - i += 1 - - # The label value is between the first and last quote - quote_end = i + 1 - label_value = sub_labels[1:quote_end] - # Replace escaping if needed - if "\\" in label_value: - label_value = _replace_escaping(label_value) - if not METRIC_LABEL_NAME_RE.match(label_name): - raise ValueError("invalid line, bad label name: " + text) - if label_name in labels: - raise ValueError("invalid line, duplicate label name: " + text) - labels[label_name] = label_value - - # Remove the processed label from the sub-slice for next iteration - sub_labels = sub_labels[quote_end + 1:] - if sub_labels.startswith(","): - next_comma = 1 - else: - next_comma = 0 - sub_labels = sub_labels[next_comma:] - - # Check for missing commas - if sub_labels and next_comma == 0: - raise ValueError - - return labels - - except ValueError: - raise ValueError("Invalid labels: " + text) - - def _parse_sample(text): separator = " # " # Detect the labels in the text - label_start = text.find("{") + label_start = _next_unquoted_char(text, '{') if label_start == -1 or separator in text[:label_start]: # We don't have labels, but there could be an exemplar. - name_end = text.index(" ") + name_end = _next_unquoted_char(text, ' ') name = text[:name_end] + if not _is_valid_legacy_metric_name(name): + raise ValueError("invalid metric name:" + text) # Parse the remaining text after the name remaining_text = text[name_end + 1:] value, timestamp, exemplar = _parse_remaining_text(remaining_text) return Sample(name, {}, value, timestamp, exemplar) - # The name is before the labels name = text[:label_start] - if separator not in text: - # Line doesn't contain an exemplar - # We can use `rindex` to find `label_end` - label_end = text.rindex("}") - label = text[label_start + 1:label_end] - labels = _parse_labels(label) - else: - # Line potentially contains an exemplar - # Fallback to parsing labels with a state machine - labels, labels_len = _parse_labels_with_state_machine(text[label_start + 1:]) - label_end = labels_len + len(name) + label_end = _next_unquoted_char(text, '}') + labels = parse_labels(text[label_start + 1:label_end], True) + if not name: + # Name might be in the labels + if '__name__' not in labels: + raise ValueError + name = labels['__name__'] + del labels['__name__'] + elif '__name__' in labels: + raise ValueError("metric name specified more than once") # Parsing labels succeeded, continue parsing the remaining text remaining_text = text[label_end + 2:] value, timestamp, exemplar = _parse_remaining_text(remaining_text) @@ -294,7 +155,12 @@ def _parse_remaining_text(text): text = split_text[1] it = iter(text) + in_quotes = False for char in it: + if char == '"': + in_quotes = not in_quotes + if in_quotes: + continue if state == 'timestamp': if char == '#' and not timestamp: state = 'exemplarspace' @@ -314,8 +180,9 @@ def _parse_remaining_text(text): raise ValueError("Invalid line: " + text) elif state == 'exemplarstartoflabels': if char == '{': - label_start, label_end = text.index("{"), text.rindex("}") - exemplar_labels = _parse_labels(text[label_start + 1:label_end]) + label_start = _next_unquoted_char(text, '{') + label_end = _last_unquoted_char(text, '}') + exemplar_labels = parse_labels(text[label_start + 1:label_end], True) state = 'exemplarparsedlabels' else: raise ValueError("Invalid line: " + text) @@ -365,35 +232,77 @@ def _parse_remaining_text(text): def _parse_nh_sample(text, suffixes): - labels_start = text.find("{") - # check if it's a native histogram with labels - re_nh_without_labels = re.compile(r'^[^{} ]+ {[^{}]+}$') - re_nh_with_labels = re.compile(r'[^{} ]+{[^{}]+} {[^{}]+}$') - if re_nh_with_labels.match(text): - nh_value_start = text.rindex("{") - labels_end = nh_value_start - 2 + """Determines if the line has a native histogram sample, and parses it if so.""" + labels_start = _next_unquoted_char(text, '{') + labels_end = -1 + + # Finding a native histogram sample requires careful parsing of + # possibly-quoted text, which can appear in metric names, label names, and + # values. + # + # First, we need to determine if there are metric labels. Find the space + # between the metric definition and the rest of the line. Look for unquoted + # space or {. + i = 0 + has_metric_labels = False + i = _next_unquoted_char(text, ' {') + if i == -1: + return + + # If the first unquoted char was a {, then that is the metric labels (which + # could contain a UTF-8 metric name). + if text[i] == '{': + has_metric_labels = True + # Consume the labels -- jump ahead to the close bracket. + labels_end = i = _next_unquoted_char(text, '}', i) + if labels_end == -1: + raise ValueError + + # If there is no subsequent unquoted {, then it's definitely not a nh. + nh_value_start = _next_unquoted_char(text, '{', i + 1) + if nh_value_start == -1: + return + + # Edge case: if there is an unquoted # between the metric definition and the {, + # then this is actually an exemplar + exemplar = _next_unquoted_char(text, '#', i + 1) + if exemplar != -1 and exemplar < nh_value_start: + return + + nh_value_end = _next_unquoted_char(text, '}', nh_value_start) + if nh_value_end == -1: + raise ValueError + + if has_metric_labels: labelstext = text[labels_start + 1:labels_end] - labels = _parse_labels(labelstext) + labels = parse_labels(labelstext, True) name_end = labels_start name = text[:name_end] if name.endswith(suffixes): - raise ValueError("the sample name of a native histogram with labels should have no suffixes", name) + raise ValueError("the sample name of a native histogram with labels should have no suffixes", name) + if not name: + # Name might be in the labels + if '__name__' not in labels: + raise ValueError + name = labels['__name__'] + del labels['__name__'] + # Edge case: the only "label" is the name definition. + if not labels: + labels = None + nh_value = text[nh_value_start:] nat_hist_value = _parse_nh_struct(nh_value) return Sample(name, labels, None, None, None, nat_hist_value) # check if it's a native histogram - if re_nh_without_labels.match(text): - nh_value_start = labels_start + else: nh_value = text[nh_value_start:] name_end = nh_value_start - 1 name = text[:name_end] if name.endswith(suffixes): raise ValueError("the sample name of a native histogram should have no suffixes", name) + # Not possible for UTF-8 name here, that would have been caught as having a labelset. nat_hist_value = _parse_nh_struct(nh_value) return Sample(name, None, None, None, None, nat_hist_value) - else: - # it's not a native histogram - return def _parse_nh_struct(text): @@ -576,6 +485,7 @@ def build_metric(name, documentation, typ, unit, samples): raise ValueError("Units not allowed for this metric type: " + name) if typ in ['histogram', 'gaugehistogram']: _check_histogram(samples, name) + _validate_metric_name(name) metric = Metric(name, documentation, typ, unit) # TODO: check labelvalues are valid utf8 metric.samples = samples @@ -596,16 +506,19 @@ def build_metric(name, documentation, typ, unit, samples): if line == '# EOF': eof = True elif line.startswith('#'): - parts = line.split(' ', 3) + parts = _split_quoted(line, ' ', 3) if len(parts) < 4: raise ValueError("Invalid line: " + line) - if parts[2] == name and samples: + candidate_name, quoted = _unquote_unescape(parts[2]) + if not quoted and not _is_valid_legacy_metric_name(candidate_name): + raise ValueError + if candidate_name == name and samples: raise ValueError("Received metadata after samples: " + line) - if parts[2] != name: + if candidate_name != name: if name is not None: yield build_metric(name, documentation, typ, unit, samples) # New metric - name = parts[2] + name = candidate_name unit = None typ = None documentation = None @@ -614,7 +527,7 @@ def build_metric(name, documentation, typ, unit, samples): group_timestamp = None group_timestamp_samples = set() samples = [] - allowed_names = [parts[2]] + allowed_names = [candidate_name] if parts[1] == 'HELP': if documentation is not None: @@ -649,7 +562,10 @@ def build_metric(name, documentation, typ, unit, samples): if name is not None: yield build_metric(name, documentation, typ, unit, samples) # Start an unknown metric. - name = sample.name + candidate_name, quoted = _unquote_unescape(sample.name) + if not quoted and not _is_valid_legacy_metric_name(candidate_name): + raise ValueError + name = candidate_name documentation = None unit = None typ = 'unknown' diff --git a/prometheus_client/parser.py b/prometheus_client/parser.py index dc8e30df..92d66723 100644 --- a/prometheus_client/parser.py +++ b/prometheus_client/parser.py @@ -1,9 +1,13 @@ import io as StringIO import re +import string from typing import Dict, Iterable, List, Match, Optional, TextIO, Tuple from .metrics_core import Metric from .samples import Sample +from .validation import ( + _is_valid_legacy_metric_name, _validate_labelname, _validate_metric_name, +) def text_string_to_metric_families(text: str) -> Iterable[Metric]: @@ -45,54 +49,169 @@ def _is_character_escaped(s: str, charpos: int) -> bool: return num_bslashes % 2 == 1 -def _parse_labels(labels_string: str) -> Dict[str, str]: +def parse_labels(labels_string: str, openmetrics: bool = False) -> Dict[str, str]: labels: Dict[str, str] = {} - # Return if we don't have valid labels - if "=" not in labels_string: - return labels - - escaping = False - if "\\" in labels_string: - escaping = True # Copy original labels - sub_labels = labels_string + sub_labels = labels_string.strip() + if openmetrics and sub_labels and sub_labels[0] == ',': + raise ValueError("leading comma: " + labels_string) try: # Process one label at a time while sub_labels: - # The label name is before the equal - value_start = sub_labels.index("=") - label_name = sub_labels[:value_start] - sub_labels = sub_labels[value_start + 1:].lstrip() - # Find the first quote after the equal - quote_start = sub_labels.index('"') + 1 - value_substr = sub_labels[quote_start:] - - # Find the last unescaped quote - i = 0 - while i < len(value_substr): - i = value_substr.index('"', i) - if not _is_character_escaped(value_substr, i): + # The label name is before the equal, or if there's no equal, that's the + # metric name. + + term, sub_labels = _next_term(sub_labels, openmetrics) + if not term: + if openmetrics: + raise ValueError("empty term in line: " + labels_string) + continue + + quoted_name = False + operator_pos = _next_unquoted_char(term, '=') + if operator_pos == -1: + quoted_name = True + label_name = "__name__" + else: + value_start = _next_unquoted_char(term, '=') + label_name, quoted_name = _unquote_unescape(term[:value_start]) + term = term[value_start + 1:] + + if not quoted_name and not _is_valid_legacy_metric_name(label_name): + raise ValueError("unquoted UTF-8 metric name") + + # Check for missing quotes + term = term.strip() + if not term or term[0] != '"': + raise ValueError + + # The first quote is guaranteed to be after the equal. + # Find the last unescaped quote. + i = 1 + while i < len(term): + i = term.index('"', i) + if not _is_character_escaped(term[:i], i): break i += 1 # The label value is between the first and last quote quote_end = i + 1 - label_value = sub_labels[quote_start:quote_end] - # Replace escaping if needed - if escaping: - label_value = _replace_escaping(label_value) - labels[label_name.strip()] = label_value - - # Remove the processed label from the sub-slice for next iteration - sub_labels = sub_labels[quote_end + 1:] - next_comma = sub_labels.find(",") + 1 - sub_labels = sub_labels[next_comma:].lstrip() - + if quote_end != len(term): + raise ValueError("unexpected text after quote: " + labels_string) + label_value, _ = _unquote_unescape(term[:quote_end]) + if label_name == '__name__': + _validate_metric_name(label_name) + else: + _validate_labelname(label_name) + if label_name in labels: + raise ValueError("invalid line, duplicate label name: " + labels_string) + labels[label_name] = label_value return labels - except ValueError: - raise ValueError("Invalid labels: %s" % labels_string) + raise ValueError("Invalid labels: " + labels_string) + + +def _next_term(text: str, openmetrics: bool) -> Tuple[str, str]: + """Extract the next comma-separated label term from the text. + + Returns the stripped term and the stripped remainder of the string, + including the comma. + + Raises ValueError if the term is empty and we're in openmetrics mode. + """ + + # There may be a leading comma, which is fine here. + if text[0] == ',': + text = text[1:] + if not text: + return "", "" + if text[0] == ',': + raise ValueError("multiple commas") + splitpos = _next_unquoted_char(text, ',}') + if splitpos == -1: + splitpos = len(text) + term = text[:splitpos] + if not term and openmetrics: + raise ValueError("empty term:", term) + + sublabels = text[splitpos:] + return term.strip(), sublabels.strip() + + +def _next_unquoted_char(text: str, chs: str, startidx: int = 0) -> int: + """Return position of next unquoted character in tuple, or -1 if not found. + + It is always assumed that the first character being checked is not already + inside quotes. + """ + i = startidx + in_quotes = False + if chs is None: + chs = string.whitespace + while i < len(text): + if text[i] == '"' and not _is_character_escaped(text, i): + in_quotes = not in_quotes + if not in_quotes: + if text[i] in chs: + return i + i += 1 + return -1 + + +def _last_unquoted_char(text: str, chs: str) -> int: + """Return position of last unquoted character in list, or -1 if not found.""" + i = len(text) - 1 + in_quotes = False + if chs is None: + chs = string.whitespace + while i > 0: + if text[i] == '"' and not _is_character_escaped(text, i): + in_quotes = not in_quotes + + if not in_quotes: + if text[i] in chs: + return i + i -= 1 + return -1 + + +def _split_quoted(text, separator, maxsplit=0): + """Splits on split_ch similarly to strings.split, skipping separators if + they are inside quotes. + """ + + tokens = [''] + x = 0 + while x < len(text): + split_pos = _next_unquoted_char(text, separator, x) + if split_pos == -1: + tokens[-1] = text[x:] + x = len(text) + continue + if maxsplit > 0 and len(tokens) > maxsplit: + tokens[-1] = text[x:] + break + tokens[-1] = text[x:split_pos] + x = split_pos + 1 + tokens.append('') + return tokens + + +def _unquote_unescape(text): + """Returns the string, and true if it was quoted.""" + if not text: + return text, False + quoted = False + text = text.strip() + if text[0] == '"': + if len(text) == 1 or text[-1] != '"': + raise ValueError("missing close quote") + text = text[1:-1] + quoted = True + if "\\" in text: + text = _replace_escaping(text) + return text, quoted # If we have multiple values only consider the first @@ -104,34 +223,50 @@ def _parse_value_and_timestamp(s: str) -> Tuple[float, Optional[float]]: values = [value.strip() for value in s.split(separator) if value.strip()] if not values: return float(s), None - value = float(values[0]) - timestamp = (float(values[-1]) / 1000) if len(values) > 1 else None + value = _parse_value(values[0]) + timestamp = (_parse_value(values[-1]) / 1000) if len(values) > 1 else None return value, timestamp -def _parse_sample(text: str) -> Sample: - # Detect the labels in the text +def _parse_value(value): + value = ''.join(value) + if value != value.strip() or '_' in value: + raise ValueError(f"Invalid value: {value!r}") try: - label_start, label_end = text.index("{"), text.rindex("}") - # The name is before the labels - name = text[:label_start].strip() - # We ignore the starting curly brace - label = text[label_start + 1:label_end] - # The value is after the label end (ignoring curly brace) - value, timestamp = _parse_value_and_timestamp(text[label_end + 1:]) - return Sample(name, _parse_labels(label), value, timestamp) - - # We don't have labels + return int(value) except ValueError: - # Detect what separator is used - separator = " " - if separator not in text: - separator = "\t" - name_end = text.index(separator) - name = text[:name_end] - # The value is after the name - value, timestamp = _parse_value_and_timestamp(text[name_end:]) + return float(value) + + +def _parse_sample(text): + separator = " # " + # Detect the labels in the text + label_start = _next_unquoted_char(text, '{') + if label_start == -1 or separator in text[:label_start]: + # We don't have labels, but there could be an exemplar. + name_end = _next_unquoted_char(text, ' \t') + name = text[:name_end].strip() + if not _is_valid_legacy_metric_name(name): + raise ValueError("invalid metric name:" + text) + # Parse the remaining text after the name + remaining_text = text[name_end + 1:] + value, timestamp = _parse_value_and_timestamp(remaining_text) return Sample(name, {}, value, timestamp) + name = text[:label_start].strip() + label_end = _next_unquoted_char(text, '}') + labels = parse_labels(text[label_start + 1:label_end], False) + if not name: + # Name might be in the labels + if '__name__' not in labels: + raise ValueError + name = labels['__name__'] + del labels['__name__'] + elif '__name__' in labels: + raise ValueError("metric name specified more than once") + # Parsing labels succeeded, continue parsing the remaining text + remaining_text = text[label_end + 1:] + value, timestamp = _parse_value_and_timestamp(remaining_text) + return Sample(name, labels, value, timestamp) def text_fd_to_metric_families(fd: TextIO) -> Iterable[Metric]: @@ -168,28 +303,35 @@ def build_metric(name: str, documentation: str, typ: str, samples: List[Sample]) line = line.strip() if line.startswith('#'): - parts = line.split(None, 3) + parts = _split_quoted(line, None, 3) if len(parts) < 2: continue + candidate_name, quoted = '', False + if len(parts) > 2: + candidate_name, quoted = _unquote_unescape(parts[2]) + if not quoted and not _is_valid_legacy_metric_name(candidate_name): + raise ValueError if parts[1] == 'HELP': - if parts[2] != name: + if candidate_name != name: if name != '': yield build_metric(name, documentation, typ, samples) # New metric - name = parts[2] + name = candidate_name typ = 'untyped' samples = [] - allowed_names = [parts[2]] + allowed_names = [candidate_name] if len(parts) == 4: documentation = _replace_help_escaping(parts[3]) else: documentation = '' elif parts[1] == 'TYPE': - if parts[2] != name: + if len(parts) < 4: + raise ValueError + if candidate_name != name: if name != '': yield build_metric(name, documentation, typ, samples) # New metric - name = parts[2] + name = candidate_name documentation = '' samples = [] typ = parts[3] diff --git a/prometheus_client/validation.py b/prometheus_client/validation.py new file mode 100644 index 00000000..bf19fc75 --- /dev/null +++ b/prometheus_client/validation.py @@ -0,0 +1,123 @@ +import os +import re + +METRIC_NAME_RE = re.compile(r'^[a-zA-Z_:][a-zA-Z0-9_:]*$') +METRIC_LABEL_NAME_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$') +RESERVED_METRIC_LABEL_NAME_RE = re.compile(r'^__.*$') + + +def _init_legacy_validation() -> bool: + """Retrieve name validation setting from environment.""" + return os.environ.get("PROMETHEUS_LEGACY_NAME_VALIDATION", 'False').lower() in ('true', '1', 't') + + +_legacy_validation = _init_legacy_validation() + + +def get_legacy_validation() -> bool: + """Return the current status of the legacy validation setting.""" + global _legacy_validation + return _legacy_validation + + +def disable_legacy_validation(): + """Disable legacy name validation, instead allowing all UTF8 characters.""" + global _legacy_validation + _legacy_validation = False + + +def enable_legacy_validation(): + """Enable legacy name validation instead of allowing all UTF8 characters.""" + global _legacy_validation + _legacy_validation = True + + +def _validate_metric_name(name: str) -> None: + """Raises ValueError if the provided name is not a valid metric name. + + This check uses the global legacy validation setting to determine the validation scheme. + """ + if not name: + raise ValueError("metric name cannot be empty") + global _legacy_validation + if _legacy_validation: + if not METRIC_NAME_RE.match(name): + raise ValueError("invalid metric name " + name) + try: + name.encode('utf-8') + except UnicodeDecodeError: + raise ValueError("invalid metric name " + name) + + +def _is_valid_legacy_metric_name(name: str) -> bool: + """Returns true if the provided metric name conforms to the legacy validation scheme.""" + return METRIC_NAME_RE.match(name) is not None + + +def _validate_metric_label_name_token(tok: str) -> None: + """Raises ValueError if a parsed label name token is invalid. + + UTF-8 names must be quoted. + """ + if not tok: + raise ValueError("invalid label name token " + tok) + global _legacy_validation + quoted = tok[0] == '"' and tok[-1] == '"' + if not quoted or _legacy_validation: + if not METRIC_LABEL_NAME_RE.match(tok): + raise ValueError("invalid label name token " + tok) + return + try: + tok.encode('utf-8') + except UnicodeDecodeError: + raise ValueError("invalid label name token " + tok) + + +def _validate_labelname(l): + """Raises ValueError if the provided name is not a valid label name. + + This check uses the global legacy validation setting to determine the validation scheme. + """ + if get_legacy_validation(): + if not METRIC_LABEL_NAME_RE.match(l): + raise ValueError('Invalid label metric name: ' + l) + if RESERVED_METRIC_LABEL_NAME_RE.match(l): + raise ValueError('Reserved label metric name: ' + l) + else: + try: + l.encode('utf-8') + except UnicodeDecodeError: + raise ValueError('Invalid label metric name: ' + l) + if RESERVED_METRIC_LABEL_NAME_RE.match(l): + raise ValueError('Reserved label metric name: ' + l) + + +def _is_valid_legacy_labelname(l: str) -> bool: + """Returns true if the provided label name conforms to the legacy validation scheme.""" + if METRIC_LABEL_NAME_RE.match(l) is None: + return False + return RESERVED_METRIC_LABEL_NAME_RE.match(l) is None + + +def _validate_labelnames(cls, labelnames): + """Raises ValueError if any of the provided names is not a valid label name. + + This check uses the global legacy validation setting to determine the validation scheme. + """ + labelnames = tuple(labelnames) + for l in labelnames: + _validate_labelname(l) + if l in cls._reserved_labelnames: + raise ValueError('Reserved label methe fric name: ' + l) + return labelnames + + +def _validate_exemplar(exemplar): + """Raises ValueError if the exemplar is invalid.""" + runes = 0 + for k, v in exemplar.items(): + _validate_labelname(k) + runes += len(k) + runes += len(v) + if runes > 128: + raise ValueError('Exemplar labels have %d UTF-8 characters, exceeding the limit of 128') diff --git a/tests/openmetrics/test_exposition.py b/tests/openmetrics/test_exposition.py index 28a90838..124e55e9 100644 --- a/tests/openmetrics/test_exposition.py +++ b/tests/openmetrics/test_exposition.py @@ -33,6 +33,12 @@ def test_counter(self): c.inc() self.assertEqual(b'# HELP cc A counter\n# TYPE cc counter\ncc_total 1.0\ncc_created 123.456\n# EOF\n', generate_latest(self.registry)) + + def test_counter_utf8(self): + c = Counter('cc.with.dots', 'A counter', registry=self.registry) + c.inc() + self.assertEqual(b'# HELP "cc.with.dots" A counter\n# TYPE "cc.with.dots" counter\n{"cc.with.dots_total"} 1.0\n{"cc.with.dots_created"} 123.456\n# EOF\n', + generate_latest(self.registry)) def test_counter_total(self): c = Counter('cc_total', 'A counter', registry=self.registry) diff --git a/tests/openmetrics/test_parser.py b/tests/openmetrics/test_parser.py index dc5e9916..019929e6 100644 --- a/tests/openmetrics/test_parser.py +++ b/tests/openmetrics/test_parser.py @@ -29,6 +29,24 @@ def test_uint64_counter(self): """) self.assertEqual([CounterMetricFamily("a", "help", value=9223372036854775808)], list(families)) + def test_utf8_counter(self): + families = text_string_to_metric_families("""# TYPE "my.counter" counter +# HELP "my.counter" help +{"my.counter_total"} 1 +# EOF +""") + self.assertEqual([CounterMetricFamily("my.counter", "help", value=1)], list(families)) + + def test_complex_name_counter(self): + families = text_string_to_metric_families("""# TYPE "my.counter{} # = \\" \\n" counter +# HELP "my.counter{} # = \\" \\n" help +{"my.counter{} # = \\" \\n_total", "awful. }}{{ # HELP EOF name"="\\n yikes } \\" value"} 1 +# EOF +""") + metric = CounterMetricFamily("my.counter{} # = \" \n", "help", labels={'awful. }}{{ # HELP EOF name': '\n yikes } " value'}) + metric.add_sample("my.counter{} # = \" \n_total", {'awful. }}{{ # HELP EOF name': '\n yikes } " value'}, 1) + self.assertEqual([metric], list(families)) + def test_simple_gauge(self): families = text_string_to_metric_families("""# TYPE a gauge # HELP a help @@ -128,6 +146,18 @@ def test_simple_histogram_float_values(self): self.assertEqual([HistogramMetricFamily("a", "help", sum_value=2, buckets=[("1.0", 0.0), ("+Inf", 3.0)])], list(families)) + def test_utf8_histogram_float_values(self): + families = text_string_to_metric_families("""# TYPE "a.b" histogram +# HELP "a.b" help +{"a.b_bucket", le="1.0"} 0.0 +{"a.b_bucket", le="+Inf"} 3.0 +{"a.b_count"} 3.0 +{"a.b_sum"} 2.0 +# EOF +""") + self.assertEqual([HistogramMetricFamily("a.b", "help", sum_value=2, buckets=[("1.0", 0.0), ("+Inf", 3.0)])], + list(families)) + def test_histogram_noncanonical(self): families = text_string_to_metric_families("""# TYPE a histogram # HELP a help @@ -175,7 +205,7 @@ def test_histogram_exemplars(self): Exemplar({"a": "2345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678"}, 4, Timestamp(123, 0))) self.assertEqual([hfm], list(families)) - + def test_native_histogram(self): families = text_string_to_metric_families("""# TYPE nativehistogram histogram # HELP nativehistogram Is a basic example of a native histogram @@ -183,11 +213,35 @@ def test_native_histogram(self): # EOF """) families = list(families) - + hfm = HistogramMetricFamily("nativehistogram", "Is a basic example of a native histogram") hfm.add_sample("nativehistogram", None, None, None, None, NativeHistogram(24, 100, 0, 0.001, 4, (BucketSpan(0, 2), BucketSpan(1, 2)), (BucketSpan(0, 2), BucketSpan(1, 2)), (2, 1, -3, 3), (2, 1, -2, 3))) self.assertEqual([hfm], families) + def test_native_histogram_utf8(self): + families = text_string_to_metric_families("""# TYPE "native{histogram" histogram +# HELP "native{histogram" Is a basic example of a native histogram +{"native{histogram"} {count:24,sum:100,schema:0,zero_threshold:0.001,zero_count:4,positive_spans:[0:2,1:2],negative_spans:[0:2,1:2],positive_deltas:[2,1,-3,3],negative_deltas:[2,1,-2,3]} +# EOF +""") + families = list(families) + + hfm = HistogramMetricFamily("native{histogram", "Is a basic example of a native histogram") + hfm.add_sample("native{histogram", None, None, None, None, NativeHistogram(24, 100, 0, 0.001, 4, (BucketSpan(0, 2), BucketSpan(1, 2)), (BucketSpan(0, 2), BucketSpan(1, 2)), (2, 1, -3, 3), (2, 1, -2, 3))) + self.assertEqual([hfm], families) + + def test_native_histogram_utf8_stress(self): + families = text_string_to_metric_families("""# TYPE "native{histogram" histogram +# HELP "native{histogram" Is a basic example of a native histogram +{"native{histogram", "xx{} # {}"=" EOF # {}}}"} {count:24,sum:100,schema:0,zero_threshold:0.001,zero_count:4,positive_spans:[0:2,1:2],negative_spans:[0:2,1:2],positive_deltas:[2,1,-3,3],negative_deltas:[2,1,-2,3]} +# EOF +""") + families = list(families) + + hfm = HistogramMetricFamily("native{histogram", "Is a basic example of a native histogram") + hfm.add_sample("native{histogram", {'xx{} # {}': ' EOF # {}}}'}, None, None, None, NativeHistogram(24, 100, 0, 0.001, 4, (BucketSpan(0, 2), BucketSpan(1, 2)), (BucketSpan(0, 2), BucketSpan(1, 2)), (2, 1, -3, 3), (2, 1, -2, 3))) + self.assertEqual([hfm], families) + def test_native_histogram_with_labels(self): families = text_string_to_metric_families("""# TYPE hist_w_labels histogram # HELP hist_w_labels Is a basic example of a native histogram with labels @@ -195,11 +249,23 @@ def test_native_histogram_with_labels(self): # EOF """) families = list(families) - + hfm = HistogramMetricFamily("hist_w_labels", "Is a basic example of a native histogram with labels") hfm.add_sample("hist_w_labels", {"foo": "bar", "baz": "qux"}, None, None, None, NativeHistogram(24, 100, 0, 0.001, 4, (BucketSpan(0, 2), BucketSpan(1, 2)), (BucketSpan(0, 2), BucketSpan(1, 2)), (2, 1, -3, 3), (2, 1, -2, 3))) self.assertEqual([hfm], families) + def test_native_histogram_with_labels_utf8(self): + families = text_string_to_metric_families("""# TYPE "hist.w.labels" histogram +# HELP "hist.w.labels" Is a basic example of a native histogram with labels +{"hist.w.labels", foo="bar",baz="qux"} {count:24,sum:100,schema:0,zero_threshold:0.001,zero_count:4,positive_spans:[0:2,1:2],negative_spans:[0:2,1:2],positive_deltas:[2,1,-3,3],negative_deltas:[2,1,-2,3]} +# EOF +""") + families = list(families) + + hfm = HistogramMetricFamily("hist.w.labels", "Is a basic example of a native histogram with labels") + hfm.add_sample("hist.w.labels", {"foo": "bar", "baz": "qux"}, None, None, None, NativeHistogram(24, 100, 0, 0.001, 4, (BucketSpan(0, 2), BucketSpan(1, 2)), (BucketSpan(0, 2), BucketSpan(1, 2)), (2, 1, -3, 3), (2, 1, -2, 3))) + self.assertEqual([hfm], families) + def test_native_histogram_with_classic_histogram(self): families = text_string_to_metric_families("""# TYPE hist_w_classic histogram # HELP hist_w_classic Is a basic example of a native histogram coexisting with a classic histogram @@ -211,7 +277,7 @@ def test_native_histogram_with_classic_histogram(self): # EOF """) families = list(families) - + hfm = HistogramMetricFamily("hist_w_classic", "Is a basic example of a native histogram coexisting with a classic histogram") hfm.add_sample("hist_w_classic", {"foo": "bar"}, None, None, None, NativeHistogram(24, 100, 0, 0.001, 4, (BucketSpan(0, 2), BucketSpan(1, 2)), (BucketSpan(0, 2), BucketSpan(1, 2)), (2, 1, -3, 3), (2, 1, -2, 3))) hfm.add_sample("hist_w_classic_bucket", {"foo": "bar", "le": "0.001"}, 4.0, None, None, None) @@ -219,7 +285,7 @@ def test_native_histogram_with_classic_histogram(self): hfm.add_sample("hist_w_classic_count", {"foo": "bar"}, 24.0, None, None, None) hfm.add_sample("hist_w_classic_sum", {"foo": "bar"}, 100.0, None, None, None) self.assertEqual([hfm], families) - + def test_native_plus_classic_histogram_two_labelsets(self): families = text_string_to_metric_families("""# TYPE hist_w_classic_two_sets histogram # HELP hist_w_classic_two_sets Is an example of a native histogram plus a classic histogram with two label sets @@ -236,7 +302,7 @@ def test_native_plus_classic_histogram_two_labelsets(self): # EOF """) families = list(families) - + hfm = HistogramMetricFamily("hist_w_classic_two_sets", "Is an example of a native histogram plus a classic histogram with two label sets") hfm.add_sample("hist_w_classic_two_sets", {"foo": "bar"}, None, None, None, NativeHistogram(24, 100, 0, 0.001, 4, (BucketSpan(0, 2), BucketSpan(1, 2)), (BucketSpan(0, 2), BucketSpan(1, 2)), (2, 1, -3, 3), (2, 1, -2, 3))) hfm.add_sample("hist_w_classic_two_sets_bucket", {"foo": "bar", "le": "0.001"}, 4.0, None, None, None) @@ -299,6 +365,16 @@ def test_counter_exemplars(self): cfm.add_sample("a_total", {}, 0.0, Timestamp(123, 0), Exemplar({"a": "b"}, 0.5)) self.assertEqual([cfm], list(families)) + def test_counter_exemplars_utf8(self): + families = text_string_to_metric_families("""# TYPE "a.b" counter +# HELP "a.b" help +{"a.b_total"} 0 123 # {"c{}d"="b"} 0.5 +# EOF +""") + cfm = CounterMetricFamily("a.b", "help") + cfm.add_sample("a.b_total", {}, 0.0, Timestamp(123, 0), Exemplar({"c{}d": "b"}, 0.5)) + self.assertEqual([cfm], list(families)) + def test_counter_exemplars_empty_brackets(self): families = text_string_to_metric_families("""# TYPE a counter # HELP a help @@ -495,10 +571,10 @@ def test_help_escaping(self): def test_escaping(self): families = text_string_to_metric_families("""# TYPE a counter # HELP a he\\n\\\\l\\tp -a_total{foo="b\\"a\\nr"} 1 -a_total{foo="b\\\\a\\z"} 2 -a_total{foo="b\\"a\\nr # "} 3 -a_total{foo="b\\\\a\\z # "} 4 +{"a_total", foo="b\\"a\\nr"} 1 +{"a_total", foo="b\\\\a\\z"} 2 +{"a_total", foo="b\\"a\\nr # "} 3 +{"a_total", foo="b\\\\a\\z # "} 4 # EOF """) metric_family = CounterMetricFamily("a", "he\n\\l\\tp", labels=["foo"]) @@ -565,66 +641,6 @@ def test_exemplars_with_hash_in_label_values(self): hfm.add_sample("a_bucket", {"le": "+Inf", "foo": "bar # "}, 3.0, None, Exemplar({"a": "d", "foo": "bar # bar"}, 4)) self.assertEqual([hfm], list(families)) - def test_fallback_to_state_machine_label_parsing(self): - from unittest.mock import patch - - from prometheus_client.openmetrics.parser import _parse_sample - - parse_sample_function = "prometheus_client.openmetrics.parser._parse_sample" - parse_labels_function = "prometheus_client.openmetrics.parser._parse_labels" - parse_remaining_function = "prometheus_client.openmetrics.parser._parse_remaining_text" - state_machine_function = "prometheus_client.openmetrics.parser._parse_labels_with_state_machine" - - parse_sample_return_value = Sample("a_total", {"foo": "foo # bar"}, 1) - with patch(parse_sample_function, return_value=parse_sample_return_value) as mock: - families = text_string_to_metric_families("""# TYPE a counter -# HELP a help -a_total{foo="foo # bar"} 1 -# EOF -""") - a = CounterMetricFamily("a", "help", labels=["foo"]) - a.add_metric(["foo # bar"], 1) - self.assertEqual([a], list(families)) - mock.assert_called_once_with('a_total{foo="foo # bar"} 1') - - # First fallback case - state_machine_return_values = [{"foo": "foo # bar"}, len('foo="foo # bar"}')] - parse_remaining_values = [1, None, None] - with patch(parse_labels_function) as mock1: - with patch(state_machine_function, return_value=state_machine_return_values) as mock2: - with patch(parse_remaining_function, return_value=parse_remaining_values) as mock3: - sample = _parse_sample('a_total{foo="foo # bar"} 1') - s = Sample("a_total", {"foo": "foo # bar"}, 1) - self.assertEqual(s, sample) - mock1.assert_not_called() - mock2.assert_called_once_with('foo="foo # bar"} 1') - mock3.assert_called_once_with('1') - - # Second fallback case - state_machine_return_values = [{"le": "1.0"}, len('le="1.0"}')] - parse_remaining_values = [0.0, Timestamp(123, 0), Exemplar({"a": "b"}, 0.5)] - with patch(parse_labels_function) as mock1: - with patch(state_machine_function, return_value=state_machine_return_values) as mock2: - with patch(parse_remaining_function, return_value=parse_remaining_values) as mock3: - sample = _parse_sample('a_bucket{le="1.0"} 0 123 # {a="b"} 0.5') - s = Sample("a_bucket", {"le": "1.0"}, 0.0, Timestamp(123, 0), Exemplar({"a": "b"}, 0.5)) - self.assertEqual(s, sample) - mock1.assert_not_called() - mock2.assert_called_once_with('le="1.0"} 0 123 # {a="b"} 0.5') - mock3.assert_called_once_with('0 123 # {a="b"} 0.5') - - # No need to fallback case - parse_labels_return_values = {"foo": "foo#bar"} - parse_remaining_values = [1, None, None] - with patch(parse_labels_function, return_value=parse_labels_return_values) as mock1: - with patch(state_machine_function) as mock2: - with patch(parse_remaining_function, return_value=parse_remaining_values) as mock3: - sample = _parse_sample('a_total{foo="foo#bar"} 1') - s = Sample("a_total", {"foo": "foo#bar"}, 1) - self.assertEqual(s, sample) - mock1.assert_called_once_with('foo="foo#bar"') - mock2.assert_not_called() - mock3.assert_called_once_with('1') def test_roundtrip(self): text = """# HELP go_gc_duration_seconds A summary of the GC invocation durations. @@ -710,8 +726,10 @@ def test_invalid_input(self): ('a{a=1} 1\n# EOF\n'), ('a{a="1} 1\n# EOF\n'), ('a{a=\'1\'} 1\n# EOF\n'), + ('"a" 1\n# EOF\n'), # Missing equal or label value. ('a{a} 1\n# EOF\n'), + ('a{"a} 1\n# EOF\n'), ('a{a"value"} 1\n# EOF\n'), ('a{a""} 1\n# EOF\n'), ('a{a=} 1\n# EOF\n'), @@ -897,6 +915,10 @@ def test_invalid_input(self): ('# TYPE a counter\n# TYPE a counter\n# EOF\n'), ('# TYPE a info\n# TYPE a counter\n# EOF\n'), ('# TYPE a_created gauge\n# TYPE a counter\n# EOF\n'), + # Bad native histograms. + ('# TYPE nh histogram\nnh {count:24\n# EOF\n'), + ('# TYPE nh histogram\nnh{} # {count:24\n# EOF\n'), + ]: with self.assertRaises(ValueError, msg=case): list(text_string_to_metric_families(case)) diff --git a/tests/test_core.py b/tests/test_core.py index 056d8e58..4e99ca33 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -14,6 +14,9 @@ ) from prometheus_client.decorator import getargspec from prometheus_client.metrics import _get_use_created +from prometheus_client.validation import ( + disable_legacy_validation, enable_legacy_validation, +) def is_locked(lock): @@ -114,8 +117,12 @@ def test_inc_not_observable(self): assert_not_observable(counter.inc) def test_exemplar_invalid_label_name(self): + enable_legacy_validation() self.assertRaises(ValueError, self.counter.inc, exemplar={':o)': 'smile'}) self.assertRaises(ValueError, self.counter.inc, exemplar={'1': 'number'}) + disable_legacy_validation() + self.counter.inc(exemplar={':o)': 'smile'}) + self.counter.inc(exemplar={'1': 'number'}) def test_exemplar_unicode(self): # 128 characters should not raise, even using characters larger than 1 byte. @@ -510,10 +517,16 @@ def test_block_decorator_with_label(self): self.assertEqual(1, value('hl_count', {'l': 'a'})) self.assertEqual(1, value('hl_bucket', {'le': '+Inf', 'l': 'a'})) - def test_exemplar_invalid_label_name(self): + def test_exemplar_invalid_legacy_label_name(self): + enable_legacy_validation() self.assertRaises(ValueError, self.histogram.observe, 3.0, exemplar={':o)': 'smile'}) self.assertRaises(ValueError, self.histogram.observe, 3.0, exemplar={'1': 'number'}) + def test_exemplar_invalid_label_name(self): + disable_legacy_validation() + self.histogram.observe(3.0, exemplar={':o)': 'smile'}) + self.histogram.observe(3.0, exemplar={'1': 'number'}) + def test_exemplar_too_long(self): # 129 characters in total should fail. self.assertRaises(ValueError, self.histogram.observe, 1.0, exemplar={ @@ -654,7 +667,8 @@ def test_labels_by_kwarg(self): self.assertRaises(ValueError, self.two_labels.labels) self.assertRaises(ValueError, self.two_labels.labels, {'a': 'x'}, b='y') - def test_invalid_names_raise(self): + def test_invalid_legacy_names_raise(self): + enable_legacy_validation() self.assertRaises(ValueError, Counter, '', 'help') self.assertRaises(ValueError, Counter, '^', 'help') self.assertRaises(ValueError, Counter, '', 'help', namespace='&') @@ -664,6 +678,14 @@ def test_invalid_names_raise(self): self.assertRaises(ValueError, Counter, 'c_total', '', labelnames=['__reserved']) self.assertRaises(ValueError, Summary, 'c_total', '', labelnames=['quantile']) + def test_invalid_names_raise(self): + disable_legacy_validation() + self.assertRaises(ValueError, Counter, '', 'help') + self.assertRaises(ValueError, Counter, '', 'help', namespace='&') + self.assertRaises(ValueError, Counter, '', 'help', subsystem='(') + self.assertRaises(ValueError, Counter, 'c_total', '', labelnames=['__reserved']) + self.assertRaises(ValueError, Summary, 'c_total', '', labelnames=['quantile']) + def test_empty_labels_list(self): Histogram('h', 'help', [], registry=self.registry) self.assertEqual(0, self.registry.get_sample_value('h_sum')) @@ -714,6 +736,10 @@ def test_counter(self): self.custom_collector(CounterMetricFamily('c_total', 'help', value=1)) self.assertEqual(1, self.registry.get_sample_value('c_total', {})) + def test_counter_utf8(self): + self.custom_collector(CounterMetricFamily('my.metric', 'help', value=1)) + self.assertEqual(1, self.registry.get_sample_value('my.metric_total', {})) + def test_counter_total(self): self.custom_collector(CounterMetricFamily('c_total', 'help', value=1)) self.assertEqual(1, self.registry.get_sample_value('c_total', {})) diff --git a/tests/test_exposition.py b/tests/test_exposition.py index 54bdaa98..2a3f08cb 100644 --- a/tests/test_exposition.py +++ b/tests/test_exposition.py @@ -46,6 +46,17 @@ def test_counter(self): # HELP cc_created A counter # TYPE cc_created gauge cc_created 123.456 +""", generate_latest(self.registry)) + + def test_counter_utf8(self): + c = Counter('utf8.cc', 'A counter', registry=self.registry) + c.inc() + self.assertEqual(b"""# HELP "utf8.cc_total" A counter +# TYPE "utf8.cc_total" counter +{"utf8.cc_total"} 1.0 +# HELP "utf8.cc_created" A counter +# TYPE "utf8.cc_created" gauge +{"utf8.cc_created"} 123.456 """, generate_latest(self.registry)) def test_counter_name_unit_append(self): diff --git a/tests/test_parser.py b/tests/test_parser.py index 61b3c8ae..10a2fc90 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -25,6 +25,22 @@ def test_simple_counter(self): """) self.assertEqualMetrics([CounterMetricFamily("a", "help", value=1)], list(families)) + def test_utf8_counter(self): + families = text_string_to_metric_families("""# TYPE "a.b" counter +# HELP "a.b" help +{"a.b"} 1 +""") + self.assertEqualMetrics([CounterMetricFamily("a.b", "help", value=1)], list(families)) + + def test_complex_name_counter(self): + families = text_string_to_metric_families("""# TYPE "my.counter{} # = \\" \\n" counter +# HELP "my.counter{} # = \\" \\n" help +{"my.counter{} # = \\" \\n", "awful. }}{{ # HELP EOF name"="\\n yikes } \\" value"} 1 +""") + metric = CounterMetricFamily("my.counter{} # = \" \n", "help", labels={'awful. }}{{ # HELP EOF name': '\n yikes } " value'}) + metric.add_sample("my.counter{} # = \" \n_total", {'awful. }}{{ # HELP EOF name': '\n yikes } " value'}, 1) + self.assertEqual([metric], list(families)) + def test_simple_gauge(self): families = text_string_to_metric_families("""# TYPE a gauge # HELP a help @@ -322,6 +338,15 @@ def test_roundtrip(self): prometheus_local_storage_chunk_ops_total{type="pin"} 32662.0 prometheus_local_storage_chunk_ops_total{type="transcode"} 980180.0 prometheus_local_storage_chunk_ops_total{type="unpin"} 32662.0 +# HELP "my.utf8.metric.#{}=" A fancy metric with dots. +# TYPE "my.utf8.metric.#{}=" summary +{"my.utf8.metric.#{}=",quantile="0"} 0.013300656000000001 +{"my.utf8.metric.#{}=",quantile="0.25"} 0.013638736 +{"my.utf8.metric.#{}=",quantile="0.5"} 0.013759906 +{"my.utf8.metric.#{}=",quantile="0.75"} 0.013962066 +{"my.utf8.metric.#{}=",quantile="1"} 0.021383540000000003 +{"my.utf8.metric.#{}=_sum"} 56.12904785 +{"my.utf8.metric.#{}=_count"} 7476.0 """ families = list(text_string_to_metric_families(text))