Skip to content

Commit

Permalink
Support UTF-8 in metric creation, parsing, and exposition (#1070)
Browse files Browse the repository at this point in the history
part of #1013

Signed-off-by: Owen Williams <owen.williams@grafana.com>
  • Loading branch information
ywwg authored Dec 2, 2024
1 parent c89624f commit 33e6828
Show file tree
Hide file tree
Showing 12 changed files with 675 additions and 381 deletions.
34 changes: 21 additions & 13 deletions prometheus_client/exposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from .openmetrics import exposition as openmetrics
from .registry import CollectorRegistry, REGISTRY
from .utils import floatToGoString
from .validation import _is_valid_legacy_metric_name

__all__ = (
'CONTENT_TYPE_LATEST',
Expand Down Expand Up @@ -247,19 +248,26 @@ class TmpServer(ThreadingWSGIServer):
def generate_latest(registry: CollectorRegistry = REGISTRY) -> bytes:
"""Returns the metrics from the registry in latest text format as a string."""

def sample_line(line):
if line.labels:
labelstr = '{{{0}}}'.format(','.join(
def sample_line(samples):
if samples.labels:
labelstr = '{0}'.format(','.join(
['{}="{}"'.format(
k, v.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"'))
for k, v in sorted(line.labels.items())]))
openmetrics.escape_label_name(k), openmetrics._escape(v))
for k, v in sorted(samples.labels.items())]))
else:
labelstr = ''
timestamp = ''
if line.timestamp is not None:
if samples.timestamp is not None:
# Convert to milliseconds.
timestamp = f' {int(float(line.timestamp) * 1000):d}'
return f'{line.name}{labelstr} {floatToGoString(line.value)}{timestamp}\n'
timestamp = f' {int(float(samples.timestamp) * 1000):d}'
if _is_valid_legacy_metric_name(samples.name):
if labelstr:
labelstr = '{{{0}}}'.format(labelstr)
return f'{samples.name}{labelstr} {floatToGoString(samples.value)}{timestamp}\n'
maybe_comma = ''
if labelstr:
maybe_comma = ','
return f'{{{openmetrics.escape_metric_name(samples.name)}{maybe_comma}{labelstr}}} {floatToGoString(samples.value)}{timestamp}\n'

output = []
for metric in registry.collect():
Expand All @@ -282,8 +290,8 @@ def sample_line(line):
mtype = 'untyped'

output.append('# HELP {} {}\n'.format(
mname, metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
output.append(f'# TYPE {mname} {mtype}\n')
openmetrics.escape_metric_name(mname), metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
output.append(f'# TYPE {openmetrics.escape_metric_name(mname)} {mtype}\n')

om_samples: Dict[str, List[str]] = {}
for s in metric.samples:
Expand All @@ -299,9 +307,9 @@ def sample_line(line):
raise

for suffix, lines in sorted(om_samples.items()):
output.append('# HELP {}{} {}\n'.format(metric.name, suffix,
metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
output.append(f'# TYPE {metric.name}{suffix} gauge\n')
output.append('# HELP {} {}\n'.format(openmetrics.escape_metric_name(metric.name + suffix),
metric.documentation.replace('\\', r'\\').replace('\n', r'\n')))
output.append(f'# TYPE {openmetrics.escape_metric_name(metric.name + suffix)} gauge\n')
output.extend(lines)
return ''.join(output).encode('utf-8')

Expand Down
38 changes: 7 additions & 31 deletions prometheus_client/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,21 @@

from . import values # retain this import style for testability
from .context_managers import ExceptionCounter, InprogressTracker, Timer
from .metrics_core import (
Metric, METRIC_LABEL_NAME_RE, METRIC_NAME_RE,
RESERVED_METRIC_LABEL_NAME_RE,
)
from .metrics_core import Metric
from .registry import Collector, CollectorRegistry, REGISTRY
from .samples import Exemplar, Sample
from .utils import floatToGoString, INF
from .validation import (
_validate_exemplar, _validate_labelnames, _validate_metric_name,
)

T = TypeVar('T', bound='MetricWrapperBase')
F = TypeVar("F", bound=Callable[..., Any])


def _build_full_name(metric_type, name, namespace, subsystem, unit):
if not name:
raise ValueError('Metric name should not be empty')
full_name = ''
if namespace:
full_name += namespace + '_'
Expand All @@ -38,31 +40,6 @@ def _build_full_name(metric_type, name, namespace, subsystem, unit):
return full_name


def _validate_labelname(l):
if not METRIC_LABEL_NAME_RE.match(l):
raise ValueError('Invalid label metric name: ' + l)
if RESERVED_METRIC_LABEL_NAME_RE.match(l):
raise ValueError('Reserved label metric name: ' + l)


def _validate_labelnames(cls, labelnames):
labelnames = tuple(labelnames)
for l in labelnames:
_validate_labelname(l)
if l in cls._reserved_labelnames:
raise ValueError('Reserved label metric name: ' + l)
return labelnames


def _validate_exemplar(exemplar):
runes = 0
for k, v in exemplar.items():
_validate_labelname(k)
runes += len(k)
runes += len(v)
if runes > 128:
raise ValueError('Exemplar labels have %d UTF-8 characters, exceeding the limit of 128')


def _get_use_created() -> bool:
return os.environ.get("PROMETHEUS_DISABLE_CREATED_SERIES", 'False').lower() not in ('true', '1', 't')
Expand Down Expand Up @@ -139,8 +116,7 @@ def __init__(self: T,
self._documentation = documentation
self._unit = unit

if not METRIC_NAME_RE.match(self._name):
raise ValueError('Invalid metric name: ' + self._name)
_validate_metric_name(self._name)

if self._is_parent():
# Prepare the fields needed for child metrics.
Expand Down
8 changes: 2 additions & 6 deletions prometheus_client/metrics_core.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
import re
from typing import Dict, List, Optional, Sequence, Tuple, Union

from .samples import Exemplar, NativeHistogram, Sample, Timestamp
from .validation import _validate_metric_name

METRIC_TYPES = (
'counter', 'gauge', 'summary', 'histogram',
'gaugehistogram', 'unknown', 'info', 'stateset',
)
METRIC_NAME_RE = re.compile(r'^[a-zA-Z_:][a-zA-Z0-9_:]*$')
METRIC_LABEL_NAME_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$')
RESERVED_METRIC_LABEL_NAME_RE = re.compile(r'^__.*$')


class Metric:
Expand All @@ -24,8 +21,7 @@ class Metric:
def __init__(self, name: str, documentation: str, typ: str, unit: str = ''):
if unit and not name.endswith("_" + unit):
name += "_" + unit
if not METRIC_NAME_RE.match(name):
raise ValueError('Invalid metric name: ' + name)
_validate_metric_name(name)
self.name: str = name
self.documentation: str = documentation
self.unit: str = unit
Expand Down
73 changes: 58 additions & 15 deletions prometheus_client/openmetrics/exposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@


from ..utils import floatToGoString
from ..validation import (
_is_valid_legacy_labelname, _is_valid_legacy_metric_name,
)

CONTENT_TYPE_LATEST = 'application/openmetrics-text; version=1.0.0; charset=utf-8'
"""Content type of the latest OpenMetrics text format"""
Expand All @@ -24,18 +27,27 @@ def generate_latest(registry):
try:
mname = metric.name
output.append('# HELP {} {}\n'.format(
mname, metric.documentation.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"')))
output.append(f'# TYPE {mname} {metric.type}\n')
escape_metric_name(mname), _escape(metric.documentation)))
output.append(f'# TYPE {escape_metric_name(mname)} {metric.type}\n')
if metric.unit:
output.append(f'# UNIT {mname} {metric.unit}\n')
output.append(f'# UNIT {escape_metric_name(mname)} {metric.unit}\n')
for s in metric.samples:
if s.labels:
labelstr = '{{{0}}}'.format(','.join(
['{}="{}"'.format(
k, v.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"'))
for k, v in sorted(s.labels.items())]))
if not _is_valid_legacy_metric_name(s.name):
labelstr = escape_metric_name(s.name)
if s.labels:
labelstr += ', '
else:
labelstr = ''

if s.labels:
items = sorted(s.labels.items())
labelstr += ','.join(
['{}="{}"'.format(
escape_label_name(k), _escape(v))
for k, v in items])
if labelstr:
labelstr = "{" + labelstr + "}"

if s.exemplar:
if not _is_valid_exemplar_metric(metric, s):
raise ValueError(f"Metric {metric.name} has exemplars, but is not a histogram bucket or counter")
Expand All @@ -59,16 +71,47 @@ def generate_latest(registry):
timestamp = ''
if s.timestamp is not None:
timestamp = f' {s.timestamp}'
output.append('{}{} {}{}{}\n'.format(
s.name,
labelstr,
floatToGoString(s.value),
timestamp,
exemplarstr,
))
if _is_valid_legacy_metric_name(s.name):
output.append('{}{} {}{}{}\n'.format(
s.name,
labelstr,
floatToGoString(s.value),
timestamp,
exemplarstr,
))
else:
output.append('{} {}{}{}\n'.format(
labelstr,
floatToGoString(s.value),
timestamp,
exemplarstr,
))
except Exception as exception:
exception.args = (exception.args or ('',)) + (metric,)
raise

output.append('# EOF\n')
return ''.join(output).encode('utf-8')


def escape_metric_name(s: str) -> str:
"""Escapes the metric name and puts it in quotes iff the name does not
conform to the legacy Prometheus character set.
"""
if _is_valid_legacy_metric_name(s):
return s
return '"{}"'.format(_escape(s))


def escape_label_name(s: str) -> str:
"""Escapes the label name and puts it in quotes iff the name does not
conform to the legacy Prometheus character set.
"""
if _is_valid_legacy_labelname(s):
return s
return '"{}"'.format(_escape(s))


def _escape(s: str) -> str:
"""Performs backslash escaping on backslash, newline, and double-quote characters."""
return s.replace('\\', r'\\').replace('\n', r'\n').replace('"', r'\"')
Loading

0 comments on commit 33e6828

Please sign in to comment.