33from __future__ import unicode_literals
44
55import math
6+ import re
67
78from ..metrics_core import Metric , METRIC_LABEL_NAME_RE
89from ..samples import Exemplar , Sample , Timestamp
@@ -24,6 +25,24 @@ def text_string_to_metric_families(text):
2425 yield metric_family
2526
2627
28+ ESCAPE_SEQUENCES = {
29+ '\\ \\ ' : '\\ ' ,
30+ '\\ n' : '\n ' ,
31+ '\\ "' : '"' ,
32+ }
33+
34+
35+ def _replace_escape_sequence (match ):
36+ return ESCAPE_SEQUENCES [match .group (0 )]
37+
38+
39+ ESCAPING_RE = re .compile (r'\\[\\n"]' )
40+
41+
42+ def _replace_escaping (s ):
43+ return ESCAPING_RE .sub (_replace_escape_sequence , s )
44+
45+
2746def _unescape_help (text ):
2847 result = []
2948 slash = False
@@ -83,14 +102,23 @@ def _parse_timestamp(timestamp):
83102 return ts
84103
85104
86- def _parse_labels (it , text ):
105+ def _is_character_escaped (s , charpos ):
106+ num_bslashes = 0
107+ while (charpos > num_bslashes and
108+ s [charpos - 1 - num_bslashes ] == '\\ ' ):
109+ num_bslashes += 1
110+ return num_bslashes % 2 == 1
111+
112+
113+ def _parse_labels_with_state_machine (text ):
87114 # The { has already been parsed.
88115 state = 'startoflabelname'
89116 labelname = []
90117 labelvalue = []
91118 labels = {}
119+ labels_len = 0
92120
93- for char in it :
121+ for char in text :
94122 if state == 'startoflabelname' :
95123 if char == '}' :
96124 state = 'endoflabels'
@@ -141,37 +169,123 @@ def _parse_labels(it, text):
141169 break
142170 else :
143171 raise ValueError ("Invalid line: " + text )
144- return labels
172+ labels_len += 1
173+ return labels , labels_len
174+
175+
176+ def _parse_labels (text ):
177+ labels = {}
178+
179+ # Raise error if we don't have valid labels
180+ if text and "=" not in text :
181+ raise ValueError
182+
183+ # Copy original labels
184+ sub_labels = text
185+ try :
186+ # Process one label at a time
187+ while sub_labels :
188+ # The label name is before the equal
189+ value_start = sub_labels .index ("=" )
190+ label_name = sub_labels [:value_start ]
191+ sub_labels = sub_labels [value_start + 1 :]
192+
193+ # Check for missing quotes
194+ if not sub_labels or sub_labels [0 ] != '"' :
195+ raise ValueError
196+
197+ # The first quote is guaranteed to be after the equal
198+ value_substr = sub_labels [1 :]
199+
200+ # Check for extra commas
201+ if not label_name or label_name [0 ] == ',' :
202+ raise ValueError
203+ if not value_substr or value_substr [- 1 ] == ',' :
204+ raise ValueError
205+
206+ # Find the last unescaped quote
207+ i = 0
208+ while i < len (value_substr ):
209+ i = value_substr .index ('"' , i )
210+ if not _is_character_escaped (value_substr [:i ], i ):
211+ break
212+ i += 1
213+
214+ # The label value is inbetween the first and last quote
215+ quote_end = i + 1
216+ label_value = sub_labels [1 :quote_end ]
217+ # Replace escaping if needed
218+ if "\\ " in label_value :
219+ label_value = _replace_escaping (label_value )
220+ labels [label_name ] = label_value
221+
222+ # Remove the processed label from the sub-slice for next iteration
223+ sub_labels = sub_labels [quote_end + 1 :]
224+ if sub_labels .startswith ("," ):
225+ next_comma = 1
226+ else :
227+ next_comma = 0
228+ sub_labels = sub_labels [next_comma :]
229+
230+ # Check for missing commas
231+ if sub_labels and next_comma == 0 :
232+ raise ValueError
233+
234+ return labels
235+
236+ except ValueError :
237+ raise ValueError ("Invalid labels: " + text )
145238
146239
147240def _parse_sample (text ):
148- name = []
149- value = []
241+ # Detect the labels in the text
242+ label_start = text .find ("{" )
243+ if label_start == - 1 :
244+ # We don't have labels
245+ name_end = text .index (" " )
246+ name = text [:name_end ]
247+ # Parse the remaining text after the name
248+ remaining_text = text [name_end + 1 :]
249+ value , timestamp , exemplar = _parse_remaining_text (remaining_text )
250+ return Sample (name , {}, value , timestamp , exemplar )
251+ # The name is before the labels
252+ name = text [:label_start ]
253+ seperator = " # "
254+ if text .count (seperator ) == 0 :
255+ # Line doesn't contain an exemplar
256+ # We can use `rindex` to find `label_end`
257+ label_end = text .rindex ("}" )
258+ label = text [label_start + 1 :label_end ]
259+ labels = _parse_labels (label )
260+ else :
261+ # Line potentially contains an exemplar
262+ # Fallback to parsing labels with a state machine
263+ labels , labels_len = _parse_labels_with_state_machine (text [label_start + 1 :])
264+ label_end = labels_len + len (name )
265+ # Parsing labels succeeded, continue parsing the remaining text
266+ remaining_text = text [label_end + 2 :]
267+ value , timestamp , exemplar = _parse_remaining_text (remaining_text )
268+ return Sample (name , labels , value , timestamp , exemplar )
269+
270+
271+ def _parse_remaining_text (text ):
272+ split_text = text .split (" " , 1 )
273+ val = _parse_value (split_text [0 ])
274+ if len (split_text ) == 1 :
275+ # We don't have timestamp or exemplar
276+ return val , None , None
277+
150278 timestamp = []
151- labels = {}
152279 exemplar_value = []
153280 exemplar_timestamp = []
154281 exemplar_labels = None
155282
156- state = 'name'
283+ state = 'timestamp'
284+ text = split_text [1 ]
157285
158286 it = iter (text )
159287 for char in it :
160- if state == 'name' :
161- if char == '{' :
162- labels = _parse_labels (it , text )
163- # Space has already been parsed.
164- state = 'value'
165- elif char == ' ' :
166- state = 'value'
167- else :
168- name .append (char )
169- elif state == 'value' :
170- if char == ' ' :
171- state = 'timestamp'
172- else :
173- value .append (char )
174- elif state == 'timestamp' :
288+ if state == 'timestamp' :
175289 if char == '#' and not timestamp :
176290 state = 'exemplarspace'
177291 elif char == ' ' :
@@ -190,13 +304,23 @@ def _parse_sample(text):
190304 raise ValueError ("Invalid line: " + text )
191305 elif state == 'exemplarstartoflabels' :
192306 if char == '{' :
193- exemplar_labels = _parse_labels (it , text )
194- # Space has already been parsed.
307+ label_start , label_end = text .index ("{" ), text .rindex ("}" )
308+ exemplar_labels = _parse_labels (text [label_start + 1 :label_end ])
309+ state = 'exemplarparsedlabels'
310+ else :
311+ raise ValueError ("Invalid line: " + text )
312+ elif state == 'exemplarparsedlabels' :
313+ if char == '}' :
314+ state = 'exemplarvaluespace'
315+ elif state == 'exemplarvaluespace' :
316+ if char == ' ' :
195317 state = 'exemplarvalue'
196318 else :
197319 raise ValueError ("Invalid line: " + text )
198320 elif state == 'exemplarvalue' :
199- if char == ' ' :
321+ if char == ' ' and not exemplar_value :
322+ raise ValueError ("Invalid line: " + text )
323+ elif char == ' ' :
200324 state = 'exemplartimestamp'
201325 else :
202326 exemplar_value .append (char )
@@ -212,13 +336,9 @@ def _parse_sample(text):
212336 raise ValueError ("Invalid line: " + text )
213337
214338 # Incomplete exemplar.
215- if state in ['exemplarhash' , 'exemplarspace' , 'exemplarstartoflabels' ]:
339+ if state in ['exemplarhash' , 'exemplarspace' , 'exemplarstartoflabels' , 'exemplarparsedlabels' ]:
216340 raise ValueError ("Invalid line: " + text )
217341
218- if not value :
219- raise ValueError ("Invalid line: " + text )
220- value = '' .join (value )
221- val = _parse_value (value )
222342 ts = _parse_timestamp (timestamp )
223343 exemplar = None
224344 if exemplar_labels is not None :
@@ -231,7 +351,7 @@ def _parse_sample(text):
231351 _parse_timestamp (exemplar_timestamp ),
232352 )
233353
234- return Sample ( '' . join ( name ), labels , val , ts , exemplar )
354+ return val , ts , exemplar
235355
236356
237357def _group_for_sample (sample , name , typ ):
0 commit comments