forked from chromium/chromium
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmd2xml.py
executable file
·190 lines (154 loc) · 5.75 KB
/
md2xml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#!/usr/bin/env python
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Parses a markdown file, extracts documentation for UMA metrics from the doc,
and writes that into histograms.xml file.
The syntax for the markdown this script processes is as follows:
. The first line for each UMA metric should be: '## [metric name]'.
. The following lines should include the additional information about the
metric, in a markdown list, in '[name]: [value]' format. For example:
* units: pixels
* owners: first@chromium.org, second@example.com
. The description, and explanation, of the metric should be after an empty
line after the list of attributes.
. Each UMA metric section should end with a line '---'. If there are non-UMA
sections at the beginning of the doc, then the first UMA section should be
preceeded by a '---' line.
A complete example:
=== sample.md
# A sample markdown document.
This is a sample markdown. It has some documentation for UMA metrics too.
# Motivation
The purpose of this sample doc is to be a guide for writing such docs.
---
## ExampleMetric.First
* units: smiles
* owners: firstowner@chromium.org, second@example.org
* os: windows, mac
* added: 2018-03-01
* expires: 2023-01-01
ExampleMetric.First measures the first example.
---
## ExampleMetric.Second
* units: happiness
This measures the second example.
"""
import datetime
import os
import re
import sys
import time
import xml.dom.minidom
sys.path.append(os.path.join(os.path.dirname(__file__), 'common'))
import path_util
sys.path.append(os.path.join(os.path.dirname(__file__), 'histograms'))
import pretty_print
SupportedTags = [
"added",
"expires",
"enum",
"os",
"owners",
"tags",
"units",
]
def IsTagKnown(tag):
return tag in SupportedTags
def IsTagValid(tag, value):
assert IsTagKnown(tag)
if tag == 'added' or tag == 'expires':
if re.match('^M[0-9]{2,3}$', value):
return True
date = re.match('^([0-9]{4})-([0-9]{2})-([0-9]{2})$', value)
return date and datetime.date(int(date.group(1)), int(date.group(2)),
int(date.group(3)))
return True
class Trace:
def __init__(self, msg):
self.msg_ = msg
self.start_ = None
def __enter__(self):
self.start_ = time.time()
sys.stdout.write('%s ...' % (self.msg_))
sys.stdout.flush()
def __exit__(self, exc_type, exc_val, exc_tb):
sys.stdout.write(' Done (%.3f sec)\n' % (time.time() - self.start_))
def GetMetricsFromMdFile(mdfile):
"""Returns an array of metrics parsed from the markdown file. See the top of
the file for documentation on the format of the markdown file.
"""
with open(mdfile) as f:
raw_md = f.read()
metrics = []
sections = re.split('\n---+\n', raw_md)
tag_pattern = re.compile('^\* ([^:]*): (.*)$')
for section in sections:
if len(section.strip()) == 0: break
lines = section.strip().split('\n')
# The first line should have the header, containing the name of the metric.
header_match = re.match('^##+ ', lines[0])
if not header_match: continue
metric = {}
metric['name'] = lines[0][len(header_match.group(0)):]
for i in range(1, len(lines)):
if len(lines[i]) == 0:
i += 1
break
match = tag_pattern.match(lines[i])
assert match
assert IsTagKnown(match.group(1)), 'Unknown tag: "%s".' % (match.group(1))
assert IsTagValid(match.group(1), match.group(2)), 'Invalid value "%s" ' \
'for tag "%s".' % (match.group(2), match.group(1))
metric[match.group(1)] = match.group(2)
assert i < len(lines), 'No summary found for "%s"' % metric['name']
metric['summary'] = '\n'.join(lines[i:])
assert 'owners' in metric, 'Must have owners for "%s"' % metric['name']
assert 'enum' in metric or 'units' in metric, 'Metric "%s" must have ' \
'a unit listed in "enum" or "units".' % metric['name']
metrics.append(metric)
return metrics
def CreateNode(tree, tag, text):
node = tree.createElement(tag)
node.appendChild(tree.createTextNode(text))
return node
def main():
if len(sys.argv) != 2:
sys.stderr.write('Usage: %s <path-to-md-file>\n' % (sys.argv[0]))
sys.exit(1)
with Trace('Reading histograms.xml') as t:
xml_path = path_util.GetHistogramsFile()
with open(xml_path, 'rb') as f:
raw_xml = f.read()
with Trace('Parsing xml') as t:
tree = xml.dom.minidom.parseString(raw_xml)
histograms = tree.getElementsByTagName('histograms')
if histograms.length != 1:
sys.stderr.write('histograms.xml should have exactly one "histograms" '
'section.\n');
sys.exit(1)
histograms = histograms[0]
with Trace('Parsing md file %s' % (sys.argv[1])) as t:
metrics = GetMetricsFromMdFile(sys.argv[1])
with Trace('Adding parsed metrics') as t:
for metric in metrics:
node = tree.createElement('histogram')
node.setAttribute('name', metric['name'])
if 'units' in metric:
node.setAttribute('units', metric['units'])
elif 'enum' in metric:
node.setAttribute('enum', metric['enum'])
owners = metric['owners'].split(',')
for owner in owners:
node.appendChild(CreateNode(tree, 'owner', owner))
node.appendChild(CreateNode(tree, 'summary', metric['summary']))
# TODO(sad): This always appends the metric to the list. This should
# also update if there is an already existing metric, instead of adding a
# new one.
histograms.appendChild(node)
with Trace('Pretty printing into histograms.xml') as t:
new_xml = pretty_print.PrettyPrintHistogramsTree(tree)
with open(xml_path, 'wb') as f:
f.write(new_xml)
if __name__ == '__main__':
main()