-
Notifications
You must be signed in to change notification settings - Fork 29.7k
/
test-disasm-regex-helper.h
318 lines (279 loc) Β· 11.6 KB
/
test-disasm-regex-helper.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_CCTEST_DISASM_REGEX_HELPER_H_
#define V8_CCTEST_DISASM_REGEX_HELPER_H_
#include <iostream>
#include <map>
#include <regex> // NOLINT(build/c++11)
#include <vector>
#include "src/base/logging.h"
#include "src/base/macros.h"
namespace v8 {
namespace internal {
// This class provides methods for regular expression matching with an extra
// feature of user defined named capture groups which are alive across
// regex search calls.
//
// The main use case for the class is to test multiple-line assembly
// output with an ability to express dataflow or dependencies by allowing single
// definition / multiple use symbols. When processing output lines and trying to
// match them against the set of patterns a user can define a named group - a
// symbol - and a regex for matching it. If the regex with the definitions is
// matched then whenever this symbol appears again (no redefinitions though) in
// the following patterns the parser will replace the symbol reference in the
// pattern by an actual literal value matched during processing symbol
// definition. This effectively checks that all of the output lines have
// the same literal for the described symbol. To track the symbols this class
// implements a simple single-definition symbol table.
//
// Example: Lets consider a case when we want to test that the assembly
// output consists of two instructions - a load and a store; we also want
// to check that the loaded value is used as store value for the store,
// like here:
//
// ldr x3, [x4]
// str x3, [x5]
//
// Using special syntax for symbol definitions and uses one could write the
// following regex making sure that the load register is used by the store:
//
// 'ldr <<NamedReg:x[0-9]+>>, [x[0-9]+]'
// 'str <<NamedReg>>, [x[0-9]+]'
//
// See 'ProcessPattern' for more details.
class RegexParser {
public:
RegexParser()
// Regex to parse symbol references: definitions or uses.
// <<SymbolName[:'def regex']>>
: symbol_ref_regex_("<<([a-zA-Z_][a-zA-Z0-9_]*)(?::(.*?))?>>") {}
// Status codes used for return values and error diagnostics.
enum class Status {
kSuccess = 0,
kNotMatched,
kWrongPattern,
kDefNotFound,
kRedefinition,
};
// This class holds info on a symbol definition.
class SymbolInfo {
public:
explicit SymbolInfo(const std::string& matched_value)
: matched_value_(matched_value) {}
// Returns an actual matched value for the symbol.
const std::string& matched_value() const { return matched_value_; }
private:
std::string matched_value_;
};
// This class holds temporary info on a symbol while processing an input line.
class SymbolVectorElem {
public:
SymbolVectorElem(bool is_def, const std::string& symbol_name)
: is_def_(is_def), symbol_name_(symbol_name) {}
bool is_def() const { return is_def_; }
const std::string& symbol_name() const { return symbol_name_; }
private:
bool is_def_;
std::string symbol_name_;
};
using SymbolMap = std::map<std::string, SymbolInfo>;
using MatchVector = std::vector<SymbolVectorElem>;
// Tries to match (actually search, similar to std::regex_serach) the line
// against the pattern (possibly containing symbols references) and if
// matched commits symbols definitions from the pattern to the symbol table.
//
// Returns: status of the matching attempt.
//
// Important: the format of pattern regexs is based on std::ECMAScript syntax
// (http://www.cplusplus.com/reference/regex/ECMAScript/) with a few extra
// restrictions:
// * no backreference (or submatch) groups
// - when a group (e.g. "(a|b)+") is needed use a passive group
// (e.g. "(?:a|b)+").
// * special syntax for symbol definitions: <<Name:regex>>
// - 'Name' must be c-ctyle variable name ([a-zA-Z_][a-zA-Z0-9_]*).
// - 'regex' - is a regex for the actual literal expected in the symbol
// definition line. It must not contain any symbol references.
// * special syntax for symbol uses <<Name>>
//
// Semantical restrictions on symbols references:
// * symbols mustn't be referenced before they are defined.
// - a pattern R1 which uses symbol 'A' mustn't be processed if a pattern
// R2 with the symbol 'A' definition hasn't been yet matched (R1!=R2).
// - A pattern mustn't define a symbol and use it inside the same regex.
// * symbols mustn't be redefined.
// - if a line has been matched against a pattern R1 with symbol 'A'
// then other patterns mustn't define symbol 'A'.
// * symbols defininitions are only committed and registered if the whole
// pattern is successfully matched.
//
// Notes:
// * A pattern may contain uses of the same or different symbols and
// definitions of different symbols however if a symbol is defined in the
// pattern it can't be used in the same pattern.
//
// Pattern example: "<<A:[0-9]+>> <<B>>, <<B> <<C:[a-z]+>>" (assuming 'B' is
// defined and matched).
Status ProcessPattern(const std::string& line, const std::string& pattern) {
// Processed pattern which is going to be used for std::regex_search; symbol
// references are replaced accordingly to the reference type - def or use.
std::string final_pattern;
// A vector of records for symbols references in the pattern. The format is
// {is_definition, symbol_name}.
MatchVector symbols_refs;
Status status =
ParseSymbolsInPattern(pattern, &final_pattern, &symbols_refs);
if (status != Status::kSuccess) {
return status;
}
std::smatch match;
if (!std::regex_search(line, match, std::regex(final_pattern))) {
return Status::kNotMatched;
}
// This checks that no backreference groups were used in the pattern except
// for those added by ParseSymbolsInPattern.
if (symbols_refs.size() != (match.size() - 1)) {
return Status::kWrongPattern;
}
status = CheckSymbolsMatchedValues(symbols_refs, match);
if (status != Status::kSuccess) {
return status;
}
CommitSymbolsDefinitions(symbols_refs, match);
return Status::kSuccess;
}
// Returns whether a symbol is defined in the symbol name.
bool IsSymbolDefined(const std::string& symbol_name) const {
auto symbol_map_iter = map_.find(symbol_name);
return symbol_map_iter != std::end(map_);
}
// Returns the matched value for a symbol.
std::string GetSymbolMatchedValue(const std::string& symbol_name) const {
DCHECK(IsSymbolDefined(symbol_name));
return map_.find(symbol_name)->second.matched_value();
}
// Prints the symbol table.
void PrintSymbols(std::ostream& os) const {
os << "Printing symbol table..." << std::endl;
for (const auto& t : map_) {
const std::string& sym_name = t.first;
const SymbolInfo& sym_info = t.second;
os << "<<" << sym_name << ">>: \"" << sym_info.matched_value() << "\""
<< std::endl;
}
}
protected:
// Fixed layout for the symbol reference match.
enum SymbolMatchIndex {
kFullSubmatch = 0,
kName = 1,
kDefRegex = 2,
kSize = kDefRegex + 1,
};
// Processes a symbol reference: for definitions it adds the symbol regex, for
// uses it adds actual literal from a previously matched definition. Also
// fills the symbol references vector.
Status ProcessSymbol(const std::smatch& match, MatchVector* symbols_refs,
std::string* new_pattern) const {
bool is_def = match[SymbolMatchIndex::kDefRegex].length() != 0;
const std::string& symbol_name = match[SymbolMatchIndex::kName];
if (is_def) {
// Make sure the symbol isn't already defined.
auto symbol_iter =
std::find_if(symbols_refs->begin(), symbols_refs->end(),
[symbol_name](const SymbolVectorElem& ref) -> bool {
return ref.symbol_name() == symbol_name;
});
if (symbol_iter != std::end(*symbols_refs)) {
return Status::kRedefinition;
}
symbols_refs->emplace_back(true, symbol_name);
new_pattern->append("(");
new_pattern->append(match[SymbolMatchIndex::kDefRegex]);
new_pattern->append(")");
} else {
auto symbol_map_iter = map_.find(symbol_name);
if (symbol_map_iter == std::end(map_)) {
return Status::kDefNotFound;
}
const SymbolInfo& sym_info = symbol_map_iter->second;
new_pattern->append("(");
new_pattern->append(sym_info.matched_value());
new_pattern->append(")");
symbols_refs->emplace_back(false, symbol_name);
}
return Status::kSuccess;
}
// Parses the input pattern regex, processes symbols defs and uses inside
// it, fills a raw pattern used for std::regex_search.
Status ParseSymbolsInPattern(const std::string& pattern,
std::string* raw_pattern,
MatchVector* symbols_refs) const {
std::string::const_iterator low = pattern.cbegin();
std::string::const_iterator high = pattern.cend();
std::smatch match;
while (low != high) {
// Search for a symbol reference.
if (!std::regex_search(low, high, match, symbol_ref_regex_)) {
raw_pattern->append(low, high);
break;
}
if (match.size() != SymbolMatchIndex::kSize) {
return Status::kWrongPattern;
}
raw_pattern->append(match.prefix());
Status status = ProcessSymbol(match, symbols_refs, raw_pattern);
if (status != Status::kSuccess) {
return status;
}
low = match[SymbolMatchIndex::kFullSubmatch].second;
}
return Status::kSuccess;
}
// Checks that there are no symbol redefinitions and the symbols uses matched
// literal values are equal to corresponding matched definitions.
Status CheckSymbolsMatchedValues(const MatchVector& symbols_refs,
const std::smatch& match) const {
// There is a one-to-one correspondence between matched subexpressions and
// symbols refences in the vector (by construction).
for (size_t vec_pos = 0, size = symbols_refs.size(); vec_pos < size;
vec_pos++) {
auto elem = symbols_refs[vec_pos];
auto map_iter = map_.find(elem.symbol_name());
if (elem.is_def()) {
if (map_iter != std::end(map_)) {
return Status::kRedefinition;
}
} else {
DCHECK(map_iter != std::end(map_));
// We replaced use with matched definition value literal.
DCHECK_EQ(map_iter->second.matched_value().compare(match[vec_pos + 1]),
0);
}
}
return Status::kSuccess;
}
// Commits symbols definitions and their matched values to the symbol table.
void CommitSymbolsDefinitions(const MatchVector& groups_vector,
const std::smatch& match) {
for (size_t vec_pos = 0, size = groups_vector.size(); vec_pos < size;
vec_pos++) {
size_t match_pos = vec_pos + 1;
auto elem = groups_vector[vec_pos];
if (elem.is_def()) {
auto emplace_res =
map_.emplace(elem.symbol_name(), SymbolInfo(match[match_pos]));
USE(emplace_res); // Silence warning about unused variable.
DCHECK(emplace_res.second == true);
}
}
}
const std::regex symbol_ref_regex_;
SymbolMap map_;
};
bool CheckDisassemblyRegexPatterns(
const char* function_name, const std::vector<std::string>& patterns_array);
} // namespace internal
} // namespace v8
#endif // V8_CCTEST_DISASM_REGEX_HELPER_H_