Skip to content

Commit

Permalink
Parser: support missing space between timestamp and unit.
Browse files Browse the repository at this point in the history
Issue #13 reports that at least one Tarmac producer starts its lines
with things like "271ns", where we've always seen "271 ns" before.

To implement this I had to take a Token structure that had already
been lexed, containing the whole string "271ns", and break it up into
two smaller Tokens covering sub-ranges of the input string, via a new
Token::split() method.
  • Loading branch information
statham-arm committed Dec 15, 2023
1 parent cfaff1e commit 7dc6f32
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 9 deletions.
3 changes: 3 additions & 0 deletions doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1108,6 +1108,9 @@ where the fields are as follows:
No processing performed by Tarmac Trace Utilities attaches any
semantic significance to the choice of unit.

The space between *timestamp* and *unit* is sometimes omitted, but
much more typically present.

*type*
This is a mandatory word that indicates what type of event is
described by this trace line. The types recognized by Tarmac Trace
Expand Down
47 changes: 38 additions & 9 deletions lib/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <set>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

using std::back_inserter;
Expand All @@ -40,6 +41,7 @@ using std::end;
using std::endl;
using std::max;
using std::ostringstream;
using std::pair;
using std::set;
using std::string;
using std::vector;
Expand Down Expand Up @@ -148,6 +150,16 @@ struct Token {
{
return !(*this == rhs);
}

pair<Token, Token> split(size_t pos) const {
assert(isword());

pair<Token, Token> p{ Token(s.substr(0, pos)), Token(s.substr(pos)) };

p.first.setpos(startpos, startpos + pos);
p.second.setpos(startpos + pos, endpos);
return p;
}
};

class TarmacLineParserImpl {
Expand All @@ -162,6 +174,8 @@ class TarmacLineParserImpl {
set<string> unrecognised_tarmac_events_reported;
ParseReceiver *receiver;

static set<string> known_timestamp_units;

inline bool iswordchr(char c)
{
return isalnum((unsigned char)c) || c == '_' || c == '-' || c == '.' ||
Expand Down Expand Up @@ -281,24 +295,35 @@ class TarmacLineParserImpl {
Token tok = lex();

// Tarmac lines often, but not always, start with a timestamp.
Time time;
// If they don't, we default to the previous timestamp.
Time time = last_timestamp;
if (tok.isdecimal()) {
time = tok.decimalvalue();
highlight(tok, HL_TIMESTAMP);
tok = lex();

if (tok == "clk" || tok == "ns" || tok == "cs" || tok == "cyc" ||
tok == "tic" || tok == "ps") {
// Any of these is something we recognise as a unit of
// time, so skip over it.
if (tok.isword() && (known_timestamp_units.find(tok.s) !=
known_timestamp_units.end()))
tok = lex();
}

last_timestamp = time;
} else {
// If no timestamp, that means the event is simultaneous
// with the previous one.
time = last_timestamp;
// Another possibility is that the timestamp and its unit
// are smushed together in a single token, with no
// intervening space.
if (tok.isword()) {
size_t end_of_digits = tok.s.find_first_not_of(
Token::decimal_digits);
if (end_of_digits > 0 && end_of_digits != string::npos &&
(known_timestamp_units.find(tok.s.substr(end_of_digits)) !=
known_timestamp_units.end())) {
auto pair = tok.split(end_of_digits);
time = pair.first.decimalvalue();
highlight(pair.first, HL_TIMESTAMP);
last_timestamp = time;
tok = lex();
}
}
}

// Now we can have a trace source identifier (cpu or other component)
Expand Down Expand Up @@ -1079,3 +1104,7 @@ TarmacLineParser::TarmacLineParser(ParseParams params, ParseReceiver &rec)
TarmacLineParser::~TarmacLineParser() { delete pImpl; }

void TarmacLineParser::parse(const string &s) const { pImpl->parse(s); }

set<string> TarmacLineParserImpl::known_timestamp_units = {
"clk", "ns", "cs", "cyc", "tic", "ps",
};
2 changes: 2 additions & 0 deletions tests/parsertest.ref
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,8 @@ Parse warning: unsupported system operation 'AT'
* MemoryEvent time=3039 read=false known=true addr=2002fa00 size=4 contents=beefbeef
--- Tarmac line: 3037 cyc MNR4O__I 00022ae4 f7ffffcb
* TextOnlyEvent time=3037 type="MNR4O__I" text="MNR4O__I 00022ae4 f7ffffcb"
--- Tarmac line: 271ns R r13 20001fff (MSP)
* RegisterEvent time=271 reg=r13 offset=0 bytes=20:00:1f:ff
--- Tarmac line: 1234567 cs E dummy header line to reset timestamp for next two lines
* TextOnlyEvent time=1234567 type="E" text="dummy header line to reset timestamp for next two lines"
--- Tarmac line: LD 000000007ff80fe0 ........ 44444444 ........ 2222..11 S:007ff80fe0 nGnRnE OSH
Expand Down
8 changes: 8 additions & 0 deletions tests/parsertest.txt
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,14 @@ Tarmac Text Rev 3t
3039 cyc MNW4___D 2002fa00 efbeefbe
3037 cyc MNR4O__I 00022ae4 f7ffffcb

# ----------------------------------------------------------------------

# Trace lines seen in the output of the Tarmac RTL module from
# Cortex-M0-AT510-r0p0-00rel0 (RTL development kit). These don't have
# a space between the timestamp and the unit ("271ns", not "271 ns").

271ns R r13 20001fff (MSP)

# ----------------------------------------------------------------------
# Manually written tests

Expand Down

0 comments on commit 7dc6f32

Please sign in to comment.