Skip to content

Commit 0cd4ffe

Browse files
committed
document xml decoder, fix pipeline edge cases
1 parent 99158d5 commit 0cd4ffe

File tree

2 files changed

+153
-105
lines changed

2 files changed

+153
-105
lines changed

xml/XMLdecoder.v

Lines changed: 119 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,57 @@
22
//////////////////////////////////////////////////////////////////////////////////
33
// Company:
44
// Engineer: Chris Shucksmith
5-
// Module: Scanning XML decoder, like a physical sax parser
5+
// Module: High speed serial XML classifier, like a physical sax parser
6+
//
7+
// XML messages are delivered byte at a time. Internally we buffer four bytes from
8+
// the input so that the comment/data streams can be seperated. The input stream
9+
// can be paused for an arbitary period by lowering 'valid' which holds the pipeline
10+
// and state machines in position.
11+
//
12+
// If eom is asserted, any 'valid' left in the pipeline is flushed iresspective of
13+
// the valid signal. For the puposes of look ahead, bytes beyond the eop are defined
14+
// as null so that processing can complete. One cycle after the last valid byte is
15+
// flushed parser state is reset.
16+
//
17+
// As data bytes clock out of the 4 byte pipeline, several control signals are present
18+
// to qualify where the data is a) valid, b) in a comment, tag or data c) position
19+
// within the current document (tags visited at each nesting depth). bytes within tags
20+
// are further classified into name, key and value elemnts.
21+
//
22+
// Transitions between tag nesting depths occur after presening the '>' tag close
23+
// character, at whcih tagDepth, depthPush, depthPop and depth stack s0..s7 are updated
24+
// for the cycle following a tag-close character '>'.
25+
//
26+
// After configuration an eop event is internally generated which ensures the nesting
27+
// stack is clear if it is synthesised to block ram.
28+
//
29+
// XML doctypes are treated as a special case of self-closing tag which does not
30+
// adjust the tag depth.
31+
//
32+
// This module can scan XML content at 1gb+ line speed on most mid range FPGAs
33+
//
634
//////////////////////////////////////////////////////////////////////////////////
735
module XMLDecoder(
836
input CLOCK,
9-
input reset,
37+
1038
input inValid,
39+
input inEop,
1140
input [7:0] in,
12-
input newMsg,
1341

14-
output outValid,
15-
output [7:0] out,
16-
output outNewMsg,
42+
output reg outValid = 0,
43+
output reg outEop = 0,
44+
output reg [7:0] out = 0,
45+
1746
output isData,
1847
output isTag,
19-
output isTagName,
20-
output isTagKey,
21-
output isTagValue,
22-
output isComment,
48+
output reg isTagName = 0,
49+
output reg isTagKey = 0,
50+
output reg isTagValue = 0,
51+
output reg isComment = 0,
2352

24-
output [3:0] tagDepth,
25-
output depthPush,
26-
output depthPop,
53+
output reg [3:0] tagDepth = 0,
54+
output reg depthPush = 0,
55+
output reg depthPop = 0,
2756

2857
output [7:0] s0,
2958
output [7:0] s1,
@@ -35,46 +64,41 @@ module XMLDecoder(
3564
output [7:0] s7
3665
);
3766

38-
reg rnnn = 1;
39-
reg rnn = 0;
40-
reg rn = 0;
41-
reg r = 0;
67+
reg eopn = 1;
68+
reg eopnn = 1;
69+
reg eopnnn = 1;
4270

4371
reg vnnn = 0;
4472
reg vnn = 0;
4573
reg vn = 0;
46-
reg v = 0;
74+
//g v = 0;
4775
reg vp = 0;
4876

4977
reg [7:0] snnn = 0;
5078
reg [7:0] snn = 0;
5179
reg [7:0] sn = 0;
52-
reg [7:0] s = 0;
80+
//g [7:0] out = 0;
5381
reg [7:0] sp = 0;
5482
reg [7:0] spp = 0;
5583
reg [7:0] sppp = 0;
5684

57-
reg _isTagName = 0;
58-
reg _isTagKey = 0;
59-
reg _isTagValue = 0;
60-
reg _isClosingTag = 0;
61-
reg _isSelfClosingTag = 0;
62-
reg _isComment = 0;
85+
reg isClosingTag = 0;
86+
reg isSelfClosingTag = 0;
87+
88+
reg intag = 0; // state of the input stream, after strippping comments: 1=tag 0=data
89+
6390

64-
reg intag = 0;
65-
reg [3:0] tagdepth = 0;
6691
reg [7:0] tagno [0:7]; // tag position at depth N.. if large enough will instantiate a block ram
67-
reg _isDepthPush = 0;
68-
reg _isDepthPop = 0;
6992

7093
// XML comment is <!--
7194
wire onTagStartNext = sn == "<" && !(snn=="!" && snnn=="-"); // '<' but not <!-
72-
wire onTagClose = (s == ">"); // '>'
73-
wire onSelfCloseTag = (s =="/" || s == "?") && sn == ">"; // consider XML doctype self-closing
95+
wire onTagClose = (out == ">"); // '>'
96+
wire onSelfCloseTag = (out =="/" || out == "?") && sn == ">"; // consider XML doctype self-closing
7497
wire onCloseThenData = onTagClose && sn != "<";
7598

76-
wire _isOpeningTag = intag && !_isSelfClosingTag && !_isClosingTag;
77-
wire _isData = outValid && !intag && !_isComment;
99+
wire isOpeningTag = intag && !isSelfClosingTag && !isClosingTag;
100+
wire _isData = outValid && !intag && !isComment;
101+
wire _isTag = intag && !isComment;
78102

79103
// export the stack depth positions
80104
assign s0 = tagno[0];
@@ -90,94 +114,92 @@ module XMLDecoder(
90114
initial $readmemh("xml/stack_zeros.txt", tagno);
91115

92116
always @(posedge CLOCK) begin
93-
if (inValid || vnn || vn || v) begin
117+
// pipeline the input so that we can see ahead by 4 'valid' characters to separate comment/data streams
118+
// If eop is set, continue to flush the pipeline irrespective of 'valid' being de-asserted
119+
120+
// extend (ripple) eop
121+
eopnnn <= inEop;
122+
eopnn <= eopnnn;
123+
eopn <= eopnn;
124+
outEop <= eopn;
125+
126+
if (inValid || eopn || eopnn || eopnnn || inEop) begin
127+
94128
// ripple for valid signal
95-
vnnn <= inValid;
96-
vnn <= vnnn;
97-
vn <= vnn;
98-
v <= vn;
99-
vp <= v;
129+
vnnn <= inValid;
130+
vnn <= vnnn;
131+
vn <= vnn;
132+
outValid <= vn;
133+
vp <= outValid;
100134
// ripple for data look ahead/behind
101-
snnn <= in;
102-
snn <= snnn;
103-
sn <= snn;
104-
s <= sn;
105-
sp <= s;
106-
spp <= sp;
107-
sppp<= spp;
108-
// ripple for newMsg
109-
rnnn <= newMsg;
110-
rnn <= rnnn;
111-
rn <= rnn;
112-
r <= rn;
135+
snnn <= in;
136+
snn <= snnn;
137+
sn <= snn;
138+
out <= sn;
139+
sp <= out;
140+
spp <= sp;
141+
sppp <= spp;
142+
end else begin
143+
outValid <= 0;
113144
end
114-
if (reset || rn) begin
115-
tagdepth <= 0;
116-
tagno[0] <= 0;
117-
tagno[1] <= 0;
118-
tagno[2] <= 0;
119-
tagno[3] <= 0;
120-
tagno[4] <= 0;
121-
tagno[5] <= 0;
122-
tagno[6] <= 0;
123-
intag <= 0;
124-
_isClosingTag <= 0;
125-
_isSelfClosingTag <= 0;
126-
_isTagName <= 0;
127-
_isTagKey <= 0;
128-
_isTagValue <= 0;
129-
_isComment <= 0;
130-
_isDepthPush <= 0;
131-
_isDepthPop <= 0;
132-
end else if (vn || v) begin
133-
// handle comments, look ahead to start, look behind to end
134-
_isComment <= ( (sn == "<" && snn == "!" && snnn == "-" && in == "-") || _isComment )
135-
&& !(s == ">" && sp == "-" && spp == "-");
145+
146+
if (outEop) begin // pipeline totally flushed, reset state
147+
tagDepth <= 0;
148+
tagno[0] <= 0;
149+
tagno[1] <= 0;
150+
tagno[2] <= 0;
151+
tagno[3] <= 0;
152+
tagno[4] <= 0;
153+
tagno[5] <= 0;
154+
tagno[6] <= 0;
155+
intag <= 0;
156+
isClosingTag <= 0;
157+
isSelfClosingTag <= 0;
158+
isTagName <= 0;
159+
isTagKey <= 0;
160+
isTagValue <= 0;
161+
isComment <= 0;
162+
depthPush <= 0;
163+
depthPop <= 0;
164+
end else if (vn || eopn || eopnn || eopnnn) begin // pipeline fully loaded with data or eop
165+
// handle comments, look ahead to start, look behind to end, one cycle ahead of data/tag state
166+
isComment <= (( (sn == "<" && !eopn) && (snn == "!" && !eopnn) && (snnn == "-" && !eopnnn) && (in == "-" && !inEop)) || isComment )
167+
&& !(out == ">" && sp == "-" && spp == "-");
136168

137169
// if we are not in a comment, stream is either a tag or data
138-
if (!_isComment) begin
170+
if (!isComment) begin
139171
intag <= (intag || onTagStartNext) && !(onTagClose && !onTagStartNext);
140-
_isTagName <= ((s == "<" && sn != "/") || (s=="/" && sp == "<") || _isTagName)
172+
isTagName <= ((out == "<" && sn != "/") || (out=="/" && sp == "<") || isTagName)
141173
&& !(sn == " " || sn == ">");
142174

143175
// for tag key/value logic, enable the alternator when intag & !isTagName
144176
// <tagname key=value key=value key=value>
145-
_isTagKey <= (intag && s==" " || _isTagKey) && sn!="=";
146-
_isTagValue <= (intag && s=="=" || _isTagValue) && !(sn == " " || sn == ">");
177+
isTagKey <= (intag && out==" " || isTagKey) && sn!="=";
178+
isTagValue <= (intag && out=="=" || isTagValue) && !(sn == " " || sn == ">");
147179

148-
_isClosingTag <= (s != ">" && _isClosingTag) || (s == "<" && sn == "/");
149-
_isSelfClosingTag <= (_isSelfClosingTag || onSelfCloseTag) && !onTagStartNext;
180+
isClosingTag <= (out != ">" && isClosingTag) || (out == "<" && sn == "/");
181+
isSelfClosingTag <= (isSelfClosingTag || onSelfCloseTag) && !onTagStartNext;
150182

151183
// a tag is either opening, closing or self-closing. At the end of the tag
152184
// (onTagClose) we adjust and flag changes to depth based on the three possibilities.
153-
tagdepth <= tagdepth + (onTagClose && _isOpeningTag)
154-
- (onTagClose && _isClosingTag);
185+
tagDepth <= tagDepth + (onTagClose && isOpeningTag)
186+
- (onTagClose && isClosingTag);
155187

156-
_isDepthPush <= onTagClose && _isOpeningTag;
157-
_isDepthPop <= onTagClose && _isClosingTag;
188+
depthPush <= onTagClose && isOpeningTag;
189+
depthPop <= onTagClose && isClosingTag;
158190

159191
if (onTagClose) begin
160-
if (_isClosingTag || _isSelfClosingTag) begin
161-
tagno[tagdepth] <= tagno[tagdepth] + 1;
162-
tagno[tagdepth+1] <= 0;
192+
if (isClosingTag || isSelfClosingTag) begin
193+
tagno[tagDepth] <= tagno[tagDepth] + 1;
194+
tagno[tagDepth+1] <= 0;
163195
end
164196
end
165197
end
166198
end
167199
end
168200

169-
assign outNewMsg = r;
170-
assign out = s;
171-
assign outValid = v;
172-
assign isComment = _isComment;
173-
assign isTag = intag && !_isComment;
201+
assign isTag = _isTag;
174202
assign isData = _isData;
175-
assign tagDepth = tagdepth;
176-
assign isTagKey = _isTagKey;
177-
assign isTagValue = _isTagValue;
178-
assign isTagName = _isTagName;
179-
assign depthPush = _isDepthPush;
180-
assign depthPop = _isDepthPop;
181203

182204
endmodule
183205

xml/XMLdecoder_test.v

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ module XMLdecoder_test;
1515
reg svalid = 0;
1616
reg [7:0] stream;
1717
reg reset = 0;
18-
reg newMsg = 1;
18+
reg eop = 0;
19+
1920
// Outputs
2021
wire [7:0] out;
2122
wire outValid;
@@ -44,7 +45,7 @@ module XMLdecoder_test;
4445
.in(stream),
4546
.inValid(svalid),
4647
.reset(reset),
47-
.newMsg(newMsg),
48+
.inEop(eop),
4849

4950
.out(out),
5051
.outValid(outValid),
@@ -70,7 +71,6 @@ module XMLdecoder_test;
7071
always #5 CLOCK = ~CLOCK;
7172
integer file;
7273
integer r;
73-
integer eof;
7474
integer i;
7575
integer overrun;
7676
reg [7:0] outNoNL;
@@ -94,26 +94,28 @@ module XMLdecoder_test;
9494
end
9595

9696
$display("Reading XML");
97-
$display(" i in | out dp + - ! t d n k v stack 0 1 2 3 4 5 6 7");
97+
$display(" ! comment, d data, t tag, n tagname, k tagkey, v tagvalue");
98+
$display(" i in | out dp + - ! d t n k v stack 0 1 2 3 4 5 6 7");
9899

99100
// Wait 100 ns for global reset to finish
100101
#100;
102+
eop <= $feof(file) != 0;
101103

102104
// stimulus
103-
while (overrun > 0) begin
105+
while (~eop || outValid) begin
106+
@(posedge CLOCK)
104107
stream <= $fgetc(file);
105-
newMsg <= 0;
106108
svalid <= $feof(file) == 0;
109+
eop <= $feof(file) != 0;
107110
i <= i+1;
108111
if ( !svalid ) begin
109112
overrun <= overrun - 1;
110113
end
111114
outNoNL = (out == 10) ? "." : out;
112115
$display(" %4d %b %x | %b %x %s %02d %b %b %b %b %b %b %b %b %1d %1d %1d %1d %1d %1d %1d %1d ",
113116
i, svalid, stream,
114-
outValid, out, outNoNL, tagDepth, depthPush, depthPop, isComment, isTag, isData, isTagName, isTagKey, isTagValue,
117+
outValid, out, outNoNL, tagDepth, depthPush, depthPop, isComment, isData, isTag, isTagName, isTagKey, isTagValue,
115118
s0, s1, s2, s3, s4, s5, s6, s7);
116-
#10;
117119

118120
end
119121

@@ -127,6 +129,30 @@ module XMLdecoder_test;
127129
$finish_and_return(1);
128130
end
129131

132+
@(posedge CLOCK)
133+
$display(" %4d %b %x | %b %x %s %02d %b %b %b %b %b %b %b %b %1d %1d %1d %1d %1d %1d %1d %1d ",
134+
i, svalid, stream,
135+
outValid, out, outNoNL, tagDepth, depthPush, depthPop, isComment, isData, isTag, isTagName, isTagKey, isTagValue,
136+
s0, s1, s2, s3, s4, s5, s6, s7);
137+
138+
@(posedge CLOCK)
139+
$display(" %4d %b %x | %b %x %s %02d %b %b %b %b %b %b %b %b %1d %1d %1d %1d %1d %1d %1d %1d ",
140+
i, svalid, stream,
141+
outValid, out, outNoNL, tagDepth, depthPush, depthPop, isComment, isData, isTag, isTagName, isTagKey, isTagValue,
142+
s0, s1, s2, s3, s4, s5, s6, s7);
143+
144+
if (tagDepth != 0) begin
145+
$display("depth did not reset");
146+
$finish_and_return(1);
147+
end
148+
if (s0 != 0) begin
149+
$display("stack did not reset");
150+
$finish_and_return(1);
151+
end
152+
153+
154+
#100;
155+
130156
$finish;
131157

132158
end

0 commit comments

Comments
 (0)