Skip to content

Commit fa5754e

Browse files
committed
Merge PR #35, Stream Parser Support - pt3
2 parents 5f8a389 + db13d6a commit fa5754e

File tree

1 file changed

+83
-68
lines changed

1 file changed

+83
-68
lines changed

parse.go

Lines changed: 83 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -17,134 +17,149 @@ func LoadURL(url string) (*Node, error) {
1717
return nil, err
1818
}
1919
defer resp.Body.Close()
20-
return parse(resp.Body)
20+
return Parse(resp.Body)
2121
}
2222

23-
func parse(r io.Reader) (*Node, error) {
24-
var (
25-
decoder = xml.NewDecoder(r)
26-
doc = &Node{Type: DocumentNode}
27-
space2prefix = make(map[string]string)
28-
level = 0
29-
)
23+
type parser struct {
24+
decoder *xml.Decoder
25+
doc *Node
26+
space2prefix map[string]string
27+
level int
28+
prev *Node
29+
}
30+
31+
func createParser(r io.Reader) *parser {
32+
p := &parser{
33+
decoder: xml.NewDecoder(r),
34+
doc: &Node{Type: DocumentNode},
35+
space2prefix: make(map[string]string),
36+
level: 0,
37+
}
3038
// http://www.w3.org/XML/1998/namespace is bound by definition to the prefix xml.
31-
space2prefix["http://www.w3.org/XML/1998/namespace"] = "xml"
32-
decoder.CharsetReader = charset.NewReaderLabel
33-
prev := doc
39+
p.space2prefix["http://www.w3.org/XML/1998/namespace"] = "xml"
40+
p.decoder.CharsetReader = charset.NewReaderLabel
41+
p.prev = p.doc
42+
return p
43+
}
44+
45+
func (p *parser) parse() (*Node, error) {
3446
for {
35-
tok, err := decoder.Token()
36-
switch {
37-
case err == io.EOF:
38-
goto quit
39-
case err != nil:
47+
tok, err := p.decoder.Token()
48+
if err != nil {
4049
return nil, err
4150
}
4251

4352
switch tok := tok.(type) {
4453
case xml.StartElement:
45-
if level == 0 {
54+
if p.level == 0 {
4655
// mising XML declaration
4756
node := &Node{Type: DeclarationNode, Data: "xml", level: 1}
48-
addChild(prev, node)
49-
level = 1
50-
prev = node
57+
addChild(p.prev, node)
58+
p.level = 1
59+
p.prev = node
5160
}
5261
// https://www.w3.org/TR/xml-names/#scoping-defaulting
5362
for _, att := range tok.Attr {
5463
if att.Name.Local == "xmlns" {
55-
space2prefix[att.Value] = ""
64+
p.space2prefix[att.Value] = ""
5665
} else if att.Name.Space == "xmlns" {
57-
space2prefix[att.Value] = att.Name.Local
66+
p.space2prefix[att.Value] = att.Name.Local
5867
}
5968
}
6069

6170
if tok.Name.Space != "" {
62-
if _, found := space2prefix[tok.Name.Space]; !found {
71+
if _, found := p.space2prefix[tok.Name.Space]; !found {
6372
return nil, errors.New("xmlquery: invalid XML document, namespace is missing")
6473
}
6574
}
6675

6776
for i := 0; i < len(tok.Attr); i++ {
6877
att := &tok.Attr[i]
69-
if prefix, ok := space2prefix[att.Name.Space]; ok {
78+
if prefix, ok := p.space2prefix[att.Name.Space]; ok {
7079
att.Name.Space = prefix
7180
}
7281
}
7382

7483
node := &Node{
7584
Type: ElementNode,
7685
Data: tok.Name.Local,
77-
Prefix: space2prefix[tok.Name.Space],
86+
Prefix: p.space2prefix[tok.Name.Space],
7887
NamespaceURI: tok.Name.Space,
7988
Attr: tok.Attr,
80-
level: level,
89+
level: p.level,
8190
}
82-
//fmt.Println(fmt.Sprintf("start > %s : %d", node.Data, level))
83-
if level == prev.level {
84-
addSibling(prev, node)
85-
} else if level > prev.level {
86-
addChild(prev, node)
87-
} else if level < prev.level {
88-
for i := prev.level - level; i > 1; i-- {
89-
prev = prev.Parent
91+
//fmt.Println(fmt.Sprintf("start > %s : %d", node.Data, node.level))
92+
if p.level == p.prev.level {
93+
addSibling(p.prev, node)
94+
} else if p.level > p.prev.level {
95+
addChild(p.prev, node)
96+
} else if p.level < p.prev.level {
97+
for i := p.prev.level - p.level; i > 1; i-- {
98+
p.prev = p.prev.Parent
9099
}
91-
addSibling(prev.Parent, node)
100+
addSibling(p.prev.Parent, node)
92101
}
93-
prev = node
94-
level++
102+
p.prev = node
103+
p.level++
95104
case xml.EndElement:
96-
level--
105+
p.level--
97106
case xml.CharData:
98-
node := &Node{Type: CharDataNode, Data: string(tok), level: level}
99-
if level == prev.level {
100-
addSibling(prev, node)
101-
} else if level > prev.level {
102-
addChild(prev, node)
103-
} else if level < prev.level {
104-
for i := prev.level - level; i > 1; i-- {
105-
prev = prev.Parent
107+
node := &Node{Type: CharDataNode, Data: string(tok), level: p.level}
108+
if p.level == p.prev.level {
109+
addSibling(p.prev, node)
110+
} else if p.level > p.prev.level {
111+
addChild(p.prev, node)
112+
} else if p.level < p.prev.level {
113+
for i := p.prev.level - p.level; i > 1; i-- {
114+
p.prev = p.prev.Parent
106115
}
107-
addSibling(prev.Parent, node)
116+
addSibling(p.prev.Parent, node)
108117
}
109118
case xml.Comment:
110-
node := &Node{Type: CommentNode, Data: string(tok), level: level}
111-
if level == prev.level {
112-
addSibling(prev, node)
113-
} else if level > prev.level {
114-
addChild(prev, node)
115-
} else if level < prev.level {
116-
for i := prev.level - level; i > 1; i-- {
117-
prev = prev.Parent
119+
node := &Node{Type: CommentNode, Data: string(tok), level: p.level}
120+
if p.level == p.prev.level {
121+
addSibling(p.prev, node)
122+
} else if p.level > p.prev.level {
123+
addChild(p.prev, node)
124+
} else if p.level < p.prev.level {
125+
for i := p.prev.level - p.level; i > 1; i-- {
126+
p.prev = p.prev.Parent
118127
}
119-
addSibling(prev.Parent, node)
128+
addSibling(p.prev.Parent, node)
120129
}
121130
case xml.ProcInst: // Processing Instruction
122-
if prev.Type != DeclarationNode {
123-
level++
131+
if p.prev.Type != DeclarationNode {
132+
p.level++
124133
}
125-
node := &Node{Type: DeclarationNode, Data: tok.Target, level: level}
134+
node := &Node{Type: DeclarationNode, Data: tok.Target, level: p.level}
126135
pairs := strings.Split(string(tok.Inst), " ")
127136
for _, pair := range pairs {
128137
pair = strings.TrimSpace(pair)
129138
if i := strings.Index(pair, "="); i > 0 {
130139
addAttr(node, pair[:i], strings.Trim(pair[i+1:], `"`))
131140
}
132141
}
133-
if level == prev.level {
134-
addSibling(prev, node)
135-
} else if level > prev.level {
136-
addChild(prev, node)
142+
if p.level == p.prev.level {
143+
addSibling(p.prev, node)
144+
} else if p.level > p.prev.level {
145+
addChild(p.prev, node)
137146
}
138-
prev = node
147+
p.prev = node
139148
case xml.Directive:
140149
}
141-
142150
}
143-
quit:
144-
return doc, nil
145151
}
146152

147153
// Parse returns the parse tree for the XML from the given Reader.
148154
func Parse(r io.Reader) (*Node, error) {
149-
return parse(r)
155+
p := createParser(r)
156+
for {
157+
_, err := p.parse()
158+
if err == io.EOF {
159+
return p.doc, nil
160+
}
161+
if err != nil {
162+
return nil, err
163+
}
164+
}
150165
}

0 commit comments

Comments
 (0)