@@ -17,134 +17,149 @@ func LoadURL(url string) (*Node, error) {
17
17
return nil , err
18
18
}
19
19
defer resp .Body .Close ()
20
- return parse (resp .Body )
20
+ return Parse (resp .Body )
21
21
}
22
22
23
- func parse (r io.Reader ) (* Node , error ) {
24
- var (
25
- decoder = xml .NewDecoder (r )
26
- doc = & Node {Type : DocumentNode }
27
- space2prefix = make (map [string ]string )
28
- level = 0
29
- )
23
+ type parser struct {
24
+ decoder * xml.Decoder
25
+ doc * Node
26
+ space2prefix map [string ]string
27
+ level int
28
+ prev * Node
29
+ }
30
+
31
+ func createParser (r io.Reader ) * parser {
32
+ p := & parser {
33
+ decoder : xml .NewDecoder (r ),
34
+ doc : & Node {Type : DocumentNode },
35
+ space2prefix : make (map [string ]string ),
36
+ level : 0 ,
37
+ }
30
38
// http://www.w3.org/XML/1998/namespace is bound by definition to the prefix xml.
31
- space2prefix ["http://www.w3.org/XML/1998/namespace" ] = "xml"
32
- decoder .CharsetReader = charset .NewReaderLabel
33
- prev := doc
39
+ p .space2prefix ["http://www.w3.org/XML/1998/namespace" ] = "xml"
40
+ p .decoder .CharsetReader = charset .NewReaderLabel
41
+ p .prev = p .doc
42
+ return p
43
+ }
44
+
45
+ func (p * parser ) parse () (* Node , error ) {
34
46
for {
35
- tok , err := decoder .Token ()
36
- switch {
37
- case err == io .EOF :
38
- goto quit
39
- case err != nil :
47
+ tok , err := p .decoder .Token ()
48
+ if err != nil {
40
49
return nil , err
41
50
}
42
51
43
52
switch tok := tok .(type ) {
44
53
case xml.StartElement :
45
- if level == 0 {
54
+ if p . level == 0 {
46
55
// mising XML declaration
47
56
node := & Node {Type : DeclarationNode , Data : "xml" , level : 1 }
48
- addChild (prev , node )
49
- level = 1
50
- prev = node
57
+ addChild (p . prev , node )
58
+ p . level = 1
59
+ p . prev = node
51
60
}
52
61
// https://www.w3.org/TR/xml-names/#scoping-defaulting
53
62
for _ , att := range tok .Attr {
54
63
if att .Name .Local == "xmlns" {
55
- space2prefix [att .Value ] = ""
64
+ p . space2prefix [att .Value ] = ""
56
65
} else if att .Name .Space == "xmlns" {
57
- space2prefix [att .Value ] = att .Name .Local
66
+ p . space2prefix [att .Value ] = att .Name .Local
58
67
}
59
68
}
60
69
61
70
if tok .Name .Space != "" {
62
- if _ , found := space2prefix [tok .Name .Space ]; ! found {
71
+ if _ , found := p . space2prefix [tok .Name .Space ]; ! found {
63
72
return nil , errors .New ("xmlquery: invalid XML document, namespace is missing" )
64
73
}
65
74
}
66
75
67
76
for i := 0 ; i < len (tok .Attr ); i ++ {
68
77
att := & tok .Attr [i ]
69
- if prefix , ok := space2prefix [att .Name .Space ]; ok {
78
+ if prefix , ok := p . space2prefix [att .Name .Space ]; ok {
70
79
att .Name .Space = prefix
71
80
}
72
81
}
73
82
74
83
node := & Node {
75
84
Type : ElementNode ,
76
85
Data : tok .Name .Local ,
77
- Prefix : space2prefix [tok .Name .Space ],
86
+ Prefix : p . space2prefix [tok .Name .Space ],
78
87
NamespaceURI : tok .Name .Space ,
79
88
Attr : tok .Attr ,
80
- level : level ,
89
+ level : p . level ,
81
90
}
82
- //fmt.Println(fmt.Sprintf("start > %s : %d", node.Data, level))
83
- if level == prev .level {
84
- addSibling (prev , node )
85
- } else if level > prev .level {
86
- addChild (prev , node )
87
- } else if level < prev .level {
88
- for i := prev .level - level ; i > 1 ; i -- {
89
- prev = prev .Parent
91
+ //fmt.Println(fmt.Sprintf("start > %s : %d", node.Data, node. level))
92
+ if p . level == p . prev .level {
93
+ addSibling (p . prev , node )
94
+ } else if p . level > p . prev .level {
95
+ addChild (p . prev , node )
96
+ } else if p . level < p . prev .level {
97
+ for i := p . prev .level - p . level ; i > 1 ; i -- {
98
+ p . prev = p . prev .Parent
90
99
}
91
- addSibling (prev .Parent , node )
100
+ addSibling (p . prev .Parent , node )
92
101
}
93
- prev = node
94
- level ++
102
+ p . prev = node
103
+ p . level ++
95
104
case xml.EndElement :
96
- level --
105
+ p . level --
97
106
case xml.CharData :
98
- node := & Node {Type : CharDataNode , Data : string (tok ), level : level }
99
- if level == prev .level {
100
- addSibling (prev , node )
101
- } else if level > prev .level {
102
- addChild (prev , node )
103
- } else if level < prev .level {
104
- for i := prev .level - level ; i > 1 ; i -- {
105
- prev = prev .Parent
107
+ node := & Node {Type : CharDataNode , Data : string (tok ), level : p . level }
108
+ if p . level == p . prev .level {
109
+ addSibling (p . prev , node )
110
+ } else if p . level > p . prev .level {
111
+ addChild (p . prev , node )
112
+ } else if p . level < p . prev .level {
113
+ for i := p . prev .level - p . level ; i > 1 ; i -- {
114
+ p . prev = p . prev .Parent
106
115
}
107
- addSibling (prev .Parent , node )
116
+ addSibling (p . prev .Parent , node )
108
117
}
109
118
case xml.Comment :
110
- node := & Node {Type : CommentNode , Data : string (tok ), level : level }
111
- if level == prev .level {
112
- addSibling (prev , node )
113
- } else if level > prev .level {
114
- addChild (prev , node )
115
- } else if level < prev .level {
116
- for i := prev .level - level ; i > 1 ; i -- {
117
- prev = prev .Parent
119
+ node := & Node {Type : CommentNode , Data : string (tok ), level : p . level }
120
+ if p . level == p . prev .level {
121
+ addSibling (p . prev , node )
122
+ } else if p . level > p . prev .level {
123
+ addChild (p . prev , node )
124
+ } else if p . level < p . prev .level {
125
+ for i := p . prev .level - p . level ; i > 1 ; i -- {
126
+ p . prev = p . prev .Parent
118
127
}
119
- addSibling (prev .Parent , node )
128
+ addSibling (p . prev .Parent , node )
120
129
}
121
130
case xml.ProcInst : // Processing Instruction
122
- if prev .Type != DeclarationNode {
123
- level ++
131
+ if p . prev .Type != DeclarationNode {
132
+ p . level ++
124
133
}
125
- node := & Node {Type : DeclarationNode , Data : tok .Target , level : level }
134
+ node := & Node {Type : DeclarationNode , Data : tok .Target , level : p . level }
126
135
pairs := strings .Split (string (tok .Inst ), " " )
127
136
for _ , pair := range pairs {
128
137
pair = strings .TrimSpace (pair )
129
138
if i := strings .Index (pair , "=" ); i > 0 {
130
139
addAttr (node , pair [:i ], strings .Trim (pair [i + 1 :], `"` ))
131
140
}
132
141
}
133
- if level == prev .level {
134
- addSibling (prev , node )
135
- } else if level > prev .level {
136
- addChild (prev , node )
142
+ if p . level == p . prev .level {
143
+ addSibling (p . prev , node )
144
+ } else if p . level > p . prev .level {
145
+ addChild (p . prev , node )
137
146
}
138
- prev = node
147
+ p . prev = node
139
148
case xml.Directive :
140
149
}
141
-
142
150
}
143
- quit:
144
- return doc , nil
145
151
}
146
152
147
153
// Parse returns the parse tree for the XML from the given Reader.
148
154
func Parse (r io.Reader ) (* Node , error ) {
149
- return parse (r )
155
+ p := createParser (r )
156
+ for {
157
+ _ , err := p .parse ()
158
+ if err == io .EOF {
159
+ return p .doc , nil
160
+ }
161
+ if err != nil {
162
+ return nil , err
163
+ }
164
+ }
150
165
}
0 commit comments