@@ -33,15 +33,6 @@ const AttribNode = (name, value) => {
3333 } )
3434}
3535
36- // Frame factory for stack items in parseXML
37- function Frame ( node , scopingElement ) {
38- return {
39- node,
40- scopingElement,
41- children : node . value . children
42- }
43- }
44-
4536const parseXML = ( lexer ) => {
4637 /*
4738 How does the grammar look?
@@ -57,93 +48,89 @@ const parseXML = (lexer) => {
5748 | AttributeName: String
5849 | AttributeValue: String
5950 */
60- const rootNode = Node ( ROOT , { children : [ ] } )
61- const stack = [
62- Frame ( rootNode , Token ( ROOT , 'ROOT' ) )
63- ]
64- let currentFrame = stack [ stack . length - 1 ]
51+ const rootNode = Node ( ROOT , {
52+ children : parseExpr ( lexer , Token ( ROOT , 'ROOT' ) )
53+ } )
54+ return rootNode
55+ }
56+
57+ const parseExpr = ( lexer , scopingElement ) => {
58+ const children = [ ]
6559 while ( lexer . hasNext ( ) ) {
6660 const lexem = lexer . next ( )
6761 switch ( lexem . type ) {
68- case TOKEN_TYPE . OPEN_BRACKET :
69- handleOpenBracket ( lexer , stack )
70- currentFrame = stack [ stack . length - 1 ]
71- break
72- case TOKEN_TYPE . CLOSE_ELEMENT :
73- handleCloseElement ( lexem , stack )
74- currentFrame = stack [ stack . length - 1 ]
62+ case TOKEN_TYPE . OPEN_BRACKET : {
63+ const elementLexem = lexer . next ( )
64+ const [ elementAttributes , currentToken ] =
65+ parseElementAttributes ( lexer )
66+ let elementChildren = [ ]
67+ if ( currentToken . type !== TOKEN_TYPE . CLOSE_ELEMENT ) {
68+ elementChildren = parseExpr ( lexer , elementLexem )
69+ }
70+ if (
71+ elementChildren &&
72+ elementChildren . length > 0 &&
73+ elementChildren [ 0 ] . type === TOKEN_TYPE . CONTENT
74+ ) {
75+ elementChildren = reduceChildrenElements ( elementChildren )
76+ }
77+ children . push (
78+ ElementNode (
79+ elementLexem . value ,
80+ elementAttributes ,
81+ elementChildren
82+ )
83+ )
7584 break
76- case TOKEN_TYPE . CONTENT :
77- handleContent ( lexem , currentFrame )
85+ }
86+ case TOKEN_TYPE . CLOSE_ELEMENT : {
87+ if ( lexem . value === scopingElement . value ) return children
7888 break
79- case TOKEN_TYPE . EOF :
89+ }
90+ case TOKEN_TYPE . CONTENT : {
91+ children . push ( ContentNode ( lexem . value ) )
8092 break
81- default :
93+ }
94+ case TOKEN_TYPE . EOF : {
95+ return children
96+ }
97+ default : {
8298 throw new Error (
83- `Unknown Lexem type: ${ lexem . type } "${ lexem . value } , scoping element: ${ currentFrame . scopingElement . value } "`
99+ `Unknown Lexem type: ${ lexem . type } "${ lexem . value } , scoping element: ${ scopingElement . value } "`
84100 )
101+ }
85102 }
86103 }
87- return rootNode
104+ return children
88105}
89106
90- function handleOpenBracket ( lexer , stack ) {
91- const currentFrame = stack [ stack . length - 1 ]
92- const elementLexem = lexer . next ( )
93- let attribs = [ ]
107+ const parseElementAttributes = ( lexer ) => {
108+ const attribs = [ ]
94109 let currentToken = lexer . peek ( )
95- const areAttributesExpected = lexer . hasNext ( ) &&
96- ( currentToken && currentToken . type !== TOKEN_TYPE . CLOSE_BRACKET ) &&
97- ( currentToken && currentToken . type !== TOKEN_TYPE . CLOSE_ELEMENT )
98- if ( areAttributesExpected ) {
99- [ attribs , currentToken ] = collectAttributes ( lexer )
110+ if (
111+ ! lexer . hasNext ( ) ||
112+ ( currentToken && currentToken . type === TOKEN_TYPE . CLOSE_BRACKET ) ||
113+ ( currentToken && currentToken . type === TOKEN_TYPE . CLOSE_ELEMENT )
114+ ) {
115+ return [ attribs , currentToken ]
100116 }
101- const elementNode = ElementNode ( elementLexem . value , attribs , [ ] )
102- currentFrame . children . push ( elementNode )
103- if ( currentToken && currentToken . type === TOKEN_TYPE . CLOSE_ELEMENT ) {
104- return
105- }
106- stack . push ( Frame ( elementNode , elementLexem ) )
107- }
108-
109- function collectAttributes ( lexer ) {
110- const attribs = [ ]
111- let currentToken = lexer . next ( )
117+ currentToken = lexer . next ( )
112118 while (
113119 lexer . hasNext ( ) &&
114120 currentToken &&
115121 currentToken . type !== TOKEN_TYPE . CLOSE_BRACKET &&
116122 currentToken . type !== TOKEN_TYPE . CLOSE_ELEMENT
117123 ) {
118124 const attribName = currentToken
119- lexer . next ( ) // assignment token
125+ lexer . next ( ) //assignment token
120126 const attribValue = lexer . next ( )
121- attribs . push ( AttribNode ( attribName . value , attribValue . value ) )
127+ const attributeNode = AttribNode ( attribName . value , attribValue . value )
128+ attribs . push ( attributeNode )
122129 currentToken = lexer . next ( )
123130 }
124131 return [ attribs , currentToken ]
125132}
126133
127- function handleCloseElement ( lexem , stack ) {
128- const currentFrame = stack [ stack . length - 1 ]
129- if ( lexem . value === currentFrame . scopingElement . value ) {
130- let children = currentFrame . children
131- if (
132- children &&
133- children . length > 0 &&
134- children [ 0 ] . type === TOKEN_TYPE . CONTENT
135- ) {
136- children = reduceChildrenElements ( children )
137- currentFrame . node . value . children = children
138- }
139- stack . pop ( )
140- }
141- }
142-
143- function handleContent ( lexem , currentFrame ) {
144- currentFrame . children . push ( ContentNode ( lexem . value ) )
145- }
146-
147134function reduceChildrenElements ( elementChildren ) {
148135 let reduced = [ ] ,
149136 buffer = ''
@@ -167,7 +154,7 @@ function reduceChildrenElements(elementChildren) {
167154
168155function transpile ( xmlAsString , astConverter ) {
169156 const lexer = createLexer ( xmlAsString )
170- const ast = parseXML ( lexer )
157+ const ast = parseXML ( lexer , xmlAsString )
171158 if ( astConverter ) {
172159 return astConverter . convert ( ast )
173160 }
0 commit comments