-
Notifications
You must be signed in to change notification settings - Fork 3
/
HTMLLexicalParser.js
103 lines (94 loc) · 2.37 KB
/
HTMLLexicalParser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
function HTMLLexicalParser(htmlString, tokenHandler) {
this.token = [];
this.tokens = [];
this.htmlString = htmlString
this.tokenHandler = tokenHandler
}
HTMLLexicalParser.prototype.start = function(c) {
if(c === '<') {
this.token.push(c)
return this.tagState
} else {
return this.textState(c)
}
}
HTMLLexicalParser.prototype.textState = function(c) {
if(c === '<') {
this.emitToken('text', this.token.join(''))
this.token = []
return this.start(c)
} else {
this.token.push(c)
return this.textState
}
}
HTMLLexicalParser.prototype.tagState = function(c) {
this.token.push(c)
if(c === '/') {
return this.endTagState
} else {
return this.startTagState
}
}
HTMLLexicalParser.prototype.startTagState = function(c) {
if(c.match(/[a-zA-Z]/)) {
this.token.push(c.toLowerCase())
return this.startTagState
}
if(c === ' ') {
this.emitToken('startTag', this.token.join(''))
this.token = []
return this.attrState
}
if(c === '>') {
this.emitToken('startTag', this.token.join(''))
this.token = []
return this.start
}
}
HTMLLexicalParser.prototype.attrState = function(c) {
if(c.match(/[a-zA-Z'"=]/)) {
this.token.push(c)
return this.attrState
}
if(c === ' ') {
this.emitToken('attr', this.token.join(''))
this.token = []
return this.attrState
}
if(c === '>') {
this.emitToken('attr', this.token.join(''))
this.token = []
return this.start
}
}
HTMLLexicalParser.prototype.endTagState = function(c) {
if(c.match(/[a-zA-Z]/)) {
this.token.push(c.toLowerCase())
return this.endTagState
}
if(c === '>') {
this.token.push(c)
this.emitToken('endTag', this.token.join(''))
this.token = []
return this.start
}
}
HTMLLexicalParser.prototype.emitToken = function(type, value) {
var res = {
type,
value
}
this.tokens.push(res)
// 流式处理
this.tokenHandler && this.tokenHandler(res)
}
HTMLLexicalParser.prototype.parse = function() {
var state = this.start;
for(var c of this.htmlString.split('')) {
state = state.bind(this)(c)
}
}
HTMLLexicalParser.prototype.getOutPut = function() {
return this.tokens
}