var rfullTag = /^<([^\s>\/=.$<]+)(\s+[^=\s]+(?:=(?:"[^"]*"|'[^']*'|[^>\s]+))?)*\s*>([\s\S]*)<\/\1>/
//匹配只有开标签的无内容元素(Void elements)
var rvoidTag = /^<([^\s>\/=.$<]+)(\s+([^=\s]+)(?:=("[^"]*"|'[^']*'|[^\s>]+))?)*\s*\/?>/
var openStr = "(?:\\s+[^=\\s]+(?:=(?:\"[^\"]*\"|'[^']*'|[^>\s]+))?)*\\s*>"
var rtext = /^[^<]+/
var rcomment = /^<!--([\w\W]*?)-->/
var rallAttrs = /(\s+[^\s>\/\/=]+(?:=(?:("|')(?:\\\2|\\?(?!\2)[\w\W])*\2|[^\s'">=]+))?)*\s*\/?>/g
var vdom = require("../vdom/index")
var VText = vdom.VText
var VComment = vdom.VComment
var VElement = vdom.VElement
// /<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi
var rnocontent = /textarea|template|script|style/
var tagCache = {}// 缓存所有匹配开标签闭标签的正则
//=== === === === 创建虚拟DOM树 === === === === =
function createVirtual(aaa, force) {
var nodes = []
if (!force && !avalon.config.rbind.test(aaa)) {
return nodes
var text = aaa.replace(rstring, function(_){
return new Array(_.length + 1).join("1")
do {
var matchText = ""
var match = text.match(rtext)
var node = false
if (match) {//尝试匹配文本
matchText = match[0]
node = new VText(matchText)
console.log("文本节点", matchText, "|")
if (!node) {//尝试匹配注释
match = text.match(rcomment)
if (match) {
matchText = match[0]
node = new VComment(match[1])
if (!node) {//尝试匹配拥有闭标签的元素节点
match = text.match(rfullTag)
if (match) {
matchText = match[0]//贪婪匹配 outerHTML,可能匹配过多
var tagName = match[1]//nodeName
var opens = []
var closes = []
var ropen = tagCache[tagName + "open"] ||
(tagCache[tagName + "open"] = new RegExp("<" + tagName + openStr, "g"))
var rclose = tagCache[tagName + "close"] ||
(tagCache[tagName + "close"] = new RegExp("<\/" + tagName + ">", "g"))
/* jshint ignore:start */
matchText.replace(rclose, function (_, b) {
closes.push(("0000" + b + ">").slice(-4))//取得所有闭标签的位置
return new Array(_.length + 1).join("1")
}).replace(ropen, function (_, b) {
opens.push(("0000" + b + "<").slice(-4))//取得所有开标签的位置
return new Array(_.length + 1).join("1")
/* jshint ignore:end */
var pos = opens.concat(closes).sort()
var gtlt = pos.join("").replace(/\d+/g, "")
var k = 0, last = 0
for (var i = 0, n = gtlt.length; i < n; i++) {
var c = gtlt.charAt(i)
if (c === "<") {
k += 1
} else {
k -= 1
if (k === 0) {
last = i
var findex = parseFloat(pos[last]) + tagName.length + 3 // (</>为三个字符)
matchText = matchText.slice(0, findex) //取得正确的outerHTML
var attrs = matchText.match(rallAttrs)[0] //抽取所有属性
var innerHTML = matchText.slice((tagName + attrs).length + 1,
(tagName.length + 3) * -1) //抽取innerHTML
node = new VElement(tagName, attrs.slice(0, -1), innerHTML)
if (!node) {
match = text.match(rvoidTag)
if (match) {//尝试匹配自闭合标签及注释节点
matchText = match[0]
tagName = match[1].toLowerCase()
attrs = matchText.slice(tagName.length + 1).replace(/\/?>$/, "")
node = new VElement(tagName, attrs, "")
node.isVoidTag = true
if (node) {
text = text.slice(matchText.length)
} else {
} while (1);
return nodes
module.exports = avalon.createVirtual = createVirtual
24号的版本 先处理掉所有字符串,优化所有正则
function heredoc(fn) {
return fn.toString().
replace(/^[^\/]+\/\*!?\s?/, '').
replace(/\*\/[^\/]+$/, '')
var rtext = /^[^<]+/
var rcomment = /^<!--([\w\W]*?)-->/
var ramp = /&/g
var rstring = /(["'])(\\(?:\r\n|[\s\S])|(?!\1)[^\\\r\n])*\1/g
var tagCache = {}
var openStr = "(?:\\s+[^=\\s]+(?:\\=[^>\\s]+)?)*\\s*>"
var rfullTag = /^<([^\s>\/=.$<]+)(?:\s+[^=\s]+(?:=[^>\s]+)?)*\s*>(?:[\s\S]*)<\/\1>/
//匹配只有开标签的无内容元素(Void elements 或 self-contained tags)
var rvoidTag = /^<([^\s>\/=.$<]+)\s*([^>]*?)\/?>/
var maps = {}
var number = 1
function dig(a) {
var key = "??" + number++
maps[key] = a
return key
var rfill = /\?\?\d+/g
function fill(a) {
var val = maps[a]
delete maps[a]
return val
function pushArray(target, other) {
target.push.apply(target, other)
// /<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi
var rnocontent = /textarea|template|script|style/
function createVirtual(str, recursive) {
var text = recursive == true ? str.replace(rstring, dig) : str
var nodes = []
do {
var matchText = ""
var match = text.match(rtext)
var node = false
var attrs = []
if (match) {//尝试匹配文本
matchText = match[0]
node = {
type: "#text",
nodeValue: matchText.replace(rfill, fill)
if (!node) {//尝试匹配注释
match = text.match(rcomment)
if (match) {
matchText = match[0]
node = {
type: "#comment",
nodeValue: matchText.replace(rfill, fill)
if (!node) {//尝试匹配拥有闭标签的元素节点
match = text.match(rfullTag)
if (match) {
matchText = match[0]//贪婪匹配 outerHTML,可能匹配过多
var type = match[1].toLowerCase()//nodeName
var opens = []
var closes = []
var ropen = tagCache[type + "open"] ||
(tagCache[type + "open"] = new RegExp("<" + type + openStr, "g"))
var rclose = tagCache[type + "close"] ||
(tagCache[type + "close"] = new RegExp("<\/" + type + ">", "g"))
/* jshint ignore:start */
matchText.replace(ropen, function (_, b) {
opens.push(("0000" + b + "<").slice(-4))//取得所有开标签的位置
return new Array(_.length + 1).join("1")
}).replace(rclose, function (_, b) {
closes.push(("0000" + b + ">").slice(-4))//取得所有闭标签的位置
/* jshint ignore:end */
var pos = opens.concat(closes).sort()
var gtlt = pos.join("").replace(/\d+/g, "")
var k = 0, last = 0
for (var i = 0, n = gtlt.length; i < n; i++) {
var c = gtlt.charAt(i)
if (c === "<") {
k += 1
} else {
k -= 1
if (k === 0) {
last = i
var findex = parseFloat(pos[last]) + type.length + 3 // (</>为三个字符)
matchText = matchText.slice(0, findex) //取得正确的outerHTML
match = matchText.match(rvoidTag) //抽取所有属性
if (match[2]) {
attrs = parseAttrs(match[2])
var template = matchText.slice(match[0].length,
(type.length + 3) * -1) //抽取innerHTML
var innerHTML = template.replace(rfill, fill)
node = {
type: type,
props: attrs,
template: innerHTML,
children: []
if (node.props["ms-skip"]) {
node.skipContent = true
} else if (type === "option" || type === "xmp") {
type: "text",
template: innerHTML
} else if (rnocontent.test(type)) {
node.skipContent = true
} else {//script, noscript, template, textarea
pushArray(node.children, createVirtual(template, true))
if (!node) {
match = text.match(rvoidTag)
if (match) {//尝试匹配自闭合标签及注释节点
matchText = match[0]
type = match[1]
if (match[2]) {
attrs = parseAttrs(match[2])
node = {
type: type,
props: attrs,
template: "",
children: [],
isVoidTag: true
if (node) {
text = text.slice(matchText.length)
} else {
} while (1);
return nodes
function parseAttrs(str) {
var attrs = []
str.replace(/\s*=\s*/g, "=").replace(/\S+/g, function (attr) {
var arr = attr.split("=")
if (arr.length === 2) {
var value = arr[1].replace(rfill, fill)
if (rstring.test(value)) {
value = value.replace(ramp, "&").
replace(/"/g, '"').
slice(1, -1)
name: arr[0],
value: value
} else {
name: arr[0],
value: ""
return attrs
var str = heredoc(function () {
<div ms-data-number="number"
>点我</div><div id=aaa><div>1111<b></b></div></div><div>222</div>
<br /><hr id=eee >
var nodes = createVirtual(str)