diff --git a/Sources/Attribute.swift b/Sources/Attribute.swift
index f95c3d9..df48a0f 100644
--- a/Sources/Attribute.swift
+++ b/Sources/Attribute.swift
@@ -19,7 +19,9 @@ open class Attribute {
"selected", "sortable", "truespeed", "typemustmatch"
])
+ @usableFromInline
var key: [UInt8]
+ @usableFromInline
var value: [UInt8]
public init(key: [UInt8], value: [UInt8]) throws {
@@ -86,15 +88,16 @@ open class Attribute {
*/
public func html() -> String {
let accum = StringBuilder()
- html(accum: accum, out: (Document("")).outputSettings())
+ html(accum: accum, out: (Document([])).outputSettings())
return accum.toString()
}
-
+
+ @inlinable
public func html(accum: StringBuilder, out: OutputSettings ) {
accum.append(key)
if (!shouldCollapseAttribute(out: out)) {
accum.append(UTF8Arrays.attributeEqualsQuoteMark)
- Entities.escape(accum, Array(value), out, true, false, false)
+ Entities.escape(&accum.buffer, value, out, true, false, false)
accum.append(UTF8Arrays.quoteMark)
}
}
diff --git a/Sources/Attributes.swift b/Sources/Attributes.swift
index 80d8f6a..4df3f04 100644
--- a/Sources/Attributes.swift
+++ b/Sources/Attributes.swift
@@ -22,15 +22,17 @@ import Foundation
*
*/
open class Attributes: NSCopying {
-
public static var dataPrefix: [UInt8] = "data-".utf8Array
// Stored by lowercased key, but key case is checked against the copy inside
// the Attribute on retrieval.
+ @usableFromInline
lazy var attributes: [Attribute] = []
internal var lowercasedKeysCache: [[UInt8]]? = nil
- public init() {}
+ public init() {
+ attributes.reserveCapacity(16)
+ }
@usableFromInline
internal func updateLowercasedKeysCache() {
@@ -81,11 +83,13 @@ open class Attributes: NSCopying {
@param key attribute key
@param value attribute value
*/
+ @inlinable
open func put(_ key: [UInt8], _ value: [UInt8]) throws {
let attr = try Attribute(key: key, value: value)
put(attribute: attr)
}
+ @inlinable
open func put(_ key: String, _ value: String) throws {
return try put(key.utf8Array, value.utf8Array)
}
@@ -95,6 +99,7 @@ open class Attributes: NSCopying {
@param key attribute key
@param value attribute value
*/
+ @inlinable
open func put(_ key: [UInt8], _ value: Bool) throws {
if (value) {
try put(attribute: BooleanAttribute(key: key))
@@ -107,6 +112,7 @@ open class Attributes: NSCopying {
Set a new attribute, or replace an existing one by (case-sensitive) key.
@param attribute attribute
*/
+ @inlinable
open func put(attribute: Attribute) {
let key = attribute.getKeyUTF8()
if let ix = attributes.firstIndex(where: { $0.getKeyUTF8() == key }) {
@@ -229,13 +235,14 @@ open class Attributes: NSCopying {
*/
open func html()throws -> String {
let accum = StringBuilder()
- try html(accum: accum, out: Document("").outputSettings()) // output settings a bit funky, but this html() seldom used
+ try html(accum: accum, out: Document([]).outputSettings()) // output settings a bit funky, but this html() seldom used
return accum.toString()
}
+ @inlinable
public func html(accum: StringBuilder, out: OutputSettings ) throws {
for attr in attributes {
- accum.append(" ")
+ accum.append(UTF8Arrays.whitespace)
attr.html(accum: accum, out: out)
}
}
diff --git a/Sources/BooleanAttribute.swift b/Sources/BooleanAttribute.swift
index 1a5b834..cf73564 100644
--- a/Sources/BooleanAttribute.swift
+++ b/Sources/BooleanAttribute.swift
@@ -16,6 +16,7 @@ open class BooleanAttribute: Attribute {
* Create a new boolean attribute from unencoded (raw) key.
* @param key attribute key
*/
+ @usableFromInline
init(key: [UInt8]) throws {
try super.init(key: key, value: [])
}
diff --git a/Sources/Document.swift b/Sources/Document.swift
index fb798ca..7021ba4 100644
--- a/Sources/Document.swift
+++ b/Sources/Document.swift
@@ -421,8 +421,10 @@ public class OutputSettings: NSCopying {
*/
public enum Syntax {case html, xml}
- private var _escapeMode: Entities.EscapeMode = Entities.EscapeMode.base
- private var _encoder: String.Encoding = String.Encoding.utf8 // Charset.forName("UTF-8")
+ @usableFromInline
+ internal var _escapeMode: Entities.EscapeMode = Entities.EscapeMode.base
+ @usableFromInline
+ internal var _encoder: String.Encoding = String.Encoding.utf8 // Charset.forName("UTF-8")
private var _prettyPrint: Bool = true
private var _outline: Bool = false
private var _indentAmount: UInt = 1
@@ -438,6 +440,7 @@ public class OutputSettings: NSCopying {
* The default escape mode is base
.
* @return the document's current escape mode
*/
+ @inlinable
public func escapeMode() -> Entities.EscapeMode {
return _escapeMode
}
@@ -449,6 +452,7 @@ public class OutputSettings: NSCopying {
* @return the document's output settings, for chaining
*/
@discardableResult
+ @inlinable
public func escapeMode(_ escapeMode: Entities.EscapeMode) -> OutputSettings {
self._escapeMode = escapeMode
return self
@@ -462,9 +466,11 @@ public class OutputSettings: NSCopying {
* input charset. Otherwise, it defaults to UTF-8.
* @return the document's current charset.
*/
+ @inlinable
public func encoder() -> String.Encoding {
return _encoder
}
+ @inlinable
public func charset() -> String.Encoding {
return _encoder
}
diff --git a/Sources/Elements.swift b/Sources/Elements.swift
index 0f08521..00593fc 100644
--- a/Sources/Elements.swift
+++ b/Sources/Elements.swift
@@ -190,7 +190,7 @@ open class Elements: NSCopying {
let sb: StringBuilder = StringBuilder()
for element: Element in this {
if !sb.isEmpty {
- sb.append(" ")
+ sb.append(UTF8Arrays.whitespace)
}
sb.append(try element.text(trimAndNormaliseWhitespace: trimAndNormaliseWhitespace))
}
diff --git a/Sources/Entities.swift b/Sources/Entities.swift
index 110e636..028cc9c 100644
--- a/Sources/Entities.swift
+++ b/Sources/Entities.swift
@@ -204,162 +204,95 @@ public class Entities {
}
public static func escape(_ string: String, _ out: OutputSettings) -> String {
- let accum = StringBuilder()//string.characters.count * 2
- escape(accum, string.utf8Array, out, false, false, false)
- // try {
- //
- // } catch (IOException e) {
- // throw new SerializationException(e) // doesn't happen
- // }
- return accum.toString()
+ var accum = [UInt8]()
+ accum.reserveCapacity(string.utf8.count * 2)
+ escape(&accum, string.utf8Array, out, false, false, false)
+ return String(decoding: accum, as: UTF8.self)
+ }
+
+ @inline(__always)
+ internal static func utf8CharLength(for byte: UInt8) -> Int {
+ if byte < 0x80 { return 1 }
+ else if byte < 0xE0 { return 2 }
+ else if byte < 0xF0 { return 3 }
+ else { return 4 }
}
// this method is ugly, and does a lot. but other breakups cause rescanning and stringbuilder generations
+ @usableFromInline
static func escape(
- _ accum: StringBuilder,
+ _ accum: inout [UInt8],
_ string: [UInt8],
_ out: OutputSettings,
_ inAttribute: Bool,
_ normaliseWhite: Bool,
_ stripLeadingWhite: Bool
) {
- var lastWasWhite = false
- var reachedNonWhite = false
- let escapeMode: EscapeMode = out.escapeMode()
- let encoder: String.Encoding = out.encoder()
-
- var i = 0
- while i < string.count {
- let byte = string[i]
-
- if normaliseWhite && byte.isWhitespace {
+ let escapeMode = out.escapeMode()
+ let encoder = out.encoder()
+ var i = 0, n = string.count
+ var lastWasWhite = false, reachedNonWhite = false
+ while i < n {
+ let b = string[i]
+ if normaliseWhite && b.isWhitespace {
var j = i
- // Skip all consecutive whitespace
- while j < string.count && string[j].isWhitespace {
- j += 1
- }
- // If leading or consecutive whitespace should be skipped
+ while j < n && string[j].isWhitespace { j += 1 }
if (!reachedNonWhite && stripLeadingWhite) || lastWasWhite {
- i = j
- continue
+ i = j; continue
}
- accum.append(spaceString) // Append one space (normalize)
- lastWasWhite = true
- i = j
- continue
+ accum.append(0x20)
+ lastWasWhite = true; i = j; continue
}
lastWasWhite = false
reachedNonWhite = true
-
- if byte < 0x80 {
- // Single-byte ASCII character
- switch byte {
- case 0x26: // '&'
- accum.append(ampEntityUTF8)
- case 0xA0: // Non-breaking space
- if escapeMode != .xhtml {
- accum.append(nbspEntityUTF8)
- } else {
- accum.append(xa0EntityUTF8)
- }
- case 0x3C: // '<'
- if !inAttribute || escapeMode == .xhtml {
- accum.append(ltEntityUTF8)
- } else {
- accum.append(byte)
- }
- case 0x3E: // '>'
- if !inAttribute {
- accum.append(gtEntityUTF8)
- } else {
- accum.append(byte)
- }
- case 0x22: // '"'
- if inAttribute {
- accum.append(quotEntityUTF8)
- } else {
- accum.append(byte)
- }
+ if b < 0x80 {
+ switch b {
+ case 0x26: accum.append(contentsOf: ampEntityUTF8)
+ case 0xA0: accum.append(contentsOf: escapeMode == .xhtml ? xa0EntityUTF8 : nbspEntityUTF8)
+ case 0x3C:
+ if !inAttribute || escapeMode == .xhtml { accum.append(contentsOf: ltEntityUTF8) } else { accum.append(b) }
+ case 0x3E:
+ if !inAttribute { accum.append(contentsOf: gtEntityUTF8) } else { accum.append(b) }
+ case 0x22:
+ if inAttribute { accum.append(contentsOf: quotEntityUTF8) } else { accum.append(b) }
default:
- if encoder == .ascii || encoder == .utf8 || encoder == .utf16 || canEncode(byte: byte, encoder: encoder) {
- accum.append(byte)
+ if encoder == .ascii || encoder == .utf8 || encoder == .utf16 || canEncode(byte: b, encoder: encoder) {
+ accum.append(b)
} else {
- appendEncoded(accum: accum, escapeMode: escapeMode, bytes: [byte])
+ appendEncoded(accum: &accum, escapeMode: escapeMode, bytes: [b])
}
}
i += 1
} else {
- // Multi-byte UTF-8 character
- var charBytes: [UInt8] = []
- var remainingBytes = 0
-
- if byte & 0xE0 == 0xC0 {
- // Two-byte character
- remainingBytes = 1
- } else if byte & 0xF0 == 0xE0 {
- // Three-byte character
- remainingBytes = 2
- } else if byte & 0xF8 == 0xF0 {
- // Four-byte character
- remainingBytes = 3
+ let len = utf8CharLength(for: b)
+ let end = i + len <= n ? i + len : n
+ let charBytes = Array(string[i.. 0, i + 1 < string.count {
- i += 1
- let nextByte = string[i]
- if nextByte & 0xC0 == 0x80 {
- charBytes.append(nextByte)
- remainingBytes -= 1
- } else {
- // Invalid UTF-8 sequence
- appendEncoded(accum: accum, escapeMode: escapeMode, bytes: [byte])
- break
- }
- }
-
- if remainingBytes == 0 {
- // Successfully collected a valid multi-byte character
- if canEncode(bytes: charBytes, encoder: encoder) {
- accum.append(charBytes)
- } else {
- appendEncoded(accum: accum, escapeMode: escapeMode, bytes: charBytes)
- }
+ appendEncoded(accum: &accum, escapeMode: escapeMode, bytes: charBytes)
}
- i += 1
+ i += len
}
}
}
-
+
@inlinable
- internal static func appendEncoded(accum: StringBuilder, escapeMode: EscapeMode, bytes: [UInt8]) {
+ internal static func appendEncoded(accum: inout [UInt8], escapeMode: EscapeMode, bytes: [UInt8]) {
if let name = escapeMode.nameForCodepoint(bytes) {
- // Append named entity (e.g., "&")
accum.append(0x26) // '&'
- accum.append(name)
+ accum.append(contentsOf: name)
accum.append(0x3B) // ';'
} else {
- // Convert bytes into a UnicodeScalar
guard let scalar = String(bytes: bytes, encoding: .utf8)?.unicodeScalars.first else {
- // Fallback for invalid encoding
- accum.append([0x26, 0x23, 0x78]) // ''
- for byte in bytes {
- accum.append(String.toHexString(n: Int(byte)))
- }
- accum.append(0x3B) // ';'
+ accum.append(contentsOf: [0x26, 0x23, 0x78]) // ''
+ for b in bytes { accum.append(contentsOf: String.toHexString(n: Int(b)).utf8Array) }
+ accum.append(0x3B)
return
}
-
- // Append numeric entity for the scalar
- accum.append([0x26, 0x23, 0x78]) // ''
- accum.append(String.toHexString(n: Int(scalar.value)))
- accum.append(0x3B) // ';'
+ accum.append(contentsOf: [0x26, 0x23, 0x78])
+ accum.append(contentsOf: String.toHexString(n: Int(scalar.value)).utf8Array)
+ accum.append(0x3B)
}
}
diff --git a/Sources/Node.swift b/Sources/Node.swift
index bc5a785..00e4015 100644
--- a/Sources/Node.swift
+++ b/Sources/Node.swift
@@ -38,7 +38,6 @@ open class Node: Equatable, Hashable {
@usableFromInline
internal var normalizedTagNameIndex: [[UInt8]: [Weak]]? = nil
-// internal lazy var normalizedTagNameIndex: [[UInt8]: [Weak]] = [:]
@usableFromInline
internal var isQueryIndexDirty: Bool = false
@@ -313,10 +312,12 @@ open class Node: Equatable, Hashable {
* Get the number of child nodes that this node holds.
* @return the number of child nodes that this node holds.
*/
+ @inline(__always)
public func childNodeSize() -> Int {
return childNodes.count
}
+ @inline(__always)
public func hasChildNodes() -> Bool {
return !childNodes.isEmpty
}
@@ -706,7 +707,7 @@ open class Node: Equatable, Hashable {
// if this node has no document (or parent), retrieve the default output settings
func getOutputSettings() -> OutputSettings {
- return ownerDocument() != nil ? ownerDocument()!.outputSettings() : (Document("")).outputSettings()
+ return ownerDocument() != nil ? ownerDocument()!.outputSettings() : (Document([])).outputSettings()
}
/**
@@ -909,7 +910,10 @@ internal extension Node {
func rebuildQueryIndexesForAllTags() {
var newIndex: [[UInt8]: [Weak]] = [:]
var queue: [Node] = [self]
- queue.reserveCapacity(childNodeSize())
+
+ let childNodeCount = childNodeSize()
+ newIndex.reserveCapacity(childNodeCount * 4)
+ queue.reserveCapacity(childNodeCount)
var index = 0
while index < queue.count {
diff --git a/Sources/String.swift b/Sources/String.swift
index 5533d40..72418b7 100644
--- a/Sources/String.swift
+++ b/Sources/String.swift
@@ -12,6 +12,7 @@ extension UInt8 {
/// Checks if the byte represents a whitespace character:
/// Space (0x20), Tab (0x09), Newline (0x0A), Carriage Return (0x0D),
/// Form Feed (0x0C), or Vertical Tab (0x0B).
+ @inline(__always)
var isWhitespace: Bool {
switch self {
case 0x20, // Space
@@ -177,22 +178,27 @@ extension ArraySlice: Comparable where Element == UInt8 {
}
extension String {
+ @inline(__always)
public var utf8Array: [UInt8] {
return Array(self.utf8)
}
+ @inline(__always)
var utf8ArraySlice: ArraySlice {
return ArraySlice(self.utf8)
}
+ @inline(__always)
func equals(_ string: [UInt8]?) -> Bool {
return self.utf8Array == string
}
+ @inline(__always)
subscript (i: Int) -> Character {
return self[self.index(self.startIndex, offsetBy: i)]
}
+ @inline(__always)
subscript (i: Int) -> String {
return String(self[i] as Character)
}
diff --git a/Sources/StringUtil.swift b/Sources/StringUtil.swift
index 61bad64..daf4aa1 100644
--- a/Sources/StringUtil.swift
+++ b/Sources/StringUtil.swift
@@ -186,7 +186,7 @@ open class StringUtil {
if ((stripLeading && !reachedNonWhite) || lastWasWhite) {
continue
}
- accum.append(" ")
+ accum.append(UTF8Arrays.whitespace)
lastWasWhite = true
} else {
accum.append(c)
diff --git a/Sources/TextNode.swift b/Sources/TextNode.swift
index 9e4decd..91ce38e 100644
--- a/Sources/TextNode.swift
+++ b/Sources/TextNode.swift
@@ -136,7 +136,7 @@ open class TextNode: Node {
let par: Element? = parent() as? Element
let normaliseWhite = out.prettyPrint() && par != nil && !Element.preserveWhitespace(par!)
- Entities.escape(accum, getWholeTextUTF8(), out, false, normaliseWhite, false)
+ Entities.escape(&accum.buffer, getWholeTextUTF8(), out, false, normaliseWhite, false)
}
override func outerHtmlTail(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {
diff --git a/Sources/Token.swift b/Sources/Token.swift
index f1a138a..4078c25 100644
--- a/Sources/Token.swift
+++ b/Sources/Token.swift
@@ -136,7 +136,8 @@ open class Token {
_pendingAttributeValueS = nil
}
- func finaliseTag() throws {
+ @inlinable
+ func finaliseTag() throws {
// finalises for emit
if (_pendingAttributeName != nil) {
// todo: check if attribute name exists; if so, drop and error
@@ -144,11 +145,13 @@ open class Token {
}
}
+ @inlinable
func name() throws -> [UInt8] { // preserves case, for input into Tag.valueOf (which may drop case)
try Validate.isFalse(val: _tagName == nil || _tagName!.isEmpty)
return _tagName!
}
+ @inline(__always)
func normalName() -> [UInt8]? { // loses case, used in tree building for working out where in tree it should go
return _normalName
}
@@ -160,10 +163,12 @@ open class Token {
return self
}
+ @inline(__always)
func isSelfClosing() -> Bool {
return _selfClosing
}
+ @inline(__always)
func getAttributes() -> Attributes {
return _attributes
}
diff --git a/Sources/Tokeniser.swift b/Sources/Tokeniser.swift
index 48cd087..9a2760c 100644
--- a/Sources/Tokeniser.swift
+++ b/Sources/Tokeniser.swift
@@ -209,9 +209,14 @@ final class Tokeniser {
@discardableResult
@inlinable
func createTagPending(_ start: Bool) -> Token.Tag {
- let token: Token.Tag = start ? Token.StartTag() : Token.EndTag()
- tagPending = token
- return token
+ if start {
+ startPending.reset()
+ tagPending = startPending
+ } else {
+ endPending.reset()
+ tagPending = endPending
+ }
+ return tagPending
}
@inlinable
diff --git a/Sources/UTF8Arrays.swift b/Sources/UTF8Arrays.swift
index 5b4e979..3c38ba3 100644
--- a/Sources/UTF8Arrays.swift
+++ b/Sources/UTF8Arrays.swift
@@ -36,6 +36,7 @@ public enum UTF8Arrays {
public static let rt = "rt".utf8Array
public static let rtc = "rtc".utf8Array
public static let page = "page".utf8Array
+ public static let class_ = "class".utf8Array
public static let table = "table".utf8Array
public static let tbody = "tbody".utf8Array
public static let th = "th".utf8Array
@@ -119,6 +120,7 @@ public enum UTF8ArraySlices {
public static let rt = UTF8Arrays.rt[...]
public static let rtc = UTF8Arrays.rtc[...]
public static let page = UTF8Arrays.page[...]
+ public static let class_ = UTF8Arrays.class_[...]
public static let table = UTF8Arrays.table[...]
public static let tbody = UTF8Arrays.tbody[...]
public static let th = UTF8Arrays.th[...]