-
Notifications
You must be signed in to change notification settings - Fork 163
/
Copy pathQueryable.swift
362 lines (285 loc) · 10.8 KB
/
Queryable.swift
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
// Queryable.swift
// Copyright (c) 2015 Ce Zheng
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
import Foundation
import libxml2
/**
* The `Queryable` protocol is adopted by `XMLDocument`, `HTMLDocument` and `XMLElement`, denoting that they can search for elements using XPath or CSS selectors.
*/
public protocol Queryable {
/**
Returns the results for an XPath selector.
- parameter xpath: XPath selector string.
- returns: An enumerable collection of results.
*/
func xpath(_ xpath: String) -> NodeSet
/**
Returns the results for an XPath selector.
- parameter xpath: XPath selector string.
- returns: An enumerable collection of results.
- Throws: last registered XMLError, most likely libXMLError with code and message.
*/
func tryXPath(_ xpath: String) throws -> NodeSet
/**
Returns the first elements matching an XPath selector, or `nil` if there are no results.
- parameter xpath: The XPath selector.
- returns: The child element.
*/
func firstChild(xpath: String) -> XMLElement?
/**
Returns the results for a CSS selector.
- parameter css: The CSS selector string.
- returns: An enumerable collection of results.
*/
func css(_ css: String) -> NodeSet
/**
Returns the first elements matching an CSS selector, or `nil` if there are no results.
- parameter css: The CSS selector.
- returns: The child element.
*/
func firstChild(css: String) -> XMLElement?
/**
Returns the result for evaluating an XPath selector that contains XPath function.
- parameter xpath: The XPath query string.
- returns: The eval function result.
*/
func eval(xpath: String) -> XPathFunctionResult?
}
/// Result for evaluating a XPath expression
open class XPathFunctionResult {
/// Boolean value
open fileprivate(set) lazy var boolValue: Bool = {
return self.cXPath.pointee.boolval != 0
}()
/// Double value
open fileprivate(set) lazy var doubleValue: Double = {
return self.cXPath.pointee.floatval
}()
/// String value
open fileprivate(set) lazy var stringValue: String = {
return ^-^self.cXPath.pointee.stringval ?? ""
}()
fileprivate let cXPath: xmlXPathObjectPtr
internal init?(cXPath: xmlXPathObjectPtr?) {
guard let cXPath = cXPath else {
return nil
}
self.cXPath = cXPath
}
deinit {
xmlXPathFreeObject(cXPath)
}
}
extension XMLDocument: Queryable {
/**
Returns the results for an XPath selector.
- parameter xpath: XPath selector string.
- returns: An enumerable collection of results.
*/
public func xpath(_ xpath: String) -> NodeSet {
return root == nil ?XPathNodeSet.emptySet :root!.xpath(xpath)
}
/**
- parameter xpath: XPath selector string.
- returns: An enumerable collection of results.
- Throws: last registered XMLError, most likely libXMLError with code and message.
*/
public func tryXPath(_ xpath: String) throws -> NodeSet {
guard let rootNode = root else {
return XPathNodeSet.emptySet
}
return try rootNode.tryXPath(xpath)
}
/**
Returns the first elements matching an XPath selector, or `nil` if there are no results.
- parameter xpath: The XPath selector.
- returns: The child element.
*/
public func firstChild(xpath: String) -> XMLElement? {
return root?.firstChild(xpath: xpath)
}
/**
Returns the results for a CSS selector.
- parameter css: The CSS selector string.
- returns: An enumerable collection of results.
*/
public func css(_ css: String) -> NodeSet {
return root == nil ?XPathNodeSet.emptySet :root!.css(css)
}
/**
Returns the first elements matching an CSS selector, or `nil` if there are no results.
- parameter css: The CSS selector.
- returns: The child element.
*/
public func firstChild(css: String) -> XMLElement? {
return root?.firstChild(css: css)
}
/**
Returns the result for evaluating an XPath selector that contains XPath function.
- parameter xpath: The XPath query string.
- returns: The eval function result.
*/
public func eval(xpath: String) -> XPathFunctionResult? {
return root?.eval(xpath: xpath)
}
}
extension XMLElement: Queryable {
/**
Returns the results for an XPath selector.
- parameter xpath: XPath selector string.
- returns: An enumerable collection of results.
*/
public func xpath(_ xpath: String) -> NodeSet {
guard let cXPath = try? self.cXPath(xpathString: xpath) else {
return XPathNodeSet.emptySet
}
return XPathNodeSet(cXPath: cXPath, document: document)
}
/**
- parameter xpath: XPath selector string.
- returns: An enumerable collection of results.
- Throws: last registered XMLError, most likely libXMLError with code and message.
*/
public func tryXPath(_ xpath: String) throws -> NodeSet {
return XPathNodeSet(cXPath: try self.cXPath(xpathString: xpath), document: document)
}
/**
Returns the first elements matching an XPath selector, or `nil` if there are no results.
- parameter xpath: The XPath selector.
- returns: The child element.
*/
public func firstChild(xpath: String) -> XMLElement? {
return self.xpath(xpath).first
}
/**
Returns the results for a CSS selector.
- parameter css: The CSS selector string.
- returns: An enumerable collection of results.
*/
public func css(_ css: String) -> NodeSet {
return xpath(XPath(fromCSS:css))
}
/**
Returns the first elements matching an CSS selector, or `nil` if there are no results.
- parameter css: The CSS selector.
- returns: The child element.
*/
public func firstChild(css: String) -> XMLElement? {
return self.css(css).first
}
/**
Returns the result for evaluating an XPath selector that contains XPath function.
- parameter xpath: The XPath query string.
- returns: The eval function result.
*/
public func eval(xpath: String) -> XPathFunctionResult? {
guard let cXPath = try? cXPath(xpathString: xpath) else {
return nil
}
return XPathFunctionResult(cXPath: cXPath)
}
fileprivate func cXPath(xpathString: String) throws -> xmlXPathObjectPtr {
guard let context = xmlXPathNewContext(cNode.pointee.doc) else {
throw XMLError.lastError(defaultError: .xpathError(code: 1207))
}
func withXMLChar(_ string: String, _ handler: (UnsafePointer<xmlChar>) -> Void) {
string.utf8CString
.map { xmlChar(bitPattern: $0) }
.withUnsafeBufferPointer {
handler($0.baseAddress!)
}
}
context.pointee.node = cNode
// Registers namespace prefixes declared in the document.
var node = cNode
while node.pointee.parent != nil {
var curNs = node.pointee.nsDef
while let ns = curNs {
if let prefix = ns.pointee.prefix {
xmlXPathRegisterNs(context, prefix, ns.pointee.href)
}
curNs = ns.pointee.next
}
node = node.pointee.parent
}
// Registers additional namespace prefixes.
for (prefix, uri) in document.namespaces {
withXMLChar(prefix) { prefix in
withXMLChar(uri) { uri in
xmlXPathRegisterNs(context, prefix, uri)
}
}
}
defer {
xmlXPathFreeContext(context)
}
guard let xmlXPath = xmlXPathEvalExpression(xpathString, context) else {
throw XMLError.lastError(defaultError: .xpathError(code: 1207))
}
return xmlXPath
}
}
private class RegexConstants {
static let idRegex = try! NSRegularExpression(pattern: "\\#([\\w-_]+)", options: [])
static let classRegex = try! NSRegularExpression(pattern: "\\.([^\\.]+)", options: [])
static let attributeRegex = try! NSRegularExpression(pattern: "\\[([^\\[\\]]+)\\]", options: [])
}
internal func XPath(fromCSS css: String) -> String {
var xpathExpressions = [String]()
for expression in css.components(separatedBy: ",") where !expression.isEmpty {
var xpathComponents = ["./"]
var prefix: String? = nil
let expressionComponents = expression.trimmingCharacters(in: CharacterSet.whitespaces).components(separatedBy: CharacterSet.whitespaces)
for (idx, var token) in expressionComponents.enumerated() {
switch token {
case "*" where idx != 0: xpathComponents.append("/*")
case ">": prefix = ""
case "+": prefix = "following-sibling::*[1]/self::"
case "~": prefix = "following-sibling::"
default:
if prefix == nil && idx != 0 {
prefix = "descendant::"
}
if let symbolRange = token.rangeOfCharacter(from: CharacterSet(charactersIn: "#.[]")) {
let symbol = symbolRange.lowerBound == token.startIndex ?"*" :""
var xpathComponent = String(token[..<symbolRange.lowerBound])
let nsrange = NSRange(location: 0, length: token.utf16.count)
if let result = RegexConstants.idRegex.firstMatch(in: token, options: [], range: nsrange), result.numberOfRanges > 1 {
xpathComponent += "\(symbol)[@id = '\(token[result.range(at: 1)])']"
}
for result in RegexConstants.classRegex.matches(in: token, options: [], range: nsrange) where result.numberOfRanges > 1 {
xpathComponent += "\(symbol)[contains(concat(' ',normalize-space(@class),' '),' \(token[result.range(at: 1)]) ')]"
}
for result in RegexConstants.attributeRegex.matches(in: token, options: [], range: nsrange) where result.numberOfRanges > 1 {
xpathComponent += "[@\(token[result.range(at: 1)])]"
}
token = xpathComponent
}
if prefix != nil {
token = prefix! + token
prefix = nil
}
xpathComponents.append(token)
}
}
xpathExpressions.append(xpathComponents.joined(separator: "/"))
}
return xpathExpressions.joined(separator: " | ")
}