Skip to content

Commit 9ab9744

Browse files
committed
Reduce change
1 parent d522fbd commit 9ab9744

File tree

5 files changed

+436
-466
lines changed

5 files changed

+436
-466
lines changed

Sources/SKCore/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ add_library(SKCore STATIC
1313
LanguageServer.swift
1414
MainFilesProvider.swift
1515
PathPrefixMapping.swift
16-
SplitShellCommand.swift
1716
Toolchain.swift
1817
ToolchainRegistry.swift
1918
XCToolchainPlist.swift)

Sources/SKCore/CompilationDatabase.swift

Lines changed: 333 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,3 +239,336 @@ extension CompilationDatabase.Command: Codable {
239239
try container.encodeIfPresent(output, forKey: .output)
240240
}
241241
}
242+
243+
fileprivate struct UnixCommandParser {
244+
var content: Substring
245+
var i: Substring.UTF8View.Index
246+
var result: [String] = []
247+
248+
var ch: UInt8 { self.content.utf8[i] }
249+
var done: Bool { self.content.endIndex == i }
250+
251+
init(_ string: Substring) {
252+
self.content = string
253+
self.i = self.content.utf8.startIndex
254+
}
255+
256+
mutating func next() {
257+
i = content.utf8.index(after: i)
258+
}
259+
260+
mutating func next(expect c: UInt8) {
261+
assert(c == ch)
262+
next()
263+
}
264+
265+
mutating func parse() -> [String] {
266+
while !done {
267+
switch ch {
268+
case UInt8(ascii: " "): next()
269+
default: parseString()
270+
}
271+
}
272+
return result
273+
}
274+
275+
mutating func parseString() {
276+
var str = ""
277+
STRING: while !done {
278+
switch ch {
279+
case UInt8(ascii: " "): break STRING
280+
case UInt8(ascii: "\""): parseDoubleQuotedString(into: &str)
281+
case UInt8(ascii: "\'"): parseSingleQuotedString(into: &str)
282+
default: parsePlainString(into: &str)
283+
}
284+
}
285+
result.append(str)
286+
}
287+
288+
mutating func parseDoubleQuotedString(into str: inout String) {
289+
next(expect: UInt8(ascii: "\""))
290+
var start = i
291+
while !done {
292+
switch ch {
293+
case UInt8(ascii: "\""):
294+
str += content[start..<i]
295+
next()
296+
return
297+
case UInt8(ascii: "\\"):
298+
str += content[start..<i]
299+
next()
300+
start = i
301+
if !done { fallthrough }
302+
default:
303+
next()
304+
}
305+
}
306+
str += content[start..<i]
307+
}
308+
309+
mutating func parseSingleQuotedString(into str: inout String) {
310+
next(expect: UInt8(ascii: "\'"))
311+
let start = i
312+
while !done {
313+
switch ch {
314+
case UInt8(ascii: "\'"):
315+
str += content[start..<i]
316+
next()
317+
return
318+
default:
319+
next()
320+
}
321+
}
322+
str += content[start..<i]
323+
}
324+
325+
mutating func parsePlainString(into str: inout String) {
326+
var start = i
327+
while !done {
328+
let _ch = ch
329+
switch _ch {
330+
case UInt8(ascii: "\""), UInt8(ascii: "\'"), UInt8(ascii: " "):
331+
str += content[start..<i]
332+
return
333+
case UInt8(ascii: "\\"):
334+
str += content[start..<i]
335+
next()
336+
start = i
337+
if !done { fallthrough }
338+
default:
339+
next()
340+
}
341+
}
342+
str += content[start..<i]
343+
}
344+
}
345+
346+
/// Split and unescape a shell-escaped command line invocation.
347+
///
348+
/// Examples:
349+
///
350+
/// ```
351+
/// abc def -> ["abc", "def"]
352+
/// abc\ def -> ["abc def"]
353+
/// abc"\""def -> ["abc\"def"]
354+
/// abc'\"'def -> ["abc\\"def"]
355+
/// ```
356+
///
357+
/// See clang's `unescapeCommandLine()`.
358+
public func splitShellEscapedCommand(_ cmd: String) -> [String] {
359+
var parser = UnixCommandParser(cmd[...])
360+
return parser.parse()
361+
}
362+
363+
// MARK: - Windows
364+
365+
fileprivate extension Character {
366+
var isWhitespace: Bool {
367+
switch self {
368+
case " ", "\t":
369+
return true
370+
default:
371+
return false
372+
}
373+
}
374+
375+
var isWhitespaceOrNull: Bool {
376+
return self.isWhitespace || self == "\0"
377+
}
378+
379+
func isWindowsSpecialChar(inCommandName: Bool) -> Bool {
380+
if isWhitespace {
381+
return true
382+
}
383+
if self == #"""# {
384+
return true
385+
}
386+
if !inCommandName && self == #"\"# {
387+
return true
388+
}
389+
return false
390+
}
391+
}
392+
393+
fileprivate struct WindowsCommandParser {
394+
/// The content of the entire command that shall be parsed.
395+
private let content: String
396+
397+
/// Whether we are parsing the initial command name. In this mode `\` is not treated as escaping the quote
398+
/// character.
399+
private var parsingCommandName: Bool
400+
401+
/// An index into `content`, pointing to the character that we are currently parsing.
402+
private var currentCharacterIndex: String.UTF8View.Index
403+
404+
/// The split command line arguments.
405+
private var result: [String] = []
406+
407+
/// The character that is currently being parsed.
408+
///
409+
/// `nil` if we have reached the end of `content`.
410+
private var currentCharacter: Character? {
411+
guard currentCharacterIndex < content.endIndex else {
412+
return nil
413+
}
414+
return self.content[currentCharacterIndex]
415+
}
416+
417+
/// The character after `currentCharacter`.
418+
///
419+
/// `nil` if we have reached the end of `content`.
420+
private var peek: Character? {
421+
let nextIndex = content.index(after: currentCharacterIndex)
422+
if nextIndex < content.endIndex {
423+
return content[nextIndex]
424+
} else {
425+
return nil
426+
}
427+
}
428+
429+
init(_ string: String, initialCommandName: Bool) {
430+
self.content = string
431+
self.currentCharacterIndex = self.content.startIndex
432+
self.parsingCommandName = initialCommandName
433+
}
434+
435+
/// Designated entry point to split a Windows command line invocation.
436+
mutating func parse() -> [String] {
437+
while let currentCharacter {
438+
if currentCharacter.isWhitespaceOrNull {
439+
// Consume any whitespace separating arguments.
440+
_ = consume()
441+
} else {
442+
result.append(parseSingleArgument())
443+
}
444+
}
445+
return result
446+
}
447+
448+
/// Consume the current character.
449+
private mutating func consume() -> Character {
450+
guard let character = currentCharacter else {
451+
preconditionFailure("Nothing to consume")
452+
}
453+
currentCharacterIndex = content.index(after: currentCharacterIndex)
454+
return character
455+
}
456+
457+
/// Consume the current character, asserting that it is `expectedCharacter`
458+
private mutating func consume(expect expectedCharacter: Character) {
459+
assert(currentCharacter == expectedCharacter)
460+
_ = consume()
461+
}
462+
463+
/// Parses a single argument, consuming its characters and returns the parsed arguments with all escaping unfolded
464+
/// (e.g. `\"` gets returned as `"`)
465+
///
466+
/// Afterwards the parser points to the character after the argument.
467+
mutating func parseSingleArgument() -> String {
468+
var str = ""
469+
while let currentCharacter {
470+
if !currentCharacter.isWindowsSpecialChar(inCommandName: parsingCommandName) {
471+
str.append(consume())
472+
continue
473+
}
474+
if currentCharacter.isWhitespaceOrNull {
475+
parsingCommandName = false
476+
return str
477+
} else if currentCharacter == "\"" {
478+
str += parseQuoted()
479+
} else if currentCharacter == #"\"# {
480+
assert(!parsingCommandName, "else we'd have treated it as a normal char");
481+
str.append(parseBackslash())
482+
} else {
483+
preconditionFailure("unexpected special character");
484+
}
485+
}
486+
return str
487+
}
488+
489+
/// Assuming that we are positioned at a `"`, parse a quoted string and return the string contents without the
490+
/// quotes.
491+
mutating func parseQuoted() -> String {
492+
// Discard the opening quote. Its not part of the unescaped text.
493+
consume(expect: "\"")
494+
495+
var str = ""
496+
while let currentCharacter {
497+
switch currentCharacter {
498+
case "\"":
499+
if peek == "\"" {
500+
// Two adjacent quotes inside a quoted string are an escaped single quote. For example
501+
// `" a "" b "`
502+
// represents the string
503+
// ` a " b `
504+
consume(expect: "\"")
505+
consume(expect: "\"")
506+
str += "\""
507+
} else {
508+
// We have found the closing quote. Discard it and return.
509+
consume(expect: "\"")
510+
return str
511+
}
512+
case "\\" where !parsingCommandName:
513+
str.append(parseBackslash())
514+
default:
515+
str.append(consume())
516+
}
517+
}
518+
return str
519+
}
520+
521+
/// Backslashes are interpreted in a rather complicated way in the Windows-style
522+
/// command line, because backslashes are used both to separate path and to
523+
/// escape double quote. This method consumes runs of backslashes as well as the
524+
/// following double quote if it's escaped.
525+
///
526+
/// * If an even number of backslashes is followed by a double quote, one
527+
/// backslash is output for every pair of backslashes, and the last double
528+
/// quote remains unconsumed. The double quote will later be interpreted as
529+
/// the start or end of a quoted string in the main loop outside of this
530+
/// function.
531+
///
532+
/// * If an odd number of backslashes is followed by a double quote, one
533+
/// backslash is output for every pair of backslashes, and a double quote is
534+
/// output for the last pair of backslash-double quote. The double quote is
535+
/// consumed in this case.
536+
///
537+
/// * Otherwise, backslashes are interpreted literally.
538+
mutating func parseBackslash() -> String {
539+
var str: String = ""
540+
541+
let firstNonBackslashIndex = content[currentCharacterIndex...].firstIndex(where: { $0 != "\\" }) ?? content.endIndex
542+
let numberOfBackslashes = content.distance(from: currentCharacterIndex, to: firstNonBackslashIndex)
543+
544+
if firstNonBackslashIndex != content.endIndex && content[firstNonBackslashIndex] == "\"" {
545+
str += String(repeating: "\\", count: numberOfBackslashes / 2)
546+
if numberOfBackslashes.isMultiple(of: 2) {
547+
// We have an even number of backslashes. Just add the escaped backslashes to `str` and return to parse the
548+
// quote in the outer function.
549+
currentCharacterIndex = firstNonBackslashIndex
550+
} else {
551+
// We have an odd number of backslashes. The last backslash escapes the quote.
552+
str += "\""
553+
currentCharacterIndex = content.index(after: firstNonBackslashIndex)
554+
}
555+
return str
556+
}
557+
558+
// The sequence of backslashes is not followed by quotes. Interpret them literally.
559+
str += String(repeating: "\\", count: numberOfBackslashes)
560+
currentCharacterIndex = firstNonBackslashIndex
561+
return str
562+
}
563+
}
564+
565+
// Sometimes, this function will be handling a full command line including an
566+
// executable pathname at the start. In that situation, the initial pathname
567+
// needs different handling from the following arguments, because when
568+
// CreateProcess or cmd.exe scans the pathname, it doesn't treat \ as
569+
// escaping the quote character, whereas when libc scans the rest of the
570+
// command line, it does.
571+
public func splitWindowsCommandLine(_ cmd: String, initialCommandName: Bool) -> [String] {
572+
var parser = WindowsCommandParser(cmd, initialCommandName: initialCommandName)
573+
return parser.parse()
574+
}

0 commit comments

Comments
 (0)