@@ -239,3 +239,336 @@ extension CompilationDatabase.Command: Codable {
239
239
try container. encodeIfPresent ( output, forKey: . output)
240
240
}
241
241
}
242
+
243
+ fileprivate struct UnixCommandParser {
244
+ var content : Substring
245
+ var i : Substring . UTF8View . Index
246
+ var result : [ String ] = [ ]
247
+
248
+ var ch : UInt8 { self . content. utf8 [ i] }
249
+ var done : Bool { self . content. endIndex == i }
250
+
251
+ init ( _ string: Substring ) {
252
+ self . content = string
253
+ self . i = self . content. utf8. startIndex
254
+ }
255
+
256
+ mutating func next( ) {
257
+ i = content. utf8. index ( after: i)
258
+ }
259
+
260
+ mutating func next( expect c: UInt8 ) {
261
+ assert ( c == ch)
262
+ next ( )
263
+ }
264
+
265
+ mutating func parse( ) -> [ String ] {
266
+ while !done {
267
+ switch ch {
268
+ case UInt8 ( ascii: " " ) : next ( )
269
+ default : parseString ( )
270
+ }
271
+ }
272
+ return result
273
+ }
274
+
275
+ mutating func parseString( ) {
276
+ var str = " "
277
+ STRING: while !done {
278
+ switch ch {
279
+ case UInt8 ( ascii: " " ) : break STRING
280
+ case UInt8 ( ascii: " \" " ) : parseDoubleQuotedString ( into: & str)
281
+ case UInt8 ( ascii: " \' " ) : parseSingleQuotedString ( into: & str)
282
+ default : parsePlainString ( into: & str)
283
+ }
284
+ }
285
+ result. append ( str)
286
+ }
287
+
288
+ mutating func parseDoubleQuotedString( into str: inout String ) {
289
+ next ( expect: UInt8 ( ascii: " \" " ) )
290
+ var start = i
291
+ while !done {
292
+ switch ch {
293
+ case UInt8 ( ascii: " \" " ) :
294
+ str += content [ start..< i]
295
+ next ( )
296
+ return
297
+ case UInt8 ( ascii: " \\ " ) :
298
+ str += content [ start..< i]
299
+ next ( )
300
+ start = i
301
+ if !done { fallthrough }
302
+ default :
303
+ next ( )
304
+ }
305
+ }
306
+ str += content [ start..< i]
307
+ }
308
+
309
+ mutating func parseSingleQuotedString( into str: inout String ) {
310
+ next ( expect: UInt8 ( ascii: " \' " ) )
311
+ let start = i
312
+ while !done {
313
+ switch ch {
314
+ case UInt8 ( ascii: " \' " ) :
315
+ str += content [ start..< i]
316
+ next ( )
317
+ return
318
+ default :
319
+ next ( )
320
+ }
321
+ }
322
+ str += content [ start..< i]
323
+ }
324
+
325
+ mutating func parsePlainString( into str: inout String ) {
326
+ var start = i
327
+ while !done {
328
+ let _ch = ch
329
+ switch _ch {
330
+ case UInt8 ( ascii: " \" " ) , UInt8 ( ascii: " \' " ) , UInt8 ( ascii: " " ) :
331
+ str += content [ start..< i]
332
+ return
333
+ case UInt8 ( ascii: " \\ " ) :
334
+ str += content [ start..< i]
335
+ next ( )
336
+ start = i
337
+ if !done { fallthrough }
338
+ default :
339
+ next ( )
340
+ }
341
+ }
342
+ str += content [ start..< i]
343
+ }
344
+ }
345
+
346
+ /// Split and unescape a shell-escaped command line invocation.
347
+ ///
348
+ /// Examples:
349
+ ///
350
+ /// ```
351
+ /// abc def -> ["abc", "def"]
352
+ /// abc\ def -> ["abc def"]
353
+ /// abc"\""def -> ["abc\"def"]
354
+ /// abc'\"'def -> ["abc\\"def"]
355
+ /// ```
356
+ ///
357
+ /// See clang's `unescapeCommandLine()`.
358
+ public func splitShellEscapedCommand( _ cmd: String ) -> [ String ] {
359
+ var parser = UnixCommandParser ( cmd [ ... ] )
360
+ return parser. parse ( )
361
+ }
362
+
363
+ // MARK: - Windows
364
+
365
+ fileprivate extension Character {
366
+ var isWhitespace : Bool {
367
+ switch self {
368
+ case " " , " \t " :
369
+ return true
370
+ default :
371
+ return false
372
+ }
373
+ }
374
+
375
+ var isWhitespaceOrNull : Bool {
376
+ return self . isWhitespace || self == " \0 "
377
+ }
378
+
379
+ func isWindowsSpecialChar( inCommandName: Bool ) -> Bool {
380
+ if isWhitespace {
381
+ return true
382
+ }
383
+ if self == #"""# {
384
+ return true
385
+ }
386
+ if !inCommandName && self == #"\"# {
387
+ return true
388
+ }
389
+ return false
390
+ }
391
+ }
392
+
393
+ fileprivate struct WindowsCommandParser {
394
+ /// The content of the entire command that shall be parsed.
395
+ private let content : String
396
+
397
+ /// Whether we are parsing the initial command name. In this mode `\` is not treated as escaping the quote
398
+ /// character.
399
+ private var parsingCommandName : Bool
400
+
401
+ /// An index into `content`, pointing to the character that we are currently parsing.
402
+ private var currentCharacterIndex : String . UTF8View . Index
403
+
404
+ /// The split command line arguments.
405
+ private var result : [ String ] = [ ]
406
+
407
+ /// The character that is currently being parsed.
408
+ ///
409
+ /// `nil` if we have reached the end of `content`.
410
+ private var currentCharacter : Character ? {
411
+ guard currentCharacterIndex < content. endIndex else {
412
+ return nil
413
+ }
414
+ return self . content [ currentCharacterIndex]
415
+ }
416
+
417
+ /// The character after `currentCharacter`.
418
+ ///
419
+ /// `nil` if we have reached the end of `content`.
420
+ private var peek : Character ? {
421
+ let nextIndex = content. index ( after: currentCharacterIndex)
422
+ if nextIndex < content. endIndex {
423
+ return content [ nextIndex]
424
+ } else {
425
+ return nil
426
+ }
427
+ }
428
+
429
+ init ( _ string: String , initialCommandName: Bool ) {
430
+ self . content = string
431
+ self . currentCharacterIndex = self . content. startIndex
432
+ self . parsingCommandName = initialCommandName
433
+ }
434
+
435
+ /// Designated entry point to split a Windows command line invocation.
436
+ mutating func parse( ) -> [ String ] {
437
+ while let currentCharacter {
438
+ if currentCharacter. isWhitespaceOrNull {
439
+ // Consume any whitespace separating arguments.
440
+ _ = consume ( )
441
+ } else {
442
+ result. append ( parseSingleArgument ( ) )
443
+ }
444
+ }
445
+ return result
446
+ }
447
+
448
+ /// Consume the current character.
449
+ private mutating func consume( ) -> Character {
450
+ guard let character = currentCharacter else {
451
+ preconditionFailure ( " Nothing to consume " )
452
+ }
453
+ currentCharacterIndex = content. index ( after: currentCharacterIndex)
454
+ return character
455
+ }
456
+
457
+ /// Consume the current character, asserting that it is `expectedCharacter`
458
+ private mutating func consume( expect expectedCharacter: Character ) {
459
+ assert ( currentCharacter == expectedCharacter)
460
+ _ = consume ( )
461
+ }
462
+
463
+ /// Parses a single argument, consuming its characters and returns the parsed arguments with all escaping unfolded
464
+ /// (e.g. `\"` gets returned as `"`)
465
+ ///
466
+ /// Afterwards the parser points to the character after the argument.
467
+ mutating func parseSingleArgument( ) -> String {
468
+ var str = " "
469
+ while let currentCharacter {
470
+ if !currentCharacter. isWindowsSpecialChar ( inCommandName: parsingCommandName) {
471
+ str. append ( consume ( ) )
472
+ continue
473
+ }
474
+ if currentCharacter. isWhitespaceOrNull {
475
+ parsingCommandName = false
476
+ return str
477
+ } else if currentCharacter == " \" " {
478
+ str += parseQuoted ( )
479
+ } else if currentCharacter == #"\"# {
480
+ assert ( !parsingCommandName, " else we'd have treated it as a normal char " ) ;
481
+ str. append ( parseBackslash ( ) )
482
+ } else {
483
+ preconditionFailure ( " unexpected special character " ) ;
484
+ }
485
+ }
486
+ return str
487
+ }
488
+
489
+ /// Assuming that we are positioned at a `"`, parse a quoted string and return the string contents without the
490
+ /// quotes.
491
+ mutating func parseQuoted( ) -> String {
492
+ // Discard the opening quote. Its not part of the unescaped text.
493
+ consume ( expect: " \" " )
494
+
495
+ var str = " "
496
+ while let currentCharacter {
497
+ switch currentCharacter {
498
+ case " \" " :
499
+ if peek == " \" " {
500
+ // Two adjacent quotes inside a quoted string are an escaped single quote. For example
501
+ // `" a "" b "`
502
+ // represents the string
503
+ // ` a " b `
504
+ consume ( expect: " \" " )
505
+ consume ( expect: " \" " )
506
+ str += " \" "
507
+ } else {
508
+ // We have found the closing quote. Discard it and return.
509
+ consume ( expect: " \" " )
510
+ return str
511
+ }
512
+ case " \\ " where !parsingCommandName:
513
+ str. append ( parseBackslash ( ) )
514
+ default :
515
+ str. append ( consume ( ) )
516
+ }
517
+ }
518
+ return str
519
+ }
520
+
521
+ /// Backslashes are interpreted in a rather complicated way in the Windows-style
522
+ /// command line, because backslashes are used both to separate path and to
523
+ /// escape double quote. This method consumes runs of backslashes as well as the
524
+ /// following double quote if it's escaped.
525
+ ///
526
+ /// * If an even number of backslashes is followed by a double quote, one
527
+ /// backslash is output for every pair of backslashes, and the last double
528
+ /// quote remains unconsumed. The double quote will later be interpreted as
529
+ /// the start or end of a quoted string in the main loop outside of this
530
+ /// function.
531
+ ///
532
+ /// * If an odd number of backslashes is followed by a double quote, one
533
+ /// backslash is output for every pair of backslashes, and a double quote is
534
+ /// output for the last pair of backslash-double quote. The double quote is
535
+ /// consumed in this case.
536
+ ///
537
+ /// * Otherwise, backslashes are interpreted literally.
538
+ mutating func parseBackslash( ) -> String {
539
+ var str : String = " "
540
+
541
+ let firstNonBackslashIndex = content [ currentCharacterIndex... ] . firstIndex ( where: { $0 != " \\ " } ) ?? content. endIndex
542
+ let numberOfBackslashes = content. distance ( from: currentCharacterIndex, to: firstNonBackslashIndex)
543
+
544
+ if firstNonBackslashIndex != content. endIndex && content [ firstNonBackslashIndex] == " \" " {
545
+ str += String ( repeating: " \\ " , count: numberOfBackslashes / 2 )
546
+ if numberOfBackslashes. isMultiple ( of: 2 ) {
547
+ // We have an even number of backslashes. Just add the escaped backslashes to `str` and return to parse the
548
+ // quote in the outer function.
549
+ currentCharacterIndex = firstNonBackslashIndex
550
+ } else {
551
+ // We have an odd number of backslashes. The last backslash escapes the quote.
552
+ str += " \" "
553
+ currentCharacterIndex = content. index ( after: firstNonBackslashIndex)
554
+ }
555
+ return str
556
+ }
557
+
558
+ // The sequence of backslashes is not followed by quotes. Interpret them literally.
559
+ str += String ( repeating: " \\ " , count: numberOfBackslashes)
560
+ currentCharacterIndex = firstNonBackslashIndex
561
+ return str
562
+ }
563
+ }
564
+
565
+ // Sometimes, this function will be handling a full command line including an
566
+ // executable pathname at the start. In that situation, the initial pathname
567
+ // needs different handling from the following arguments, because when
568
+ // CreateProcess or cmd.exe scans the pathname, it doesn't treat \ as
569
+ // escaping the quote character, whereas when libc scans the rest of the
570
+ // command line, it does.
571
+ public func splitWindowsCommandLine( _ cmd: String , initialCommandName: Bool ) -> [ String ] {
572
+ var parser = WindowsCommandParser ( cmd, initialCommandName: initialCommandName)
573
+ return parser. parse ( )
574
+ }
0 commit comments