Skip to content

[RFC 9651] Add support for Display String type to RawStructuredFieldValues #43

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Sources/RawStructuredFieldValues/ASCII.swift
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,12 @@ let asciiSlash = UInt8(ascii: "/")
let asciiPeriod = UInt8(ascii: ".")
let asciiComma = UInt8(ascii: ",")
let asciiCapitalA = UInt8(ascii: "A")
let asciiCapitalF = UInt8(ascii: "F")
let asciiCapitalZ = UInt8(ascii: "Z")
let asciiLowerA = UInt8(ascii: "a")
let asciiLowerF = UInt8(ascii: "f")
let asciiLowerZ = UInt8(ascii: "z")
let asciiCapitals = asciiCapitalA...asciiCapitalZ
let asciiLowercases = asciiLowerA...asciiLowerZ
let asciiHexCapitals = asciiCapitalA...asciiCapitalF
let asciiHexLowercases = asciiLowerA...asciiLowerF
5 changes: 5 additions & 0 deletions Sources/RawStructuredFieldValues/ComponentTypes.swift
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ extension BareItem {

case .date:
throw StructuredHeaderError.invalidItem
case .displayString:
throw StructuredHeaderError.invalidItem
}
}
}
Expand Down Expand Up @@ -141,6 +143,9 @@ public enum RFC9651BareItem: Sendable {

/// A date item.
case date(Int)

/// A display string item.
case displayString(String)
}

extension RFC9651BareItem: ExpressibleByBooleanLiteral {
Expand Down
2 changes: 2 additions & 0 deletions Sources/RawStructuredFieldValues/Errors.swift
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ public struct StructuredHeaderError: Error, Sendable {
case invalidBoolean
case invalidToken
case invalidDate
case invalidDisplayString
case invalidList
case invalidDictionary
case missingKey
Expand All @@ -53,6 +54,7 @@ extension StructuredHeaderError {
public static let invalidBoolean = StructuredHeaderError(.invalidBoolean)
public static let invalidToken = StructuredHeaderError(.invalidToken)
public static let invalidDate = StructuredHeaderError(.invalidDate)
public static let invalidDisplayString = StructuredHeaderError(.invalidDisplayString)
public static let invalidList = StructuredHeaderError(.invalidList)
public static let invalidDictionary = StructuredHeaderError(.invalidDictionary)
public static let missingKey = StructuredHeaderError(.missingKey)
Expand Down
116 changes: 116 additions & 0 deletions Sources/RawStructuredFieldValues/FieldParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,8 @@ extension StructuredFieldValueParser {
return try self._parseAToken()
case asciiAt:
return try self._parseADate()
case asciiPercent:
return try self._parseADisplayString()
default:
throw StructuredHeaderError.invalidItem
}
Expand Down Expand Up @@ -491,6 +493,84 @@ extension StructuredFieldValueParser {
return try self._parseAnIntegerOrDecimal(isDate: true)
}

private mutating func _parseADisplayString() throws -> RFC9651BareItem {
assert(self.underlyingData.first == asciiPercent)
self.underlyingData.consumeFirst()

guard self.underlyingData.first == asciiDquote else {
throw StructuredHeaderError.invalidDisplayString
}

self.underlyingData.consumeFirst()

var byteArray = [UInt8]()

while let char = self.underlyingData.first {
self.underlyingData.consumeFirst()

switch char {
case 0x00...0x1F, 0x7F...:
throw StructuredHeaderError.invalidDisplayString
case asciiPercent:
if self.underlyingData.count < 2 {
throw StructuredHeaderError.invalidDisplayString
}

let octetHex = EncodedHex(self.underlyingData.prefix(2))

self.underlyingData = self.underlyingData.dropFirst(2)

guard let octet = octetHex.decode() else {
throw StructuredHeaderError.invalidDisplayString
}

byteArray.append(octet)
case asciiDquote:
#if compiler(>=6.0)
if #available(macOS 15.0, iOS 18.0, tvOS 18.0, watchOS 11.0, *) {
let unicodeSequence = String(validating: byteArray, as: UTF8.self)

guard let unicodeSequence else {
throw StructuredHeaderError.invalidDisplayString
}

return .displayString(unicodeSequence)
} else {
return try _decodeDisplayString(byteArray: &byteArray)
}
#else
return try _decodeDisplayString(byteArray: &byteArray)
#endif
default:
byteArray.append(char)
}
}

// Fail parsing — reached the end of the string without finding a closing DQUOTE.
throw StructuredHeaderError.invalidDisplayString
}

/// This method is called in environments where `String(validating:as:)` is unavailable. It uses
/// `String(validatingUTF8:)` which requires `byteArray` to be null terminated. `String(validating:as:)`
/// does not require that requirement. Therefore, it does not perform null checks, which makes it more optimal.
private func _decodeDisplayString(byteArray: inout [UInt8]) throws -> RFC9651BareItem {
// String(validatingUTF8:) requires byteArray to be null-terminated.
byteArray.append(0)

let unicodeSequence = byteArray.withUnsafeBytes {
$0.withMemoryRebound(to: CChar.self) {
// This force-unwrap is safe, as the buffer must successfully bind to CChar.
String(validatingUTF8: $0.baseAddress!)
}
}

guard let unicodeSequence else {
throw StructuredHeaderError.invalidDisplayString
}

return .displayString(unicodeSequence)
}

private mutating func _parseParameters() throws -> OrderedMap<Key, RFC9651BareItem> {
var parameters = OrderedMap<Key, RFC9651BareItem>()

Expand Down Expand Up @@ -643,3 +723,39 @@ extension StrippingStringEscapesCollection.Index: Comparable {
lhs._baseIndex < rhs._baseIndex
}
}

/// `EncodedHex` represents a (possibly invalid) hex value in UTF8.
struct EncodedHex {
private(set) var firstChar: UInt8
private(set) var secondChar: UInt8

init<Bytes: RandomAccessCollection>(_ bytes: Bytes) where Bytes.Element == UInt8 {
precondition(bytes.count == 2)
self.firstChar = bytes[bytes.startIndex]
self.secondChar = bytes[bytes.index(after: bytes.startIndex)]
}

/// Validates and converts `EncodedHex` to a base 10 UInt8.
///
/// If `EncodedHex` does not represent a valid hex value, the result of this method is nil.
fileprivate func decode() -> UInt8? {
guard
let firstCharAsInteger = self.htoi(self.firstChar),
let secondCharAsInteger = self.htoi(self.secondChar)
else { return nil }

return (firstCharAsInteger << 4) + secondCharAsInteger
}

/// Converts a hex character given in UTF8 to its integer value.
private func htoi(_ asciiChar: UInt8) -> UInt8? {
switch asciiChar {
case asciiZero...asciiNine:
return asciiChar - asciiZero
case asciiLowerA...asciiLowerF:
return asciiChar - asciiLowerA + 10
default:
return nil
}
}
}
38 changes: 38 additions & 0 deletions Sources/RawStructuredFieldValues/FieldSerializer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,29 @@ extension StructuredFieldValueSerializer {
}

self.data.append(contentsOf: String(date, radix: 10).utf8)
case .displayString(let displayString):
let bytes = displayString.utf8

self.data.append(asciiPercent)
self.data.append(asciiDquote)

for byte in bytes {
if byte == asciiPercent
|| byte == asciiDquote
|| (0x00...0x1F).contains(byte)
|| (0x7F...).contains(byte)
{
self.data.append(asciiPercent)

let encodedByte = UInt8.encodeToHex(byte)
self.data.append(encodedByte.firstChar)
self.data.append(encodedByte.secondChar)
} else {
self.data.append(byte)
}
}

self.data.append(asciiDquote)
}
}
}
Expand Down Expand Up @@ -245,3 +268,18 @@ extension String {
}
}
}

extension UInt8 {
/// Converts an integer in base 10 to hex of type `EncodedHex`.
fileprivate static func encodeToHex(_ int: Self) -> EncodedHex {
let firstChar = self.itoh(int >> 4)
let secondChar = self.itoh(int & 0x0F)

return EncodedHex([firstChar, secondChar])
}

/// Converts an integer to its hex character in UTF8.
private static func itoh(_ int: Self) -> Self {
(int > 9) ? (asciiLowerA + int - 10) : (asciiZero + int)
}
}
2 changes: 2 additions & 0 deletions Sources/sh-parser/main.swift
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ extension RFC9651BareItem {
return "decimal \(d)"
case .date(let date):
return "date \(date)"
case .displayString(let displayString):
return "display string \(displayString)"
}
}
}
Expand Down
18 changes: 18 additions & 0 deletions Tests/StructuredFieldValuesTests/StructuredFieldParserTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,24 @@ final class StructuredFieldParserTests: XCTestCase {

XCTAssertEqual(typeName, "date", "\(fixtureName): Expected type date, got type \(typeName)")
XCTAssertEqual(typeValue, baseDate, "\(fixtureName): Got \(baseDate), expected \(typeValue)")
case (.displayString(let baseDisplayString), .dictionary(let typeDictionary)):
guard typeDictionary.count == 2, case .string(let typeName) = typeDictionary["__type"],
case .string(let typeValue) = typeDictionary["value"]
else {
XCTFail("\(fixtureName): Unexpected type dict \(typeDictionary)")
return
}

XCTAssertEqual(
typeName,
"displaystring",
"\(fixtureName): Expected type displaystring, got type \(typeName)"
)
XCTAssertEqual(
typeValue,
baseDisplayString,
"\(fixtureName): Got \(baseDisplayString), expected \(typeValue)"
)
default:
XCTFail("\(fixtureName): Got \(bareItem), expected \(schema)")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,9 @@ extension RFC9651BareItem {
case (.some(.string("date")), .some(.integer(let value))):
self = .date(value)

case (.some(.string("displaystring")), .some(.string(let value))):
self = .displayString(value)

default:
preconditionFailure("Unexpected type object \(typeObject)")
}
Expand Down
111 changes: 111 additions & 0 deletions Tests/TestFixtures/display-string.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
[
{
"name": "basic display string (ascii content)",
"raw": ["%\"foo bar\""],
"header_type": "item",
"expected": [{"__type": "displaystring", "value": "foo bar"}, {}]
},
{
"name": "all printable ascii",
"raw": ["%\" !%22#$%25&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\""],
"header_type": "item",
"expected": [{"__type": "displaystring", "value": " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"}, {}]
},
{
"name": "non-ascii display string (uppercase escaping)",
"raw": ["%\"f%C3%BC%C3%BC\""],
"canonical": ["%\"f%c3%bc%c3%bc\""],
"header_type": "item",
"must_fail": true
},
{
"name": "non-ascii display string (lowercase escaping)",
"raw": ["%\"f%c3%bc%c3%bc\""],
"header_type": "item",
"expected": [{"__type": "displaystring", "value": "füü"}, {}]
},
{
"name": "tab in display string",
"raw": ["%\"\t\""],
"header_type": "item",
"must_fail": true
},
{
"name": "newline in display string",
"raw": ["%\"\n\""],
"header_type": "item",
"must_fail": true
},
{
"name": "single quoted display string",
"raw": ["%'foo'"],
"header_type": "item",
"must_fail": true
},
{
"name": "unquoted display string",
"raw": ["%foo"],
"header_type": "item",
"must_fail": true
},
{
"name": "display string missing initial quote",
"raw": ["%foo\""],
"header_type": "item",
"must_fail": true
},
{
"name": "unbalanced display string",
"raw": ["%\"foo"],
"header_type": "item",
"must_fail": true
},
{
"name": "display string quoting",
"raw": ["%\"foo %22bar%22 \\ baz\""],
"header_type": "item",
"expected": [{"__type": "displaystring", "value": "foo \"bar\" \\ baz"}, {}]
},
{
"name": "bad display string escaping",
"raw": ["%\"foo %a"],
"header_type": "item",
"must_fail": true
},
{
"name": "bad display string utf-8 (invalid 2-byte seq)",
"raw": ["%\"%c3%28\""],
"header_type": "item",
"must_fail": true
},
{
"name": "bad display string utf-8 (invalid sequence id)",
"raw": ["%\"%a0%a1\""],
"header_type": "item",
"must_fail": true
},
{
"name": "bad display string utf-8 (invalid hex)",
"raw": ["%\"%g0%1w\""],
"header_type": "item",
"must_fail": true
},
{
"name": "bad display string utf-8 (invalid 3-byte seq)",
"raw": ["%\"%e2%28%a1\""],
"header_type": "item",
"must_fail": true
},
{
"name": "bad display string utf-8 (invalid 4-byte seq)",
"raw": ["%\"%f0%28%8c%28\""],
"header_type": "item",
"must_fail": true
},
{
"name": "BOM in display string",
"raw": ["%\"BOM: %ef%bb%bf\""],
"header_type": "item",
"expected": [{"__type": "displaystring", "value": "BOM: \uFEFF"}, {}]
}
]
Loading