Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Sources/RawStructuredFieldValues/ComponentTypes.swift
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ extension BareItem {

case .date:
throw StructuredHeaderError.invalidItem
case .displayString:
throw StructuredHeaderError.invalidItem
}
}
}
Expand Down Expand Up @@ -141,6 +143,9 @@ public enum RFC9651BareItem: Sendable {

/// A date item.
case date(Int)

/// A display string item.
case displayString(String)
}

extension RFC9651BareItem: ExpressibleByBooleanLiteral {
Expand Down
2 changes: 2 additions & 0 deletions Sources/RawStructuredFieldValues/Errors.swift
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ public struct StructuredHeaderError: Error, Sendable {
case invalidBoolean
case invalidToken
case invalidDate
case invalidDisplayString
case invalidList
case invalidDictionary
case missingKey
Expand All @@ -53,6 +54,7 @@ extension StructuredHeaderError {
public static let invalidBoolean = StructuredHeaderError(.invalidBoolean)
public static let invalidToken = StructuredHeaderError(.invalidToken)
public static let invalidDate = StructuredHeaderError(.invalidDate)
public static let invalidDisplayString = StructuredHeaderError(.invalidDisplayString)
public static let invalidList = StructuredHeaderError(.invalidList)
public static let invalidDictionary = StructuredHeaderError(.invalidDictionary)
public static let missingKey = StructuredHeaderError(.missingKey)
Expand Down
97 changes: 97 additions & 0 deletions Sources/RawStructuredFieldValues/FieldParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,8 @@ extension StructuredFieldValueParser {
return try self._parseAToken()
case asciiAt:
return try self._parseADate()
case asciiPercent:
return try self._parseADisplayString()
default:
throw StructuredHeaderError.invalidItem
}
Expand Down Expand Up @@ -491,6 +493,68 @@ extension StructuredFieldValueParser {
return try self._parseAnIntegerOrDecimal(isDate: true)
}

private mutating func _parseADisplayString() throws -> RFC9651BareItem {
assert(self.underlyingData.first == asciiPercent)
self.underlyingData.consumeFirst()

guard self.underlyingData.first == asciiDquote else {
throw StructuredHeaderError.invalidDisplayString
}

self.underlyingData.consumeFirst()

var byteArray = [UInt8]()

while let char = self.underlyingData.first {
self.underlyingData.consumeFirst()

switch char {
case 0x00...0x1F, 0x7F...:
throw StructuredHeaderError.invalidDisplayString
case asciiPercent:
if self.underlyingData.count < 2 {
throw StructuredHeaderError.invalidDisplayString
}

let startIndex = self.underlyingData.startIndex
let secondIndex = self.underlyingData.index(after: startIndex)
let octetHex = self.underlyingData[...secondIndex]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can skip the index math and do let octetHex = self.underlyingData.prefix(2)


self.underlyingData = self.underlyingData.dropFirst(2)

guard
octetHex.allSatisfy({ asciiDigits.contains($0) || asciiLowercases.contains($0) }),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be somewhere I'd write a little micro-extension method for this purpose, as we know that there are exactly two values and so we don't strictly need a loop.

let octet = UInt8.decodeHex(octetHex)
else {
throw StructuredHeaderError.invalidDisplayString
}

byteArray.append(octet)
case asciiDquote:
let unicodeSequence = try byteArray.withUnsafeBytes {
try $0.withMemoryRebound(to: CChar.self) {
guard let baseAddress = $0.baseAddress else {
throw StructuredHeaderError.invalidDisplayString
}

return String(validatingUTF8: baseAddress)
}
}

guard let unicodeSequence else {
throw StructuredHeaderError.invalidDisplayString
}

return .displayString(unicodeSequence)
default:
byteArray.append(char)
}
}

// Fail parsing — reached the end of the string without finding a closing DQUOTE.
throw StructuredHeaderError.invalidDisplayString
}

private mutating func _parseParameters() throws -> OrderedMap<Key, RFC9651BareItem> {
var parameters = OrderedMap<Key, RFC9651BareItem>()

Expand Down Expand Up @@ -643,3 +707,36 @@ extension StrippingStringEscapesCollection.Index: Comparable {
lhs._baseIndex < rhs._baseIndex
}
}

extension UInt8 {
/// Converts a hex value given in UTF8 to base 10.
fileprivate static func decodeHex<Bytes: RandomAccessCollection>(_ bytes: Bytes) -> Self?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is also excessively generic I think. We only ever accept a single data type, and it's always two bytes long. It might actually be useful to express that as its own type, e.g:

struct EncodedHex {
    var first: UInt8
    var second: UInt8

    init(_ slice: ArraySlice<UInt8>) {
        precondition(slice.count == 2)
        self.first = slice[slice.startIndex]
        self.second = slice[slice.index(after: slice.startIndex)]
    }
}

That lets us express the rest of this math in a much less branchy way, and with an avoidance of preconditions elsewhere.

where Bytes.Element == Self {
var result = Self(0)
var power = Self(bytes.count)

for byte in bytes {
power -= 1

guard let integer = Self.htoi(byte) else { return nil }
result += integer << (power * 4)
}

return result
}

/// Converts a hex character given in UTF8 to its integer value.
private static func htoi(_ value: Self) -> Self? {
let charA = Self(UnicodeScalar("a").value)
let char0 = Self(UnicodeScalar("0").value)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prefer the UInt8(ascii:) initializer.


switch value {
case char0...char0 + 9:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are a touch nicer using named values at both ends of the range.

return value - char0
case charA...charA + 5:
return value - charA + 10
default:
return nil
}
}
}
20 changes: 20 additions & 0 deletions Sources/RawStructuredFieldValues/FieldSerializer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,26 @@ extension StructuredFieldValueSerializer {
}

self.data.append(contentsOf: String(date, radix: 10).utf8)
case .displayString(let displayString):
let bytes = displayString.utf8

self.data.append(asciiPercent)
self.data.append(asciiDquote)

for byte in bytes {
if byte == asciiPercent
|| byte == asciiDquote
|| (0x00...0x1F).contains(byte)
|| (0x7F...).contains(byte)
{
self.data.append(asciiPercent)
self.data.append(contentsOf: String(byte, radix: 16, uppercase: false).utf8)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can avoid constructing a String here, if we write the code out.

} else {
self.data.append(byte)
}
}

self.data.append(asciiDquote)
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions Sources/sh-parser/main.swift
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ extension RFC9651BareItem {
return "decimal \(d)"
case .date(let date):
return "date \(date)"
case .displayString(let displayString):
return "display string \(displayString)"
}
}
}
Expand Down
16 changes: 16 additions & 0 deletions Tests/StructuredFieldValuesTests/StructuredFieldParserTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,22 @@ final class StructuredFieldParserTests: XCTestCase {

XCTAssertEqual(typeName, "date", "\(fixtureName): Expected type date, got type \(typeName)")
XCTAssertEqual(typeValue, baseDate, "\(fixtureName): Got \(baseDate), expected \(typeValue)")
case (.displayString(let baseDisplayString), .dictionary(let typeDictionary)):
guard typeDictionary.count == 2, case .string(let typeName) = typeDictionary["__type"],
case .string(let typeValue) = typeDictionary["value"]
else {
XCTFail("\(fixtureName): Unexpected type dict \(typeDictionary)")
return
}

XCTAssertEqual(
typeName,
"displaystring",
"\(fixtureName): Expected type displaystring, got type \(typeName)")
XCTAssertEqual(
typeValue,
baseDisplayString,
"\(fixtureName): Got \(baseDisplayString), expected \(typeValue)")
default:
XCTFail("\(fixtureName): Got \(bareItem), expected \(schema)")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,9 @@ extension RFC9651BareItem {
case (.some(.string("date")), .some(.integer(let value))):
self = .date(value)

case (.some(.string("displaystring")), .some(.string(let value))):
self = .displayString(value)

default:
preconditionFailure("Unexpected type object \(typeObject)")
}
Expand Down
111 changes: 111 additions & 0 deletions Tests/TestFixtures/display-string.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
[
{
"name": "basic display string (ascii content)",
"raw": ["%\"foo bar\""],
"header_type": "item",
"expected": [{"__type": "displaystring", "value": "foo bar"}, {}]
},
{
"name": "all printable ascii",
"raw": ["%\" !%22#$%25&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\""],
"header_type": "item",
"expected": [{"__type": "displaystring", "value": " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"}, {}]
},
{
"name": "non-ascii display string (uppercase escaping)",
"raw": ["%\"f%C3%BC%C3%BC\""],
"canonical": ["%\"f%c3%bc%c3%bc\""],
"header_type": "item",
"must_fail": true
},
{
"name": "non-ascii display string (lowercase escaping)",
"raw": ["%\"f%c3%bc%c3%bc\""],
"header_type": "item",
"expected": [{"__type": "displaystring", "value": "füü"}, {}]
},
{
"name": "tab in display string",
"raw": ["%\"\t\""],
"header_type": "item",
"must_fail": true
},
{
"name": "newline in display string",
"raw": ["%\"\n\""],
"header_type": "item",
"must_fail": true
},
{
"name": "single quoted display string",
"raw": ["%'foo'"],
"header_type": "item",
"must_fail": true
},
{
"name": "unquoted display string",
"raw": ["%foo"],
"header_type": "item",
"must_fail": true
},
{
"name": "display string missing initial quote",
"raw": ["%foo\""],
"header_type": "item",
"must_fail": true
},
{
"name": "unbalanced display string",
"raw": ["%\"foo"],
"header_type": "item",
"must_fail": true
},
{
"name": "display string quoting",
"raw": ["%\"foo %22bar%22 \\ baz\""],
"header_type": "item",
"expected": [{"__type": "displaystring", "value": "foo \"bar\" \\ baz"}, {}]
},
{
"name": "bad display string escaping",
"raw": ["%\"foo %a"],
"header_type": "item",
"must_fail": true
},
{
"name": "bad display string utf-8 (invalid 2-byte seq)",
"raw": ["%\"%c3%28\""],
"header_type": "item",
"must_fail": true
},
{
"name": "bad display string utf-8 (invalid sequence id)",
"raw": ["%\"%a0%a1\""],
"header_type": "item",
"must_fail": true
},
{
"name": "bad display string utf-8 (invalid hex)",
"raw": ["%\"%g0%1w\""],
"header_type": "item",
"must_fail": true
},
{
"name": "bad display string utf-8 (invalid 3-byte seq)",
"raw": ["%\"%e2%28%a1\""],
"header_type": "item",
"must_fail": true
},
{
"name": "bad display string utf-8 (invalid 4-byte seq)",
"raw": ["%\"%f0%28%8c%28\""],
"header_type": "item",
"must_fail": true
},
{
"name": "BOM in display string",
"raw": ["%\"BOM: %ef%bb%bf\""],
"header_type": "item",
"expected": [{"__type": "displaystring", "value": "BOM: \uFEFF"}, {}]
}
]
Loading