Skip to content

More unicode properties #385

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
May 16, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add support for canonical combining class
  • Loading branch information
natecook1000 committed May 16, 2022
commit 150f21ab4f510b0daa095d1a90b022a6d43a6c7f
3 changes: 3 additions & 0 deletions Sources/_RegexParser/Regex/AST/Atom.swift
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,9 @@ extension AST.Atom.CharacterProperty {
/// Case mapping.
case mapping(MapKind, String)

/// Canonical Combining Class.
case ccc(Unicode.CanonicalCombiningClass)

/// Character age, as per UnicodeScalar.Properties.age.
case age(major: Int, minor: Int)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,11 @@ extension Source {
return .mapping(.uppercase, value)
case "stc", "simpletitlecasemapping":
return .mapping(.titlecase, value)
case "ccc", "canonicalcombiningclass":
guard let cccValue = UInt8(value), cccValue <= 254 else {
throw ParseError.invalidCCC(value)
}
return .ccc(.init(rawValue: cccValue))
default:
break
}
Expand Down
6 changes: 3 additions & 3 deletions Sources/_RegexParser/Regex/Parse/Diagnostics.swift
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ enum ParseError: Error, Hashable {
case invalidAge(String)
case invalidNumericValue(String)
case unrecognizedNumericType(String)
case expectedMapping(String)
case invalidCCC(String)

case expectedGroupSpecifier
case unbalancedEndOfGroup
Expand Down Expand Up @@ -197,8 +197,8 @@ extension ParseError: CustomStringConvertible {
return "invalid age format for '\(value)' - use '3.0' or 'V3_0' formats"
case .invalidNumericValue(let value):
return "invalid numeric value '\(value)'"
case .expectedMapping(let mapping):
return "missing value for '\(mapping)'"
case .invalidCCC(let value):
return "invalid canonical combining class '\(value)'"

// MARK: Semantic Errors

Expand Down
2 changes: 1 addition & 1 deletion Sources/_RegexParser/Regex/Parse/Sema.swift
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ extension RegexValidator {
case .binary(let b, _):
try validateBinaryProperty(b, at: loc)
case .any, .assigned, .ascii, .generalCategory, .posix, .named, .script,
.scriptExtension, .age, .numericType, .numericValue, .mapping:
.scriptExtension, .age, .numericType, .numericValue, .mapping, .ccc:
break
case .pcreSpecial:
throw error(.unsupported("PCRE property"), at: loc)
Expand Down
3 changes: 3 additions & 0 deletions Sources/_StringProcessing/ConsumerInterface.swift
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,9 @@ extension AST.Atom.CharacterProperty {
case .numericType(let type):
return consume { $0.properties.numericType == type }

case .ccc(let ccc):
return consume { $0.properties.canonicalCombiningClass == ccc }

case .mapping(.lowercase, let value):
return consume { $0.properties.lowercaseMapping == value }

Expand Down
2 changes: 2 additions & 0 deletions Tests/RegexTests/ParseTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2475,6 +2475,8 @@ extension RegexTests {
diagnosticTest(#"\p{Numeric_Type=Nuemric}"#, .unrecognizedNumericType("Nuemric"))
diagnosticTest(#"\p{Simple_Lowercase_Mapping}"#, .unknownProperty(key: nil, value: "Simple_Lowercase_Mapping"))
diagnosticTest(#"\p{Simple_Lowercase_Mapping=}"#, .emptyProperty)
diagnosticTest(#"\p{ccc=255}"#, .invalidCCC("255"))
diagnosticTest(#"\p{ccc=Nada}"#, .invalidCCC("Nada"))
diagnosticTest(#"(?#"#, .expected(")"))
diagnosticTest(#"(?x"#, .expected(")"))

Expand Down
4 changes: 2 additions & 2 deletions Tests/RegexTests/UTS18Tests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -715,8 +715,8 @@ extension UTS18Tests {

// MARK: Normalization
// Canonical_Combining_Class
// XCTAssertTrue("abc".contains(regex(#"^\p{Canonical_Combining_Class}+$"#)))
// XCTAssertFalse("123".contains(regex(#"\p{Canonical_Combining_Class}"#)))
XCTAssertTrue("\u{0321}\u{0322}\u{1DD0}".contains(regex(#"^\p{Canonical_Combining_Class=202}+$"#)))
XCTAssertFalse("123".contains(regex(#"\p{Canonical_Combining_Class=202}"#)))

// Decomposition_Type
// XCTAssertTrue("abc".contains(regex(#"^\p{Decomposition_Type}+$"#)))
Expand Down