@@ -1159,3 +1159,142 @@ extension Unicode.Scalar.Properties {
1159
1159
return String ( _storage: storage)
1160
1160
}
1161
1161
}
1162
+
1163
+ extension Unicode {
1164
+
1165
+ /// The classification of a scalar used in the Canonical Ordering Algorithm
1166
+ /// defined by the Unicode Standard.
1167
+ ///
1168
+ /// Canonical combining classes are used by the ordering algorithm to
1169
+ /// determine if two sequences of combining marks should be considered
1170
+ /// canonically equivalent (that is, identical in interpretation). Two
1171
+ /// sequences are canonically equivalent if they are equal when sorting the
1172
+ /// scalars in ascending order by their combining class.
1173
+ ///
1174
+ /// For example, consider the sequence `"\u{0041}\u{0301}\u{0316}"` (LATIN
1175
+ /// CAPITAL LETTER A, COMBINING ACUTE ACCENT, COMBINING GRAVE ACCENT BELOW).
1176
+ /// The combining classes of these scalars have the numeric values 0, 230, and
1177
+ /// 220, respectively. Sorting these scalars by their combining classes yields
1178
+ /// `"\u{0041}\u{0316}\u{0301}"`, so two strings that differ only by the
1179
+ /// ordering of those scalars would compare as equal:
1180
+ ///
1181
+ /// ```
1182
+ /// print("\u{0041}\u{0316}\u{0301}" == "\u{0041}\u{0301}\u{0316}")
1183
+ /// // Prints "true"
1184
+ /// ```
1185
+ ///
1186
+ /// Named and Unnamed Combining Classes
1187
+ /// ===================================
1188
+ ///
1189
+ /// Canonical combining classes are defined in the Unicode Standard as
1190
+ /// integers in the range `0...254`. For convenience, the standard assigns
1191
+ /// symbolic names to a subset of these combining classes.
1192
+ ///
1193
+ /// The `CanonicalCombiningClass` type conforms to `RawRepresentable` with a
1194
+ /// raw value of type `UInt8`. Instances of the type can be created from the
1195
+ /// actual numeric value using the `init(rawValue:)` initializer, and
1196
+ /// combining classes with symbolic names can also be referenced using the
1197
+ /// static members that share those names.
1198
+ ///
1199
+ /// ```
1200
+ /// print(Unicode.CanonicalCombiningClass(rawValue: 1) == .overlay)
1201
+ /// // Prints "true"
1202
+ /// ```
1203
+ public struct CanonicalCombiningClass :
1204
+ Comparable , Hashable , RawRepresentable
1205
+ {
1206
+ /// Base glyphs that occupy their own space and do not combine with others.
1207
+ public static let notReordered = CanonicalCombiningClass ( rawValue: 0 )
1208
+
1209
+ /// Marks that overlay a base letter or symbol.
1210
+ public static let overlay = CanonicalCombiningClass ( rawValue: 1 )
1211
+
1212
+ /// Diacritic nukta marks in Brahmi-derived scripts.
1213
+ public static let nukta = CanonicalCombiningClass ( rawValue: 7 )
1214
+
1215
+ /// Combining marks that are attached to hiragana and katakana to indicate
1216
+ /// voicing changes.
1217
+ public static let kanaVoicing = CanonicalCombiningClass ( rawValue: 8 )
1218
+
1219
+ /// Diacritic virama marks in Brahmi-derived scripts.
1220
+ public static let virama = CanonicalCombiningClass ( rawValue: 9 )
1221
+
1222
+ /// Marks attached at the bottom left.
1223
+ public static let attachedBelowLeft = CanonicalCombiningClass ( rawValue: 200 )
1224
+
1225
+ /// Marks attached directly below.
1226
+ public static let attachedBelow = CanonicalCombiningClass ( rawValue: 202 )
1227
+
1228
+ /// Marks attached directly above.
1229
+ public static let attachedAbove = CanonicalCombiningClass ( rawValue: 214 )
1230
+
1231
+ /// Marks attached at the top right.
1232
+ public static let attachedAboveRight =
1233
+ CanonicalCombiningClass ( rawValue: 216 )
1234
+
1235
+ /// Distinct marks at the bottom left.
1236
+ public static let belowLeft = CanonicalCombiningClass ( rawValue: 218 )
1237
+
1238
+ /// Distinct marks directly below.
1239
+ public static let below = CanonicalCombiningClass ( rawValue: 220 )
1240
+
1241
+ /// Distinct marks at the bottom right.
1242
+ public static let belowRight = CanonicalCombiningClass ( rawValue: 222 )
1243
+
1244
+ /// Distinct marks to the left.
1245
+ public static let left = CanonicalCombiningClass ( rawValue: 224 )
1246
+
1247
+ /// Distinct marks to the right.
1248
+ public static let right = CanonicalCombiningClass ( rawValue: 226 )
1249
+
1250
+ /// Distinct marks at the top left.
1251
+ public static let aboveLeft = CanonicalCombiningClass ( rawValue: 228 )
1252
+
1253
+ /// Distinct marks directly above.
1254
+ public static let above = CanonicalCombiningClass ( rawValue: 230 )
1255
+
1256
+ /// Distinct marks at the top right.
1257
+ public static let aboveRight = CanonicalCombiningClass ( rawValue: 232 )
1258
+
1259
+ /// Distinct marks subtending two bases.
1260
+ public static let doubleBelow = CanonicalCombiningClass ( rawValue: 233 )
1261
+
1262
+ /// Distinct marks extending above two bases.
1263
+ public static let doubleAbove = CanonicalCombiningClass ( rawValue: 234 )
1264
+
1265
+ /// Greek iota subscript only (U+0345 COMBINING GREEK YPOGEGRAMMENI).
1266
+ public static let iotaSubscript = CanonicalCombiningClass ( rawValue: 240 )
1267
+
1268
+ /// The raw integer value of the canonical combining class.
1269
+ public let rawValue : UInt8
1270
+
1271
+ /// Creates a new canonical combining class with the given raw integer
1272
+ /// value.
1273
+ ///
1274
+ /// - Parameter rawValue: The raw integer value of the canonical combining
1275
+ /// class.
1276
+ public init ( rawValue: UInt8 ) {
1277
+ self . rawValue = rawValue
1278
+ }
1279
+
1280
+ public static func < (
1281
+ lhs: CanonicalCombiningClass ,
1282
+ rhs: CanonicalCombiningClass
1283
+ ) -> Bool {
1284
+ return lhs. rawValue < rhs. rawValue
1285
+ }
1286
+ }
1287
+ }
1288
+
1289
+ extension Unicode . Scalar . Properties {
1290
+
1291
+ /// The canonical combining class of the scalar.
1292
+ ///
1293
+ /// This property corresponds to the `Canonical_Combining_Class` property in
1294
+ /// the [Unicode Standard](http://www.unicode.org/versions/latest/).
1295
+ public var canonicalCombiningClass : Unicode . CanonicalCombiningClass {
1296
+ let rawValue = UInt8 ( __swift_stdlib_u_getIntPropertyValue (
1297
+ _value, __swift_stdlib_UCHAR_CANONICAL_COMBINING_CLASS) )
1298
+ return Unicode . CanonicalCombiningClass ( rawValue: rawValue)
1299
+ }
1300
+ }
0 commit comments