From f5d0b69b750369d535c3f200222132403b0d9bff Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Tue, 26 Mar 2024 18:30:13 +0100 Subject: [PATCH 01/36] syntax: accept `{,n}` as an equivalent to `{0,n}` Most regular expression engines don't accept the `{,n}` syntax, but some other do it (namely Python's `re` library). This introduces a new parser configuration option that enables the `{,n}` syntax. PR #1086 --- regex-syntax/src/ast/parse.rs | 69 +++++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 8 deletions(-) diff --git a/regex-syntax/src/ast/parse.rs b/regex-syntax/src/ast/parse.rs index 1a3df56b5..0c2a35265 100644 --- a/regex-syntax/src/ast/parse.rs +++ b/regex-syntax/src/ast/parse.rs @@ -124,6 +124,7 @@ pub struct ParserBuilder { ignore_whitespace: bool, nest_limit: u32, octal: bool, + empty_min_range: bool, } impl Default for ParserBuilder { @@ -139,6 +140,7 @@ impl ParserBuilder { ignore_whitespace: false, nest_limit: 250, octal: false, + empty_min_range: false, } } @@ -149,6 +151,7 @@ impl ParserBuilder { capture_index: Cell::new(0), nest_limit: self.nest_limit, octal: self.octal, + empty_min_range: self.empty_min_range, initial_ignore_whitespace: self.ignore_whitespace, ignore_whitespace: Cell::new(self.ignore_whitespace), comments: RefCell::new(vec![]), @@ -221,6 +224,18 @@ impl ParserBuilder { self.ignore_whitespace = yes; self } + + /// Allow using `{,n}` as an equivalent to `{0,n}`. + /// + /// When enabled, the parser accepts `{,n}` as valid syntax for `{0,n}`. + /// Most regular expression engines don't support the `{,n}` syntax, but + /// some others do it, namely Python's `re` library. + /// + /// This is disabled by default. + pub fn empty_min_range(&mut self, yes: bool) -> &mut ParserBuilder { + self.empty_min_range = yes; + self + } } /// A regular expression parser. @@ -246,6 +261,9 @@ pub struct Parser { /// The initial setting for `ignore_whitespace` as provided by /// `ParserBuilder`. It is used when resetting the parser's state. initial_ignore_whitespace: bool, + /// Whether the parser supports `{,n}` repetitions as an equivalent to + /// `{0,n}.` + empty_min_range: bool, /// Whether whitespace should be ignored. When enabled, comments are /// also permitted. ignore_whitespace: Cell, @@ -1114,15 +1132,14 @@ impl<'s, P: Borrow> ParserI<'s, P> { self.parse_decimal(), ast::ErrorKind::DecimalEmpty, ast::ErrorKind::RepetitionCountDecimalEmpty, - )?; - let mut range = ast::RepetitionRange::Exactly(count_start); + ); if self.is_eof() { return Err(self.error( Span::new(start, self.pos()), ast::ErrorKind::RepetitionCountUnclosed, )); } - if self.char() == ',' { + let range = if self.char() == ',' { if !self.bump_and_bump_space() { return Err(self.error( Span::new(start, self.pos()), @@ -1130,16 +1147,33 @@ impl<'s, P: Borrow> ParserI<'s, P> { )); } if self.char() != '}' { + let count_start = match count_start { + Ok(c) => c, + Err(err) + if err.kind + == ast::ErrorKind::RepetitionCountDecimalEmpty => + { + if self.parser().empty_min_range { + 0 + } else { + return Err(err); + } + } + err => err?, + }; let count_end = specialize_err( self.parse_decimal(), ast::ErrorKind::DecimalEmpty, ast::ErrorKind::RepetitionCountDecimalEmpty, )?; - range = ast::RepetitionRange::Bounded(count_start, count_end); + ast::RepetitionRange::Bounded(count_start, count_end) } else { - range = ast::RepetitionRange::AtLeast(count_start); + ast::RepetitionRange::AtLeast(count_start?) } - } + } else { + ast::RepetitionRange::Exactly(count_start?) + }; + if self.is_eof() || self.char() != '}' { return Err(self.error( Span::new(start, self.pos()), @@ -2459,6 +2493,11 @@ mod tests { ParserI::new(parser, pattern) } + fn parser_empty_min_range(pattern: &str) -> ParserI<'_, Parser> { + let parser = ParserBuilder::new().empty_min_range(true).build(); + ParserI::new(parser, pattern) + } + fn parser_nest_limit( pattern: &str, nest_limit: u32, @@ -3376,6 +3415,20 @@ bar ast: Box::new(lit('a', 0)), })) ); + assert_eq!( + parser_empty_min_range(r"a{,9}").parse(), + Ok(Ast::repetition(ast::Repetition { + span: span(0..5), + op: ast::RepetitionOp { + span: span(1..5), + kind: ast::RepetitionKind::Range( + ast::RepetitionRange::Bounded(0, 9) + ), + }, + greedy: true, + ast: Box::new(lit('a', 0)), + })) + ); assert_eq!( parser_ignore_whitespace(r"a{5,9} ?").parse(), Ok(Ast::repetition(ast::Repetition { @@ -4596,8 +4649,8 @@ bar assert_eq!( parser(r"\b{ ").parse().unwrap_err(), TestError { - span: span(4..4), - kind: ast::ErrorKind::RepetitionCountDecimalEmpty, + span: span(2..4), + kind: ast::ErrorKind::RepetitionCountUnclosed, } ); // In this case, we got some valid chars that makes it look like the From d895bd984537538240e175cc55bc010307210468 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Tue, 26 Mar 2024 13:30:26 -0400 Subject: [PATCH 02/36] regex-syntax-0.8.3 --- regex-syntax/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regex-syntax/Cargo.toml b/regex-syntax/Cargo.toml index c9ce87da7..1a25d1ce6 100644 --- a/regex-syntax/Cargo.toml +++ b/regex-syntax/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regex-syntax" -version = "0.8.2" #:version +version = "0.8.3" #:version authors = ["The Rust Project Developers", "Andrew Gallant "] license = "MIT OR Apache-2.0" repository = "https://github.com/rust-lang/regex/tree/master/regex-syntax" From 66a3bca217881fe8eee9a5a898aea4ecb1eb5cf3 Mon Sep 17 00:00:00 2001 From: JohnEndson <165029498+JohnEndson@users.noreply.github.com> Date: Thu, 28 Mar 2024 20:25:05 +0800 Subject: [PATCH 03/36] doc: remove repetitive words PR #1179 --- regex-automata/src/dfa/mod.rs | 2 +- regex-automata/src/meta/regex.rs | 4 ++-- regex-automata/src/nfa/thompson/range_trie.rs | 2 +- regex-automata/src/util/alphabet.rs | 2 +- regex-automata/src/util/captures.rs | 4 ++-- regex-automata/src/util/start.rs | 2 +- regex-cli/cmd/generate/fowler.rs | 2 +- regex-lite/src/lib.rs | 4 ++-- regex-syntax/src/ast/mod.rs | 2 +- src/regexset/bytes.rs | 2 +- src/regexset/string.rs | 2 +- 11 files changed, 14 insertions(+), 14 deletions(-) diff --git a/regex-automata/src/dfa/mod.rs b/regex-automata/src/dfa/mod.rs index fd58cac23..0e6a968e3 100644 --- a/regex-automata/src/dfa/mod.rs +++ b/regex-automata/src/dfa/mod.rs @@ -190,7 +190,7 @@ assert_eq!(matches, vec![ ``` Note that unlike dense DFAs, sparse DFAs have no alignment requirements. -Conversely, dense DFAs must be be aligned to the same alignment as a +Conversely, dense DFAs must be aligned to the same alignment as a [`StateID`](crate::util::primitives::StateID). # Support for `no_std` and `alloc`-only diff --git a/regex-automata/src/meta/regex.rs b/regex-automata/src/meta/regex.rs index a06d2bb48..8cfdecbec 100644 --- a/regex-automata/src/meta/regex.rs +++ b/regex-automata/src/meta/regex.rs @@ -1826,7 +1826,7 @@ impl Regex { /// /// The precise meaning of "accelerated" is specifically left unspecified, /// but the general meaning is that the search is a high likelihood of - /// running faster than than a character-at-a-time loop inside a standard + /// running faster than a character-at-a-time loop inside a standard /// regex engine. /// /// When a regex is accelerated, it is only a *probabilistic* claim. That @@ -2282,7 +2282,7 @@ impl<'r, 'h> core::iter::FusedIterator for SplitN<'r, 'h> {} /// /// Most of the regex engines in this crate require some kind of /// mutable state in order to execute a search. This mutable state is -/// explicitly separated from the the core regex object (such as a +/// explicitly separated from the core regex object (such as a /// [`thompson::NFA`](crate::nfa::thompson::NFA)) so that the read-only regex /// object can be shared across multiple threads simultaneously without any /// synchronization. Conversely, a `Cache` must either be duplicated if using diff --git a/regex-automata/src/nfa/thompson/range_trie.rs b/regex-automata/src/nfa/thompson/range_trie.rs index 49debda40..93cce1699 100644 --- a/regex-automata/src/nfa/thompson/range_trie.rs +++ b/regex-automata/src/nfa/thompson/range_trie.rs @@ -693,7 +693,7 @@ impl NextInsert { /// handle: /// /// 1. The part where the two ranges actually overlap. i.e., The intersection. -/// 2. The part of the existing range that is not in the the new range. +/// 2. The part of the existing range that is not in the new range. /// 3. The part of the new range that is not in the old range. /// /// (1) is guaranteed to always occur since all overlapping ranges have a diff --git a/regex-automata/src/util/alphabet.rs b/regex-automata/src/util/alphabet.rs index 22b5a7644..e0e4d2fc1 100644 --- a/regex-automata/src/util/alphabet.rs +++ b/regex-automata/src/util/alphabet.rs @@ -699,7 +699,7 @@ impl ByteClassSet { ByteClassSet(ByteSet::empty()) } - /// Indicate the the range of byte given (inclusive) can discriminate a + /// Indicate the range of byte given (inclusive) can discriminate a /// match between it and all other bytes outside of the range. pub(crate) fn set_range(&mut self, start: u8, end: u8) { debug_assert!(start <= end); diff --git a/regex-automata/src/util/captures.rs b/regex-automata/src/util/captures.rs index 05db6a993..93a0a8afa 100644 --- a/regex-automata/src/util/captures.rs +++ b/regex-automata/src/util/captures.rs @@ -1643,7 +1643,7 @@ impl GroupInfo { /// /// This also returns `None` for all inputs if these captures are empty /// (e.g., built from an empty [`GroupInfo`]). To check whether captures - /// are are present for a specific pattern, use [`GroupInfo::group_len`]. + /// are present for a specific pattern, use [`GroupInfo::group_len`]. /// /// # Example /// @@ -1695,7 +1695,7 @@ impl GroupInfo { /// /// This also returns `None` for all inputs if these captures are empty /// (e.g., built from an empty [`GroupInfo`]). To check whether captures - /// are are present for a specific pattern, use [`GroupInfo::group_len`]. + /// are present for a specific pattern, use [`GroupInfo::group_len`]. /// /// # Example /// diff --git a/regex-automata/src/util/start.rs b/regex-automata/src/util/start.rs index 27153780e..97988b44b 100644 --- a/regex-automata/src/util/start.rs +++ b/regex-automata/src/util/start.rs @@ -323,7 +323,7 @@ impl core::fmt::Debug for StartByteMap { /// Represents the six possible starting configurations of a DFA search. /// -/// The starting configuration is determined by inspecting the the beginning +/// The starting configuration is determined by inspecting the beginning /// of the haystack (up to 1 byte). Ultimately, this along with a pattern ID /// (if specified) and the type of search (anchored or not) is what selects the /// start state to use in a DFA. diff --git a/regex-cli/cmd/generate/fowler.rs b/regex-cli/cmd/generate/fowler.rs index c287f6f52..404c47721 100644 --- a/regex-cli/cmd/generate/fowler.rs +++ b/regex-cli/cmd/generate/fowler.rs @@ -178,7 +178,7 @@ impl TomlTest { // this trade off (to this extent anyway), so it really wants all // capturing groups... // - // So what we do here is is look for the number of groups in the + // So what we do here is look for the number of groups in the // pattern and then just pad out the capture matches with None // values to make the number of capture matches equal to what we // would expect from the pattern. (We actually parse the regex to diff --git a/regex-lite/src/lib.rs b/regex-lite/src/lib.rs index 9b394a480..0aca8221d 100644 --- a/regex-lite/src/lib.rs +++ b/regex-lite/src/lib.rs @@ -257,7 +257,7 @@ let dates: Vec<(&str, &str, &str)> = re.captures_iter(hay).map(|caps| { // regex matches, and in this context, we know we have a match. // // Note that we use `caps.name("y").unwrap().as_str()` instead of - // `&caps["y"]` because the the lifetime of the former is the same as the + // `&caps["y"]` because the lifetime of the former is the same as the // lifetime of `hay` above, but the lifetime of the latter is tied to the // lifetime of `caps` due to how the `Index` trait is defined. let year = caps.name("y").unwrap().as_str(); @@ -821,7 +821,7 @@ it, a longer haystack will take more time to search. * Very large regexes can searches to be quite slow due to increasing the size `m` in the worst case `O(m * n)` bound. This is especially true when they are combined with counted repetitions. While the regex size limit above will -protect you from the most egregious cases, the the default size limit still +protect you from the most egregious cases, the default size limit still permits pretty big regexes that can execute more slowly than one might expect. * While routines like [`Regex::find`] and [`Regex::captures`] guarantee worst case `O(m * n)` search time, routines like [`Regex::find_iter`] and diff --git a/regex-syntax/src/ast/mod.rs b/regex-syntax/src/ast/mod.rs index 6a77ee134..ce79a89ab 100644 --- a/regex-syntax/src/ast/mod.rs +++ b/regex-syntax/src/ast/mod.rs @@ -711,7 +711,7 @@ pub enum LiteralKind { /// The literal is written as an octal escape, e.g., `\141`. Octal, /// The literal is written as a hex code with a fixed number of digits - /// depending on the type of the escape, e.g., `\x61` or or `\u0061` or + /// depending on the type of the escape, e.g., `\x61` or `\u0061` or /// `\U00000061`. HexFixed(HexLiteralKind), /// The literal is written as a hex code with a bracketed number of diff --git a/src/regexset/bytes.rs b/src/regexset/bytes.rs index 1220a1466..2f46abc4d 100644 --- a/src/regexset/bytes.rs +++ b/src/regexset/bytes.rs @@ -355,7 +355,7 @@ impl RegexSet { ) -> bool { // This is pretty dumb. We should try to fix this, but the // regex-automata API doesn't provide a way to store matches in an - // arbitrary &mut [bool]. Thankfully, this API is is doc(hidden) and + // arbitrary &mut [bool]. Thankfully, this API is doc(hidden) and // thus not public... But regex-capi currently uses it. We should // fix regex-capi to use a PatternSet, maybe? Not sure... PatternSet // is in regex-automata, not regex. So maybe we should just accept a diff --git a/src/regexset/string.rs b/src/regexset/string.rs index 2a3e7b802..5cb9b5608 100644 --- a/src/regexset/string.rs +++ b/src/regexset/string.rs @@ -351,7 +351,7 @@ impl RegexSet { ) -> bool { // This is pretty dumb. We should try to fix this, but the // regex-automata API doesn't provide a way to store matches in an - // arbitrary &mut [bool]. Thankfully, this API is is doc(hidden) and + // arbitrary &mut [bool]. Thankfully, this API is doc(hidden) and // thus not public... But regex-capi currently uses it. We should // fix regex-capi to use a PatternSet, maybe? Not sure... PatternSet // is in regex-automata, not regex. So maybe we should just accept a From 4c565c8a636aa0e1e13dd801340ad6545fd256e9 Mon Sep 17 00:00:00 2001 From: careworry <167077904+careworry@users.noreply.github.com> Date: Thu, 18 Apr 2024 20:25:44 +0800 Subject: [PATCH 04/36] doc: fix typos PR #1182 --- regex-automata/src/dfa/dense.rs | 2 +- regex-automata/src/util/determinize/state.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/regex-automata/src/dfa/dense.rs b/regex-automata/src/dfa/dense.rs index 8e0f33c03..ed37d3b84 100644 --- a/regex-automata/src/dfa/dense.rs +++ b/regex-automata/src/dfa/dense.rs @@ -2498,7 +2498,7 @@ impl OwnedDFA { self.tt.set(from, byte, to); } - /// An an empty state (a state where all transitions lead to a dead state) + /// An empty state (a state where all transitions lead to a dead state) /// and return its identifier. The identifier returned is guaranteed to /// not point to any other existing state. /// diff --git a/regex-automata/src/util/determinize/state.rs b/regex-automata/src/util/determinize/state.rs index 8a8561a31..540d5d4d1 100644 --- a/regex-automata/src/util/determinize/state.rs +++ b/regex-automata/src/util/determinize/state.rs @@ -57,7 +57,7 @@ can only be used for adding NFA state IDs and recording some assertions. The expected flow here is to use the above builders to construct a candidate DFA state to check if it already exists. If it does, then there's no need to -freeze it into a `State`. It it doesn't exist, then `StateBuilderNFA::to_state` +freeze it into a `State`. If it doesn't exist, then `StateBuilderNFA::to_state` can be called to freeze the builder into an immutable `State`. In either case, `clear` should be called on the builder to turn it back into a `StateBuilderEmpty` that reuses the underlying memory. From b12a2761f91320bc8bf8246f88d2884a90034b5a Mon Sep 17 00:00:00 2001 From: Luca Bruno Date: Mon, 22 Apr 2024 16:21:41 +0200 Subject: [PATCH 05/36] syntax/utf8: avoid a spurious vector reallocation This reworks `Utf8Sequences` logic in order to avoid allocating a 0-sized vector and immediately reallocating it for the initial element. Directly create the populated vector instead. I was looking at the memory usage patterns of [rolldown] through heaptrack, and this spot showed up as a potentially-spurious temporary allocation. The consumer side is [here][consumer side]. I do not have a specific benchmark for this. [rolldown]: https://github.com/rolldown/rolldown [consumer side]: https://github.com/rolldown/rolldown/blob/ce36a195ed4e9ce7c446557cefff4750a2268e01/crates/rolldown/src/utils/extract_hash_pattern.rs#L12 --- regex-syntax/src/utf8.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/regex-syntax/src/utf8.rs b/regex-syntax/src/utf8.rs index e13b55abf..69d749451 100644 --- a/regex-syntax/src/utf8.rs +++ b/regex-syntax/src/utf8.rs @@ -302,9 +302,9 @@ impl Utf8Sequences { /// Create a new iterator over UTF-8 byte ranges for the scalar value range /// given. pub fn new(start: char, end: char) -> Self { - let mut it = Utf8Sequences { range_stack: vec![] }; - it.push(u32::from(start), u32::from(end)); - it + let range = + ScalarRange { start: u32::from(start), end: u32::from(end) }; + Utf8Sequences { range_stack: vec![range] } } /// reset resets the scalar value range. From 9c139f4fa5c64a89075749cd5e57148c8eea8c22 Mon Sep 17 00:00:00 2001 From: Nathan West Date: Mon, 6 May 2024 19:55:10 -0400 Subject: [PATCH 06/36] syntax: simplify `Hir::dot` constructors This also likely avoids a spurious alloc or two, although it assuredly doesn't matter in practice. --- regex-syntax/src/hir/mod.rs | 52 +++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/regex-syntax/src/hir/mod.rs b/regex-syntax/src/hir/mod.rs index ae3ba318e..5db784388 100644 --- a/regex-syntax/src/hir/mod.rs +++ b/regex-syntax/src/hir/mod.rs @@ -658,16 +658,12 @@ impl Hir { #[inline] pub fn dot(dot: Dot) -> Hir { match dot { - Dot::AnyChar => { - let mut cls = ClassUnicode::empty(); - cls.push(ClassUnicodeRange::new('\0', '\u{10FFFF}')); - Hir::class(Class::Unicode(cls)) - } - Dot::AnyByte => { - let mut cls = ClassBytes::empty(); - cls.push(ClassBytesRange::new(b'\0', b'\xFF')); - Hir::class(Class::Bytes(cls)) - } + Dot::AnyChar => Hir::class(Class::Unicode(ClassUnicode::new([ + ClassUnicodeRange::new('\0', '\u{10FFFF}'), + ]))), + Dot::AnyByte => Hir::class(Class::Bytes(ClassBytes::new([ + ClassBytesRange::new(b'\0', b'\xFF'), + ]))), Dot::AnyCharExcept(ch) => { let mut cls = ClassUnicode::new([ClassUnicodeRange::new(ch, ch)]); @@ -675,17 +671,17 @@ impl Hir { Hir::class(Class::Unicode(cls)) } Dot::AnyCharExceptLF => { - let mut cls = ClassUnicode::empty(); - cls.push(ClassUnicodeRange::new('\0', '\x09')); - cls.push(ClassUnicodeRange::new('\x0B', '\u{10FFFF}')); - Hir::class(Class::Unicode(cls)) + Hir::class(Class::Unicode(ClassUnicode::new([ + ClassUnicodeRange::new('\0', '\x09'), + ClassUnicodeRange::new('\x0B', '\u{10FFFF}'), + ]))) } Dot::AnyCharExceptCRLF => { - let mut cls = ClassUnicode::empty(); - cls.push(ClassUnicodeRange::new('\0', '\x09')); - cls.push(ClassUnicodeRange::new('\x0B', '\x0C')); - cls.push(ClassUnicodeRange::new('\x0E', '\u{10FFFF}')); - Hir::class(Class::Unicode(cls)) + Hir::class(Class::Unicode(ClassUnicode::new([ + ClassUnicodeRange::new('\0', '\x09'), + ClassUnicodeRange::new('\x0B', '\x0C'), + ClassUnicodeRange::new('\x0E', '\u{10FFFF}'), + ]))) } Dot::AnyByteExcept(byte) => { let mut cls = @@ -694,17 +690,17 @@ impl Hir { Hir::class(Class::Bytes(cls)) } Dot::AnyByteExceptLF => { - let mut cls = ClassBytes::empty(); - cls.push(ClassBytesRange::new(b'\0', b'\x09')); - cls.push(ClassBytesRange::new(b'\x0B', b'\xFF')); - Hir::class(Class::Bytes(cls)) + Hir::class(Class::Bytes(ClassBytes::new([ + ClassBytesRange::new(b'\0', b'\x09'), + ClassBytesRange::new(b'\x0B', b'\xFF'), + ]))) } Dot::AnyByteExceptCRLF => { - let mut cls = ClassBytes::empty(); - cls.push(ClassBytesRange::new(b'\0', b'\x09')); - cls.push(ClassBytesRange::new(b'\x0B', b'\x0C')); - cls.push(ClassBytesRange::new(b'\x0E', b'\xFF')); - Hir::class(Class::Bytes(cls)) + Hir::class(Class::Bytes(ClassBytes::new([ + ClassBytesRange::new(b'\0', b'\x09'), + ClassBytesRange::new(b'\x0B', b'\x0C'), + ClassBytesRange::new(b'\x0E', b'\xFF'), + ]))) } } } From 023f1c9ac117cd5ef2e45119b61b94f85d109667 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Wed, 8 May 2024 11:08:11 -0400 Subject: [PATCH 07/36] lite: fix attribute warning about rustfmt I'm not sure why I wrote it like this originally? --- regex-lite/src/utf8.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/regex-lite/src/utf8.rs b/regex-lite/src/utf8.rs index 5f2a6a153..2730b602d 100644 --- a/regex-lite/src/utf8.rs +++ b/regex-lite/src/utf8.rs @@ -87,7 +87,7 @@ fn decode_step(state: &mut usize, cp: &mut u32, b: u8) { // Splits the space of all bytes into equivalence classes, such that // any byte in the same class can never discriminate between whether a // particular sequence is valid UTF-8 or not. - #[cfg_attr(rustfmt, rustfmt::skip)] + #[rustfmt::skip] const CLASSES: [u8; 256] = [ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -101,7 +101,7 @@ fn decode_step(state: &mut usize, cp: &mut u32, b: u8) { // A state machine taken from `bstr` which was in turn adapted from: // https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ - #[cfg_attr(rustfmt, rustfmt::skip)] + #[rustfmt::skip] const STATES_FORWARD: &'static [u8] = &[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 24, 36, 60, 96, 84, 0, 0, 0, 48, 72, From ddeb85eaa3bdf79d6306cc92a9d8bd89d839b5cd Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Wed, 8 May 2024 11:08:35 -0400 Subject: [PATCH 08/36] cli/deps: update memmap2 to 0.9 --- regex-cli/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regex-cli/Cargo.toml b/regex-cli/Cargo.toml index a107c09df..ac69c9ec4 100644 --- a/regex-cli/Cargo.toml +++ b/regex-cli/Cargo.toml @@ -27,7 +27,7 @@ anyhow = "1.0.28" bstr = { version = "1.4.0", default-features = false, features = ["std"] } lexopt = "0.3.0" log = { version = "0.4.17", features = ["std"] } -memmap2 = "0.5.10" +memmap2 = "0.9.4" regex = { version = "1.9.0", path = ".." } regex-automata = { version = "0.4.0", path = "../regex-automata", features = ["logging"] } regex-lite = { version = "0.1.0", path = "../regex-lite" } From ab4c8d1f210a2e1011a4408476b2c708e64dcede Mon Sep 17 00:00:00 2001 From: denzenin Date: Mon, 3 Jun 2024 00:30:07 +0100 Subject: [PATCH 09/36] doc: fix duplicate phrasing typo PR #1198 --- regex-lite/src/string.rs | 4 ++-- src/regex/bytes.rs | 4 ++-- src/regex/string.rs | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/regex-lite/src/string.rs b/regex-lite/src/string.rs index 4e4de9068..5fe30ade3 100644 --- a/regex-lite/src/string.rs +++ b/regex-lite/src/string.rs @@ -1717,8 +1717,8 @@ impl<'h> Captures<'h> { /// /// This returns a tuple where the first element corresponds to the full /// substring of the haystack that matched the regex. The second element is - /// an array of substrings, with each corresponding to the to the substring - /// that matched for a particular capture group. + /// an array of substrings, with each corresponding to the substring that + /// matched for a particular capture group. /// /// # Panics /// diff --git a/src/regex/bytes.rs b/src/regex/bytes.rs index 7b7aad574..3de4022a8 100644 --- a/src/regex/bytes.rs +++ b/src/regex/bytes.rs @@ -1711,8 +1711,8 @@ impl<'h> Captures<'h> { /// /// This returns a tuple where the first element corresponds to the full /// substring of the haystack that matched the regex. The second element is - /// an array of substrings, with each corresponding to the to the substring - /// that matched for a particular capture group. + /// an array of substrings, with each corresponding to the substring that + /// matched for a particular capture group. /// /// # Panics /// diff --git a/src/regex/string.rs b/src/regex/string.rs index dba94d46e..fab178a68 100644 --- a/src/regex/string.rs +++ b/src/regex/string.rs @@ -1716,8 +1716,8 @@ impl<'h> Captures<'h> { /// /// This returns a tuple where the first element corresponds to the full /// substring of the haystack that matched the regex. The second element is - /// an array of substrings, with each corresponding to the to the substring - /// that matched for a particular capture group. + /// an array of substrings, with each corresponding to the substring that + /// matched for a particular capture group. /// /// # Panics /// From 1f9f9ccd393fc5342aff6db5e3d47915e87a2554 Mon Sep 17 00:00:00 2001 From: Lee ByeongJun Date: Sun, 9 Jun 2024 20:29:34 +0900 Subject: [PATCH 10/36] bytes: escape invalid UTF-8 bytes in debug output for Match PR #1203 --- src/regex/bytes.rs | 102 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 91 insertions(+), 11 deletions(-) diff --git a/src/regex/bytes.rs b/src/regex/bytes.rs index 3de4022a8..39af6e71c 100644 --- a/src/regex/bytes.rs +++ b/src/regex/bytes.rs @@ -1555,18 +1555,13 @@ impl<'h> Match<'h> { impl<'h> core::fmt::Debug for Match<'h> { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + use regex_automata::util::escape::DebugHaystack; + let mut fmt = f.debug_struct("Match"); - fmt.field("start", &self.start).field("end", &self.end); - if let Ok(s) = core::str::from_utf8(self.as_bytes()) { - fmt.field("bytes", &s); - } else { - // FIXME: It would be nice if this could be printed as a string - // with invalid UTF-8 replaced with hex escapes. A alloc would - // probably okay if that makes it easier, but regex-automata does - // (at time of writing) have internal routines that do this. So - // maybe we should expose them. - fmt.field("bytes", &self.as_bytes()); - } + fmt.field("start", &self.start) + .field("end", &self.end) + .field("bytes", &DebugHaystack(&self.as_bytes())); + fmt.finish() } } @@ -2620,3 +2615,88 @@ fn no_expansion>(replacement: &T) -> Option> { None => Some(Cow::Borrowed(replacement)), } } + +#[cfg(test)] +mod tests { + use super::*; + use alloc::format; + + #[test] + fn test_match_properties() { + let haystack = b"Hello, world!"; + let m = Match::new(haystack, 7, 12); + + assert_eq!(m.start(), 7); + assert_eq!(m.end(), 12); + assert_eq!(m.is_empty(), false); + assert_eq!(m.len(), 5); + assert_eq!(m.as_bytes(), b"world"); + } + + #[test] + fn test_empty_match() { + let haystack = b""; + let m = Match::new(haystack, 0, 0); + + assert_eq!(m.is_empty(), true); + assert_eq!(m.len(), 0); + } + + #[test] + fn test_debug_output_valid_utf8() { + let haystack = b"Hello, world!"; + let m = Match::new(haystack, 7, 12); + let debug_str = format!("{:?}", m); + + assert_eq!( + debug_str, + r#"Match { start: 7, end: 12, bytes: "world" }"# + ); + } + + #[test] + fn test_debug_output_invalid_utf8() { + let haystack = b"Hello, \xFFworld!"; + let m = Match::new(haystack, 7, 13); + let debug_str = format!("{:?}", m); + + assert_eq!( + debug_str, + r#"Match { start: 7, end: 13, bytes: "\xffworld" }"# + ); + } + + #[test] + fn test_debug_output_various_unicode() { + let haystack = + "Hello, 😊 world! 안녕하세요? مرحبا بالعالم!".as_bytes(); + let m = Match::new(haystack, 0, haystack.len()); + let debug_str = format!("{:?}", m); + + assert_eq!( + debug_str, + r#"Match { start: 0, end: 62, bytes: "Hello, 😊 world! 안녕하세요? مرحبا بالعالم!" }"# + ); + } + + #[test] + fn test_debug_output_ascii_escape() { + let haystack = b"Hello,\tworld!\nThis is a \x1b[31mtest\x1b[0m."; + let m = Match::new(haystack, 0, haystack.len()); + let debug_str = format!("{:?}", m); + + assert_eq!( + debug_str, + r#"Match { start: 0, end: 38, bytes: "Hello,\tworld!\nThis is a \u{1b}[31mtest\u{1b}[0m." }"# + ); + } + + #[test] + fn test_debug_output_match_in_middle() { + let haystack = b"The quick brown fox jumps over the lazy dog."; + let m = Match::new(haystack, 16, 19); + let debug_str = format!("{:?}", m); + + assert_eq!(debug_str, r#"Match { start: 16, end: 19, bytes: "fox" }"#); + } +} From 1430b65baeebeb67b3335e26a71f251cce9964ef Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 9 Jun 2024 07:32:22 -0400 Subject: [PATCH 11/36] changelog: 1.10.4 --- CHANGELOG.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ffd961d7..4fc5b9197 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +1.10.4 (2024-06-09) +=================== +This is a new patch release with some minor fixes. + +Bug fixes: + +* [BUG #1203](https://github.com/rust-lang/regex/pull/1203): +Escape invalid UTF-8 when in the `Debug` impl of `regex::bytes::Match`. + + 1.10.3 (2024-01-21) =================== This is a new patch release that fixes the feature configuration of optional From 4757b5f01a7b9b6c8d89bd63b3d1500f7e0efa9e Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 9 Jun 2024 07:33:32 -0400 Subject: [PATCH 12/36] regex-syntax-0.8.4 --- regex-syntax/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regex-syntax/Cargo.toml b/regex-syntax/Cargo.toml index 1a25d1ce6..3f213542b 100644 --- a/regex-syntax/Cargo.toml +++ b/regex-syntax/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regex-syntax" -version = "0.8.3" #:version +version = "0.8.4" #:version authors = ["The Rust Project Developers", "Andrew Gallant "] license = "MIT OR Apache-2.0" repository = "https://github.com/rust-lang/regex/tree/master/regex-syntax" From 68c4f0b7b7f500e0ab3fbdd42c14f837c4ed1be4 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 9 Jun 2024 07:33:44 -0400 Subject: [PATCH 13/36] regex-automata-0.4.7 --- regex-automata/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regex-automata/Cargo.toml b/regex-automata/Cargo.toml index 40a0ebfb9..97bfacfec 100644 --- a/regex-automata/Cargo.toml +++ b/regex-automata/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regex-automata" -version = "0.4.6" #:version +version = "0.4.7" #:version authors = ["The Rust Project Developers", "Andrew Gallant "] description = "Automata construction and matching using regular expressions." documentation = "https://docs.rs/regex-automata" From 377463bd8200c038e7997a550aa708e0c686d90f Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 9 Jun 2024 07:40:20 -0400 Subject: [PATCH 14/36] changelog: 1.10.4 and 1.10.5 We had previously release regex 1.10.4 but omitted a changelog entry for it. So this adds it. --- CHANGELOG.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4fc5b9197..586191d75 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -1.10.4 (2024-06-09) +1.10.5 (2024-06-09) =================== This is a new patch release with some minor fixes. @@ -8,6 +8,17 @@ Bug fixes: Escape invalid UTF-8 when in the `Debug` impl of `regex::bytes::Match`. +1.10.4 (2024-03-22) +=================== +This is a new patch release with some minor fixes. + +* [BUG #1169](https://github.com/rust-lang/regex/issues/1169): +Fixes a bug with compiling a reverse NFA automaton in `regex-automata`. +* [BUG #1178](https://github.com/rust-lang/regex/pull/1178): +Clarifies that when `Cow::Borrowed` is returned from replace APIs, it is +equivalent to the input. + + 1.10.3 (2024-01-21) =================== This is a new patch release that fixes the feature configuration of optional From 0718fc5acbe91b84322ef1bd2c32e189e4668254 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 9 Jun 2024 07:40:47 -0400 Subject: [PATCH 15/36] 1.10.5 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 68ac658c6..4fe3be20a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regex" -version = "1.10.4" #:version +version = "1.10.5" #:version authors = ["The Rust Project Developers", "Andrew Gallant "] license = "MIT OR Apache-2.0" readme = "README.md" From 1288b83af3d8b441efb264ed6651b0dfb9c2df78 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 9 Jun 2024 07:41:05 -0400 Subject: [PATCH 16/36] regex-lite-0.1.6 --- regex-lite/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regex-lite/Cargo.toml b/regex-lite/Cargo.toml index 0ba53485b..396e90af8 100644 --- a/regex-lite/Cargo.toml +++ b/regex-lite/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regex-lite" -version = "0.1.5" #:version +version = "0.1.6" #:version authors = ["The Rust Project Developers", "Andrew Gallant "] license = "MIT OR Apache-2.0" repository = "https://github.com/rust-lang/regex/tree/master/regex-lite" From c2f9ca49a66c51d88b71e5b2248d86789765d2ce Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 9 Jun 2024 07:44:08 -0400 Subject: [PATCH 17/36] regex-test: bump toml dependency --- regex-test/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regex-test/Cargo.toml b/regex-test/Cargo.toml index 8adeb5888..729cd1987 100644 --- a/regex-test/Cargo.toml +++ b/regex-test/Cargo.toml @@ -24,4 +24,4 @@ path = "lib.rs" anyhow = "1.0.27" bstr = { version = "1.3.0", default-features = false, features = ["std", "serde"] } serde = { version = "1.0.105", features = ["derive"] } -toml = { version = "0.7.3", default-features = false, features = ["parse"] } +toml = { version = "0.8.14", default-features = false, features = ["parse"] } From c4c76a19b80b981b249f2af1c7b66bcfb0ba1fa0 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 9 Jun 2024 07:44:15 -0400 Subject: [PATCH 18/36] regex-test-0.1.1 --- regex-test/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regex-test/Cargo.toml b/regex-test/Cargo.toml index 729cd1987..1fadd5bca 100644 --- a/regex-test/Cargo.toml +++ b/regex-test/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regex-test" -version = "0.1.0" #:version +version = "0.1.1" #:version authors = ["The Rust Project Developers", "Andrew Gallant "] description = """ Infrastructure for testing regexes. From 8856fe36ac7dc37989e6ffb26b5fc57189bae626 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 9 Jun 2024 07:44:44 -0400 Subject: [PATCH 19/36] regex-cli-0.2.1 --- regex-cli/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regex-cli/Cargo.toml b/regex-cli/Cargo.toml index ac69c9ec4..543732285 100644 --- a/regex-cli/Cargo.toml +++ b/regex-cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regex-cli" -version = "0.2.0" #:version +version = "0.2.1" #:version authors = ["The Rust Project Developers", "Andrew Gallant "] description = """ A command line tool for debugging, ad hoc benchmarking and generating regular From 2970d2940dfab8cb4bd8f7122ba8ee50d2e37f67 Mon Sep 17 00:00:00 2001 From: Niklas Mischkulnig <4586894+mischnic@users.noreply.github.com> Date: Fri, 2 Aug 2024 17:08:11 +0200 Subject: [PATCH 20/36] unstable: fit `Pattern` trait implementation This is an update from a change made to the trait: https://github.com/rust-lang/rust/pull/127481 There shouldn't be any behavior changes here. PR #1219 --- src/pattern.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pattern.rs b/src/pattern.rs index 2db04d8b3..5c4260e95 100644 --- a/src/pattern.rs +++ b/src/pattern.rs @@ -10,10 +10,10 @@ pub struct RegexSearcher<'r, 't> { next_match: Option<(usize, usize)>, } -impl<'r, 't> Pattern<'t> for &'r Regex { - type Searcher = RegexSearcher<'r, 't>; +impl<'r> Pattern for &'r Regex { + type Searcher<'t> = RegexSearcher<'r, 't>; - fn into_searcher(self, haystack: &'t str) -> RegexSearcher<'r, 't> { + fn into_searcher<'t>(self, haystack: &'t str) -> RegexSearcher<'r, 't> { RegexSearcher { haystack, it: self.find_iter(haystack), From 76f2d30d85eb70336b76d20bb6eb462de6391321 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Fri, 2 Aug 2024 11:10:01 -0400 Subject: [PATCH 21/36] changelog: 1.10.6 --- CHANGELOG.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 586191d75..df7977c39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,14 @@ +1.10.6 (2024-08-02) +=================== +This is a new patch release with a fix for the `unstable` crate feature that +enables `std::str::Pattern` trait integration. + +Bug fixes: + +* [BUG #1219](https://github.com/rust-lang/regex/pull/1219): +Fix the `Pattern` trait implementation as a result of nightly API breakage. + + 1.10.5 (2024-06-09) =================== This is a new patch release with some minor fixes. From ab88aa5c6824ebe7c4b4c72fe5191681783b3a68 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Fri, 2 Aug 2024 11:10:04 -0400 Subject: [PATCH 22/36] 1.10.6 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 4fe3be20a..37696cf46 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regex" -version = "1.10.5" #:version +version = "1.10.6" #:version authors = ["The Rust Project Developers", "Andrew Gallant "] license = "MIT OR Apache-2.0" readme = "README.md" From 92efe4ad898e8454cdb7bf21fcf4bf61c498fef2 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 29 Sep 2024 08:21:24 -0400 Subject: [PATCH 23/36] automata: add explicit lifetime annotation rustc seems to warn about this. And I would prefer writing the lifetime here anyway. That it wasn't was probably an oversight. --- regex-automata/src/dfa/onepass.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regex-automata/src/dfa/onepass.rs b/regex-automata/src/dfa/onepass.rs index e62bbd383..01e45309c 100644 --- a/regex-automata/src/dfa/onepass.rs +++ b/regex-automata/src/dfa/onepass.rs @@ -521,7 +521,7 @@ struct InternalBuilder<'a> { impl<'a> InternalBuilder<'a> { /// Create a new builder with an initial empty DFA. - fn new(config: Config, nfa: &'a NFA) -> InternalBuilder { + fn new(config: Config, nfa: &'a NFA) -> InternalBuilder<'a> { let classes = if !config.get_byte_classes() { // A one-pass DFA will always use the equivalence class map, but // enabling this option is useful for debugging. Namely, this will From d3d3ff7abe4d98972a2d2cde1202ce903ddd4e14 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 29 Sep 2024 08:25:48 -0400 Subject: [PATCH 24/36] cli: remove some dead code It looks like rustc picks this up now but didn't before. --- regex-cli/cmd/find/half/mod.rs | 41 ++-------------------------------- 1 file changed, 2 insertions(+), 39 deletions(-) diff --git a/regex-cli/cmd/find/half/mod.rs b/regex-cli/cmd/find/half/mod.rs index a7239f234..4dd33bd3d 100644 --- a/regex-cli/cmd/find/half/mod.rs +++ b/regex-cli/cmd/find/half/mod.rs @@ -2,12 +2,12 @@ use std::io::{stdout, Write}; use { anyhow::Context, - lexopt::{Arg, Parser}, + lexopt::Parser, regex_automata::{HalfMatch, Input, MatchError, PatternID}, }; use crate::{ - args::{self, Configurable, Usage}, + args, util::{self, Table}, }; @@ -49,43 +49,6 @@ ENGINES: } } -#[derive(Debug, Default)] -struct Args { - overlapping: bool, -} - -impl Configurable for Args { - fn configure( - &mut self, - _: &mut Parser, - arg: &mut Arg, - ) -> anyhow::Result { - match *arg { - Arg::Long("overlapping") => { - self.overlapping = true; - } - _ => return Ok(false), - } - Ok(true) - } - - fn usage(&self) -> &[Usage] { - const USAGES: &[Usage] = &[Usage::new( - "--overlapping", - "Search for overlapping matches.", - r#" -This flag enables overlapping mode, where the regex engine will attempt to find -all possible matches reported by the underlying matcher. - -Generally this flag is used in conjunction with '--match-kind all'. If the -match semantics are not set to compile all possible matches in the underlying -automaton, then the results will likely be counter-intuitive. -"#, - )]; - USAGES - } -} - fn run_regex(p: &mut lexopt::Parser) -> anyhow::Result<()> { const USAGE: &'static str = "\ Executes a search for half matches using the top-level API regex engine. From b790aa5d2880ad4aeb1820fcb4bd6046f9c50dd6 Mon Sep 17 00:00:00 2001 From: Thayne McCombs Date: Sun, 29 Sep 2024 06:34:29 -0600 Subject: [PATCH 25/36] api: add SetMatches::matched_all This complements `matched_any` with a means to check if a set of patterns all matched the haystack. PR #1228 --- src/regexset/bytes.rs | 18 ++++++++++++++++++ src/regexset/string.rs | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/src/regexset/bytes.rs b/src/regexset/bytes.rs index 2f46abc4d..46f02fbbd 100644 --- a/src/regexset/bytes.rs +++ b/src/regexset/bytes.rs @@ -482,6 +482,24 @@ impl SetMatches { !self.0.is_empty() } + /// Whether all patterns in this set matched. + /// + /// # Example + /// + /// ``` + /// use regex::bytes::RegexSet; + /// + /// let set = RegexSet::new(&[ + /// r"^foo", + /// r"[a-z]+\.com", + /// ]).unwrap(); + /// let matches = set.matches(b"foo.example.com"); + /// assert!(matches.matched_all()); + /// ``` + pub fn matched_all(&self) -> bool { + self.0.is_full() + } + /// Whether the regex at the given index matched. /// /// The index for a regex is determined by its insertion order upon the diff --git a/src/regexset/string.rs b/src/regexset/string.rs index 5cb9b5608..535a670c8 100644 --- a/src/regexset/string.rs +++ b/src/regexset/string.rs @@ -478,6 +478,24 @@ impl SetMatches { !self.0.is_empty() } + /// Whether all patterns in this set matched. + /// + /// # Example + /// + /// ``` + /// use regex::RegexSet; + /// + /// let set = RegexSet::new(&[ + /// r"^foo", + /// r"[a-z]+\.com", + /// ]).unwrap(); + /// let matches = set.matches("foo.example.com"); + /// assert!(matches.matched_all()); + /// ``` + pub fn matched_all(&self) -> bool { + self.0.is_full() + } + /// Whether the regex at the given index matched. /// /// The index for a regex is determined by its insertion order upon the From 7691e4913d1f0c7d15dc6e1f95cf07474f614bd9 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 29 Sep 2024 08:45:12 -0400 Subject: [PATCH 26/36] cli: include \w, \s and \d in Unicode data table generation This was an oversight omission when porting the old generator shell script to regex-cli. This hasn't been an issue because I don't think we've generated data for a new release of Unicode with this new infrastructure yet. This was flagged by unit tests that failed because \d was no longer a subset of \w. --- regex-cli/cmd/generate/unicode.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/regex-cli/cmd/generate/unicode.rs b/regex-cli/cmd/generate/unicode.rs index 2c60bb872..05991a671 100644 --- a/regex-cli/cmd/generate/unicode.rs +++ b/regex-cli/cmd/generate/unicode.rs @@ -84,6 +84,23 @@ USAGE: gen(d.join("sentence_break.rs"), &["sentence-break", &ucd, "--chars"])?; gen(d.join("word_break.rs"), &["word-break", &ucd, "--chars"])?; + // These generate the \w, \d and \s Unicode-aware character classes for + // regex-syntax. \d and \s are technically part of the general category + // and boolean properties generated above. However, these are generated + // separately to make it possible to enable or disable them via Cargo + // features independently of whether all boolean properties or general + // categories are enabled or disabled. The crate ensures that only one copy + // is compiled. + gen(d.join("perl_word.rs"), &["perl-word", &ucd, "--chars"])?; + gen( + d.join("perl_decimal.rs"), + &["general-category", &ucd, "--chars", "--include", "decimalnumber"], + )?; + gen( + d.join("perl_space.rs"), + &["property-bool", &ucd, "--chars", "--include", "whitespace"], + )?; + // Data tables for regex-automata. let d = out .join("regex-automata") From 9239e7e2feb996e8d764dbda23614663a1882ece Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 29 Sep 2024 08:29:29 -0400 Subject: [PATCH 27/36] data: update to UCD 16 --- regex-automata/src/nfa/thompson/compiler.rs | 8 +- .../src/util/unicode_data/perl_word.rs | 65 +- regex-syntax/src/hir/translate.rs | 21 + regex-syntax/src/unicode.rs | 2 + regex-syntax/src/unicode_tables/age.rs | 77 +- .../src/unicode_tables/case_folding_simple.rs | 66 +- .../src/unicode_tables/general_category.rs | 427 +++++-- .../unicode_tables/grapheme_cluster_break.rs | 104 +- .../src/unicode_tables/perl_decimal.rs | 13 +- regex-syntax/src/unicode_tables/perl_space.rs | 6 +- regex-syntax/src/unicode_tables/perl_word.rs | 65 +- .../src/unicode_tables/property_bool.rs | 1122 ++++++++++++++--- .../src/unicode_tables/property_names.rs | 23 +- .../src/unicode_tables/property_values.rs | 38 +- regex-syntax/src/unicode_tables/script.rs | 105 +- .../src/unicode_tables/script_extension.rs | 425 +++++-- .../src/unicode_tables/sentence_break.rs | 113 +- regex-syntax/src/unicode_tables/word_break.rs | 94 +- 18 files changed, 2149 insertions(+), 625 deletions(-) diff --git a/regex-automata/src/nfa/thompson/compiler.rs b/regex-automata/src/nfa/thompson/compiler.rs index 668bca87c..ced17719d 100644 --- a/regex-automata/src/nfa/thompson/compiler.rs +++ b/regex-automata/src/nfa/thompson/compiler.rs @@ -230,15 +230,15 @@ impl Config { /// # if cfg!(miri) { return Ok(()); } // miri takes too long /// use regex_automata::nfa::thompson::NFA; /// - /// // 300KB isn't enough! + /// // 400KB isn't enough! /// NFA::compiler() - /// .configure(NFA::config().nfa_size_limit(Some(300_000))) + /// .configure(NFA::config().nfa_size_limit(Some(400_000))) /// .build(r"\w{20}") /// .unwrap_err(); /// - /// // ... but 400KB probably is. + /// // ... but 500KB probably is. /// let nfa = NFA::compiler() - /// .configure(NFA::config().nfa_size_limit(Some(400_000))) + /// .configure(NFA::config().nfa_size_limit(Some(500_000))) /// .build(r"\w{20}")?; /// /// assert_eq!(nfa.pattern_len(), 1); diff --git a/regex-automata/src/util/unicode_data/perl_word.rs b/regex-automata/src/util/unicode_data/perl_word.rs index 74d62656f..21c8c0f9c 100644 --- a/regex-automata/src/util/unicode_data/perl_word.rs +++ b/regex-automata/src/util/unicode_data/perl_word.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate perl-word tmp/ucd-15.0.0/ --chars +// ucd-generate perl-word ucd-16.0.0 --chars // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.15 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const PERL_WORD: &'static [(char, char)] = &[ ('0', '9'), @@ -59,7 +59,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ࡠ', 'ࡪ'), ('ࡰ', 'ࢇ'), ('ࢉ', 'ࢎ'), - ('\u{898}', '\u{8e1}'), + ('\u{897}', '\u{8e1}'), ('\u{8e3}', '\u{963}'), ('०', '९'), ('ॱ', 'ঃ'), @@ -158,8 +158,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('\u{cbc}', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), @@ -243,8 +243,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), - ('ᜀ', '᜕'), - ('ᜟ', '᜴'), + ('ᜀ', '\u{1715}'), + ('ᜟ', '\u{1734}'), ('ᝀ', '\u{1753}'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -276,11 +276,11 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{1b00}', 'ᭌ'), ('᭐', '᭙'), ('\u{1b6b}', '\u{1b73}'), - ('\u{1b80}', '᯳'), + ('\u{1b80}', '\u{1bf3}'), ('ᰀ', '\u{1c37}'), ('᱀', '᱉'), ('ᱍ', 'ᱽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('\u{1cd0}', '\u{1cd2}'), @@ -367,10 +367,10 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ꙿ', '\u{a6f1}'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠧ'), ('\u{a82c}', '\u{a82c}'), ('ꡀ', 'ꡳ'), @@ -379,9 +379,9 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{a8e0}', 'ꣷ'), ('ꣻ', 'ꣻ'), ('ꣽ', '\u{a92d}'), - ('ꤰ', '꥓'), + ('ꤰ', '\u{a953}'), ('ꥠ', 'ꥼ'), - ('\u{a980}', '꧀'), + ('\u{a980}', '\u{a9c0}'), ('ꧏ', '꧙'), ('ꧠ', 'ꧾ'), ('ꨀ', '\u{aa36}'), @@ -468,6 +468,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -508,10 +509,14 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𐳀', '𐳲'), ('𐴀', '\u{10d27}'), ('𐴰', '𐴹'), + ('𐵀', '𐵥'), + ('\u{10d69}', '\u{10d6d}'), + ('𐵯', '𐶅'), ('𐺀', '𐺩'), ('\u{10eab}', '\u{10eac}'), ('𐺰', '𐺱'), - ('\u{10efd}', '𐼜'), + ('𐻂', '𐻄'), + ('\u{10efc}', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '\u{10f50}'), ('𐽰', '\u{10f85}'), @@ -551,12 +556,22 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𑌵', '𑌹'), ('\u{1133b}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('𑍐', '𑍐'), ('\u{11357}', '\u{11357}'), ('𑍝', '𑍣'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏓'), + ('\u{113e1}', '\u{113e2}'), ('𑐀', '𑑊'), ('𑑐', '𑑙'), ('\u{1145e}', '𑑡'), @@ -571,6 +586,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𑙐', '𑙙'), ('𑚀', '𑚸'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑜹'), @@ -594,6 +610,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𑩐', '\u{11a99}'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), + ('𑯰', '𑯹'), ('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱀'), @@ -618,7 +636,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '\u{11f42}'), - ('𑽐', '𑽙'), + ('𑽐', '\u{11f5a}'), ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), @@ -626,7 +644,9 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('\u{13440}', '\u{13455}'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄹'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'), @@ -639,16 +659,18 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𖭐', '𖭙'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), + ('𖵰', '𖵹'), ('𖹀', '𖹿'), ('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟'), ('𖿠', '𖿡'), ('𖿣', '\u{16fe4}'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -663,10 +685,11 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('\u{1bc9d}', '\u{1bc9e}'), + ('𜳰', '𜳹'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), - ('𝅭', '\u{1d172}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -724,6 +747,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𞊐', '\u{1e2ae}'), ('𞋀', '𞋹'), ('𞓐', '𞓹'), + ('𞗐', '𞗺'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -774,6 +798,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), diff --git a/regex-syntax/src/hir/translate.rs b/regex-syntax/src/hir/translate.rs index 3749ce307..e8e5a8812 100644 --- a/regex-syntax/src/hir/translate.rs +++ b/regex-syntax/src/hir/translate.rs @@ -3143,10 +3143,31 @@ mod tests { #[cfg(feature = "unicode-script")] assert_eq!( t(r"[\p{sc:Greek}~~\p{scx:Greek}]"), + // Class({ + // '·'..='·', + // '\u{300}'..='\u{301}', + // '\u{304}'..='\u{304}', + // '\u{306}'..='\u{306}', + // '\u{308}'..='\u{308}', + // '\u{313}'..='\u{313}', + // '\u{342}'..='\u{342}', + // '\u{345}'..='\u{345}', + // 'ʹ'..='ʹ', + // '\u{1dc0}'..='\u{1dc1}', + // '⁝'..='⁝', + // }) hir_uclass(&[ + ('·', '·'), + ('\u{0300}', '\u{0301}'), + ('\u{0304}', '\u{0304}'), + ('\u{0306}', '\u{0306}'), + ('\u{0308}', '\u{0308}'), + ('\u{0313}', '\u{0313}'), ('\u{0342}', '\u{0342}'), ('\u{0345}', '\u{0345}'), + ('ʹ', 'ʹ'), ('\u{1DC0}', '\u{1DC1}'), + ('⁝', '⁝'), ]) ); assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')])); diff --git a/regex-syntax/src/unicode.rs b/regex-syntax/src/unicode.rs index 393a4c018..07f78194b 100644 --- a/regex-syntax/src/unicode.rs +++ b/regex-syntax/src/unicode.rs @@ -675,6 +675,8 @@ fn ages(canonical_age: &str) -> Result, Error> { ("V13_0", age::V13_0), ("V14_0", age::V14_0), ("V15_0", age::V15_0), + ("V15_1", age::V15_1), + ("V16_0", age::V16_0), ]; assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync"); diff --git a/regex-syntax/src/unicode_tables/age.rs b/regex-syntax/src/unicode_tables/age.rs index 71f4861e0..466510c9e 100644 --- a/regex-syntax/src/unicode_tables/age.rs +++ b/regex-syntax/src/unicode_tables/age.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate age ucd-15.0.0 --chars +// ucd-generate age ucd-16.0.0 --chars // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("V10_0", V10_0), @@ -14,6 +14,8 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("V13_0", V13_0), ("V14_0", V14_0), ("V15_0", V15_0), + ("V15_1", V15_1), + ("V16_0", V16_0), ("V1_1", V1_1), ("V2_0", V2_0), ("V2_1", V2_1), @@ -238,7 +240,7 @@ pub const V13_0: &'static [(char, char)] = &[ ('𑥐', '𑥙'), ('𑾰', '𑾰'), ('\u{16fe4}', '\u{16fe4}'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('𘫳', '𘳕'), ('𘴀', '𘴈'), ('🄍', '🄏'), @@ -277,7 +279,7 @@ pub const V14_0: &'static [(char, char)] = &[ ('ౝ', 'ౝ'), ('ೝ', 'ೝ'), ('ᜍ', 'ᜍ'), - ('᜕', '᜕'), + ('\u{1715}', '\u{1715}'), ('ᜟ', 'ᜟ'), ('\u{180f}', '\u{180f}'), ('\u{1ac1}', '\u{1ace}'), @@ -382,6 +384,59 @@ pub const V15_0: &'static [(char, char)] = &[ ('𱍐', '𲎯'), ]; +pub const V15_1: &'static [(char, char)] = + &[('⿼', '⿿'), ('㇯', '㇯'), ('𮯰', '𮹝')]; + +pub const V16_0: &'static [(char, char)] = &[ + ('\u{897}', '\u{897}'), + ('᭎', '᭏'), + ('᭿', '᭿'), + ('Ᲊ', 'ᲊ'), + ('␧', '␩'), + ('㇤', '㇥'), + ('Ɤ', 'ꟍ'), + ('Ꟛ', 'Ƛ'), + ('𐗀', '𐗳'), + ('𐵀', '𐵥'), + ('\u{10d69}', '𐶅'), + ('𐶎', '𐶏'), + ('𐻂', '𐻄'), + ('\u{10efc}', '\u{10efc}'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏕'), + ('𑏗', '𑏘'), + ('\u{113e1}', '\u{113e2}'), + ('𑛐', '𑛣'), + ('𑯀', '𑯡'), + ('𑯰', '𑯹'), + ('\u{11f5a}', '\u{11f5a}'), + ('𓑠', '𔏺'), + ('𖄀', '𖄹'), + ('𖵀', '𖵹'), + ('𘳿', '𘳿'), + ('𜰀', '𜳹'), + ('𜴀', '𜺳'), + ('𞗐', '𞗺'), + ('𞗿', '𞗿'), + ('🢲', '🢻'), + ('🣀', '🣁'), + ('🪉', '🪉'), + ('🪏', '🪏'), + ('🪾', '🪾'), + ('🫆', '🫆'), + ('🫜', '🫜'), + ('🫟', '🫟'), + ('🫩', '🫩'), + ('🯋', '🯯'), +]; + pub const V1_1: &'static [(char, char)] = &[ ('\0', 'ǵ'), ('Ǻ', 'ȗ'), @@ -530,8 +585,8 @@ pub const V1_1: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('ಾ', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('ೞ', 'ೞ'), ('ೠ', 'ೡ'), @@ -1174,7 +1229,7 @@ pub const V5_1: &'static [(char, char)] = &[ ('ၚ', '႙'), ('႞', '႟'), ('ᢪ', 'ᢪ'), - ('\u{1b80}', '᮪'), + ('\u{1b80}', '\u{1baa}'), ('ᮮ', '᮹'), ('ᰀ', '\u{1c37}'), ('᰻', '᱉'), @@ -1212,7 +1267,7 @@ pub const V5_1: &'static [(char, char)] = &[ ('ꟻ', 'ꟿ'), ('ꢀ', '\u{a8c4}'), ('꣎', '꣙'), - ('꤀', '꥓'), + ('꤀', '\u{a953}'), ('꥟', '꥟'), ('ꨀ', '\u{aa36}'), ('ꩀ', 'ꩍ'), @@ -1340,7 +1395,7 @@ pub const V6_0: &'static [(char, char)] = &[ ('ྌ', '\u{f8f}'), ('࿙', '࿚'), ('\u{135d}', '\u{135e}'), - ('ᯀ', '᯳'), + ('ᯀ', '\u{1bf3}'), ('᯼', '᯿'), ('\u{1dfc}', '\u{1dfc}'), ('ₕ', 'ₜ'), @@ -1606,7 +1661,7 @@ pub const V7_0: &'static [(char, char)] = &[ ('𑌵', '𑌹'), ('\u{1133c}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('\u{11357}', '\u{11357}'), ('𑍝', '𑍣'), ('\u{11366}', '\u{1136c}'), diff --git a/regex-syntax/src/unicode_tables/case_folding_simple.rs b/regex-syntax/src/unicode_tables/case_folding_simple.rs index 23f9364ce..07f6ff2f5 100644 --- a/regex-syntax/src/unicode_tables/case_folding_simple.rs +++ b/regex-syntax/src/unicode_tables/case_folding_simple.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate case-folding-simple ucd-15.0.0 --chars --all-pairs +// ucd-generate case-folding-simple ucd-16.0.0 --chars --all-pairs // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('A', &['a']), @@ -272,6 +272,7 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('Ƙ', &['ƙ']), ('ƙ', &['Ƙ']), ('ƚ', &['Ƚ']), + ('ƛ', &['Ƛ']), ('Ɯ', &['ɯ']), ('Ɲ', &['ɲ']), ('ƞ', &['Ƞ']), @@ -448,6 +449,7 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('ɠ', &['Ɠ']), ('ɡ', &['Ɡ']), ('ɣ', &['Ɣ']), + ('ɤ', &['Ɤ']), ('ɥ', &['Ɥ']), ('ɦ', &['Ɦ']), ('ɨ', &['Ɨ']), @@ -490,6 +492,7 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('Ό', &['ό']), ('Ύ', &['ύ']), ('Ώ', &['ώ']), + ('ΐ', &['ΐ']), ('Α', &['α']), ('Β', &['β', 'ϐ']), ('Γ', &['γ']), @@ -520,6 +523,7 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('έ', &['Έ']), ('ή', &['Ή']), ('ί', &['Ί']), + ('ΰ', &['ΰ']), ('α', &['Α']), ('β', &['Β', 'ϐ']), ('γ', &['Γ']), @@ -1153,6 +1157,8 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('ᲆ', &['Ъ', 'ъ']), ('ᲇ', &['Ѣ', 'ѣ']), ('ᲈ', &['Ꙋ', 'ꙋ']), + ('Ᲊ', &['ᲊ']), + ('ᲊ', &['Ᲊ']), ('Ა', &['ა']), ('Ბ', &['ბ']), ('Გ', &['გ']), @@ -1625,12 +1631,14 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('ῌ', &['ῃ']), ('ῐ', &['Ῐ']), ('ῑ', &['Ῑ']), + ('ΐ', &['ΐ']), ('Ῐ', &['ῐ']), ('Ῑ', &['ῑ']), ('Ὶ', &['ὶ']), ('Ί', &['ί']), ('ῠ', &['Ῠ']), ('ῡ', &['Ῡ']), + ('ΰ', &['ΰ']), ('ῥ', &['Ῥ']), ('Ῠ', &['ῠ']), ('Ῡ', &['ῡ']), @@ -2224,12 +2232,18 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('ꟈ', &['Ꟈ']), ('Ꟊ', &['ꟊ']), ('ꟊ', &['Ꟊ']), + ('Ɤ', &['ɤ']), + ('Ꟍ', &['ꟍ']), + ('ꟍ', &['Ꟍ']), ('Ꟑ', &['ꟑ']), ('ꟑ', &['Ꟑ']), ('Ꟗ', &['ꟗ']), ('ꟗ', &['Ꟗ']), ('Ꟙ', &['ꟙ']), ('ꟙ', &['Ꟙ']), + ('Ꟛ', &['ꟛ']), + ('ꟛ', &['Ꟛ']), + ('Ƛ', &['ƛ']), ('Ꟶ', &['ꟶ']), ('ꟶ', &['Ꟶ']), ('ꭓ', &['Ꭓ']), @@ -2313,6 +2327,8 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('ꮽ', &['Ꮽ']), ('ꮾ', &['Ꮾ']), ('ꮿ', &['Ꮿ']), + ('ſt', &['st']), + ('st', &['ſt']), ('A', &['a']), ('B', &['b']), ('C', &['c']), @@ -2689,6 +2705,50 @@ pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[ ('𐳰', &['𐲰']), ('𐳱', &['𐲱']), ('𐳲', &['𐲲']), + ('𐵐', &['𐵰']), + ('𐵑', &['𐵱']), + ('𐵒', &['𐵲']), + ('𐵓', &['𐵳']), + ('𐵔', &['𐵴']), + ('𐵕', &['𐵵']), + ('𐵖', &['𐵶']), + ('𐵗', &['𐵷']), + ('𐵘', &['𐵸']), + ('𐵙', &['𐵹']), + ('𐵚', &['𐵺']), + ('𐵛', &['𐵻']), + ('𐵜', &['𐵼']), + ('𐵝', &['𐵽']), + ('𐵞', &['𐵾']), + ('𐵟', &['𐵿']), + ('𐵠', &['𐶀']), + ('𐵡', &['𐶁']), + ('𐵢', &['𐶂']), + ('𐵣', &['𐶃']), + ('𐵤', &['𐶄']), + ('𐵥', &['𐶅']), + ('𐵰', &['𐵐']), + ('𐵱', &['𐵑']), + ('𐵲', &['𐵒']), + ('𐵳', &['𐵓']), + ('𐵴', &['𐵔']), + ('𐵵', &['𐵕']), + ('𐵶', &['𐵖']), + ('𐵷', &['𐵗']), + ('𐵸', &['𐵘']), + ('𐵹', &['𐵙']), + ('𐵺', &['𐵚']), + ('𐵻', &['𐵛']), + ('𐵼', &['𐵜']), + ('𐵽', &['𐵝']), + ('𐵾', &['𐵞']), + ('𐵿', &['𐵟']), + ('𐶀', &['𐵠']), + ('𐶁', &['𐵡']), + ('𐶂', &['𐵢']), + ('𐶃', &['𐵣']), + ('𐶄', &['𐵤']), + ('𐶅', &['𐵥']), ('𑢠', &['𑣀']), ('𑢡', &['𑣁']), ('𑢢', &['𑣂']), diff --git a/regex-syntax/src/unicode_tables/general_category.rs b/regex-syntax/src/unicode_tables/general_category.rs index 8fc928912..6ff6b5384 100644 --- a/regex-syntax/src/unicode_tables/general_category.rs +++ b/regex-syntax/src/unicode_tables/general_category.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate general-category ucd-15.0.0 --chars --exclude surrogate +// ucd-generate general-category ucd-16.0.0 --chars --exclude surrogate // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Cased_Letter", CASED_LETTER), @@ -76,7 +76,7 @@ pub const CASED_LETTER: &'static [(char, char)] = &[ ('ჽ', 'ჿ'), ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('ᴀ', 'ᴫ'), @@ -128,10 +128,10 @@ pub const CASED_LETTER: &'static [(char, char)] = &[ ('Ꜣ', 'ꝯ'), ('ꝱ', 'ꞇ'), ('Ꞌ', 'ꞎ'), - ('Ꞑ', 'ꟊ'), + ('Ꞑ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('Ꟶ', 'ꟶ'), ('ꟺ', 'ꟺ'), ('ꬰ', 'ꭚ'), @@ -154,6 +154,8 @@ pub const CASED_LETTER: &'static [(char, char)] = &[ ('𐖻', '𐖼'), ('𐲀', '𐲲'), ('𐳀', '𐳲'), + ('𐵐', '𐵥'), + ('𐵰', '𐶅'), ('𑢠', '𑣟'), ('𖹀', '𖹿'), ('𝐀', '𝑔'), @@ -326,6 +328,7 @@ pub const DASH_PUNCTUATION: &'static [(char, char)] = &[ ('﹘', '﹘'), ('﹣', '﹣'), ('-', '-'), + ('𐵮', '𐵮'), ('𐺭', '𐺭'), ]; @@ -369,6 +372,7 @@ pub const DECIMAL_NUMBER: &'static [(char, char)] = &[ ('0', '9'), ('𐒠', '𐒩'), ('𐴰', '𐴹'), + ('𐵀', '𐵉'), ('𑁦', '𑁯'), ('𑃰', '𑃹'), ('𑄶', '𑄿'), @@ -378,20 +382,26 @@ pub const DECIMAL_NUMBER: &'static [(char, char)] = &[ ('𑓐', '𑓙'), ('𑙐', '𑙙'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜰', '𑜹'), ('𑣠', '𑣩'), ('𑥐', '𑥙'), + ('𑯰', '𑯹'), ('𑱐', '𑱙'), ('𑵐', '𑵙'), ('𑶠', '𑶩'), ('𑽐', '𑽙'), + ('𖄰', '𖄹'), ('𖩠', '𖩩'), ('𖫀', '𖫉'), ('𖭐', '𖭙'), + ('𖵰', '𖵹'), + ('𜳰', '𜳹'), ('𝟎', '𝟿'), ('𞅀', '𞅉'), ('𞋰', '𞋹'), ('𞓰', '𞓹'), + ('𞗱', '𞗺'), ('𞥐', '𞥙'), ('🯰', '🯹'), ]; @@ -681,7 +691,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('ᰀ', 'ᰣ'), ('ᱍ', 'ᱏ'), ('ᱚ', 'ᱽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('ᳩ', 'ᳬ'), @@ -765,10 +775,10 @@ pub const LETTER: &'static [(char, char)] = &[ ('ꚠ', 'ꛥ'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), @@ -865,6 +875,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -901,8 +912,11 @@ pub const LETTER: &'static [(char, char)] = &[ ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐴀', '𐴣'), + ('𐵊', '𐵥'), + ('𐵯', '𐶅'), ('𐺀', '𐺩'), ('𐺰', '𐺱'), + ('𐻂', '𐻄'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), @@ -941,6 +955,13 @@ pub const LETTER: &'static [(char, char)] = &[ ('𑌽', '𑌽'), ('𑍐', '𑍐'), ('𑍝', '𑍡'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '𑎷'), + ('𑏑', '𑏑'), + ('𑏓', '𑏓'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), ('𑑟', '𑑡'), @@ -975,6 +996,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('𑩜', '𑪉'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), @@ -997,7 +1019,9 @@ pub const LETTER: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('𓑁', '𓑆'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄝'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩰', '𖪾'), @@ -1006,6 +1030,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('𖭀', '𖭃'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), ('𖹀', '𖹿'), ('𖼀', '𖽊'), ('𖽐', '𖽐'), @@ -1014,7 +1039,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('𖿣', '𖿣'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -1067,6 +1092,8 @@ pub const LETTER: &'static [(char, char)] = &[ ('𞊐', '𞊭'), ('𞋀', '𞋫'), ('𞓐', '𞓫'), + ('𞗐', '𞗭'), + ('𞗰', '𞗰'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -1112,6 +1139,7 @@ pub const LETTER: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -1410,6 +1438,7 @@ pub const LOWERCASE_LETTER: &'static [(char, char)] = &[ ('ჽ', 'ჿ'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), + ('ᲊ', 'ᲊ'), ('ᴀ', 'ᴫ'), ('ᵫ', 'ᵷ'), ('ᵹ', 'ᶚ'), @@ -1740,11 +1769,13 @@ pub const LOWERCASE_LETTER: &'static [(char, char)] = &[ ('ꟃ', 'ꟃ'), ('ꟈ', 'ꟈ'), ('ꟊ', 'ꟊ'), + ('ꟍ', 'ꟍ'), ('ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), ('ꟕ', 'ꟕ'), ('ꟗ', 'ꟗ'), ('ꟙ', 'ꟙ'), + ('ꟛ', 'ꟛ'), ('ꟶ', 'ꟶ'), ('ꟺ', 'ꟺ'), ('ꬰ', 'ꭚ'), @@ -1760,6 +1791,7 @@ pub const LOWERCASE_LETTER: &'static [(char, char)] = &[ ('𐖳', '𐖹'), ('𐖻', '𐖼'), ('𐳀', '𐳲'), + ('𐵰', '𐶅'), ('𑣀', '𑣟'), ('𖹠', '𖹿'), ('𝐚', '𝐳'), @@ -1821,7 +1853,7 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{898}', '\u{89f}'), + ('\u{897}', '\u{89f}'), ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', 'ः'), ('\u{93a}', '\u{93c}'), @@ -1873,8 +1905,8 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{c81}', 'ಃ'), ('\u{cbc}', '\u{cbc}'), ('ಾ', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), ('ೳ', 'ೳ'), @@ -1917,8 +1949,8 @@ pub const MARK: &'static [(char, char)] = &[ ('ႏ', 'ႏ'), ('ႚ', '\u{109d}'), ('\u{135d}', '\u{135f}'), - ('\u{1712}', '᜕'), - ('\u{1732}', '᜴'), + ('\u{1712}', '\u{1715}'), + ('\u{1732}', '\u{1734}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17d3}'), @@ -1935,11 +1967,11 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{1a7f}', '\u{1a7f}'), ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', 'ᬄ'), - ('\u{1b34}', '᭄'), + ('\u{1b34}', '\u{1b44}'), ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', 'ᮂ'), ('ᮡ', '\u{1bad}'), - ('\u{1be6}', '᯳'), + ('\u{1be6}', '\u{1bf3}'), ('ᰤ', '\u{1c37}'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', '\u{1ce8}'), @@ -1967,9 +1999,9 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{a8e0}', '\u{a8f1}'), ('\u{a8ff}', '\u{a8ff}'), ('\u{a926}', '\u{a92d}'), - ('\u{a947}', '꥓'), + ('\u{a947}', '\u{a953}'), ('\u{a980}', 'ꦃ'), - ('\u{a9b3}', '꧀'), + ('\u{a9b3}', '\u{a9c0}'), ('\u{a9e5}', '\u{a9e5}'), ('\u{aa29}', '\u{aa36}'), ('\u{aa43}', '\u{aa43}'), @@ -1997,8 +2029,9 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), + ('\u{10d69}', '\u{10d6d}'), ('\u{10eab}', '\u{10eac}'), - ('\u{10efd}', '\u{10eff}'), + ('\u{10efc}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), ('\u{10f82}', '\u{10f85}'), ('𑀀', '𑀂'), @@ -2013,7 +2046,7 @@ pub const MARK: &'static [(char, char)] = &[ ('𑅅', '𑅆'), ('\u{11173}', '\u{11173}'), ('\u{11180}', '𑆂'), - ('𑆳', '𑇀'), + ('𑆳', '\u{111c0}'), ('\u{111c9}', '\u{111cc}'), ('𑇎', '\u{111cf}'), ('𑈬', '\u{11237}'), @@ -2024,11 +2057,18 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{1133b}', '\u{1133c}'), ('\u{1133e}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('\u{11357}', '\u{11357}'), ('𑍢', '𑍣'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('\u{113b8}', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '\u{113d0}'), + ('\u{113d2}', '\u{113d2}'), + ('\u{113e1}', '\u{113e2}'), ('𑐵', '\u{11446}'), ('\u{1145e}', '\u{1145e}'), ('\u{114b0}', '\u{114c3}'), @@ -2070,20 +2110,22 @@ pub const MARK: &'static [(char, char)] = &[ ('𑼃', '𑼃'), ('𑼴', '\u{11f3a}'), ('𑼾', '\u{11f42}'), + ('\u{11f5a}', '\u{11f5a}'), ('\u{13440}', '\u{13440}'), ('\u{13447}', '\u{13455}'), + ('\u{1611e}', '\u{1612f}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('𖽑', '𖾇'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('\u{1bc9d}', '\u{1bc9e}'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), - ('𝅭', '\u{1d172}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -2104,6 +2146,7 @@ pub const MARK: &'static [(char, char)] = &[ ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), ('\u{1e4ec}', '\u{1e4ef}'), + ('\u{1e5ee}', '\u{1e5ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('\u{e0100}', '\u{e01ef}'), @@ -2163,6 +2206,7 @@ pub const MATH_SYMBOL: &'static [(char, char)] = &[ ('~', '~'), ('¬', '¬'), ('←', '↓'), + ('𐶎', '𐶏'), ('𝛁', '𝛁'), ('𝛛', '𝛛'), ('𝛻', '𝛻'), @@ -2237,7 +2281,11 @@ pub const MODIFIER_LETTER: &'static [(char, char)] = &[ ('𐞀', '𐞅'), ('𐞇', '𐞰'), ('𐞲', '𐞺'), + ('𐵎', '𐵎'), + ('𐵯', '𐵯'), ('𖭀', '𖭃'), + ('𖵀', '𖵂'), + ('𖵫', '𖵬'), ('𖾓', '𖾟'), ('𖿠', '𖿡'), ('𖿣', '𖿣'), @@ -2309,7 +2357,7 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{898}', '\u{89f}'), + ('\u{897}', '\u{89f}'), ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', '\u{902}'), ('\u{93a}', '\u{93a}'), @@ -2507,8 +2555,9 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), + ('\u{10d69}', '\u{10d6d}'), ('\u{10eab}', '\u{10eac}'), - ('\u{10efd}', '\u{10eff}'), + ('\u{10efc}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), ('\u{10f82}', '\u{10f85}'), ('\u{11001}', '\u{11001}'), @@ -2539,6 +2588,11 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{11340}', '\u{11340}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('\u{113bb}', '\u{113c0}'), + ('\u{113ce}', '\u{113ce}'), + ('\u{113d0}', '\u{113d0}'), + ('\u{113d2}', '\u{113d2}'), + ('\u{113e1}', '\u{113e2}'), ('\u{11438}', '\u{1143f}'), ('\u{11442}', '\u{11444}'), ('\u{11446}', '\u{11446}'), @@ -2558,7 +2612,8 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{116ad}', '\u{116ad}'), ('\u{116b0}', '\u{116b5}'), ('\u{116b7}', '\u{116b7}'), - ('\u{1171d}', '\u{1171f}'), + ('\u{1171d}', '\u{1171d}'), + ('\u{1171f}', '\u{1171f}'), ('\u{11722}', '\u{11725}'), ('\u{11727}', '\u{1172b}'), ('\u{1182f}', '\u{11837}'), @@ -2597,8 +2652,11 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{11f36}', '\u{11f3a}'), ('\u{11f40}', '\u{11f40}'), ('\u{11f42}', '\u{11f42}'), + ('\u{11f5a}', '\u{11f5a}'), ('\u{13440}', '\u{13440}'), ('\u{13447}', '\u{13455}'), + ('\u{1611e}', '\u{16129}'), + ('\u{1612d}', '\u{1612f}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), @@ -2628,6 +2686,7 @@ pub const NONSPACING_MARK: &'static [(char, char)] = &[ ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), ('\u{1e4ec}', '\u{1e4ef}'), + ('\u{1e5ee}', '\u{1e5ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('\u{e0100}', '\u{e01ef}'), @@ -2727,6 +2786,7 @@ pub const NUMBER: &'static [(char, char)] = &[ ('𐮩', '𐮯'), ('𐳺', '𐳿'), ('𐴰', '𐴹'), + ('𐵀', '𐵉'), ('𐹠', '𐹾'), ('𐼝', '𐼦'), ('𐽑', '𐽔'), @@ -2741,20 +2801,25 @@ pub const NUMBER: &'static [(char, char)] = &[ ('𑓐', '𑓙'), ('𑙐', '𑙙'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜰', '𑜻'), ('𑣠', '𑣲'), ('𑥐', '𑥙'), + ('𑯰', '𑯹'), ('𑱐', '𑱬'), ('𑵐', '𑵙'), ('𑶠', '𑶩'), ('𑽐', '𑽙'), ('𑿀', '𑿔'), ('𒐀', '𒑮'), + ('𖄰', '𖄹'), ('𖩠', '𖩩'), ('𖫀', '𖫉'), ('𖭐', '𖭙'), ('𖭛', '𖭡'), + ('𖵰', '𖵹'), ('𖺀', '𖺖'), + ('𜳰', '𜳹'), ('𝋀', '𝋓'), ('𝋠', '𝋳'), ('𝍠', '𝍸'), @@ -2762,6 +2827,7 @@ pub const NUMBER: &'static [(char, char)] = &[ ('𞅀', '𞅉'), ('𞋰', '𞋹'), ('𞓰', '𞓹'), + ('𞗱', '𞗺'), ('𞣇', '𞣏'), ('𞥐', '𞥙'), ('𞱱', '𞲫'), @@ -2882,7 +2948,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{85c}', '\u{85d}'), ('\u{85f}', '\u{85f}'), ('\u{86b}', '\u{86f}'), - ('\u{88f}', '\u{897}'), + ('\u{88f}', '\u{896}'), ('\u{8e2}', '\u{8e2}'), ('\u{984}', '\u{984}'), ('\u{98d}', '\u{98e}'), @@ -3076,12 +3142,11 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{1a9a}', '\u{1a9f}'), ('\u{1aae}', '\u{1aaf}'), ('\u{1acf}', '\u{1aff}'), - ('\u{1b4d}', '\u{1b4f}'), - ('\u{1b7f}', '\u{1b7f}'), + ('\u{1b4d}', '\u{1b4d}'), ('\u{1bf4}', '\u{1bfb}'), ('\u{1c38}', '\u{1c3a}'), ('\u{1c4a}', '\u{1c4c}'), - ('\u{1c89}', '\u{1c8f}'), + ('\u{1c8b}', '\u{1c8f}'), ('\u{1cbb}', '\u{1cbc}'), ('\u{1cc8}', '\u{1ccf}'), ('\u{1cfb}', '\u{1cff}'), @@ -3110,7 +3175,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{20c1}', '\u{20cf}'), ('\u{20f1}', '\u{20ff}'), ('\u{218c}', '\u{218f}'), - ('\u{2427}', '\u{243f}'), + ('\u{242a}', '\u{243f}'), ('\u{244b}', '\u{245f}'), ('\u{2b74}', '\u{2b75}'), ('\u{2b96}', '\u{2b96}'), @@ -3133,22 +3198,21 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{2e9a}', '\u{2e9a}'), ('\u{2ef4}', '\u{2eff}'), ('\u{2fd6}', '\u{2fef}'), - ('\u{2ffc}', '\u{2fff}'), ('\u{3040}', '\u{3040}'), ('\u{3097}', '\u{3098}'), ('\u{3100}', '\u{3104}'), ('\u{3130}', '\u{3130}'), ('\u{318f}', '\u{318f}'), - ('\u{31e4}', '\u{31ef}'), + ('\u{31e6}', '\u{31ee}'), ('\u{321f}', '\u{321f}'), ('\u{a48d}', '\u{a48f}'), ('\u{a4c7}', '\u{a4cf}'), ('\u{a62c}', '\u{a63f}'), ('\u{a6f8}', '\u{a6ff}'), - ('\u{a7cb}', '\u{a7cf}'), + ('\u{a7ce}', '\u{a7cf}'), ('\u{a7d2}', '\u{a7d2}'), ('\u{a7d4}', '\u{a7d4}'), - ('\u{a7da}', '\u{a7f1}'), + ('\u{a7dd}', '\u{a7f1}'), ('\u{a82d}', '\u{a82f}'), ('\u{a83a}', '\u{a83f}'), ('\u{a878}', '\u{a87f}'), @@ -3237,7 +3301,8 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{105a2}', '\u{105a2}'), ('\u{105b2}', '\u{105b2}'), ('\u{105ba}', '\u{105ba}'), - ('\u{105bd}', '\u{105ff}'), + ('\u{105bd}', '\u{105bf}'), + ('\u{105f4}', '\u{105ff}'), ('\u{10737}', '\u{1073f}'), ('\u{10756}', '\u{1075f}'), ('\u{10768}', '\u{1077f}'), @@ -3280,11 +3345,15 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{10cb3}', '\u{10cbf}'), ('\u{10cf3}', '\u{10cf9}'), ('\u{10d28}', '\u{10d2f}'), - ('\u{10d3a}', '\u{10e5f}'), + ('\u{10d3a}', '\u{10d3f}'), + ('\u{10d66}', '\u{10d68}'), + ('\u{10d86}', '\u{10d8d}'), + ('\u{10d90}', '\u{10e5f}'), ('\u{10e7f}', '\u{10e7f}'), ('\u{10eaa}', '\u{10eaa}'), ('\u{10eae}', '\u{10eaf}'), - ('\u{10eb2}', '\u{10efc}'), + ('\u{10eb2}', '\u{10ec1}'), + ('\u{10ec5}', '\u{10efb}'), ('\u{10f28}', '\u{10f2f}'), ('\u{10f5a}', '\u{10f6f}'), ('\u{10f8a}', '\u{10faf}'), @@ -3324,7 +3393,18 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{11358}', '\u{1135c}'), ('\u{11364}', '\u{11365}'), ('\u{1136d}', '\u{1136f}'), - ('\u{11375}', '\u{113ff}'), + ('\u{11375}', '\u{1137f}'), + ('\u{1138a}', '\u{1138a}'), + ('\u{1138c}', '\u{1138d}'), + ('\u{1138f}', '\u{1138f}'), + ('\u{113b6}', '\u{113b6}'), + ('\u{113c1}', '\u{113c1}'), + ('\u{113c3}', '\u{113c4}'), + ('\u{113c6}', '\u{113c6}'), + ('\u{113cb}', '\u{113cb}'), + ('\u{113d6}', '\u{113d6}'), + ('\u{113d9}', '\u{113e0}'), + ('\u{113e3}', '\u{113ff}'), ('\u{1145c}', '\u{1145c}'), ('\u{11462}', '\u{1147f}'), ('\u{114c8}', '\u{114cf}'), @@ -3335,7 +3415,8 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{1165a}', '\u{1165f}'), ('\u{1166d}', '\u{1167f}'), ('\u{116ba}', '\u{116bf}'), - ('\u{116ca}', '\u{116ff}'), + ('\u{116ca}', '\u{116cf}'), + ('\u{116e4}', '\u{116ff}'), ('\u{1171b}', '\u{1171c}'), ('\u{1172c}', '\u{1172f}'), ('\u{11747}', '\u{117ff}'), @@ -3355,7 +3436,9 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{11a48}', '\u{11a4f}'), ('\u{11aa3}', '\u{11aaf}'), ('\u{11af9}', '\u{11aff}'), - ('\u{11b0a}', '\u{11bff}'), + ('\u{11b0a}', '\u{11bbf}'), + ('\u{11be2}', '\u{11bef}'), + ('\u{11bfa}', '\u{11bff}'), ('\u{11c09}', '\u{11c09}'), ('\u{11c37}', '\u{11c37}'), ('\u{11c46}', '\u{11c4f}'), @@ -3379,7 +3462,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{11ef9}', '\u{11eff}'), ('\u{11f11}', '\u{11f11}'), ('\u{11f3b}', '\u{11f3d}'), - ('\u{11f5a}', '\u{11faf}'), + ('\u{11f5b}', '\u{11faf}'), ('\u{11fb1}', '\u{11fbf}'), ('\u{11ff2}', '\u{11ffe}'), ('\u{1239a}', '\u{123ff}'), @@ -3388,8 +3471,10 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{12544}', '\u{12f8f}'), ('\u{12ff3}', '\u{12fff}'), ('\u{13430}', '\u{1343f}'), - ('\u{13456}', '\u{143ff}'), - ('\u{14647}', '\u{167ff}'), + ('\u{13456}', '\u{1345f}'), + ('\u{143fb}', '\u{143ff}'), + ('\u{14647}', '\u{160ff}'), + ('\u{1613a}', '\u{167ff}'), ('\u{16a39}', '\u{16a3f}'), ('\u{16a5f}', '\u{16a5f}'), ('\u{16a6a}', '\u{16a6d}'), @@ -3401,7 +3486,8 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{16b5a}', '\u{16b5a}'), ('\u{16b62}', '\u{16b62}'), ('\u{16b78}', '\u{16b7c}'), - ('\u{16b90}', '\u{16e3f}'), + ('\u{16b90}', '\u{16d3f}'), + ('\u{16d7a}', '\u{16e3f}'), ('\u{16e9b}', '\u{16eff}'), ('\u{16f4b}', '\u{16f4e}'), ('\u{16f88}', '\u{16f8e}'), @@ -3409,7 +3495,7 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{16fe5}', '\u{16fef}'), ('\u{16ff2}', '\u{16fff}'), ('\u{187f8}', '\u{187ff}'), - ('\u{18cd6}', '\u{18cff}'), + ('\u{18cd6}', '\u{18cfe}'), ('\u{18d09}', '\u{1afef}'), ('\u{1aff4}', '\u{1aff4}'), ('\u{1affc}', '\u{1affc}'), @@ -3424,7 +3510,9 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{1bc7d}', '\u{1bc7f}'), ('\u{1bc89}', '\u{1bc8f}'), ('\u{1bc9a}', '\u{1bc9b}'), - ('\u{1bca0}', '\u{1ceff}'), + ('\u{1bca0}', '\u{1cbff}'), + ('\u{1ccfa}', '\u{1ccff}'), + ('\u{1ceb4}', '\u{1ceff}'), ('\u{1cf2e}', '\u{1cf2f}'), ('\u{1cf47}', '\u{1cf4f}'), ('\u{1cfc4}', '\u{1cfff}'), @@ -3476,7 +3564,9 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{1e2af}', '\u{1e2bf}'), ('\u{1e2fa}', '\u{1e2fe}'), ('\u{1e300}', '\u{1e4cf}'), - ('\u{1e4fa}', '\u{1e7df}'), + ('\u{1e4fa}', '\u{1e5cf}'), + ('\u{1e5fb}', '\u{1e5fe}'), + ('\u{1e600}', '\u{1e7df}'), ('\u{1e7e7}', '\u{1e7e7}'), ('\u{1e7ec}', '\u{1e7ec}'), ('\u{1e7ef}', '\u{1e7ef}'), @@ -3546,24 +3636,24 @@ pub const OTHER: &'static [(char, char)] = &[ ('\u{1f85a}', '\u{1f85f}'), ('\u{1f888}', '\u{1f88f}'), ('\u{1f8ae}', '\u{1f8af}'), - ('\u{1f8b2}', '\u{1f8ff}'), + ('\u{1f8bc}', '\u{1f8bf}'), + ('\u{1f8c2}', '\u{1f8ff}'), ('\u{1fa54}', '\u{1fa5f}'), ('\u{1fa6e}', '\u{1fa6f}'), ('\u{1fa7d}', '\u{1fa7f}'), - ('\u{1fa89}', '\u{1fa8f}'), - ('\u{1fabe}', '\u{1fabe}'), - ('\u{1fac6}', '\u{1facd}'), - ('\u{1fadc}', '\u{1fadf}'), - ('\u{1fae9}', '\u{1faef}'), + ('\u{1fa8a}', '\u{1fa8e}'), + ('\u{1fac7}', '\u{1facd}'), + ('\u{1fadd}', '\u{1fade}'), + ('\u{1faea}', '\u{1faef}'), ('\u{1faf9}', '\u{1faff}'), ('\u{1fb93}', '\u{1fb93}'), - ('\u{1fbcb}', '\u{1fbef}'), ('\u{1fbfa}', '\u{1ffff}'), ('\u{2a6e0}', '\u{2a6ff}'), ('\u{2b73a}', '\u{2b73f}'), ('\u{2b81e}', '\u{2b81f}'), ('\u{2cea2}', '\u{2ceaf}'), - ('\u{2ebe1}', '\u{2f7ff}'), + ('\u{2ebe1}', '\u{2ebef}'), + ('\u{2ee5e}', '\u{2f7ff}'), ('\u{2fa1e}', '\u{2ffff}'), ('\u{3134b}', '\u{3134f}'), ('\u{323b0}', '\u{e00ff}'), @@ -3880,6 +3970,7 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𐑐', '𐒝'), ('𐔀', '𐔧'), ('𐔰', '𐕣'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -3911,8 +4002,11 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𐮀', '𐮑'), ('𐰀', '𐱈'), ('𐴀', '𐴣'), + ('𐵊', '𐵍'), + ('𐵏', '𐵏'), ('𐺀', '𐺩'), ('𐺰', '𐺱'), + ('𐻂', '𐻄'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), @@ -3951,6 +4045,13 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𑌽', '𑌽'), ('𑍐', '𑍐'), ('𑍝', '𑍡'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '𑎷'), + ('𑏑', '𑏑'), + ('𑏓', '𑏓'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), ('𑑟', '𑑡'), @@ -3984,6 +4085,7 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𑩜', '𑪉'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), @@ -4006,7 +4108,9 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('𓑁', '𓑆'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄝'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩰', '𖪾'), @@ -4014,11 +4118,12 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𖬀', '𖬯'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵃', '𖵪'), ('𖼀', '𖽊'), ('𖽐', '𖽐'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𛀀', '𛄢'), ('𛄲', '𛄲'), ('𛅐', '𛅒'), @@ -4035,6 +4140,8 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𞊐', '𞊭'), ('𞋀', '𞋫'), ('𞓐', '𞓪'), + ('𞗐', '𞗭'), + ('𞗰', '𞗰'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -4078,6 +4185,7 @@ pub const OTHER_LETTER: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -4218,8 +4326,9 @@ pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[ ('᨞', '᨟'), ('᪠', '᪦'), ('᪨', '᪭'), + ('᭎', '᭏'), ('᭚', '᭠'), - ('᭽', '᭾'), + ('᭽', '᭿'), ('᯼', '᯿'), ('᰻', '᰿'), ('᱾', '᱿'), @@ -4314,6 +4423,8 @@ pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[ ('𑇝', '𑇟'), ('𑈸', '𑈽'), ('𑊩', '𑊩'), + ('𑏔', '𑏕'), + ('𑏗', '𑏘'), ('𑑋', '𑑏'), ('𑑚', '𑑛'), ('𑑝', '𑑝'), @@ -4330,6 +4441,7 @@ pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[ ('𑪚', '𑪜'), ('𑪞', '𑪢'), ('𑬀', '𑬉'), + ('𑯡', '𑯡'), ('𑱁', '𑱅'), ('𑱰', '𑱱'), ('𑻷', '𑻸'), @@ -4341,10 +4453,12 @@ pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[ ('𖫵', '𖫵'), ('𖬷', '𖬻'), ('𖭄', '𖭄'), + ('𖵭', '𖵯'), ('𖺗', '𖺚'), ('𖿢', '𖿢'), ('𛲟', '𛲟'), ('𝪇', '𝪋'), + ('𞗿', '𞗿'), ('𞥞', '𞥟'), ]; @@ -4415,7 +4529,7 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('⌫', '⍻'), ('⍽', '⎚'), ('⎴', '⏛'), - ('⏢', '␦'), + ('⏢', '␩'), ('⑀', '⑊'), ('⒜', 'ⓩ'), ('─', '▶'), @@ -4435,7 +4549,7 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'), - ('⿰', '⿻'), + ('⿰', '⿿'), ('〄', '〄'), ('〒', '〓'), ('〠', '〠'), @@ -4443,7 +4557,8 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('〾', '〿'), ('㆐', '㆑'), ('㆖', '㆟'), - ('㇀', '㇣'), + ('㇀', '㇥'), + ('㇯', '㇯'), ('㈀', '㈞'), ('㈪', '㉇'), ('㉐', '㉐'), @@ -4477,6 +4592,8 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('𖬼', '𖬿'), ('𖭅', '𖭅'), ('𛲜', '𛲜'), + ('𜰀', '𜳯'), + ('𜴀', '𜺳'), ('𜽐', '𜿃'), ('𝀀', '𝃵'), ('𝄀', '𝄦'), @@ -4521,18 +4638,18 @@ pub const OTHER_SYMBOL: &'static [(char, char)] = &[ ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'), - ('🢰', '🢱'), + ('🢰', '🢻'), + ('🣀', '🣁'), ('🤀', '🩓'), ('🩠', '🩭'), ('🩰', '🩼'), - ('🪀', '🪈'), - ('🪐', '🪽'), - ('🪿', '🫅'), - ('🫎', '🫛'), - ('🫠', '🫨'), + ('🪀', '🪉'), + ('🪏', '🫆'), + ('🫎', '🫜'), + ('🫟', '🫩'), ('🫰', '🫸'), ('🬀', '🮒'), - ('🮔', '🯊'), + ('🮔', '🯯'), ]; pub const PARAGRAPH_SEPARATOR: &'static [(char, char)] = @@ -4610,8 +4727,9 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('᨞', '᨟'), ('᪠', '᪦'), ('᪨', '᪭'), + ('᭎', '᭏'), ('᭚', '᭠'), - ('᭽', '᭾'), + ('᭽', '᭿'), ('᯼', '᯿'), ('᰻', '᰿'), ('᱾', '᱿'), @@ -4690,6 +4808,7 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('𐫰', '𐫶'), ('𐬹', '𐬿'), ('𐮙', '𐮜'), + ('𐵮', '𐵮'), ('𐺭', '𐺭'), ('𐽕', '𐽙'), ('𐾆', '𐾉'), @@ -4704,6 +4823,8 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('𑇝', '𑇟'), ('𑈸', '𑈽'), ('𑊩', '𑊩'), + ('𑏔', '𑏕'), + ('𑏗', '𑏘'), ('𑑋', '𑑏'), ('𑑚', '𑑛'), ('𑑝', '𑑝'), @@ -4720,6 +4841,7 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('𑪚', '𑪜'), ('𑪞', '𑪢'), ('𑬀', '𑬉'), + ('𑯡', '𑯡'), ('𑱁', '𑱅'), ('𑱰', '𑱱'), ('𑻷', '𑻸'), @@ -4731,10 +4853,12 @@ pub const PUNCTUATION: &'static [(char, char)] = &[ ('𖫵', '𖫵'), ('𖬷', '𖬻'), ('𖭄', '𖭄'), + ('𖵭', '𖵯'), ('𖺗', '𖺚'), ('𖿢', '𖿢'), ('𛲟', '𛲟'), ('𝪇', '𝪋'), + ('𞗿', '𞗿'), ('𞥞', '𞥟'), ]; @@ -4791,9 +4915,9 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('ు', 'ౄ'), ('ಂ', 'ಃ'), ('ಾ', 'ಾ'), - ('ೀ', 'ೄ'), - ('ೇ', 'ೈ'), - ('ೊ', 'ೋ'), + ('\u{cc0}', 'ೄ'), + ('\u{cc7}', '\u{cc8}'), + ('\u{cca}', '\u{ccb}'), ('\u{cd5}', '\u{cd6}'), ('ೳ', 'ೳ'), ('ം', 'ഃ'), @@ -4818,8 +4942,8 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('ႇ', 'ႌ'), ('ႏ', 'ႏ'), ('ႚ', 'ႜ'), - ('᜕', '᜕'), - ('᜴', '᜴'), + ('\u{1715}', '\u{1715}'), + ('\u{1734}', '\u{1734}'), ('ា', 'ា'), ('ើ', 'ៅ'), ('ះ', 'ៈ'), @@ -4835,17 +4959,17 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('ᩭ', 'ᩲ'), ('ᬄ', 'ᬄ'), ('\u{1b35}', '\u{1b35}'), - ('ᬻ', 'ᬻ'), - ('ᬽ', 'ᭁ'), - ('ᭃ', '᭄'), + ('\u{1b3b}', '\u{1b3b}'), + ('\u{1b3d}', 'ᭁ'), + ('\u{1b43}', '\u{1b44}'), ('ᮂ', 'ᮂ'), ('ᮡ', 'ᮡ'), ('ᮦ', 'ᮧ'), - ('᮪', '᮪'), + ('\u{1baa}', '\u{1baa}'), ('ᯧ', 'ᯧ'), ('ᯪ', 'ᯬ'), ('ᯮ', 'ᯮ'), - ('᯲', '᯳'), + ('\u{1bf2}', '\u{1bf3}'), ('ᰤ', 'ᰫ'), ('ᰴ', 'ᰵ'), ('᳡', '᳡'), @@ -4855,11 +4979,11 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('ꠧ', 'ꠧ'), ('ꢀ', 'ꢁ'), ('ꢴ', 'ꣃ'), - ('ꥒ', '꥓'), + ('ꥒ', '\u{a953}'), ('ꦃ', 'ꦃ'), ('ꦴ', 'ꦵ'), ('ꦺ', 'ꦻ'), - ('ꦾ', '꧀'), + ('ꦾ', '\u{a9c0}'), ('ꨯ', 'ꨰ'), ('ꨳ', 'ꨴ'), ('ꩍ', 'ꩍ'), @@ -4881,19 +5005,25 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('𑅅', '𑅆'), ('𑆂', '𑆂'), ('𑆳', '𑆵'), - ('𑆿', '𑇀'), + ('𑆿', '\u{111c0}'), ('𑇎', '𑇎'), ('𑈬', '𑈮'), ('𑈲', '𑈳'), - ('𑈵', '𑈵'), + ('\u{11235}', '\u{11235}'), ('𑋠', '𑋢'), ('𑌂', '𑌃'), ('\u{1133e}', '𑌿'), ('𑍁', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('\u{11357}', '\u{11357}'), ('𑍢', '𑍣'), + ('\u{113b8}', '𑎺'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏍'), + ('\u{113cf}', '\u{113cf}'), ('𑐵', '𑐷'), ('𑑀', '𑑁'), ('𑑅', '𑑅'), @@ -4909,14 +5039,15 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('𑘾', '𑘾'), ('𑚬', '𑚬'), ('𑚮', '𑚯'), - ('𑚶', '𑚶'), + ('\u{116b6}', '\u{116b6}'), + ('𑜞', '𑜞'), ('𑜠', '𑜡'), ('𑜦', '𑜦'), ('𑠬', '𑠮'), ('𑠸', '𑠸'), ('\u{11930}', '𑤵'), ('𑤷', '𑤸'), - ('𑤽', '𑤽'), + ('\u{1193d}', '\u{1193d}'), ('𑥀', '𑥀'), ('𑥂', '𑥂'), ('𑧑', '𑧓'), @@ -4937,11 +5068,12 @@ pub const SPACING_MARK: &'static [(char, char)] = &[ ('𑼃', '𑼃'), ('𑼴', '𑼵'), ('𑼾', '𑼿'), - ('𑽁', '𑽁'), + ('\u{11f41}', '\u{11f41}'), + ('𖄪', '𖄬'), ('𖽑', '𖾇'), - ('𖿰', '𖿱'), - ('\u{1d165}', '𝅦'), - ('𝅭', '\u{1d172}'), + ('\u{16ff0}', '\u{16ff1}'), + ('\u{1d165}', '\u{1d166}'), + ('\u{1d16d}', '\u{1d172}'), ]; pub const SYMBOL: &'static [(char, char)] = &[ @@ -5035,7 +5167,7 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('↊', '↋'), ('←', '⌇'), ('⌌', '⌨'), - ('⌫', '␦'), + ('⌫', '␩'), ('⑀', '⑊'), ('⒜', 'ⓩ'), ('─', '❧'), @@ -5052,7 +5184,7 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'), - ('⿰', '⿻'), + ('⿰', '⿿'), ('〄', '〄'), ('〒', '〓'), ('〠', '〠'), @@ -5061,7 +5193,8 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('゛', '゜'), ('㆐', '㆑'), ('㆖', '㆟'), - ('㇀', '㇣'), + ('㇀', '㇥'), + ('㇯', '㇯'), ('㈀', '㈞'), ('㈪', '㉇'), ('㉐', '㉐'), @@ -5104,11 +5237,14 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('𐇐', '𐇼'), ('𐡷', '𐡸'), ('𐫈', '𐫈'), + ('𐶎', '𐶏'), ('𑜿', '𑜿'), ('𑿕', '𑿱'), ('𖬼', '𖬿'), ('𖭅', '𖭅'), ('𛲜', '𛲜'), + ('𜰀', '𜳯'), + ('𜴀', '𜺳'), ('𜽐', '𜿃'), ('𝀀', '𝃵'), ('𝄀', '𝄦'), @@ -5165,18 +5301,18 @@ pub const SYMBOL: &'static [(char, char)] = &[ ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'), - ('🢰', '🢱'), + ('🢰', '🢻'), + ('🣀', '🣁'), ('🤀', '🩓'), ('🩠', '🩭'), ('🩰', '🩼'), - ('🪀', '🪈'), - ('🪐', '🪽'), - ('🪿', '🫅'), - ('🫎', '🫛'), - ('🫠', '🫨'), + ('🪀', '🪉'), + ('🪏', '🫆'), + ('🫎', '🫜'), + ('🫟', '🫩'), ('🫰', '🫸'), ('🬀', '🮒'), - ('🮔', '🯊'), + ('🮔', '🯯'), ]; pub const TITLECASE_LETTER: &'static [(char, char)] = &[ @@ -5215,7 +5351,7 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{85f}', '\u{85f}'), ('\u{86b}', '\u{86f}'), ('\u{88f}', '\u{88f}'), - ('\u{892}', '\u{897}'), + ('\u{892}', '\u{896}'), ('\u{984}', '\u{984}'), ('\u{98d}', '\u{98e}'), ('\u{991}', '\u{992}'), @@ -5407,12 +5543,11 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{1a9a}', '\u{1a9f}'), ('\u{1aae}', '\u{1aaf}'), ('\u{1acf}', '\u{1aff}'), - ('\u{1b4d}', '\u{1b4f}'), - ('\u{1b7f}', '\u{1b7f}'), + ('\u{1b4d}', '\u{1b4d}'), ('\u{1bf4}', '\u{1bfb}'), ('\u{1c38}', '\u{1c3a}'), ('\u{1c4a}', '\u{1c4c}'), - ('\u{1c89}', '\u{1c8f}'), + ('\u{1c8b}', '\u{1c8f}'), ('\u{1cbb}', '\u{1cbc}'), ('\u{1cc8}', '\u{1ccf}'), ('\u{1cfb}', '\u{1cff}'), @@ -5439,7 +5574,7 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{20c1}', '\u{20cf}'), ('\u{20f1}', '\u{20ff}'), ('\u{218c}', '\u{218f}'), - ('\u{2427}', '\u{243f}'), + ('\u{242a}', '\u{243f}'), ('\u{244b}', '\u{245f}'), ('\u{2b74}', '\u{2b75}'), ('\u{2b96}', '\u{2b96}'), @@ -5462,22 +5597,21 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{2e9a}', '\u{2e9a}'), ('\u{2ef4}', '\u{2eff}'), ('\u{2fd6}', '\u{2fef}'), - ('\u{2ffc}', '\u{2fff}'), ('\u{3040}', '\u{3040}'), ('\u{3097}', '\u{3098}'), ('\u{3100}', '\u{3104}'), ('\u{3130}', '\u{3130}'), ('\u{318f}', '\u{318f}'), - ('\u{31e4}', '\u{31ef}'), + ('\u{31e6}', '\u{31ee}'), ('\u{321f}', '\u{321f}'), ('\u{a48d}', '\u{a48f}'), ('\u{a4c7}', '\u{a4cf}'), ('\u{a62c}', '\u{a63f}'), ('\u{a6f8}', '\u{a6ff}'), - ('\u{a7cb}', '\u{a7cf}'), + ('\u{a7ce}', '\u{a7cf}'), ('\u{a7d2}', '\u{a7d2}'), ('\u{a7d4}', '\u{a7d4}'), - ('\u{a7da}', '\u{a7f1}'), + ('\u{a7dd}', '\u{a7f1}'), ('\u{a82d}', '\u{a82f}'), ('\u{a83a}', '\u{a83f}'), ('\u{a878}', '\u{a87f}'), @@ -5567,7 +5701,8 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{105a2}', '\u{105a2}'), ('\u{105b2}', '\u{105b2}'), ('\u{105ba}', '\u{105ba}'), - ('\u{105bd}', '\u{105ff}'), + ('\u{105bd}', '\u{105bf}'), + ('\u{105f4}', '\u{105ff}'), ('\u{10737}', '\u{1073f}'), ('\u{10756}', '\u{1075f}'), ('\u{10768}', '\u{1077f}'), @@ -5610,11 +5745,15 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{10cb3}', '\u{10cbf}'), ('\u{10cf3}', '\u{10cf9}'), ('\u{10d28}', '\u{10d2f}'), - ('\u{10d3a}', '\u{10e5f}'), + ('\u{10d3a}', '\u{10d3f}'), + ('\u{10d66}', '\u{10d68}'), + ('\u{10d86}', '\u{10d8d}'), + ('\u{10d90}', '\u{10e5f}'), ('\u{10e7f}', '\u{10e7f}'), ('\u{10eaa}', '\u{10eaa}'), ('\u{10eae}', '\u{10eaf}'), - ('\u{10eb2}', '\u{10efc}'), + ('\u{10eb2}', '\u{10ec1}'), + ('\u{10ec5}', '\u{10efb}'), ('\u{10f28}', '\u{10f2f}'), ('\u{10f5a}', '\u{10f6f}'), ('\u{10f8a}', '\u{10faf}'), @@ -5654,7 +5793,18 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{11358}', '\u{1135c}'), ('\u{11364}', '\u{11365}'), ('\u{1136d}', '\u{1136f}'), - ('\u{11375}', '\u{113ff}'), + ('\u{11375}', '\u{1137f}'), + ('\u{1138a}', '\u{1138a}'), + ('\u{1138c}', '\u{1138d}'), + ('\u{1138f}', '\u{1138f}'), + ('\u{113b6}', '\u{113b6}'), + ('\u{113c1}', '\u{113c1}'), + ('\u{113c3}', '\u{113c4}'), + ('\u{113c6}', '\u{113c6}'), + ('\u{113cb}', '\u{113cb}'), + ('\u{113d6}', '\u{113d6}'), + ('\u{113d9}', '\u{113e0}'), + ('\u{113e3}', '\u{113ff}'), ('\u{1145c}', '\u{1145c}'), ('\u{11462}', '\u{1147f}'), ('\u{114c8}', '\u{114cf}'), @@ -5665,7 +5815,8 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{1165a}', '\u{1165f}'), ('\u{1166d}', '\u{1167f}'), ('\u{116ba}', '\u{116bf}'), - ('\u{116ca}', '\u{116ff}'), + ('\u{116ca}', '\u{116cf}'), + ('\u{116e4}', '\u{116ff}'), ('\u{1171b}', '\u{1171c}'), ('\u{1172c}', '\u{1172f}'), ('\u{11747}', '\u{117ff}'), @@ -5685,7 +5836,9 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{11a48}', '\u{11a4f}'), ('\u{11aa3}', '\u{11aaf}'), ('\u{11af9}', '\u{11aff}'), - ('\u{11b0a}', '\u{11bff}'), + ('\u{11b0a}', '\u{11bbf}'), + ('\u{11be2}', '\u{11bef}'), + ('\u{11bfa}', '\u{11bff}'), ('\u{11c09}', '\u{11c09}'), ('\u{11c37}', '\u{11c37}'), ('\u{11c46}', '\u{11c4f}'), @@ -5709,7 +5862,7 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{11ef9}', '\u{11eff}'), ('\u{11f11}', '\u{11f11}'), ('\u{11f3b}', '\u{11f3d}'), - ('\u{11f5a}', '\u{11faf}'), + ('\u{11f5b}', '\u{11faf}'), ('\u{11fb1}', '\u{11fbf}'), ('\u{11ff2}', '\u{11ffe}'), ('\u{1239a}', '\u{123ff}'), @@ -5717,8 +5870,10 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{12475}', '\u{1247f}'), ('\u{12544}', '\u{12f8f}'), ('\u{12ff3}', '\u{12fff}'), - ('\u{13456}', '\u{143ff}'), - ('\u{14647}', '\u{167ff}'), + ('\u{13456}', '\u{1345f}'), + ('\u{143fb}', '\u{143ff}'), + ('\u{14647}', '\u{160ff}'), + ('\u{1613a}', '\u{167ff}'), ('\u{16a39}', '\u{16a3f}'), ('\u{16a5f}', '\u{16a5f}'), ('\u{16a6a}', '\u{16a6d}'), @@ -5730,7 +5885,8 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{16b5a}', '\u{16b5a}'), ('\u{16b62}', '\u{16b62}'), ('\u{16b78}', '\u{16b7c}'), - ('\u{16b90}', '\u{16e3f}'), + ('\u{16b90}', '\u{16d3f}'), + ('\u{16d7a}', '\u{16e3f}'), ('\u{16e9b}', '\u{16eff}'), ('\u{16f4b}', '\u{16f4e}'), ('\u{16f88}', '\u{16f8e}'), @@ -5738,7 +5894,7 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{16fe5}', '\u{16fef}'), ('\u{16ff2}', '\u{16fff}'), ('\u{187f8}', '\u{187ff}'), - ('\u{18cd6}', '\u{18cff}'), + ('\u{18cd6}', '\u{18cfe}'), ('\u{18d09}', '\u{1afef}'), ('\u{1aff4}', '\u{1aff4}'), ('\u{1affc}', '\u{1affc}'), @@ -5753,7 +5909,9 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{1bc7d}', '\u{1bc7f}'), ('\u{1bc89}', '\u{1bc8f}'), ('\u{1bc9a}', '\u{1bc9b}'), - ('\u{1bca4}', '\u{1ceff}'), + ('\u{1bca4}', '\u{1cbff}'), + ('\u{1ccfa}', '\u{1ccff}'), + ('\u{1ceb4}', '\u{1ceff}'), ('\u{1cf2e}', '\u{1cf2f}'), ('\u{1cf47}', '\u{1cf4f}'), ('\u{1cfc4}', '\u{1cfff}'), @@ -5804,7 +5962,9 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{1e2af}', '\u{1e2bf}'), ('\u{1e2fa}', '\u{1e2fe}'), ('\u{1e300}', '\u{1e4cf}'), - ('\u{1e4fa}', '\u{1e7df}'), + ('\u{1e4fa}', '\u{1e5cf}'), + ('\u{1e5fb}', '\u{1e5fe}'), + ('\u{1e600}', '\u{1e7df}'), ('\u{1e7e7}', '\u{1e7e7}'), ('\u{1e7ec}', '\u{1e7ec}'), ('\u{1e7ef}', '\u{1e7ef}'), @@ -5874,24 +6034,24 @@ pub const UNASSIGNED: &'static [(char, char)] = &[ ('\u{1f85a}', '\u{1f85f}'), ('\u{1f888}', '\u{1f88f}'), ('\u{1f8ae}', '\u{1f8af}'), - ('\u{1f8b2}', '\u{1f8ff}'), + ('\u{1f8bc}', '\u{1f8bf}'), + ('\u{1f8c2}', '\u{1f8ff}'), ('\u{1fa54}', '\u{1fa5f}'), ('\u{1fa6e}', '\u{1fa6f}'), ('\u{1fa7d}', '\u{1fa7f}'), - ('\u{1fa89}', '\u{1fa8f}'), - ('\u{1fabe}', '\u{1fabe}'), - ('\u{1fac6}', '\u{1facd}'), - ('\u{1fadc}', '\u{1fadf}'), - ('\u{1fae9}', '\u{1faef}'), + ('\u{1fa8a}', '\u{1fa8e}'), + ('\u{1fac7}', '\u{1facd}'), + ('\u{1fadd}', '\u{1fade}'), + ('\u{1faea}', '\u{1faef}'), ('\u{1faf9}', '\u{1faff}'), ('\u{1fb93}', '\u{1fb93}'), - ('\u{1fbcb}', '\u{1fbef}'), ('\u{1fbfa}', '\u{1ffff}'), ('\u{2a6e0}', '\u{2a6ff}'), ('\u{2b73a}', '\u{2b73f}'), ('\u{2b81e}', '\u{2b81f}'), ('\u{2cea2}', '\u{2ceaf}'), - ('\u{2ebe1}', '\u{2f7ff}'), + ('\u{2ebe1}', '\u{2ebef}'), + ('\u{2ee5e}', '\u{2f7ff}'), ('\u{2fa1e}', '\u{2ffff}'), ('\u{3134b}', '\u{3134f}'), ('\u{323b0}', '\u{e0000}'), @@ -6179,6 +6339,7 @@ pub const UPPERCASE_LETTER: &'static [(char, char)] = &[ ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('Ꭰ', 'Ᏽ'), + ('Ᲊ', 'Ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('Ḁ', 'Ḁ'), @@ -6503,9 +6664,12 @@ pub const UPPERCASE_LETTER: &'static [(char, char)] = &[ ('Ꟃ', 'Ꟃ'), ('Ꞔ', 'Ꟈ'), ('Ꟊ', 'Ꟊ'), + ('Ɤ', 'Ꟍ'), ('Ꟑ', 'Ꟑ'), ('Ꟗ', 'Ꟗ'), ('Ꟙ', 'Ꟙ'), + ('Ꟛ', 'Ꟛ'), + ('Ƛ', 'Ƛ'), ('Ꟶ', 'Ꟶ'), ('A', 'Z'), ('𐐀', '𐐧'), @@ -6515,6 +6679,7 @@ pub const UPPERCASE_LETTER: &'static [(char, char)] = &[ ('𐖌', '𐖒'), ('𐖔', '𐖕'), ('𐲀', '𐲲'), + ('𐵐', '𐵥'), ('𑢠', '𑢿'), ('𖹀', '𖹟'), ('𝐀', '𝐙'), diff --git a/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs b/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs index 294dfbdcc..6a6ec2af5 100644 --- a/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs +++ b/regex-syntax/src/unicode_tables/grapheme_cluster_break.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate grapheme-cluster-break ucd-15.0.0 --chars +// ucd-generate grapheme-cluster-break ucd-16.0.0 --chars // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("CR", CR), @@ -71,7 +71,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{898}', '\u{89f}'), + ('\u{897}', '\u{89f}'), ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', '\u{902}'), ('\u{93a}', '\u{93a}'), @@ -125,10 +125,10 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{c62}', '\u{c63}'), ('\u{c81}', '\u{c81}'), ('\u{cbc}', '\u{cbc}'), - ('\u{cbf}', '\u{cbf}'), + ('\u{cbf}', '\u{cc0}'), ('\u{cc2}', '\u{cc2}'), - ('\u{cc6}', '\u{cc6}'), - ('\u{ccc}', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), ('\u{d00}', '\u{d01}'), @@ -172,8 +172,8 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{108d}', '\u{108d}'), ('\u{109d}', '\u{109d}'), ('\u{135d}', '\u{135f}'), - ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1733}'), + ('\u{1712}', '\u{1715}'), + ('\u{1732}', '\u{1734}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17b5}'), @@ -200,18 +200,16 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1a7f}', '\u{1a7f}'), ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', '\u{1b03}'), - ('\u{1b34}', '\u{1b3a}'), - ('\u{1b3c}', '\u{1b3c}'), - ('\u{1b42}', '\u{1b42}'), + ('\u{1b34}', '\u{1b3d}'), + ('\u{1b42}', '\u{1b44}'), ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', '\u{1b81}'), ('\u{1ba2}', '\u{1ba5}'), - ('\u{1ba8}', '\u{1ba9}'), - ('\u{1bab}', '\u{1bad}'), + ('\u{1ba8}', '\u{1bad}'), ('\u{1be6}', '\u{1be6}'), ('\u{1be8}', '\u{1be9}'), ('\u{1bed}', '\u{1bed}'), - ('\u{1bef}', '\u{1bf1}'), + ('\u{1bef}', '\u{1bf3}'), ('\u{1c2c}', '\u{1c33}'), ('\u{1c36}', '\u{1c37}'), ('\u{1cd0}', '\u{1cd2}'), @@ -242,10 +240,12 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{a8ff}', '\u{a8ff}'), ('\u{a926}', '\u{a92d}'), ('\u{a947}', '\u{a951}'), + ('\u{a953}', '\u{a953}'), ('\u{a980}', '\u{a982}'), ('\u{a9b3}', '\u{a9b3}'), ('\u{a9b6}', '\u{a9b9}'), ('\u{a9bc}', '\u{a9bd}'), + ('\u{a9c0}', '\u{a9c0}'), ('\u{a9e5}', '\u{a9e5}'), ('\u{aa29}', '\u{aa2e}'), ('\u{aa31}', '\u{aa32}'), @@ -277,8 +277,9 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), + ('\u{10d69}', '\u{10d6d}'), ('\u{10eab}', '\u{10eac}'), - ('\u{10efd}', '\u{10eff}'), + ('\u{10efc}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), ('\u{10f82}', '\u{10f85}'), ('\u{11001}', '\u{11001}'), @@ -295,11 +296,11 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{11173}', '\u{11173}'), ('\u{11180}', '\u{11181}'), ('\u{111b6}', '\u{111be}'), + ('\u{111c0}', '\u{111c0}'), ('\u{111c9}', '\u{111cc}'), ('\u{111cf}', '\u{111cf}'), ('\u{1122f}', '\u{11231}'), - ('\u{11234}', '\u{11234}'), - ('\u{11236}', '\u{11237}'), + ('\u{11234}', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112df}'), @@ -308,9 +309,18 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1133b}', '\u{1133c}'), ('\u{1133e}', '\u{1133e}'), ('\u{11340}', '\u{11340}'), + ('\u{1134d}', '\u{1134d}'), ('\u{11357}', '\u{11357}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('\u{113b8}', '\u{113b8}'), + ('\u{113bb}', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '\u{113c9}'), + ('\u{113ce}', '\u{113d0}'), + ('\u{113d2}', '\u{113d2}'), + ('\u{113e1}', '\u{113e2}'), ('\u{11438}', '\u{1143f}'), ('\u{11442}', '\u{11444}'), ('\u{11446}', '\u{11446}'), @@ -331,16 +341,15 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1163f}', '\u{11640}'), ('\u{116ab}', '\u{116ab}'), ('\u{116ad}', '\u{116ad}'), - ('\u{116b0}', '\u{116b5}'), - ('\u{116b7}', '\u{116b7}'), - ('\u{1171d}', '\u{1171f}'), + ('\u{116b0}', '\u{116b7}'), + ('\u{1171d}', '\u{1171d}'), + ('\u{1171f}', '\u{1171f}'), ('\u{11722}', '\u{11725}'), ('\u{11727}', '\u{1172b}'), ('\u{1182f}', '\u{11837}'), ('\u{11839}', '\u{1183a}'), ('\u{11930}', '\u{11930}'), - ('\u{1193b}', '\u{1193c}'), - ('\u{1193e}', '\u{1193e}'), + ('\u{1193b}', '\u{1193e}'), ('\u{11943}', '\u{11943}'), ('\u{119d4}', '\u{119d7}'), ('\u{119da}', '\u{119db}'), @@ -371,21 +380,23 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{11ef3}', '\u{11ef4}'), ('\u{11f00}', '\u{11f01}'), ('\u{11f36}', '\u{11f3a}'), - ('\u{11f40}', '\u{11f40}'), - ('\u{11f42}', '\u{11f42}'), + ('\u{11f40}', '\u{11f42}'), + ('\u{11f5a}', '\u{11f5a}'), ('\u{13440}', '\u{13440}'), ('\u{13447}', '\u{13455}'), + ('\u{1611e}', '\u{16129}'), + ('\u{1612d}', '\u{1612f}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), + ('\u{16ff0}', '\u{16ff1}'), ('\u{1bc9d}', '\u{1bc9e}'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), - ('\u{1d165}', '\u{1d165}'), - ('\u{1d167}', '\u{1d169}'), - ('\u{1d16e}', '\u{1d172}'), + ('\u{1d165}', '\u{1d169}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -406,6 +417,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), ('\u{1e4ec}', '\u{1e4ef}'), + ('\u{1e5ee}', '\u{1e5ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('🏻', '🏿'), @@ -1231,6 +1243,7 @@ pub const PREPEND: &'static [(char, char)] = &[ ('\u{110bd}', '\u{110bd}'), ('\u{110cd}', '\u{110cd}'), ('𑇂', '𑇃'), + ('𑏑', '𑏑'), ('𑤿', '𑤿'), ('𑥁', '𑥁'), ('𑨺', '𑨺'), @@ -1269,10 +1282,8 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('ు', 'ౄ'), ('ಂ', 'ಃ'), ('ಾ', 'ಾ'), - ('ೀ', 'ು'), + ('ು', 'ು'), ('ೃ', 'ೄ'), - ('ೇ', 'ೈ'), - ('ೊ', 'ೋ'), ('ೳ', 'ೳ'), ('ം', 'ഃ'), ('ി', 'ീ'), @@ -1290,8 +1301,6 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('ျ', 'ြ'), ('ၖ', 'ၗ'), ('ႄ', 'ႄ'), - ('᜕', '᜕'), - ('᜴', '᜴'), ('ា', 'ា'), ('ើ', 'ៅ'), ('ះ', 'ៈ'), @@ -1304,17 +1313,13 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('ᩗ', 'ᩗ'), ('ᩭ', 'ᩲ'), ('ᬄ', 'ᬄ'), - ('ᬻ', 'ᬻ'), - ('ᬽ', 'ᭁ'), - ('ᭃ', '᭄'), + ('ᬾ', 'ᭁ'), ('ᮂ', 'ᮂ'), ('ᮡ', 'ᮡ'), ('ᮦ', 'ᮧ'), - ('᮪', '᮪'), ('ᯧ', 'ᯧ'), ('ᯪ', 'ᯬ'), ('ᯮ', 'ᯮ'), - ('᯲', '᯳'), ('ᰤ', 'ᰫ'), ('ᰴ', 'ᰵ'), ('᳡', '᳡'), @@ -1323,11 +1328,11 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('ꠧ', 'ꠧ'), ('ꢀ', 'ꢁ'), ('ꢴ', 'ꣃ'), - ('ꥒ', '꥓'), + ('ꥒ', 'ꥒ'), ('ꦃ', 'ꦃ'), ('ꦴ', 'ꦵ'), ('ꦺ', 'ꦻ'), - ('ꦾ', '꧀'), + ('ꦾ', 'ꦿ'), ('ꨯ', 'ꨰ'), ('ꨳ', 'ꨴ'), ('ꩍ', 'ꩍ'), @@ -1347,18 +1352,20 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('𑅅', '𑅆'), ('𑆂', '𑆂'), ('𑆳', '𑆵'), - ('𑆿', '𑇀'), + ('𑆿', '𑆿'), ('𑇎', '𑇎'), ('𑈬', '𑈮'), ('𑈲', '𑈳'), - ('𑈵', '𑈵'), ('𑋠', '𑋢'), ('𑌂', '𑌃'), ('𑌿', '𑌿'), ('𑍁', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '𑍌'), ('𑍢', '𑍣'), + ('𑎹', '𑎺'), + ('𑏊', '𑏊'), + ('𑏌', '𑏍'), ('𑐵', '𑐷'), ('𑑀', '𑑁'), ('𑑅', '𑑅'), @@ -1375,13 +1382,12 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('𑘾', '𑘾'), ('𑚬', '𑚬'), ('𑚮', '𑚯'), - ('𑚶', '𑚶'), + ('𑜞', '𑜞'), ('𑜦', '𑜦'), ('𑠬', '𑠮'), ('𑠸', '𑠸'), ('𑤱', '𑤵'), ('𑤷', '𑤸'), - ('𑤽', '𑤽'), ('𑥀', '𑥀'), ('𑥂', '𑥂'), ('𑧑', '𑧓'), @@ -1402,15 +1408,13 @@ pub const SPACINGMARK: &'static [(char, char)] = &[ ('𑼃', '𑼃'), ('𑼴', '𑼵'), ('𑼾', '𑼿'), - ('𑽁', '𑽁'), + ('𖄪', '𖄬'), ('𖽑', '𖾇'), - ('𖿰', '𖿱'), - ('𝅦', '𝅦'), - ('𝅭', '𝅭'), ]; pub const T: &'static [(char, char)] = &[('ᆨ', 'ᇿ'), ('ퟋ', 'ퟻ')]; -pub const V: &'static [(char, char)] = &[('ᅠ', 'ᆧ'), ('ힰ', 'ퟆ')]; +pub const V: &'static [(char, char)] = + &[('ᅠ', 'ᆧ'), ('ힰ', 'ퟆ'), ('𖵣', '𖵣'), ('𖵧', '𖵪')]; pub const ZWJ: &'static [(char, char)] = &[('\u{200d}', '\u{200d}')]; diff --git a/regex-syntax/src/unicode_tables/perl_decimal.rs b/regex-syntax/src/unicode_tables/perl_decimal.rs index 4f4c08a12..18996c2bf 100644 --- a/regex-syntax/src/unicode_tables/perl_decimal.rs +++ b/regex-syntax/src/unicode_tables/perl_decimal.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate general-category ucd-15.0.0 --chars --include decimalnumber +// ucd-generate general-category ucd-16.0.0 --chars --include decimalnumber // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[("Decimal_Number", DECIMAL_NUMBER)]; @@ -49,6 +49,7 @@ pub const DECIMAL_NUMBER: &'static [(char, char)] = &[ ('0', '9'), ('𐒠', '𐒩'), ('𐴰', '𐴹'), + ('𐵀', '𐵉'), ('𑁦', '𑁯'), ('𑃰', '𑃹'), ('𑄶', '𑄿'), @@ -58,20 +59,26 @@ pub const DECIMAL_NUMBER: &'static [(char, char)] = &[ ('𑓐', '𑓙'), ('𑙐', '𑙙'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜰', '𑜹'), ('𑣠', '𑣩'), ('𑥐', '𑥙'), + ('𑯰', '𑯹'), ('𑱐', '𑱙'), ('𑵐', '𑵙'), ('𑶠', '𑶩'), ('𑽐', '𑽙'), + ('𖄰', '𖄹'), ('𖩠', '𖩩'), ('𖫀', '𖫉'), ('𖭐', '𖭙'), + ('𖵰', '𖵹'), + ('𜳰', '𜳹'), ('𝟎', '𝟿'), ('𞅀', '𞅉'), ('𞋰', '𞋹'), ('𞓰', '𞓹'), + ('𞗱', '𞗺'), ('𞥐', '𞥙'), ('🯰', '🯹'), ]; diff --git a/regex-syntax/src/unicode_tables/perl_space.rs b/regex-syntax/src/unicode_tables/perl_space.rs index 174169579..c969e3733 100644 --- a/regex-syntax/src/unicode_tables/perl_space.rs +++ b/regex-syntax/src/unicode_tables/perl_space.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate property-bool ucd-15.0.0 --chars --include whitespace +// ucd-generate property-bool ucd-16.0.0 --chars --include whitespace // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[("White_Space", WHITE_SPACE)]; diff --git a/regex-syntax/src/unicode_tables/perl_word.rs b/regex-syntax/src/unicode_tables/perl_word.rs index c1b66bd9a..21c8c0f9c 100644 --- a/regex-syntax/src/unicode_tables/perl_word.rs +++ b/regex-syntax/src/unicode_tables/perl_word.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate perl-word ucd-15.0.0 --chars +// ucd-generate perl-word ucd-16.0.0 --chars // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const PERL_WORD: &'static [(char, char)] = &[ ('0', '9'), @@ -59,7 +59,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ࡠ', 'ࡪ'), ('ࡰ', 'ࢇ'), ('ࢉ', 'ࢎ'), - ('\u{898}', '\u{8e1}'), + ('\u{897}', '\u{8e1}'), ('\u{8e3}', '\u{963}'), ('०', '९'), ('ॱ', 'ঃ'), @@ -158,8 +158,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('\u{cbc}', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), @@ -243,8 +243,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), - ('ᜀ', '᜕'), - ('ᜟ', '᜴'), + ('ᜀ', '\u{1715}'), + ('ᜟ', '\u{1734}'), ('ᝀ', '\u{1753}'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -276,11 +276,11 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{1b00}', 'ᭌ'), ('᭐', '᭙'), ('\u{1b6b}', '\u{1b73}'), - ('\u{1b80}', '᯳'), + ('\u{1b80}', '\u{1bf3}'), ('ᰀ', '\u{1c37}'), ('᱀', '᱉'), ('ᱍ', 'ᱽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('\u{1cd0}', '\u{1cd2}'), @@ -367,10 +367,10 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('ꙿ', '\u{a6f1}'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠧ'), ('\u{a82c}', '\u{a82c}'), ('ꡀ', 'ꡳ'), @@ -379,9 +379,9 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{a8e0}', 'ꣷ'), ('ꣻ', 'ꣻ'), ('ꣽ', '\u{a92d}'), - ('ꤰ', '꥓'), + ('ꤰ', '\u{a953}'), ('ꥠ', 'ꥼ'), - ('\u{a980}', '꧀'), + ('\u{a980}', '\u{a9c0}'), ('ꧏ', '꧙'), ('ꧠ', 'ꧾ'), ('ꨀ', '\u{aa36}'), @@ -468,6 +468,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -508,10 +509,14 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𐳀', '𐳲'), ('𐴀', '\u{10d27}'), ('𐴰', '𐴹'), + ('𐵀', '𐵥'), + ('\u{10d69}', '\u{10d6d}'), + ('𐵯', '𐶅'), ('𐺀', '𐺩'), ('\u{10eab}', '\u{10eac}'), ('𐺰', '𐺱'), - ('\u{10efd}', '𐼜'), + ('𐻂', '𐻄'), + ('\u{10efc}', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '\u{10f50}'), ('𐽰', '\u{10f85}'), @@ -551,12 +556,22 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𑌵', '𑌹'), ('\u{1133b}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('𑍐', '𑍐'), ('\u{11357}', '\u{11357}'), ('𑍝', '𑍣'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏓'), + ('\u{113e1}', '\u{113e2}'), ('𑐀', '𑑊'), ('𑑐', '𑑙'), ('\u{1145e}', '𑑡'), @@ -571,6 +586,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𑙐', '𑙙'), ('𑚀', '𑚸'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑜹'), @@ -594,6 +610,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𑩐', '\u{11a99}'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), + ('𑯰', '𑯹'), ('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱀'), @@ -618,7 +636,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '\u{11f42}'), - ('𑽐', '𑽙'), + ('𑽐', '\u{11f5a}'), ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), @@ -626,7 +644,9 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('\u{13440}', '\u{13455}'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄹'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'), @@ -639,16 +659,18 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𖭐', '𖭙'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), + ('𖵰', '𖵹'), ('𖹀', '𖹿'), ('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟'), ('𖿠', '𖿡'), ('𖿣', '\u{16fe4}'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -663,10 +685,11 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('\u{1bc9d}', '\u{1bc9e}'), + ('𜳰', '𜳹'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), - ('𝅭', '\u{1d172}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -724,6 +747,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𞊐', '\u{1e2ae}'), ('𞋀', '𞋹'), ('𞓐', '𞓹'), + ('𞗐', '𞗺'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -774,6 +798,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), diff --git a/regex-syntax/src/unicode_tables/property_bool.rs b/regex-syntax/src/unicode_tables/property_bool.rs index a3e84b519..3d62edc42 100644 --- a/regex-syntax/src/unicode_tables/property_bool.rs +++ b/regex-syntax/src/unicode_tables/property_bool.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate property-bool ucd-15.0.0 --chars +// ucd-generate property-bool ucd-16.0.0 --chars // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("ASCII_Hex_Digit", ASCII_HEX_DIGIT), @@ -36,13 +36,18 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Hyphen", HYPHEN), ("IDS_Binary_Operator", IDS_BINARY_OPERATOR), ("IDS_Trinary_Operator", IDS_TRINARY_OPERATOR), + ("IDS_Unary_Operator", IDS_UNARY_OPERATOR), + ("ID_Compat_Math_Continue", ID_COMPAT_MATH_CONTINUE), + ("ID_Compat_Math_Start", ID_COMPAT_MATH_START), ("ID_Continue", ID_CONTINUE), ("ID_Start", ID_START), ("Ideographic", IDEOGRAPHIC), + ("InCB", INCB), ("Join_Control", JOIN_CONTROL), ("Logical_Order_Exception", LOGICAL_ORDER_EXCEPTION), ("Lowercase", LOWERCASE), ("Math", MATH), + ("Modifier_Combining_Mark", MODIFIER_COMBINING_MARK), ("Noncharacter_Code_Point", NONCHARACTER_CODE_POINT), ("Other_Alphabetic", OTHER_ALPHABETIC), ("Other_Default_Ignorable_Code_Point", OTHER_DEFAULT_IGNORABLE_CODE_POINT), @@ -86,7 +91,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ˬ', 'ˬ'), ('ˮ', 'ˮ'), ('\u{345}', '\u{345}'), - ('Ͱ', 'ʹ'), + ('\u{363}', 'ʹ'), ('Ͷ', 'ͷ'), ('ͺ', 'ͽ'), ('Ϳ', 'Ϳ'), @@ -127,6 +132,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ࡠ', 'ࡪ'), ('ࡰ', 'ࢇ'), ('ࢉ', 'ࢎ'), + ('\u{897}', '\u{897}'), ('ࢠ', 'ࣉ'), ('\u{8d4}', '\u{8df}'), ('\u{8e3}', '\u{8e9}'), @@ -225,8 +231,8 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('ಽ', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccc}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccc}'), ('\u{cd5}', '\u{cd6}'), ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), @@ -331,7 +337,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('\u{1abf}', '\u{1ac0}'), ('\u{1acc}', '\u{1ace}'), ('\u{1b00}', 'ᬳ'), - ('\u{1b35}', 'ᭃ'), + ('\u{1b35}', '\u{1b43}'), ('ᭅ', 'ᭌ'), ('\u{1b80}', '\u{1ba9}'), ('\u{1bac}', 'ᮯ'), @@ -340,7 +346,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ᰀ', '\u{1c36}'), ('ᱍ', 'ᱏ'), ('ᱚ', 'ᱽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('ᳩ', 'ᳬ'), @@ -348,7 +354,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ᳵ', 'ᳶ'), ('ᳺ', 'ᳺ'), ('ᴀ', 'ᶿ'), - ('\u{1de7}', '\u{1df4}'), + ('\u{1dd3}', '\u{1df4}'), ('Ḁ', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'), @@ -428,10 +434,10 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('ꙿ', 'ꛯ'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠅ'), ('ꠇ', 'ꠧ'), ('ꡀ', 'ꡳ'), @@ -522,6 +528,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -559,9 +566,14 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐴀', '\u{10d27}'), + ('𐵊', '𐵥'), + ('\u{10d69}', '\u{10d69}'), + ('𐵯', '𐶅'), ('𐺀', '𐺩'), ('\u{10eab}', '\u{10eac}'), ('𐺰', '𐺱'), + ('𐻂', '𐻄'), + ('\u{10efc}', '\u{10efc}'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), @@ -605,6 +617,17 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𑍐', '𑍐'), ('\u{11357}', '\u{11357}'), ('𑍝', '𑍣'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏍'), + ('𑏑', '𑏑'), + ('𑏓', '𑏓'), ('𑐀', '𑑁'), ('\u{11443}', '𑑅'), ('𑑇', '𑑊'), @@ -643,6 +666,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𑩐', '𑪗'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), ('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑰾'), @@ -675,7 +699,9 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('𓑁', '𓑆'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '\u{1612e}'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩰', '𖪾'), @@ -684,16 +710,17 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𖭀', '𖭃'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), ('𖹀', '𖹿'), ('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟'), ('𖿠', '𖿡'), ('𖿣', '𖿣'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -753,6 +780,8 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𞊐', '𞊭'), ('𞋀', '𞋫'), ('𞓐', '𞓫'), + ('𞗐', '𞗭'), + ('𞗰', '𞗰'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -802,6 +831,7 @@ pub const ALPHABETIC: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -846,7 +876,7 @@ pub const BIDI_MIRRORED: &'static [(char, char)] = &[ ('≟', '≠'), ('≢', '≢'), ('≤', '≫'), - ('≮', '⊌'), + ('≭', '⊌'), ('⊏', '⊒'), ('⊘', '⊘'), ('⊢', '⊣'), @@ -976,7 +1006,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{859}', '\u{85b}'), ('࢈', '࢈'), ('\u{890}', '\u{891}'), - ('\u{898}', '\u{89f}'), + ('\u{897}', '\u{89f}'), ('ࣉ', '\u{902}'), ('\u{93a}', '\u{93a}'), ('\u{93c}', '\u{93c}'), @@ -1233,8 +1263,11 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), + ('𐵎', '𐵎'), + ('\u{10d69}', '\u{10d6d}'), + ('𐵯', '𐵯'), ('\u{10eab}', '\u{10eac}'), - ('\u{10efd}', '\u{10eff}'), + ('\u{10efc}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), ('\u{10f82}', '\u{10f85}'), ('\u{11001}', '\u{11001}'), @@ -1267,6 +1300,11 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{11340}', '\u{11340}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('\u{113bb}', '\u{113c0}'), + ('\u{113ce}', '\u{113ce}'), + ('\u{113d0}', '\u{113d0}'), + ('\u{113d2}', '\u{113d2}'), + ('\u{113e1}', '\u{113e2}'), ('\u{11438}', '\u{1143f}'), ('\u{11442}', '\u{11444}'), ('\u{11446}', '\u{11446}'), @@ -1286,7 +1324,8 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{116ad}', '\u{116ad}'), ('\u{116b0}', '\u{116b5}'), ('\u{116b7}', '\u{116b7}'), - ('\u{1171d}', '\u{1171f}'), + ('\u{1171d}', '\u{1171d}'), + ('\u{1171f}', '\u{1171f}'), ('\u{11722}', '\u{11725}'), ('\u{11727}', '\u{1172b}'), ('\u{1182f}', '\u{11837}'), @@ -1325,11 +1364,16 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{11f36}', '\u{11f3a}'), ('\u{11f40}', '\u{11f40}'), ('\u{11f42}', '\u{11f42}'), + ('\u{11f5a}', '\u{11f5a}'), ('\u{13430}', '\u{13440}'), ('\u{13447}', '\u{13455}'), + ('\u{1611e}', '\u{16129}'), + ('\u{1612d}', '\u{1612f}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('𖭀', '𖭃'), + ('𖵀', '𖵂'), + ('𖵫', '𖵬'), ('\u{16f4f}', '\u{16f4f}'), ('\u{16f8f}', '𖾟'), ('𖿠', '𖿡'), @@ -1363,6 +1407,7 @@ pub const CASE_IGNORABLE: &'static [(char, char)] = &[ ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), ('𞓫', '\u{1e4ef}'), + ('\u{1e5ee}', '\u{1e5ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '𞥋'), ('🏻', '🏿'), @@ -1406,7 +1451,7 @@ pub const CASED: &'static [(char, char)] = &[ ('ჼ', 'ჿ'), ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('ᴀ', 'ᶿ'), @@ -1459,10 +1504,10 @@ pub const CASED: &'static [(char, char)] = &[ ('Ꚁ', 'ꚝ'), ('Ꜣ', 'ꞇ'), ('Ꞌ', 'ꞎ'), - ('Ꞑ', 'ꟊ'), + ('Ꞑ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꟶ'), ('ꟸ', 'ꟺ'), ('ꬰ', 'ꭚ'), @@ -1489,6 +1534,8 @@ pub const CASED: &'static [(char, char)] = &[ ('𐞲', '𐞺'), ('𐲀', '𐲲'), ('𐳀', '𐳲'), + ('𐵐', '𐵥'), + ('𐵰', '𐶅'), ('𑢠', '𑣟'), ('𖹀', '𖹿'), ('𝐀', '𝑔'), @@ -1814,7 +1861,7 @@ pub const CHANGES_WHEN_CASEFOLDED: &'static [(char, char)] = &[ ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ᏸ', 'ᏽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'Ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('Ḁ', 'Ḁ'), @@ -2136,9 +2183,12 @@ pub const CHANGES_WHEN_CASEFOLDED: &'static [(char, char)] = &[ ('Ꟃ', 'Ꟃ'), ('Ꞔ', 'Ꟈ'), ('Ꟊ', 'Ꟊ'), + ('Ɤ', 'Ꟍ'), ('Ꟑ', 'Ꟑ'), ('Ꟗ', 'Ꟗ'), ('Ꟙ', 'Ꟙ'), + ('Ꟛ', 'Ꟛ'), + ('Ƛ', 'Ƛ'), ('Ꟶ', 'Ꟶ'), ('ꭰ', 'ꮿ'), ('ff', 'st'), @@ -2151,6 +2201,7 @@ pub const CHANGES_WHEN_CASEFOLDED: &'static [(char, char)] = &[ ('𐖌', '𐖒'), ('𐖔', '𐖕'), ('𐲀', '𐲲'), + ('𐵐', '𐵥'), ('𑢠', '𑢿'), ('𖹀', '𖹟'), ('𞤀', '𞤡'), @@ -2164,8 +2215,7 @@ pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[ ('Ø', 'ö'), ('ø', 'ķ'), ('Ĺ', 'ƌ'), - ('Ǝ', 'ƚ'), - ('Ɯ', 'Ʃ'), + ('Ǝ', 'Ʃ'), ('Ƭ', 'ƹ'), ('Ƽ', 'ƽ'), ('ƿ', 'ƿ'), @@ -2176,8 +2226,7 @@ pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[ ('ə', 'ə'), ('ɛ', 'ɜ'), ('ɠ', 'ɡ'), - ('ɣ', 'ɣ'), - ('ɥ', 'ɦ'), + ('ɣ', 'ɦ'), ('ɨ', 'ɬ'), ('ɯ', 'ɯ'), ('ɱ', 'ɲ'), @@ -2211,7 +2260,7 @@ pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[ ('ჽ', 'ჿ'), ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('ᵹ', 'ᵹ'), @@ -2262,9 +2311,9 @@ pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[ ('Ꞌ', 'Ɥ'), ('Ꞑ', 'ꞔ'), ('Ꞗ', 'Ɪ'), - ('Ʞ', 'ꟊ'), + ('Ʞ', 'ꟍ'), ('Ꟑ', 'ꟑ'), - ('Ꟗ', 'ꟙ'), + ('Ꟗ', 'Ƛ'), ('Ꟶ', 'ꟶ'), ('ꭓ', 'ꭓ'), ('ꭰ', 'ꮿ'), @@ -2285,6 +2334,8 @@ pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[ ('𐖻', '𐖼'), ('𐲀', '𐲲'), ('𐳀', '𐳲'), + ('𐵐', '𐵥'), + ('𐵰', '𐶅'), ('𑢠', '𑣟'), ('𖹀', '𖹿'), ('𞤀', '𞥃'), @@ -2566,6 +2617,7 @@ pub const CHANGES_WHEN_LOWERCASED: &'static [(char, char)] = &[ ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('Ꭰ', 'Ᏽ'), + ('Ᲊ', 'Ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('Ḁ', 'Ḁ'), @@ -2885,9 +2937,12 @@ pub const CHANGES_WHEN_LOWERCASED: &'static [(char, char)] = &[ ('Ꟃ', 'Ꟃ'), ('Ꞔ', 'Ꟈ'), ('Ꟊ', 'Ꟊ'), + ('Ɤ', 'Ꟍ'), ('Ꟑ', 'Ꟑ'), ('Ꟗ', 'Ꟗ'), ('Ꟙ', 'Ꟙ'), + ('Ꟛ', 'Ꟛ'), + ('Ƛ', 'Ƛ'), ('Ꟶ', 'Ꟶ'), ('A', 'Z'), ('𐐀', '𐐧'), @@ -2897,6 +2952,7 @@ pub const CHANGES_WHEN_LOWERCASED: &'static [(char, char)] = &[ ('𐖌', '𐖒'), ('𐖔', '𐖕'), ('𐲀', '𐲲'), + ('𐵐', '𐵥'), ('𑢠', '𑢿'), ('𖹀', '𖹟'), ('𞤀', '𞤡'), @@ -2975,7 +3031,7 @@ pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[ ('ƌ', 'ƌ'), ('ƒ', 'ƒ'), ('ƕ', 'ƕ'), - ('ƙ', 'ƚ'), + ('ƙ', 'ƛ'), ('ƞ', 'ƞ'), ('ơ', 'ơ'), ('ƣ', 'ƣ'), @@ -3052,8 +3108,7 @@ pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[ ('ə', 'ə'), ('ɛ', 'ɜ'), ('ɠ', 'ɡ'), - ('ɣ', 'ɣ'), - ('ɥ', 'ɦ'), + ('ɣ', 'ɦ'), ('ɨ', 'ɬ'), ('ɯ', 'ɯ'), ('ɱ', 'ɲ'), @@ -3191,6 +3246,7 @@ pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[ ('ա', 'և'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), + ('ᲊ', 'ᲊ'), ('ᵹ', 'ᵹ'), ('ᵽ', 'ᵽ'), ('ᶎ', 'ᶎ'), @@ -3510,9 +3566,11 @@ pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[ ('ꟃ', 'ꟃ'), ('ꟈ', 'ꟈ'), ('ꟊ', 'ꟊ'), + ('ꟍ', 'ꟍ'), ('ꟑ', 'ꟑ'), ('ꟗ', 'ꟗ'), ('ꟙ', 'ꟙ'), + ('ꟛ', 'ꟛ'), ('ꟶ', 'ꟶ'), ('ꭓ', 'ꭓ'), ('ꭰ', 'ꮿ'), @@ -3526,6 +3584,7 @@ pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[ ('𐖳', '𐖹'), ('𐖻', '𐖼'), ('𐳀', '𐳲'), + ('𐵰', '𐶅'), ('𑣀', '𑣟'), ('𖹠', '𖹿'), ('𞤢', '𞥃'), @@ -3604,7 +3663,7 @@ pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[ ('ƌ', 'ƌ'), ('ƒ', 'ƒ'), ('ƕ', 'ƕ'), - ('ƙ', 'ƚ'), + ('ƙ', 'ƛ'), ('ƞ', 'ƞ'), ('ơ', 'ơ'), ('ƣ', 'ƣ'), @@ -3680,8 +3739,7 @@ pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[ ('ə', 'ə'), ('ɛ', 'ɜ'), ('ɠ', 'ɡ'), - ('ɣ', 'ɣ'), - ('ɥ', 'ɦ'), + ('ɣ', 'ɦ'), ('ɨ', 'ɬ'), ('ɯ', 'ɯ'), ('ɱ', 'ɲ'), @@ -3821,6 +3879,7 @@ pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[ ('ჽ', 'ჿ'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), + ('ᲊ', 'ᲊ'), ('ᵹ', 'ᵹ'), ('ᵽ', 'ᵽ'), ('ᶎ', 'ᶎ'), @@ -4140,9 +4199,11 @@ pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[ ('ꟃ', 'ꟃ'), ('ꟈ', 'ꟈ'), ('ꟊ', 'ꟊ'), + ('ꟍ', 'ꟍ'), ('ꟑ', 'ꟑ'), ('ꟗ', 'ꟗ'), ('ꟙ', 'ꟙ'), + ('ꟛ', 'ꟛ'), ('ꟶ', 'ꟶ'), ('ꭓ', 'ꭓ'), ('ꭰ', 'ꮿ'), @@ -4156,6 +4217,7 @@ pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[ ('𐖳', '𐖹'), ('𐖻', '𐖼'), ('𐳀', '𐳲'), + ('𐵰', '𐶅'), ('𑣀', '𑣟'), ('𖹠', '𖹿'), ('𞤢', '𞥃'), @@ -4184,6 +4246,7 @@ pub const DASH: &'static [(char, char)] = &[ ('﹘', '﹘'), ('﹣', '﹣'), ('-', '-'), + ('𐵮', '𐵮'), ('𐺭', '𐺭'), ]; @@ -4272,6 +4335,7 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('\u{d3b}', '\u{d3c}'), ('\u{d4d}', '\u{d4d}'), ('\u{dca}', '\u{dca}'), + ('\u{e3a}', '\u{e3a}'), ('\u{e47}', '\u{e4c}'), ('\u{e4e}', '\u{e4e}'), ('\u{eba}', '\u{eba}'), @@ -4292,18 +4356,22 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('ႏ', 'ႏ'), ('ႚ', 'ႛ'), ('\u{135d}', '\u{135f}'), - ('\u{1714}', '᜕'), + ('\u{1714}', '\u{1715}'), + ('\u{1734}', '\u{1734}'), ('\u{17c9}', '\u{17d3}'), ('\u{17dd}', '\u{17dd}'), ('\u{1939}', '\u{193b}'), + ('\u{1a60}', '\u{1a60}'), ('\u{1a75}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'), ('\u{1ab0}', '\u{1abe}'), ('\u{1ac1}', '\u{1acb}'), ('\u{1b34}', '\u{1b34}'), - ('᭄', '᭄'), + ('\u{1b44}', '\u{1b44}'), ('\u{1b6b}', '\u{1b73}'), - ('᮪', '\u{1bab}'), + ('\u{1baa}', '\u{1bab}'), + ('\u{1be6}', '\u{1be6}'), + ('\u{1bf2}', '\u{1bf3}'), ('\u{1c36}', '\u{1c37}'), ('ᱸ', 'ᱽ'), ('\u{1cd0}', '\u{1ce8}'), @@ -4332,12 +4400,14 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('꜀', '꜡'), ('ꞈ', '꞊'), ('ꟸ', 'ꟹ'), + ('\u{a806}', '\u{a806}'), + ('\u{a82c}', '\u{a82c}'), ('\u{a8c4}', '\u{a8c4}'), ('\u{a8e0}', '\u{a8f1}'), ('\u{a92b}', '꤮'), - ('꥓', '꥓'), + ('\u{a953}', '\u{a953}'), ('\u{a9b3}', '\u{a9b3}'), - ('꧀', '꧀'), + ('\u{a9c0}', '\u{a9c0}'), ('\u{a9e5}', '\u{a9e5}'), ('ꩻ', 'ꩽ'), ('\u{aabf}', 'ꫂ'), @@ -4356,8 +4426,12 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('𐞀', '𐞅'), ('𐞇', '𐞰'), ('𐞲', '𐞺'), + ('\u{10a38}', '\u{10a3a}'), + ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'), ('𐴢', '\u{10d27}'), + ('𐵎', '𐵎'), + ('\u{10d69}', '\u{10d6d}'), ('\u{10efd}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), ('\u{10f82}', '\u{10f85}'), @@ -4366,23 +4440,26 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('\u{110b9}', '\u{110ba}'), ('\u{11133}', '\u{11134}'), ('\u{11173}', '\u{11173}'), - ('𑇀', '𑇀'), + ('\u{111c0}', '\u{111c0}'), ('\u{111ca}', '\u{111cc}'), - ('𑈵', '\u{11236}'), + ('\u{11235}', '\u{11236}'), ('\u{112e9}', '\u{112ea}'), - ('\u{1133c}', '\u{1133c}'), - ('𑍍', '𑍍'), + ('\u{1133b}', '\u{1133c}'), + ('\u{1134d}', '\u{1134d}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('\u{113ce}', '\u{113d0}'), + ('\u{113d2}', '𑏓'), + ('\u{113e1}', '\u{113e2}'), ('\u{11442}', '\u{11442}'), ('\u{11446}', '\u{11446}'), ('\u{114c2}', '\u{114c3}'), ('\u{115bf}', '\u{115c0}'), ('\u{1163f}', '\u{1163f}'), - ('𑚶', '\u{116b7}'), + ('\u{116b6}', '\u{116b7}'), ('\u{1172b}', '\u{1172b}'), ('\u{11839}', '\u{1183a}'), - ('𑤽', '\u{1193e}'), + ('\u{1193d}', '\u{1193e}'), ('\u{11943}', '\u{11943}'), ('\u{119e0}', '\u{119e0}'), ('\u{11a34}', '\u{11a34}'), @@ -4392,18 +4469,22 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('\u{11d42}', '\u{11d42}'), ('\u{11d44}', '\u{11d45}'), ('\u{11d97}', '\u{11d97}'), + ('\u{11f41}', '\u{11f42}'), + ('\u{11f5a}', '\u{11f5a}'), ('\u{13447}', '\u{13455}'), + ('\u{1612f}', '\u{1612f}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), + ('𖵫', '𖵬'), ('\u{16f8f}', '𖾟'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), ('\u{1d167}', '\u{1d169}'), - ('𝅭', '\u{1d172}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -4411,6 +4492,7 @@ pub const DIACRITIC: &'static [(char, char)] = &[ ('\u{1e130}', '\u{1e136}'), ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), + ('\u{1e5ee}', '\u{1e5ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e946}'), ('\u{1e948}', '\u{1e94a}'), @@ -4562,11 +4644,10 @@ pub const EMOJI: &'static [(char, char)] = &[ ('🤼', '🥅'), ('🥇', '🧿'), ('🩰', '🩼'), - ('🪀', '🪈'), - ('🪐', '🪽'), - ('🪿', '🫅'), - ('🫎', '🫛'), - ('🫠', '🫨'), + ('🪀', '🪉'), + ('🪏', '🫆'), + ('🫎', '🫜'), + ('🫟', '🫩'), ('🫰', '🫸'), ]; @@ -4704,11 +4785,10 @@ pub const EMOJI_PRESENTATION: &'static [(char, char)] = &[ ('🤼', '🥅'), ('🥇', '🧿'), ('🩰', '🩼'), - ('🪀', '🪈'), - ('🪐', '🪽'), - ('🪿', '🫅'), - ('🫎', '🫛'), - ('🫠', '🫨'), + ('🪀', '🪉'), + ('🪏', '🫆'), + ('🫎', '🫜'), + ('🫟', '🫩'), ('🫰', '🫸'), ]; @@ -4798,6 +4878,8 @@ pub const EXTENDER: &'static [(char, char)] = &[ ('ː', 'ˑ'), ('ـ', 'ـ'), ('ߺ', 'ߺ'), + ('\u{a71}', '\u{a71}'), + ('\u{afb}', '\u{afb}'), ('\u{b55}', '\u{b55}'), ('ๆ', 'ๆ'), ('ໆ', 'ໆ'), @@ -4819,13 +4901,19 @@ pub const EXTENDER: &'static [(char, char)] = &[ ('ꫳ', 'ꫴ'), ('ー', 'ー'), ('𐞁', '𐞂'), + ('𐵎', '𐵎'), + ('\u{10d6a}', '\u{10d6a}'), + ('𐵯', '𐵯'), + ('\u{11237}', '\u{11237}'), ('𑍝', '𑍝'), + ('\u{113d2}', '𑏓'), ('𑗆', '𑗈'), ('\u{11a98}', '\u{11a98}'), ('𖭂', '𖭃'), ('𖿠', '𖿡'), ('𖿣', '𖿣'), ('𞄼', '𞄽'), + ('\u{1e5ef}', '\u{1e5ef}'), ('\u{1e944}', '\u{1e946}'), ]; @@ -4972,10 +5060,8 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('ಽ', 'ಾ'), - ('ೀ', 'ು'), + ('ು', 'ು'), ('ೃ', 'ೄ'), - ('ೇ', 'ೈ'), - ('ೊ', 'ೋ'), ('ೝ', 'ೞ'), ('ೠ', 'ೡ'), ('೦', '೯'), @@ -5066,9 +5152,8 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('᐀', '᚜'), ('ᚠ', 'ᛸ'), ('ᜀ', 'ᜑ'), - ('᜕', '᜕'), ('ᜟ', 'ᜱ'), - ('᜴', '᜶'), + ('᜵', '᜶'), ('ᝀ', 'ᝑ'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -5108,23 +5193,20 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('᪐', '᪙'), ('᪠', '᪭'), ('ᬄ', 'ᬳ'), - ('ᬻ', 'ᬻ'), - ('ᬽ', 'ᭁ'), - ('ᭃ', 'ᭌ'), - ('᭐', '᭪'), - ('᭴', '᭾'), + ('ᬾ', 'ᭁ'), + ('ᭅ', 'ᭌ'), + ('᭎', '᭪'), + ('᭴', '᭿'), ('ᮂ', 'ᮡ'), ('ᮦ', 'ᮧ'), - ('᮪', '᮪'), ('ᮮ', 'ᯥ'), ('ᯧ', 'ᯧ'), ('ᯪ', 'ᯬ'), ('ᯮ', 'ᯮ'), - ('᯲', '᯳'), ('᯼', 'ᰫ'), ('ᰴ', 'ᰵ'), ('᰻', '᱉'), - ('ᱍ', 'ᲈ'), + ('ᱍ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', '᳇'), ('᳓', '᳓'), @@ -5158,7 +5240,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('ₐ', 'ₜ'), ('₠', '⃀'), ('℀', '↋'), - ('←', '␦'), + ('←', '␩'), ('⑀', '⑊'), ('①', '⭳'), ('⭶', '⮕'), @@ -5182,15 +5264,14 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'), - ('⿰', '⿻'), - ('\u{3000}', '〩'), + ('⿰', '〩'), ('〰', '〿'), ('ぁ', 'ゖ'), ('゛', 'ヿ'), ('ㄅ', 'ㄯ'), ('ㄱ', 'ㆎ'), - ('㆐', '㇣'), - ('ㇰ', '㈞'), + ('㆐', '㇥'), + ('㇯', '㈞'), ('㈠', 'ꒌ'), ('꒐', '꓆'), ('ꓐ', 'ꘫ'), @@ -5199,10 +5280,10 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('꙾', 'ꚝ'), ('ꚠ', 'ꛯ'), ('꛲', '꛷'), - ('꜀', 'ꟊ'), + ('꜀', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), @@ -5215,12 +5296,13 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('ꣲ', 'ꣾ'), ('꤀', 'ꤥ'), ('꤮', 'ꥆ'), - ('ꥒ', '꥓'), + ('ꥒ', 'ꥒ'), ('꥟', 'ꥼ'), ('ꦃ', 'ꦲ'), ('ꦴ', 'ꦵ'), ('ꦺ', 'ꦻ'), - ('ꦾ', '꧍'), + ('ꦾ', 'ꦿ'), + ('꧁', '꧍'), ('ꧏ', '꧙'), ('꧞', 'ꧤ'), ('ꧦ', 'ꧾ'), @@ -5319,6 +5401,7 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -5360,10 +5443,14 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𐳀', '𐳲'), ('𐳺', '𐴣'), ('𐴰', '𐴹'), + ('𐵀', '𐵥'), + ('𐵮', '𐶅'), + ('𐶎', '𐶏'), ('𐹠', '𐹾'), ('𐺀', '𐺩'), ('𐺭', '𐺭'), ('𐺰', '𐺱'), + ('𐻂', '𐻄'), ('𐼀', '𐼧'), ('𐼰', '𐽅'), ('𐽑', '𐽙'), @@ -5389,14 +5476,14 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𑅐', '𑅲'), ('𑅴', '𑅶'), ('𑆂', '𑆵'), - ('𑆿', '𑇈'), + ('𑆿', '𑆿'), + ('𑇁', '𑇈'), ('𑇍', '𑇎'), ('𑇐', '𑇟'), ('𑇡', '𑇴'), ('𑈀', '𑈑'), ('𑈓', '𑈮'), ('𑈲', '𑈳'), - ('𑈵', '𑈵'), ('𑈸', '𑈽'), ('𑈿', '𑉀'), ('𑊀', '𑊆'), @@ -5418,9 +5505,20 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𑌿', '𑌿'), ('𑍁', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '𑍌'), ('𑍐', '𑍐'), ('𑍝', '𑍣'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '𑎷'), + ('𑎹', '𑎺'), + ('𑏊', '𑏊'), + ('𑏌', '𑏍'), + ('𑏑', '𑏑'), + ('𑏓', '𑏕'), + ('𑏗', '𑏘'), ('𑐀', '𑐷'), ('𑑀', '𑑁'), ('𑑅', '𑑅'), @@ -5449,10 +5547,11 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𑚀', '𑚪'), ('𑚬', '𑚬'), ('𑚮', '𑚯'), - ('𑚶', '𑚶'), ('𑚸', '𑚹'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜀', '𑜚'), + ('𑜞', '𑜞'), ('𑜠', '𑜡'), ('𑜦', '𑜦'), ('𑜰', '𑝆'), @@ -5467,7 +5566,6 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𑤘', '𑤯'), ('𑤱', '𑤵'), ('𑤷', '𑤸'), - ('𑤽', '𑤽'), ('𑤿', '𑥂'), ('𑥄', '𑥆'), ('𑥐', '𑥙'), @@ -5486,6 +5584,8 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𑪚', '𑪢'), ('𑪰', '𑫸'), ('𑬀', '𑬉'), + ('𑯀', '𑯡'), + ('𑯰', '𑯹'), ('𑰀', '𑰈'), ('𑰊', '𑰯'), ('𑰾', '𑰾'), @@ -5512,7 +5612,6 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𑼂', '𑼐'), ('𑼒', '𑼵'), ('𑼾', '𑼿'), - ('𑽁', '𑽁'), ('𑽃', '𑽙'), ('𑾰', '𑾰'), ('𑿀', '𑿱'), @@ -5523,7 +5622,11 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𒾐', '𒿲'), ('𓀀', '𓐯'), ('𓑁', '𓑆'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄝'), + ('𖄪', '𖄬'), + ('𖄰', '𖄹'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'), @@ -5537,15 +5640,15 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𖭛', '𖭡'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵹'), ('𖹀', '𖺚'), ('𖼀', '𖽊'), ('𖽐', '𖾇'), ('𖾓', '𖾟'), ('𖿠', '𖿣'), - ('𖿰', '𖿱'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -5561,12 +5664,13 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𛲐', '𛲙'), ('𛲜', '𛲜'), ('𛲟', '𛲟'), + ('𜰀', '𜳹'), + ('𜴀', '𜺳'), ('𜽐', '𜿃'), ('𝀀', '𝃵'), ('𝄀', '𝄦'), ('𝄩', '𝅘𝅥𝅲'), - ('𝅦', '𝅦'), - ('𝅪', '𝅭'), + ('𝅪', '𝅬'), ('𝆃', '𝆄'), ('𝆌', '𝆩'), ('𝆮', '𝇪'), @@ -5614,6 +5718,9 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('𞋿', '𞋿'), ('𞓐', '𞓫'), ('𞓰', '𞓹'), + ('𞗐', '𞗭'), + ('𞗰', '𞗺'), + ('𞗿', '𞗿'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -5684,24 +5791,24 @@ pub const GRAPHEME_BASE: &'static [(char, char)] = &[ ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'), - ('🢰', '🢱'), + ('🢰', '🢻'), + ('🣀', '🣁'), ('🤀', '🩓'), ('🩠', '🩭'), ('🩰', '🩼'), - ('🪀', '🪈'), - ('🪐', '🪽'), - ('🪿', '🫅'), - ('🫎', '🫛'), - ('🫠', '🫨'), + ('🪀', '🪉'), + ('🪏', '🫆'), + ('🫎', '🫜'), + ('🫟', '🫩'), ('🫰', '🫸'), ('🬀', '🮒'), - ('🮔', '🯊'), - ('🯰', '🯹'), + ('🮔', '🯹'), ('𠀀', '𪛟'), ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -5732,7 +5839,7 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{898}', '\u{89f}'), + ('\u{897}', '\u{89f}'), ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', '\u{902}'), ('\u{93a}', '\u{93a}'), @@ -5786,10 +5893,10 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{c62}', '\u{c63}'), ('\u{c81}', '\u{c81}'), ('\u{cbc}', '\u{cbc}'), - ('\u{cbf}', '\u{cbf}'), + ('\u{cbf}', '\u{cc0}'), ('\u{cc2}', '\u{cc2}'), - ('\u{cc6}', '\u{cc6}'), - ('\u{ccc}', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), ('\u{d00}', '\u{d01}'), @@ -5833,8 +5940,8 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{108d}', '\u{108d}'), ('\u{109d}', '\u{109d}'), ('\u{135d}', '\u{135f}'), - ('\u{1712}', '\u{1714}'), - ('\u{1732}', '\u{1733}'), + ('\u{1712}', '\u{1715}'), + ('\u{1732}', '\u{1734}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17b5}'), @@ -5861,18 +5968,16 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{1a7f}', '\u{1a7f}'), ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', '\u{1b03}'), - ('\u{1b34}', '\u{1b3a}'), - ('\u{1b3c}', '\u{1b3c}'), - ('\u{1b42}', '\u{1b42}'), + ('\u{1b34}', '\u{1b3d}'), + ('\u{1b42}', '\u{1b44}'), ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', '\u{1b81}'), ('\u{1ba2}', '\u{1ba5}'), - ('\u{1ba8}', '\u{1ba9}'), - ('\u{1bab}', '\u{1bad}'), + ('\u{1ba8}', '\u{1bad}'), ('\u{1be6}', '\u{1be6}'), ('\u{1be8}', '\u{1be9}'), ('\u{1bed}', '\u{1bed}'), - ('\u{1bef}', '\u{1bf1}'), + ('\u{1bef}', '\u{1bf3}'), ('\u{1c2c}', '\u{1c33}'), ('\u{1c36}', '\u{1c37}'), ('\u{1cd0}', '\u{1cd2}'), @@ -5903,10 +6008,12 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{a8ff}', '\u{a8ff}'), ('\u{a926}', '\u{a92d}'), ('\u{a947}', '\u{a951}'), + ('\u{a953}', '\u{a953}'), ('\u{a980}', '\u{a982}'), ('\u{a9b3}', '\u{a9b3}'), ('\u{a9b6}', '\u{a9b9}'), ('\u{a9bc}', '\u{a9bd}'), + ('\u{a9c0}', '\u{a9c0}'), ('\u{a9e5}', '\u{a9e5}'), ('\u{aa29}', '\u{aa2e}'), ('\u{aa31}', '\u{aa32}'), @@ -5938,8 +6045,9 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), + ('\u{10d69}', '\u{10d6d}'), ('\u{10eab}', '\u{10eac}'), - ('\u{10efd}', '\u{10eff}'), + ('\u{10efc}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), ('\u{10f82}', '\u{10f85}'), ('\u{11001}', '\u{11001}'), @@ -5956,11 +6064,11 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{11173}', '\u{11173}'), ('\u{11180}', '\u{11181}'), ('\u{111b6}', '\u{111be}'), + ('\u{111c0}', '\u{111c0}'), ('\u{111c9}', '\u{111cc}'), ('\u{111cf}', '\u{111cf}'), ('\u{1122f}', '\u{11231}'), - ('\u{11234}', '\u{11234}'), - ('\u{11236}', '\u{11237}'), + ('\u{11234}', '\u{11237}'), ('\u{1123e}', '\u{1123e}'), ('\u{11241}', '\u{11241}'), ('\u{112df}', '\u{112df}'), @@ -5969,9 +6077,18 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{1133b}', '\u{1133c}'), ('\u{1133e}', '\u{1133e}'), ('\u{11340}', '\u{11340}'), + ('\u{1134d}', '\u{1134d}'), ('\u{11357}', '\u{11357}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('\u{113b8}', '\u{113b8}'), + ('\u{113bb}', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '\u{113c9}'), + ('\u{113ce}', '\u{113d0}'), + ('\u{113d2}', '\u{113d2}'), + ('\u{113e1}', '\u{113e2}'), ('\u{11438}', '\u{1143f}'), ('\u{11442}', '\u{11444}'), ('\u{11446}', '\u{11446}'), @@ -5992,16 +6109,15 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{1163f}', '\u{11640}'), ('\u{116ab}', '\u{116ab}'), ('\u{116ad}', '\u{116ad}'), - ('\u{116b0}', '\u{116b5}'), - ('\u{116b7}', '\u{116b7}'), - ('\u{1171d}', '\u{1171f}'), + ('\u{116b0}', '\u{116b7}'), + ('\u{1171d}', '\u{1171d}'), + ('\u{1171f}', '\u{1171f}'), ('\u{11722}', '\u{11725}'), ('\u{11727}', '\u{1172b}'), ('\u{1182f}', '\u{11837}'), ('\u{11839}', '\u{1183a}'), ('\u{11930}', '\u{11930}'), - ('\u{1193b}', '\u{1193c}'), - ('\u{1193e}', '\u{1193e}'), + ('\u{1193b}', '\u{1193e}'), ('\u{11943}', '\u{11943}'), ('\u{119d4}', '\u{119d7}'), ('\u{119da}', '\u{119db}'), @@ -6032,21 +6148,23 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{11ef3}', '\u{11ef4}'), ('\u{11f00}', '\u{11f01}'), ('\u{11f36}', '\u{11f3a}'), - ('\u{11f40}', '\u{11f40}'), - ('\u{11f42}', '\u{11f42}'), + ('\u{11f40}', '\u{11f42}'), + ('\u{11f5a}', '\u{11f5a}'), ('\u{13440}', '\u{13440}'), ('\u{13447}', '\u{13455}'), + ('\u{1611e}', '\u{16129}'), + ('\u{1612d}', '\u{1612f}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), + ('\u{16ff0}', '\u{16ff1}'), ('\u{1bc9d}', '\u{1bc9e}'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), - ('\u{1d165}', '\u{1d165}'), - ('\u{1d167}', '\u{1d169}'), - ('\u{1d16e}', '\u{1d172}'), + ('\u{1d165}', '\u{1d169}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -6067,6 +6185,7 @@ pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), ('\u{1e4ec}', '\u{1e4ef}'), + ('\u{1e5ee}', '\u{1e5ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('\u{e0020}', '\u{e007f}'), @@ -6089,19 +6208,19 @@ pub const GRAPHEME_LINK: &'static [(char, char)] = &[ ('\u{eba}', '\u{eba}'), ('\u{f84}', '\u{f84}'), ('\u{1039}', '\u{103a}'), - ('\u{1714}', '᜕'), - ('᜴', '᜴'), + ('\u{1714}', '\u{1715}'), + ('\u{1734}', '\u{1734}'), ('\u{17d2}', '\u{17d2}'), ('\u{1a60}', '\u{1a60}'), - ('᭄', '᭄'), - ('᮪', '\u{1bab}'), - ('᯲', '᯳'), + ('\u{1b44}', '\u{1b44}'), + ('\u{1baa}', '\u{1bab}'), + ('\u{1bf2}', '\u{1bf3}'), ('\u{2d7f}', '\u{2d7f}'), ('\u{a806}', '\u{a806}'), ('\u{a82c}', '\u{a82c}'), ('\u{a8c4}', '\u{a8c4}'), - ('꥓', '꥓'), - ('꧀', '꧀'), + ('\u{a953}', '\u{a953}'), + ('\u{a9c0}', '\u{a9c0}'), ('\u{aaf6}', '\u{aaf6}'), ('\u{abed}', '\u{abed}'), ('\u{10a3f}', '\u{10a3f}'), @@ -6110,18 +6229,19 @@ pub const GRAPHEME_LINK: &'static [(char, char)] = &[ ('\u{1107f}', '\u{1107f}'), ('\u{110b9}', '\u{110b9}'), ('\u{11133}', '\u{11134}'), - ('𑇀', '𑇀'), - ('𑈵', '𑈵'), + ('\u{111c0}', '\u{111c0}'), + ('\u{11235}', '\u{11235}'), ('\u{112ea}', '\u{112ea}'), - ('𑍍', '𑍍'), + ('\u{1134d}', '\u{1134d}'), + ('\u{113ce}', '\u{113d0}'), ('\u{11442}', '\u{11442}'), ('\u{114c2}', '\u{114c2}'), ('\u{115bf}', '\u{115bf}'), ('\u{1163f}', '\u{1163f}'), - ('𑚶', '𑚶'), + ('\u{116b6}', '\u{116b6}'), ('\u{1172b}', '\u{1172b}'), ('\u{11839}', '\u{11839}'), - ('𑤽', '\u{1193e}'), + ('\u{1193d}', '\u{1193e}'), ('\u{119e0}', '\u{119e0}'), ('\u{11a34}', '\u{11a34}'), ('\u{11a47}', '\u{11a47}'), @@ -6129,7 +6249,8 @@ pub const GRAPHEME_LINK: &'static [(char, char)] = &[ ('\u{11c3f}', '\u{11c3f}'), ('\u{11d44}', '\u{11d45}'), ('\u{11d97}', '\u{11d97}'), - ('𑽁', '\u{11f42}'), + ('\u{11f41}', '\u{11f42}'), + ('\u{1612f}', '\u{1612f}'), ]; pub const HEX_DIGIT: &'static [(char, char)] = &[ @@ -6155,10 +6276,49 @@ pub const HYPHEN: &'static [(char, char)] = &[ ]; pub const IDS_BINARY_OPERATOR: &'static [(char, char)] = - &[('⿰', '⿱'), ('⿴', '⿻')]; + &[('⿰', '⿱'), ('⿴', '⿽'), ('㇯', '㇯')]; pub const IDS_TRINARY_OPERATOR: &'static [(char, char)] = &[('⿲', '⿳')]; +pub const IDS_UNARY_OPERATOR: &'static [(char, char)] = &[('⿾', '⿿')]; + +pub const ID_COMPAT_MATH_CONTINUE: &'static [(char, char)] = &[ + ('²', '³'), + ('¹', '¹'), + ('⁰', '⁰'), + ('⁴', '⁾'), + ('₀', '₎'), + ('∂', '∂'), + ('∇', '∇'), + ('∞', '∞'), + ('𝛁', '𝛁'), + ('𝛛', '𝛛'), + ('𝛻', '𝛻'), + ('𝜕', '𝜕'), + ('𝜵', '𝜵'), + ('𝝏', '𝝏'), + ('𝝯', '𝝯'), + ('𝞉', '𝞉'), + ('𝞩', '𝞩'), + ('𝟃', '𝟃'), +]; + +pub const ID_COMPAT_MATH_START: &'static [(char, char)] = &[ + ('∂', '∂'), + ('∇', '∇'), + ('∞', '∞'), + ('𝛁', '𝛁'), + ('𝛛', '𝛛'), + ('𝛻', '𝛻'), + ('𝜕', '𝜕'), + ('𝜵', '𝜵'), + ('𝝏', '𝝏'), + ('𝝯', '𝝯'), + ('𝞉', '𝞉'), + ('𝞩', '𝞩'), + ('𝟃', '𝟃'), +]; + pub const ID_CONTINUE: &'static [(char, char)] = &[ ('0', '9'), ('A', 'Z'), @@ -6213,7 +6373,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ࡠ', 'ࡪ'), ('ࡰ', 'ࢇ'), ('ࢉ', 'ࢎ'), - ('\u{898}', '\u{8e1}'), + ('\u{897}', '\u{8e1}'), ('\u{8e3}', '\u{963}'), ('०', '९'), ('ॱ', 'ঃ'), @@ -6312,8 +6472,8 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('\u{cbc}', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), @@ -6398,8 +6558,8 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), - ('ᜀ', '᜕'), - ('ᜟ', '᜴'), + ('ᜀ', '\u{1715}'), + ('ᜟ', '\u{1734}'), ('ᝀ', '\u{1753}'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -6432,11 +6592,11 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('\u{1b00}', 'ᭌ'), ('᭐', '᭙'), ('\u{1b6b}', '\u{1b73}'), - ('\u{1b80}', '᯳'), + ('\u{1b80}', '\u{1bf3}'), ('ᰀ', '\u{1c37}'), ('᱀', '᱉'), ('ᱍ', 'ᱽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('\u{1cd0}', '\u{1cd2}'), @@ -6460,6 +6620,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῶ', 'ῼ'), + ('\u{200c}', '\u{200d}'), ('‿', '⁀'), ('⁔', '⁔'), ('ⁱ', 'ⁱ'), @@ -6504,8 +6665,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('〸', '〼'), ('ぁ', 'ゖ'), ('\u{3099}', 'ゟ'), - ('ァ', 'ヺ'), - ('ー', 'ヿ'), + ('ァ', 'ヿ'), ('ㄅ', 'ㄯ'), ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆿ'), @@ -6520,10 +6680,10 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('ꙿ', '\u{a6f1}'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠧ'), ('\u{a82c}', '\u{a82c}'), ('ꡀ', 'ꡳ'), @@ -6532,9 +6692,9 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('\u{a8e0}', 'ꣷ'), ('ꣻ', 'ꣻ'), ('ꣽ', '\u{a92d}'), - ('ꤰ', '꥓'), + ('ꤰ', '\u{a953}'), ('ꥠ', 'ꥼ'), - ('\u{a980}', '꧀'), + ('\u{a980}', '\u{a9c0}'), ('ꧏ', '꧙'), ('ꧠ', 'ꧾ'), ('ꨀ', '\u{aa36}'), @@ -6583,7 +6743,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('A', 'Z'), ('_', '_'), ('a', 'z'), - ('ヲ', 'ᄒ'), + ('・', 'ᄒ'), ('ᅡ', 'ᅦ'), ('ᅧ', 'ᅬ'), ('ᅭ', 'ᅲ'), @@ -6621,6 +6781,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -6661,10 +6822,14 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𐳀', '𐳲'), ('𐴀', '\u{10d27}'), ('𐴰', '𐴹'), + ('𐵀', '𐵥'), + ('\u{10d69}', '\u{10d6d}'), + ('𐵯', '𐶅'), ('𐺀', '𐺩'), ('\u{10eab}', '\u{10eac}'), ('𐺰', '𐺱'), - ('\u{10efd}', '𐼜'), + ('𐻂', '𐻄'), + ('\u{10efc}', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '\u{10f50}'), ('𐽰', '\u{10f85}'), @@ -6704,12 +6869,22 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𑌵', '𑌹'), ('\u{1133b}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('𑍐', '𑍐'), ('\u{11357}', '\u{11357}'), ('𑍝', '𑍣'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏓'), + ('\u{113e1}', '\u{113e2}'), ('𑐀', '𑑊'), ('𑑐', '𑑙'), ('\u{1145e}', '𑑡'), @@ -6724,6 +6899,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𑙐', '𑙙'), ('𑚀', '𑚸'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑜹'), @@ -6747,6 +6923,8 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𑩐', '\u{11a99}'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), + ('𑯰', '𑯹'), ('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱀'), @@ -6771,7 +6949,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '\u{11f42}'), - ('𑽐', '𑽙'), + ('𑽐', '\u{11f5a}'), ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), @@ -6779,7 +6957,9 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('\u{13440}', '\u{13455}'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄹'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'), @@ -6792,16 +6972,18 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𖭐', '𖭙'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), + ('𖵰', '𖵹'), ('𖹀', '𖹿'), ('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟'), ('𖿠', '𖿡'), ('𖿣', '\u{16fe4}'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -6816,10 +6998,11 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('\u{1bc9d}', '\u{1bc9e}'), + ('𜳰', '𜳹'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), - ('𝅭', '\u{1d172}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -6877,6 +7060,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𞊐', '\u{1e2ae}'), ('𞋀', '𞋹'), ('𞓐', '𞓹'), + ('𞗐', '𞗺'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -6924,6 +7108,7 @@ pub const ID_CONTINUE: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -7155,7 +7340,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('ᰀ', 'ᰣ'), ('ᱍ', 'ᱏ'), ('ᱚ', 'ᱽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('ᳩ', 'ᳬ'), @@ -7238,10 +7423,10 @@ pub const ID_START: &'static [(char, char)] = &[ ('ꚠ', 'ꛯ'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), @@ -7339,6 +7524,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -7375,8 +7561,11 @@ pub const ID_START: &'static [(char, char)] = &[ ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐴀', '𐴣'), + ('𐵊', '𐵥'), + ('𐵯', '𐶅'), ('𐺀', '𐺩'), ('𐺰', '𐺱'), + ('𐻂', '𐻄'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), @@ -7415,6 +7604,13 @@ pub const ID_START: &'static [(char, char)] = &[ ('𑌽', '𑌽'), ('𑍐', '𑍐'), ('𑍝', '𑍡'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '𑎷'), + ('𑏑', '𑏑'), + ('𑏓', '𑏓'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), ('𑑟', '𑑡'), @@ -7449,6 +7645,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('𑩜', '𑪉'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), @@ -7472,7 +7669,9 @@ pub const ID_START: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('𓑁', '𓑆'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄝'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩰', '𖪾'), @@ -7481,6 +7680,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('𖭀', '𖭃'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), ('𖹀', '𖹿'), ('𖼀', '𖽊'), ('𖽐', '𖽐'), @@ -7489,7 +7689,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('𖿣', '𖿣'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -7542,6 +7742,8 @@ pub const ID_START: &'static [(char, char)] = &[ ('𞊐', '𞊭'), ('𞋀', '𞋫'), ('𞓐', '𞓫'), + ('𞗐', '𞗭'), + ('𞗰', '𞗰'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -7587,6 +7789,7 @@ pub const ID_START: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -7603,18 +7806,420 @@ pub const IDEOGRAPHIC: &'static [(char, char)] = &[ ('\u{16fe4}', '\u{16fe4}'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𛅰', '𛋻'), ('𠀀', '𪛟'), ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), ]; +pub const INCB: &'static [(char, char)] = &[ + ('\u{300}', '\u{36f}'), + ('\u{483}', '\u{489}'), + ('\u{591}', '\u{5bd}'), + ('\u{5bf}', '\u{5bf}'), + ('\u{5c1}', '\u{5c2}'), + ('\u{5c4}', '\u{5c5}'), + ('\u{5c7}', '\u{5c7}'), + ('\u{610}', '\u{61a}'), + ('\u{64b}', '\u{65f}'), + ('\u{670}', '\u{670}'), + ('\u{6d6}', '\u{6dc}'), + ('\u{6df}', '\u{6e4}'), + ('\u{6e7}', '\u{6e8}'), + ('\u{6ea}', '\u{6ed}'), + ('\u{711}', '\u{711}'), + ('\u{730}', '\u{74a}'), + ('\u{7a6}', '\u{7b0}'), + ('\u{7eb}', '\u{7f3}'), + ('\u{7fd}', '\u{7fd}'), + ('\u{816}', '\u{819}'), + ('\u{81b}', '\u{823}'), + ('\u{825}', '\u{827}'), + ('\u{829}', '\u{82d}'), + ('\u{859}', '\u{85b}'), + ('\u{897}', '\u{89f}'), + ('\u{8ca}', '\u{8e1}'), + ('\u{8e3}', '\u{902}'), + ('क', '\u{93a}'), + ('\u{93c}', '\u{93c}'), + ('\u{941}', '\u{948}'), + ('\u{94d}', '\u{94d}'), + ('\u{951}', 'य़'), + ('\u{962}', '\u{963}'), + ('ॸ', 'ॿ'), + ('\u{981}', '\u{981}'), + ('ক', 'ন'), + ('প', 'র'), + ('ল', 'ল'), + ('শ', 'হ'), + ('\u{9bc}', '\u{9bc}'), + ('\u{9be}', '\u{9be}'), + ('\u{9c1}', '\u{9c4}'), + ('\u{9cd}', '\u{9cd}'), + ('\u{9d7}', '\u{9d7}'), + ('ড়', 'ঢ়'), + ('য়', 'য়'), + ('\u{9e2}', '\u{9e3}'), + ('ৰ', 'ৱ'), + ('\u{9fe}', '\u{9fe}'), + ('\u{a01}', '\u{a02}'), + ('\u{a3c}', '\u{a3c}'), + ('\u{a41}', '\u{a42}'), + ('\u{a47}', '\u{a48}'), + ('\u{a4b}', '\u{a4d}'), + ('\u{a51}', '\u{a51}'), + ('\u{a70}', '\u{a71}'), + ('\u{a75}', '\u{a75}'), + ('\u{a81}', '\u{a82}'), + ('ક', 'ન'), + ('પ', 'ર'), + ('લ', 'ળ'), + ('વ', 'હ'), + ('\u{abc}', '\u{abc}'), + ('\u{ac1}', '\u{ac5}'), + ('\u{ac7}', '\u{ac8}'), + ('\u{acd}', '\u{acd}'), + ('\u{ae2}', '\u{ae3}'), + ('ૹ', '\u{aff}'), + ('\u{b01}', '\u{b01}'), + ('କ', 'ନ'), + ('ପ', 'ର'), + ('ଲ', 'ଳ'), + ('ଵ', 'ହ'), + ('\u{b3c}', '\u{b3c}'), + ('\u{b3e}', '\u{b3f}'), + ('\u{b41}', '\u{b44}'), + ('\u{b4d}', '\u{b4d}'), + ('\u{b55}', '\u{b57}'), + ('ଡ଼', 'ଢ଼'), + ('ୟ', 'ୟ'), + ('\u{b62}', '\u{b63}'), + ('ୱ', 'ୱ'), + ('\u{b82}', '\u{b82}'), + ('\u{bbe}', '\u{bbe}'), + ('\u{bc0}', '\u{bc0}'), + ('\u{bcd}', '\u{bcd}'), + ('\u{bd7}', '\u{bd7}'), + ('\u{c00}', '\u{c00}'), + ('\u{c04}', '\u{c04}'), + ('క', 'న'), + ('ప', 'హ'), + ('\u{c3c}', '\u{c3c}'), + ('\u{c3e}', '\u{c40}'), + ('\u{c46}', '\u{c48}'), + ('\u{c4a}', '\u{c4d}'), + ('\u{c55}', '\u{c56}'), + ('ౘ', 'ౚ'), + ('\u{c62}', '\u{c63}'), + ('\u{c81}', '\u{c81}'), + ('\u{cbc}', '\u{cbc}'), + ('\u{cbf}', '\u{cc0}'), + ('\u{cc2}', '\u{cc2}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), + ('\u{cd5}', '\u{cd6}'), + ('\u{ce2}', '\u{ce3}'), + ('\u{d00}', '\u{d01}'), + ('ക', '\u{d3c}'), + ('\u{d3e}', '\u{d3e}'), + ('\u{d41}', '\u{d44}'), + ('\u{d4d}', '\u{d4d}'), + ('\u{d57}', '\u{d57}'), + ('\u{d62}', '\u{d63}'), + ('\u{d81}', '\u{d81}'), + ('\u{dca}', '\u{dca}'), + ('\u{dcf}', '\u{dcf}'), + ('\u{dd2}', '\u{dd4}'), + ('\u{dd6}', '\u{dd6}'), + ('\u{ddf}', '\u{ddf}'), + ('\u{e31}', '\u{e31}'), + ('\u{e34}', '\u{e3a}'), + ('\u{e47}', '\u{e4e}'), + ('\u{eb1}', '\u{eb1}'), + ('\u{eb4}', '\u{ebc}'), + ('\u{ec8}', '\u{ece}'), + ('\u{f18}', '\u{f19}'), + ('\u{f35}', '\u{f35}'), + ('\u{f37}', '\u{f37}'), + ('\u{f39}', '\u{f39}'), + ('\u{f71}', '\u{f7e}'), + ('\u{f80}', '\u{f84}'), + ('\u{f86}', '\u{f87}'), + ('\u{f8d}', '\u{f97}'), + ('\u{f99}', '\u{fbc}'), + ('\u{fc6}', '\u{fc6}'), + ('\u{102d}', '\u{1030}'), + ('\u{1032}', '\u{1037}'), + ('\u{1039}', '\u{103a}'), + ('\u{103d}', '\u{103e}'), + ('\u{1058}', '\u{1059}'), + ('\u{105e}', '\u{1060}'), + ('\u{1071}', '\u{1074}'), + ('\u{1082}', '\u{1082}'), + ('\u{1085}', '\u{1086}'), + ('\u{108d}', '\u{108d}'), + ('\u{109d}', '\u{109d}'), + ('\u{135d}', '\u{135f}'), + ('\u{1712}', '\u{1715}'), + ('\u{1732}', '\u{1734}'), + ('\u{1752}', '\u{1753}'), + ('\u{1772}', '\u{1773}'), + ('\u{17b4}', '\u{17b5}'), + ('\u{17b7}', '\u{17bd}'), + ('\u{17c6}', '\u{17c6}'), + ('\u{17c9}', '\u{17d3}'), + ('\u{17dd}', '\u{17dd}'), + ('\u{180b}', '\u{180d}'), + ('\u{180f}', '\u{180f}'), + ('\u{1885}', '\u{1886}'), + ('\u{18a9}', '\u{18a9}'), + ('\u{1920}', '\u{1922}'), + ('\u{1927}', '\u{1928}'), + ('\u{1932}', '\u{1932}'), + ('\u{1939}', '\u{193b}'), + ('\u{1a17}', '\u{1a18}'), + ('\u{1a1b}', '\u{1a1b}'), + ('\u{1a56}', '\u{1a56}'), + ('\u{1a58}', '\u{1a5e}'), + ('\u{1a60}', '\u{1a60}'), + ('\u{1a62}', '\u{1a62}'), + ('\u{1a65}', '\u{1a6c}'), + ('\u{1a73}', '\u{1a7c}'), + ('\u{1a7f}', '\u{1a7f}'), + ('\u{1ab0}', '\u{1ace}'), + ('\u{1b00}', '\u{1b03}'), + ('\u{1b34}', '\u{1b3d}'), + ('\u{1b42}', '\u{1b44}'), + ('\u{1b6b}', '\u{1b73}'), + ('\u{1b80}', '\u{1b81}'), + ('\u{1ba2}', '\u{1ba5}'), + ('\u{1ba8}', '\u{1bad}'), + ('\u{1be6}', '\u{1be6}'), + ('\u{1be8}', '\u{1be9}'), + ('\u{1bed}', '\u{1bed}'), + ('\u{1bef}', '\u{1bf3}'), + ('\u{1c2c}', '\u{1c33}'), + ('\u{1c36}', '\u{1c37}'), + ('\u{1cd0}', '\u{1cd2}'), + ('\u{1cd4}', '\u{1ce0}'), + ('\u{1ce2}', '\u{1ce8}'), + ('\u{1ced}', '\u{1ced}'), + ('\u{1cf4}', '\u{1cf4}'), + ('\u{1cf8}', '\u{1cf9}'), + ('\u{1dc0}', '\u{1dff}'), + ('\u{200d}', '\u{200d}'), + ('\u{20d0}', '\u{20f0}'), + ('\u{2cef}', '\u{2cf1}'), + ('\u{2d7f}', '\u{2d7f}'), + ('\u{2de0}', '\u{2dff}'), + ('\u{302a}', '\u{302f}'), + ('\u{3099}', '\u{309a}'), + ('\u{a66f}', '\u{a672}'), + ('\u{a674}', '\u{a67d}'), + ('\u{a69e}', '\u{a69f}'), + ('\u{a6f0}', '\u{a6f1}'), + ('\u{a802}', '\u{a802}'), + ('\u{a806}', '\u{a806}'), + ('\u{a80b}', '\u{a80b}'), + ('\u{a825}', '\u{a826}'), + ('\u{a82c}', '\u{a82c}'), + ('\u{a8c4}', '\u{a8c5}'), + ('\u{a8e0}', '\u{a8f1}'), + ('\u{a8ff}', '\u{a8ff}'), + ('\u{a926}', '\u{a92d}'), + ('\u{a947}', '\u{a951}'), + ('\u{a953}', '\u{a953}'), + ('\u{a980}', '\u{a982}'), + ('\u{a9b3}', '\u{a9b3}'), + ('\u{a9b6}', '\u{a9b9}'), + ('\u{a9bc}', '\u{a9bd}'), + ('\u{a9c0}', '\u{a9c0}'), + ('\u{a9e5}', '\u{a9e5}'), + ('\u{aa29}', '\u{aa2e}'), + ('\u{aa31}', '\u{aa32}'), + ('\u{aa35}', '\u{aa36}'), + ('\u{aa43}', '\u{aa43}'), + ('\u{aa4c}', '\u{aa4c}'), + ('\u{aa7c}', '\u{aa7c}'), + ('\u{aab0}', '\u{aab0}'), + ('\u{aab2}', '\u{aab4}'), + ('\u{aab7}', '\u{aab8}'), + ('\u{aabe}', '\u{aabf}'), + ('\u{aac1}', '\u{aac1}'), + ('\u{aaec}', '\u{aaed}'), + ('\u{aaf6}', '\u{aaf6}'), + ('\u{abe5}', '\u{abe5}'), + ('\u{abe8}', '\u{abe8}'), + ('\u{abed}', '\u{abed}'), + ('\u{fb1e}', '\u{fb1e}'), + ('\u{fe00}', '\u{fe0f}'), + ('\u{fe20}', '\u{fe2f}'), + ('\u{ff9e}', '\u{ff9f}'), + ('\u{101fd}', '\u{101fd}'), + ('\u{102e0}', '\u{102e0}'), + ('\u{10376}', '\u{1037a}'), + ('\u{10a01}', '\u{10a03}'), + ('\u{10a05}', '\u{10a06}'), + ('\u{10a0c}', '\u{10a0f}'), + ('\u{10a38}', '\u{10a3a}'), + ('\u{10a3f}', '\u{10a3f}'), + ('\u{10ae5}', '\u{10ae6}'), + ('\u{10d24}', '\u{10d27}'), + ('\u{10d69}', '\u{10d6d}'), + ('\u{10eab}', '\u{10eac}'), + ('\u{10efc}', '\u{10eff}'), + ('\u{10f46}', '\u{10f50}'), + ('\u{10f82}', '\u{10f85}'), + ('\u{11001}', '\u{11001}'), + ('\u{11038}', '\u{11046}'), + ('\u{11070}', '\u{11070}'), + ('\u{11073}', '\u{11074}'), + ('\u{1107f}', '\u{11081}'), + ('\u{110b3}', '\u{110b6}'), + ('\u{110b9}', '\u{110ba}'), + ('\u{110c2}', '\u{110c2}'), + ('\u{11100}', '\u{11102}'), + ('\u{11127}', '\u{1112b}'), + ('\u{1112d}', '\u{11134}'), + ('\u{11173}', '\u{11173}'), + ('\u{11180}', '\u{11181}'), + ('\u{111b6}', '\u{111be}'), + ('\u{111c0}', '\u{111c0}'), + ('\u{111c9}', '\u{111cc}'), + ('\u{111cf}', '\u{111cf}'), + ('\u{1122f}', '\u{11231}'), + ('\u{11234}', '\u{11237}'), + ('\u{1123e}', '\u{1123e}'), + ('\u{11241}', '\u{11241}'), + ('\u{112df}', '\u{112df}'), + ('\u{112e3}', '\u{112ea}'), + ('\u{11300}', '\u{11301}'), + ('\u{1133b}', '\u{1133c}'), + ('\u{1133e}', '\u{1133e}'), + ('\u{11340}', '\u{11340}'), + ('\u{1134d}', '\u{1134d}'), + ('\u{11357}', '\u{11357}'), + ('\u{11366}', '\u{1136c}'), + ('\u{11370}', '\u{11374}'), + ('\u{113b8}', '\u{113b8}'), + ('\u{113bb}', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '\u{113c9}'), + ('\u{113ce}', '\u{113d0}'), + ('\u{113d2}', '\u{113d2}'), + ('\u{113e1}', '\u{113e2}'), + ('\u{11438}', '\u{1143f}'), + ('\u{11442}', '\u{11444}'), + ('\u{11446}', '\u{11446}'), + ('\u{1145e}', '\u{1145e}'), + ('\u{114b0}', '\u{114b0}'), + ('\u{114b3}', '\u{114b8}'), + ('\u{114ba}', '\u{114ba}'), + ('\u{114bd}', '\u{114bd}'), + ('\u{114bf}', '\u{114c0}'), + ('\u{114c2}', '\u{114c3}'), + ('\u{115af}', '\u{115af}'), + ('\u{115b2}', '\u{115b5}'), + ('\u{115bc}', '\u{115bd}'), + ('\u{115bf}', '\u{115c0}'), + ('\u{115dc}', '\u{115dd}'), + ('\u{11633}', '\u{1163a}'), + ('\u{1163d}', '\u{1163d}'), + ('\u{1163f}', '\u{11640}'), + ('\u{116ab}', '\u{116ab}'), + ('\u{116ad}', '\u{116ad}'), + ('\u{116b0}', '\u{116b7}'), + ('\u{1171d}', '\u{1171d}'), + ('\u{1171f}', '\u{1171f}'), + ('\u{11722}', '\u{11725}'), + ('\u{11727}', '\u{1172b}'), + ('\u{1182f}', '\u{11837}'), + ('\u{11839}', '\u{1183a}'), + ('\u{11930}', '\u{11930}'), + ('\u{1193b}', '\u{1193e}'), + ('\u{11943}', '\u{11943}'), + ('\u{119d4}', '\u{119d7}'), + ('\u{119da}', '\u{119db}'), + ('\u{119e0}', '\u{119e0}'), + ('\u{11a01}', '\u{11a0a}'), + ('\u{11a33}', '\u{11a38}'), + ('\u{11a3b}', '\u{11a3e}'), + ('\u{11a47}', '\u{11a47}'), + ('\u{11a51}', '\u{11a56}'), + ('\u{11a59}', '\u{11a5b}'), + ('\u{11a8a}', '\u{11a96}'), + ('\u{11a98}', '\u{11a99}'), + ('\u{11c30}', '\u{11c36}'), + ('\u{11c38}', '\u{11c3d}'), + ('\u{11c3f}', '\u{11c3f}'), + ('\u{11c92}', '\u{11ca7}'), + ('\u{11caa}', '\u{11cb0}'), + ('\u{11cb2}', '\u{11cb3}'), + ('\u{11cb5}', '\u{11cb6}'), + ('\u{11d31}', '\u{11d36}'), + ('\u{11d3a}', '\u{11d3a}'), + ('\u{11d3c}', '\u{11d3d}'), + ('\u{11d3f}', '\u{11d45}'), + ('\u{11d47}', '\u{11d47}'), + ('\u{11d90}', '\u{11d91}'), + ('\u{11d95}', '\u{11d95}'), + ('\u{11d97}', '\u{11d97}'), + ('\u{11ef3}', '\u{11ef4}'), + ('\u{11f00}', '\u{11f01}'), + ('\u{11f36}', '\u{11f3a}'), + ('\u{11f40}', '\u{11f42}'), + ('\u{11f5a}', '\u{11f5a}'), + ('\u{13440}', '\u{13440}'), + ('\u{13447}', '\u{13455}'), + ('\u{1611e}', '\u{16129}'), + ('\u{1612d}', '\u{1612f}'), + ('\u{16af0}', '\u{16af4}'), + ('\u{16b30}', '\u{16b36}'), + ('\u{16f4f}', '\u{16f4f}'), + ('\u{16f8f}', '\u{16f92}'), + ('\u{16fe4}', '\u{16fe4}'), + ('\u{16ff0}', '\u{16ff1}'), + ('\u{1bc9d}', '\u{1bc9e}'), + ('\u{1cf00}', '\u{1cf2d}'), + ('\u{1cf30}', '\u{1cf46}'), + ('\u{1d165}', '\u{1d169}'), + ('\u{1d16d}', '\u{1d172}'), + ('\u{1d17b}', '\u{1d182}'), + ('\u{1d185}', '\u{1d18b}'), + ('\u{1d1aa}', '\u{1d1ad}'), + ('\u{1d242}', '\u{1d244}'), + ('\u{1da00}', '\u{1da36}'), + ('\u{1da3b}', '\u{1da6c}'), + ('\u{1da75}', '\u{1da75}'), + ('\u{1da84}', '\u{1da84}'), + ('\u{1da9b}', '\u{1da9f}'), + ('\u{1daa1}', '\u{1daaf}'), + ('\u{1e000}', '\u{1e006}'), + ('\u{1e008}', '\u{1e018}'), + ('\u{1e01b}', '\u{1e021}'), + ('\u{1e023}', '\u{1e024}'), + ('\u{1e026}', '\u{1e02a}'), + ('\u{1e08f}', '\u{1e08f}'), + ('\u{1e130}', '\u{1e136}'), + ('\u{1e2ae}', '\u{1e2ae}'), + ('\u{1e2ec}', '\u{1e2ef}'), + ('\u{1e4ec}', '\u{1e4ef}'), + ('\u{1e5ee}', '\u{1e5ef}'), + ('\u{1e8d0}', '\u{1e8d6}'), + ('\u{1e944}', '\u{1e94a}'), + ('🏻', '🏿'), + ('\u{e0020}', '\u{e007f}'), + ('\u{e0100}', '\u{e01ef}'), +]; + pub const JOIN_CONTROL: &'static [(char, char)] = &[('\u{200c}', '\u{200d}')]; pub const LOGICAL_ORDER_EXCEPTION: &'static [(char, char)] = &[ @@ -7907,6 +8512,7 @@ pub const LOWERCASE: &'static [(char, char)] = &[ ('ჼ', 'ჿ'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), + ('ᲊ', 'ᲊ'), ('ᴀ', 'ᶿ'), ('ḁ', 'ḁ'), ('ḃ', 'ḃ'), @@ -8239,11 +8845,13 @@ pub const LOWERCASE: &'static [(char, char)] = &[ ('ꟃ', 'ꟃ'), ('ꟈ', 'ꟈ'), ('ꟊ', 'ꟊ'), + ('ꟍ', 'ꟍ'), ('ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), ('ꟕ', 'ꟕ'), ('ꟗ', 'ꟗ'), ('ꟙ', 'ꟙ'), + ('ꟛ', 'ꟛ'), ('ꟲ', 'ꟴ'), ('ꟶ', 'ꟶ'), ('ꟸ', 'ꟺ'), @@ -8264,6 +8872,7 @@ pub const LOWERCASE: &'static [(char, char)] = &[ ('𐞇', '𐞰'), ('𐞲', '𐞺'), ('𐳀', '𐳲'), + ('𐵰', '𐶅'), ('𑣀', '𑣟'), ('𖹠', '𖹿'), ('𝐚', '𝐳'), @@ -8385,6 +8994,7 @@ pub const MATH: &'static [(char, char)] = &[ ('~', '~'), ('¬', '¬'), ('←', '↓'), + ('𐶎', '𐶏'), ('𝐀', '𝑔'), ('𝑖', '𝒜'), ('𝒞', '𝒟'), @@ -8442,6 +9052,18 @@ pub const MATH: &'static [(char, char)] = &[ ('𞻰', '𞻱'), ]; +pub const MODIFIER_COMBINING_MARK: &'static [(char, char)] = &[ + ('\u{654}', '\u{655}'), + ('\u{658}', '\u{658}'), + ('\u{6dc}', '\u{6dc}'), + ('\u{6e3}', '\u{6e3}'), + ('\u{6e7}', '\u{6e8}'), + ('\u{8ca}', '\u{8cb}'), + ('\u{8cd}', '\u{8cf}'), + ('\u{8d3}', '\u{8d3}'), + ('\u{8f3}', '\u{8f3}'), +]; + pub const NONCHARACTER_CODE_POINT: &'static [(char, char)] = &[ ('\u{fdd0}', '\u{fdef}'), ('\u{fffe}', '\u{ffff}'), @@ -8465,6 +9087,7 @@ pub const NONCHARACTER_CODE_POINT: &'static [(char, char)] = &[ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('\u{345}', '\u{345}'), + ('\u{363}', '\u{36f}'), ('\u{5b0}', '\u{5bd}'), ('\u{5bf}', '\u{5bf}'), ('\u{5c1}', '\u{5c2}'), @@ -8485,6 +9108,7 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('\u{81b}', '\u{823}'), ('\u{825}', '\u{827}'), ('\u{829}', '\u{82c}'), + ('\u{897}', '\u{897}'), ('\u{8d4}', '\u{8df}'), ('\u{8e3}', '\u{8e9}'), ('\u{8f0}', 'ः'), @@ -8531,8 +9155,8 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('\u{c62}', '\u{c63}'), ('\u{c81}', 'ಃ'), ('ಾ', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccc}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccc}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), ('ೳ', 'ೳ'), @@ -8583,13 +9207,13 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('\u{1abf}', '\u{1ac0}'), ('\u{1acc}', '\u{1ace}'), ('\u{1b00}', 'ᬄ'), - ('\u{1b35}', 'ᭃ'), + ('\u{1b35}', '\u{1b43}'), ('\u{1b80}', 'ᮂ'), ('ᮡ', '\u{1ba9}'), ('\u{1bac}', '\u{1bad}'), ('ᯧ', '\u{1bf1}'), ('ᰤ', '\u{1c36}'), - ('\u{1de7}', '\u{1df4}'), + ('\u{1dd3}', '\u{1df4}'), ('Ⓐ', 'ⓩ'), ('\u{2de0}', '\u{2dff}'), ('\u{a674}', '\u{a67b}'), @@ -8623,7 +9247,9 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('\u{10a05}', '\u{10a06}'), ('\u{10a0c}', '\u{10a0f}'), ('\u{10d24}', '\u{10d27}'), + ('\u{10d69}', '\u{10d69}'), ('\u{10eab}', '\u{10eac}'), + ('\u{10efc}', '\u{10efc}'), ('𑀀', '𑀂'), ('\u{11038}', '\u{11045}'), ('\u{11073}', '\u{11074}'), @@ -8647,6 +9273,11 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('𑍋', '𑍌'), ('\u{11357}', '\u{11357}'), ('𑍢', '𑍣'), + ('\u{113b8}', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏍'), ('𑐵', '𑑁'), ('\u{11443}', '𑑅'), ('\u{114b0}', '𑓁'), @@ -8689,10 +9320,11 @@ pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[ ('𑼃', '𑼃'), ('𑼴', '\u{11f3a}'), ('𑼾', '\u{11f40}'), + ('\u{1611e}', '\u{1612e}'), ('\u{16f4f}', '\u{16f4f}'), ('𖽑', '𖾇'), ('\u{16f8f}', '\u{16f92}'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('\u{1bc9e}', '\u{1bc9e}'), ('\u{1e000}', '\u{1e006}'), ('\u{1e008}', '\u{1e018}'), @@ -8727,29 +9359,60 @@ pub const OTHER_GRAPHEME_EXTEND: &'static [(char, char)] = &[ ('\u{b57}', '\u{b57}'), ('\u{bbe}', '\u{bbe}'), ('\u{bd7}', '\u{bd7}'), + ('\u{cc0}', '\u{cc0}'), ('\u{cc2}', '\u{cc2}'), + ('\u{cc7}', '\u{cc8}'), + ('\u{cca}', '\u{ccb}'), ('\u{cd5}', '\u{cd6}'), ('\u{d3e}', '\u{d3e}'), ('\u{d57}', '\u{d57}'), ('\u{dcf}', '\u{dcf}'), ('\u{ddf}', '\u{ddf}'), + ('\u{1715}', '\u{1715}'), + ('\u{1734}', '\u{1734}'), ('\u{1b35}', '\u{1b35}'), + ('\u{1b3b}', '\u{1b3b}'), + ('\u{1b3d}', '\u{1b3d}'), + ('\u{1b43}', '\u{1b44}'), + ('\u{1baa}', '\u{1baa}'), + ('\u{1bf2}', '\u{1bf3}'), ('\u{200c}', '\u{200c}'), ('\u{302e}', '\u{302f}'), + ('\u{a953}', '\u{a953}'), + ('\u{a9c0}', '\u{a9c0}'), ('\u{ff9e}', '\u{ff9f}'), + ('\u{111c0}', '\u{111c0}'), + ('\u{11235}', '\u{11235}'), ('\u{1133e}', '\u{1133e}'), + ('\u{1134d}', '\u{1134d}'), ('\u{11357}', '\u{11357}'), + ('\u{113b8}', '\u{113b8}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '\u{113c9}'), + ('\u{113cf}', '\u{113cf}'), ('\u{114b0}', '\u{114b0}'), ('\u{114bd}', '\u{114bd}'), ('\u{115af}', '\u{115af}'), + ('\u{116b6}', '\u{116b6}'), ('\u{11930}', '\u{11930}'), - ('\u{1d165}', '\u{1d165}'), - ('\u{1d16e}', '\u{1d172}'), + ('\u{1193d}', '\u{1193d}'), + ('\u{11f41}', '\u{11f41}'), + ('\u{16ff0}', '\u{16ff1}'), + ('\u{1d165}', '\u{1d166}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{e0020}', '\u{e007f}'), ]; -pub const OTHER_ID_CONTINUE: &'static [(char, char)] = - &[('·', '·'), ('·', '·'), ('፩', '፱'), ('᧚', '᧚')]; +pub const OTHER_ID_CONTINUE: &'static [(char, char)] = &[ + ('·', '·'), + ('·', '·'), + ('፩', '፱'), + ('᧚', '᧚'), + ('\u{200c}', '\u{200d}'), + ('・', '・'), + ('・', '・'), +]; pub const OTHER_ID_START: &'static [(char, char)] = &[('\u{1885}', '\u{1886}'), ('℘', '℘'), ('℮', '℮'), ('゛', '゜')]; @@ -9013,17 +9676,21 @@ pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[ ('፧', '፨'), ('᙮', '᙮'), ('᜵', '᜶'), + ('។', '៕'), ('᠃', '᠃'), ('᠉', '᠉'), ('᥄', '᥅'), ('᪨', '᪫'), + ('᭎', '᭏'), ('᭚', '᭛'), ('᭞', '᭟'), - ('᭽', '᭾'), + ('᭽', '᭿'), ('᰻', '᰼'), ('᱾', '᱿'), + ('․', '․'), ('‼', '‽'), ('⁇', '⁉'), + ('⳹', '⳻'), ('⸮', '⸮'), ('⸼', '⸼'), ('⹓', '⹔'), @@ -9039,6 +9706,8 @@ pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[ ('꩝', '꩟'), ('꫰', '꫱'), ('꯫', '꯫'), + ('︒', '︒'), + ('︕', '︖'), ('﹒', '﹒'), ('﹖', '﹗'), ('!', '!'), @@ -9057,6 +9726,7 @@ pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[ ('𑈸', '𑈹'), ('𑈻', '𑈼'), ('𑊩', '𑊩'), + ('𑏔', '𑏕'), ('𑑋', '𑑌'), ('𑗂', '𑗃'), ('𑗉', '𑗗'), @@ -9073,6 +9743,7 @@ pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[ ('𖫵', '𖫵'), ('𖬷', '𖬸'), ('𖭄', '𖭄'), + ('𖵮', '𖵯'), ('𖺘', '𖺘'), ('𛲟', '𛲟'), ('𝪈', '𝪈'), @@ -9132,7 +9803,8 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('܀', '܊'), ('܌', '܌'), ('߸', '߹'), - ('࠰', '࠾'), + ('࠰', '࠵'), + ('࠷', '࠾'), ('࡞', '࡞'), ('।', '॥'), ('๚', '๛'), @@ -9149,13 +9821,16 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('᠈', '᠉'), ('᥄', '᥅'), ('᪨', '᪫'), + ('᭎', '᭏'), ('᭚', '᭛'), ('᭝', '᭟'), - ('᭽', '᭾'), + ('᭽', '᭿'), ('᰻', '᰿'), ('᱾', '᱿'), + ('․', '․'), ('‼', '‽'), ('⁇', '⁉'), + ('⳹', '⳻'), ('⸮', '⸮'), ('⸼', '⸼'), ('⹁', '⹁'), @@ -9174,6 +9849,8 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('꫟', '꫟'), ('꫰', '꫱'), ('꯫', '꯫'), + ('︒', '︒'), + ('︕', '︖'), ('﹐', '﹒'), ('﹔', '﹗'), ('!', '!'), @@ -9201,6 +9878,7 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('𑇞', '𑇟'), ('𑈸', '𑈼'), ('𑊩', '𑊩'), + ('𑏔', '𑏕'), ('𑑋', '𑑍'), ('𑑚', '𑑛'), ('𑗂', '𑗅'), @@ -9221,6 +9899,7 @@ pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[ ('𖫵', '𖫵'), ('𖬷', '𖬹'), ('𖭄', '𖭄'), + ('𖵮', '𖵯'), ('𖺗', '𖺘'), ('𛲟', '𛲟'), ('𝪇', '𝪊'), @@ -9241,6 +9920,7 @@ pub const UNIFIED_IDEOGRAPH: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), ]; @@ -9522,6 +10202,7 @@ pub const UPPERCASE: &'static [(char, char)] = &[ ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('Ꭰ', 'Ᏽ'), + ('Ᲊ', 'Ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('Ḁ', 'Ḁ'), @@ -9848,9 +10529,12 @@ pub const UPPERCASE: &'static [(char, char)] = &[ ('Ꟃ', 'Ꟃ'), ('Ꞔ', 'Ꟈ'), ('Ꟊ', 'Ꟊ'), + ('Ɤ', 'Ꟍ'), ('Ꟑ', 'Ꟑ'), ('Ꟗ', 'Ꟗ'), ('Ꟙ', 'Ꟙ'), + ('Ꟛ', 'Ꟛ'), + ('Ƛ', 'Ƛ'), ('Ꟶ', 'Ꟶ'), ('A', 'Z'), ('𐐀', '𐐧'), @@ -9860,6 +10544,7 @@ pub const UPPERCASE: &'static [(char, char)] = &[ ('𐖌', '𐖒'), ('𐖔', '𐖕'), ('𐲀', '𐲲'), + ('𐵐', '𐵥'), ('𑢠', '𑢿'), ('𖹀', '𖹟'), ('𝐀', '𝐙'), @@ -9973,7 +10658,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ࡠ', 'ࡪ'), ('ࡰ', 'ࢇ'), ('ࢉ', 'ࢎ'), - ('\u{898}', '\u{8e1}'), + ('\u{897}', '\u{8e1}'), ('\u{8e3}', '\u{963}'), ('०', '९'), ('ॱ', 'ঃ'), @@ -10072,8 +10757,8 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('\u{cbc}', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), @@ -10158,8 +10843,8 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), - ('ᜀ', '᜕'), - ('ᜟ', '᜴'), + ('ᜀ', '\u{1715}'), + ('ᜟ', '\u{1734}'), ('ᝀ', '\u{1753}'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), @@ -10192,11 +10877,11 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('\u{1b00}', 'ᭌ'), ('᭐', '᭙'), ('\u{1b6b}', '\u{1b73}'), - ('\u{1b80}', '᯳'), + ('\u{1b80}', '\u{1bf3}'), ('ᰀ', '\u{1c37}'), ('᱀', '᱉'), ('ᱍ', 'ᱽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('\u{1cd0}', '\u{1cd2}'), @@ -10220,6 +10905,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῶ', 'ῼ'), + ('\u{200c}', '\u{200d}'), ('‿', '⁀'), ('⁔', '⁔'), ('ⁱ', 'ⁱ'), @@ -10265,8 +10951,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ぁ', 'ゖ'), ('\u{3099}', '\u{309a}'), ('ゝ', 'ゟ'), - ('ァ', 'ヺ'), - ('ー', 'ヿ'), + ('ァ', 'ヿ'), ('ㄅ', 'ㄯ'), ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆿ'), @@ -10281,10 +10966,10 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('ꙿ', '\u{a6f1}'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠧ'), ('\u{a82c}', '\u{a82c}'), ('ꡀ', 'ꡳ'), @@ -10293,9 +10978,9 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('\u{a8e0}', 'ꣷ'), ('ꣻ', 'ꣻ'), ('ꣽ', '\u{a92d}'), - ('ꤰ', '꥓'), + ('ꤰ', '\u{a953}'), ('ꥠ', 'ꥼ'), - ('\u{a980}', '꧀'), + ('\u{a980}', '\u{a9c0}'), ('ꧏ', '꧙'), ('ꧠ', 'ꧾ'), ('ꨀ', '\u{aa36}'), @@ -10350,7 +11035,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('A', 'Z'), ('_', '_'), ('a', 'z'), - ('ヲ', 'ᄒ'), + ('・', 'ᄒ'), ('ᅡ', 'ᅦ'), ('ᅧ', 'ᅬ'), ('ᅭ', 'ᅲ'), @@ -10388,6 +11073,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -10428,10 +11114,14 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𐳀', '𐳲'), ('𐴀', '\u{10d27}'), ('𐴰', '𐴹'), + ('𐵀', '𐵥'), + ('\u{10d69}', '\u{10d6d}'), + ('𐵯', '𐶅'), ('𐺀', '𐺩'), ('\u{10eab}', '\u{10eac}'), ('𐺰', '𐺱'), - ('\u{10efd}', '𐼜'), + ('𐻂', '𐻄'), + ('\u{10efc}', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '\u{10f50}'), ('𐽰', '\u{10f85}'), @@ -10471,12 +11161,22 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𑌵', '𑌹'), ('\u{1133b}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('𑍐', '𑍐'), ('\u{11357}', '\u{11357}'), ('𑍝', '𑍣'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏓'), + ('\u{113e1}', '\u{113e2}'), ('𑐀', '𑑊'), ('𑑐', '𑑙'), ('\u{1145e}', '𑑡'), @@ -10491,6 +11191,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𑙐', '𑙙'), ('𑚀', '𑚸'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑜹'), @@ -10514,6 +11215,8 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𑩐', '\u{11a99}'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), + ('𑯰', '𑯹'), ('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱀'), @@ -10538,7 +11241,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '\u{11f42}'), - ('𑽐', '𑽙'), + ('𑽐', '\u{11f5a}'), ('𑾰', '𑾰'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), @@ -10546,7 +11249,9 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('\u{13440}', '\u{13455}'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄹'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'), @@ -10559,16 +11264,18 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𖭐', '𖭙'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), + ('𖵰', '𖵹'), ('𖹀', '𖹿'), ('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟'), ('𖿠', '𖿡'), ('𖿣', '\u{16fe4}'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -10583,10 +11290,11 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('\u{1bc9d}', '\u{1bc9e}'), + ('𜳰', '𜳹'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), - ('𝅭', '\u{1d172}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -10644,6 +11352,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𞊐', '\u{1e2ae}'), ('𞋀', '𞋹'), ('𞓐', '𞓹'), + ('𞗐', '𞗺'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -10691,6 +11400,7 @@ pub const XID_CONTINUE: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -10922,7 +11632,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('ᰀ', 'ᰣ'), ('ᱍ', 'ᱏ'), ('ᱚ', 'ᱽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('ᳩ', 'ᳬ'), @@ -11005,10 +11715,10 @@ pub const XID_START: &'static [(char, char)] = &[ ('ꚠ', 'ꛯ'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), @@ -11113,6 +11823,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -11149,8 +11860,11 @@ pub const XID_START: &'static [(char, char)] = &[ ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐴀', '𐴣'), + ('𐵊', '𐵥'), + ('𐵯', '𐶅'), ('𐺀', '𐺩'), ('𐺰', '𐺱'), + ('𐻂', '𐻄'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), @@ -11189,6 +11903,13 @@ pub const XID_START: &'static [(char, char)] = &[ ('𑌽', '𑌽'), ('𑍐', '𑍐'), ('𑍝', '𑍡'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '𑎷'), + ('𑏑', '𑏑'), + ('𑏓', '𑏓'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), ('𑑟', '𑑡'), @@ -11223,6 +11944,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('𑩜', '𑪉'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), @@ -11246,7 +11968,9 @@ pub const XID_START: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('𓑁', '𓑆'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄝'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩰', '𖪾'), @@ -11255,6 +11979,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('𖭀', '𖭃'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), ('𖹀', '𖹿'), ('𖼀', '𖽊'), ('𖽐', '𖽐'), @@ -11263,7 +11988,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('𖿣', '𖿣'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -11316,6 +12041,8 @@ pub const XID_START: &'static [(char, char)] = &[ ('𞊐', '𞊭'), ('𞋀', '𞋫'), ('𞓐', '𞓫'), + ('𞗐', '𞗭'), + ('𞗰', '𞗰'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -11361,6 +12088,7 @@ pub const XID_START: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), diff --git a/regex-syntax/src/unicode_tables/property_names.rs b/regex-syntax/src/unicode_tables/property_names.rs index 599a123ae..a27b49133 100644 --- a/regex-syntax/src/unicode_tables/property_names.rs +++ b/regex-syntax/src/unicode_tables/property_names.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate property-names ucd-15.0.0 +// ucd-generate property-names ucd-16.0.0 // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[ ("age", "Age"), @@ -116,6 +116,8 @@ pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[ ("hst", "Hangul_Syllable_Type"), ("hyphen", "Hyphen"), ("idc", "ID_Continue"), + ("idcompatmathcontinue", "ID_Compat_Math_Continue"), + ("idcompatmathstart", "ID_Compat_Math_Start"), ("idcontinue", "ID_Continue"), ("ideo", "Ideographic"), ("ideographic", "Ideographic"), @@ -125,6 +127,10 @@ pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[ ("idst", "IDS_Trinary_Operator"), ("idstart", "ID_Start"), ("idstrinaryoperator", "IDS_Trinary_Operator"), + ("idsu", "IDS_Unary_Operator"), + ("idsunaryoperator", "IDS_Unary_Operator"), + ("incb", "Indic_Conjunct_Break"), + ("indicconjunctbreak", "Indic_Conjunct_Break"), ("indicpositionalcategory", "Indic_Positional_Category"), ("indicsyllabiccategory", "Indic_Syllabic_Category"), ("inpc", "Indic_Positional_Category"), @@ -140,6 +146,13 @@ pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[ ("jt", "Joining_Type"), ("kaccountingnumeric", "kAccountingNumeric"), ("kcompatibilityvariant", "kCompatibilityVariant"), + ("kehcat", "kEH_Cat"), + ("kehdesc", "kEH_Desc"), + ("kehhg", "kEH_HG"), + ("kehifao", "kEH_IFAO"), + ("kehjsesh", "kEH_JSesh"), + ("kehnomirror", "kEH_NoMirror"), + ("kehnorotate", "kEH_NoRotate"), ("kiicore", "kIICore"), ("kirggsource", "kIRG_GSource"), ("kirghsource", "kIRG_HSource"), @@ -164,6 +177,8 @@ pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[ ("lowercase", "Lowercase"), ("lowercasemapping", "Lowercase_Mapping"), ("math", "Math"), + ("mcm", "Modifier_Combining_Mark"), + ("modifiercombiningmark", "Modifier_Combining_Mark"), ("na", "Name"), ("na1", "Unicode_1_Name"), ("name", "Name"), @@ -177,6 +192,8 @@ pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[ ("nfkccf", "NFKC_Casefold"), ("nfkcqc", "NFKC_Quick_Check"), ("nfkcquickcheck", "NFKC_Quick_Check"), + ("nfkcscf", "NFKC_Simple_Casefold"), + ("nfkcsimplecasefold", "NFKC_Simple_Casefold"), ("nfkdqc", "NFKD_Quick_Check"), ("nfkdquickcheck", "NFKD_Quick_Check"), ("noncharactercodepoint", "Noncharacter_Code_Point"), diff --git a/regex-syntax/src/unicode_tables/property_values.rs b/regex-syntax/src/unicode_tables/property_values.rs index cb2d32fb7..2270d6638 100644 --- a/regex-syntax/src/unicode_tables/property_values.rs +++ b/regex-syntax/src/unicode_tables/property_values.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate property-values ucd-15.0.0 --include gc,script,scx,age,gcb,wb,sb +// ucd-generate property-values ucd-16.0.0 --include gc,script,scx,age,gcb,wb,sb // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const PROPERTY_VALUES: &'static [( &'static str, @@ -21,6 +21,8 @@ pub const PROPERTY_VALUES: &'static [( ("13.0", "V13_0"), ("14.0", "V14_0"), ("15.0", "V15_0"), + ("15.1", "V15_1"), + ("16.0", "V16_0"), ("2.0", "V2_0"), ("2.1", "V2_1"), ("3.0", "V3_0"), @@ -48,6 +50,8 @@ pub const PROPERTY_VALUES: &'static [( ("v130", "V13_0"), ("v140", "V14_0"), ("v150", "V15_0"), + ("v151", "V15_1"), + ("v160", "V16_0"), ("v20", "V2_0"), ("v21", "V2_1"), ("v30", "V3_0"), @@ -262,6 +266,8 @@ pub const PROPERTY_VALUES: &'static [( ("elymaic", "Elymaic"), ("ethi", "Ethiopic"), ("ethiopic", "Ethiopic"), + ("gara", "Garay"), + ("garay", "Garay"), ("geor", "Georgian"), ("georgian", "Georgian"), ("glag", "Glagolitic"), @@ -276,9 +282,11 @@ pub const PROPERTY_VALUES: &'static [( ("grek", "Greek"), ("gujarati", "Gujarati"), ("gujr", "Gujarati"), + ("gukh", "Gurung_Khema"), ("gunjalagondi", "Gunjala_Gondi"), ("gurmukhi", "Gurmukhi"), ("guru", "Gurmukhi"), + ("gurungkhema", "Gurung_Khema"), ("han", "Han"), ("hang", "Hangul"), ("hangul", "Hangul"), @@ -320,8 +328,10 @@ pub const PROPERTY_VALUES: &'static [( ("khoj", "Khojki"), ("khojki", "Khojki"), ("khudawadi", "Khudawadi"), + ("kiratrai", "Kirat_Rai"), ("kits", "Khitan_Small_Script"), ("knda", "Kannada"), + ("krai", "Kirat_Rai"), ("kthi", "Kaithi"), ("lana", "Tai_Tham"), ("lao", "Lao"), @@ -401,6 +411,8 @@ pub const PROPERTY_VALUES: &'static [( ("oldsoutharabian", "Old_South_Arabian"), ("oldturkic", "Old_Turkic"), ("olduyghur", "Old_Uyghur"), + ("olonal", "Ol_Onal"), + ("onao", "Ol_Onal"), ("oriya", "Oriya"), ("orkh", "Old_Turkic"), ("orya", "Oriya"), @@ -456,6 +468,8 @@ pub const PROPERTY_VALUES: &'static [( ("soyombo", "Soyombo"), ("sund", "Sundanese"), ("sundanese", "Sundanese"), + ("sunu", "Sunuwar"), + ("sunuwar", "Sunuwar"), ("sylo", "Syloti_Nagri"), ("sylotinagri", "Syloti_Nagri"), ("syrc", "Syriac"), @@ -489,7 +503,11 @@ pub const PROPERTY_VALUES: &'static [( ("tirh", "Tirhuta"), ("tirhuta", "Tirhuta"), ("tnsa", "Tangsa"), + ("todhri", "Todhri"), + ("todr", "Todhri"), ("toto", "Toto"), + ("tulutigalari", "Tulu_Tigalari"), + ("tutg", "Tulu_Tigalari"), ("ugar", "Ugaritic"), ("ugaritic", "Ugaritic"), ("unknown", "Unknown"), @@ -591,6 +609,8 @@ pub const PROPERTY_VALUES: &'static [( ("elymaic", "Elymaic"), ("ethi", "Ethiopic"), ("ethiopic", "Ethiopic"), + ("gara", "Garay"), + ("garay", "Garay"), ("geor", "Georgian"), ("georgian", "Georgian"), ("glag", "Glagolitic"), @@ -605,9 +625,11 @@ pub const PROPERTY_VALUES: &'static [( ("grek", "Greek"), ("gujarati", "Gujarati"), ("gujr", "Gujarati"), + ("gukh", "Gurung_Khema"), ("gunjalagondi", "Gunjala_Gondi"), ("gurmukhi", "Gurmukhi"), ("guru", "Gurmukhi"), + ("gurungkhema", "Gurung_Khema"), ("han", "Han"), ("hang", "Hangul"), ("hangul", "Hangul"), @@ -649,8 +671,10 @@ pub const PROPERTY_VALUES: &'static [( ("khoj", "Khojki"), ("khojki", "Khojki"), ("khudawadi", "Khudawadi"), + ("kiratrai", "Kirat_Rai"), ("kits", "Khitan_Small_Script"), ("knda", "Kannada"), + ("krai", "Kirat_Rai"), ("kthi", "Kaithi"), ("lana", "Tai_Tham"), ("lao", "Lao"), @@ -730,6 +754,8 @@ pub const PROPERTY_VALUES: &'static [( ("oldsoutharabian", "Old_South_Arabian"), ("oldturkic", "Old_Turkic"), ("olduyghur", "Old_Uyghur"), + ("olonal", "Ol_Onal"), + ("onao", "Ol_Onal"), ("oriya", "Oriya"), ("orkh", "Old_Turkic"), ("orya", "Oriya"), @@ -785,6 +811,8 @@ pub const PROPERTY_VALUES: &'static [( ("soyombo", "Soyombo"), ("sund", "Sundanese"), ("sundanese", "Sundanese"), + ("sunu", "Sunuwar"), + ("sunuwar", "Sunuwar"), ("sylo", "Syloti_Nagri"), ("sylotinagri", "Syloti_Nagri"), ("syrc", "Syriac"), @@ -818,7 +846,11 @@ pub const PROPERTY_VALUES: &'static [( ("tirh", "Tirhuta"), ("tirhuta", "Tirhuta"), ("tnsa", "Tangsa"), + ("todhri", "Todhri"), + ("todr", "Todhri"), ("toto", "Toto"), + ("tulutigalari", "Tulu_Tigalari"), + ("tutg", "Tulu_Tigalari"), ("ugar", "Ugaritic"), ("ugaritic", "Ugaritic"), ("unknown", "Unknown"), diff --git a/regex-syntax/src/unicode_tables/script.rs b/regex-syntax/src/unicode_tables/script.rs index cc5c400dd..3e437ca9c 100644 --- a/regex-syntax/src/unicode_tables/script.rs +++ b/regex-syntax/src/unicode_tables/script.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate script ucd-15.0.0 --chars +// ucd-generate script ucd-16.0.0 --chars // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Adlam", ADLAM), @@ -46,6 +46,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Elbasan", ELBASAN), ("Elymaic", ELYMAIC), ("Ethiopic", ETHIOPIC), + ("Garay", GARAY), ("Georgian", GEORGIAN), ("Glagolitic", GLAGOLITIC), ("Gothic", GOTHIC), @@ -54,6 +55,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Gujarati", GUJARATI), ("Gunjala_Gondi", GUNJALA_GONDI), ("Gurmukhi", GURMUKHI), + ("Gurung_Khema", GURUNG_KHEMA), ("Han", HAN), ("Hangul", HANGUL), ("Hanifi_Rohingya", HANIFI_ROHINGYA), @@ -76,6 +78,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Khmer", KHMER), ("Khojki", KHOJKI), ("Khudawadi", KHUDAWADI), + ("Kirat_Rai", KIRAT_RAI), ("Lao", LAO), ("Latin", LATIN), ("Lepcha", LEPCHA), @@ -113,6 +116,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Nyiakeng_Puachue_Hmong", NYIAKENG_PUACHUE_HMONG), ("Ogham", OGHAM), ("Ol_Chiki", OL_CHIKI), + ("Ol_Onal", OL_ONAL), ("Old_Hungarian", OLD_HUNGARIAN), ("Old_Italic", OLD_ITALIC), ("Old_North_Arabian", OLD_NORTH_ARABIAN), @@ -144,6 +148,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Sora_Sompeng", SORA_SOMPENG), ("Soyombo", SOYOMBO), ("Sundanese", SUNDANESE), + ("Sunuwar", SUNUWAR), ("Syloti_Nagri", SYLOTI_NAGRI), ("Syriac", SYRIAC), ("Tagalog", TAGALOG), @@ -161,7 +166,9 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Tibetan", TIBETAN), ("Tifinagh", TIFINAGH), ("Tirhuta", TIRHUTA), + ("Todhri", TODHRI), ("Toto", TOTO), + ("Tulu_Tigalari", TULU_TIGALARI), ("Ugaritic", UGARITIC), ("Vai", VAI), ("Vithkuqi", VITHKUQI), @@ -193,7 +200,7 @@ pub const ARABIC: &'static [(char, char)] = &[ ('ݐ', 'ݿ'), ('ࡰ', 'ࢎ'), ('\u{890}', '\u{891}'), - ('\u{898}', '\u{8e1}'), + ('\u{897}', '\u{8e1}'), ('\u{8e3}', '\u{8ff}'), ('ﭐ', '﯂'), ('ﯓ', 'ﴽ'), @@ -204,7 +211,8 @@ pub const ARABIC: &'static [(char, char)] = &[ ('ﹰ', 'ﹴ'), ('ﹶ', 'ﻼ'), ('𐹠', '𐹾'), - ('\u{10efd}', '\u{10eff}'), + ('𐻂', '𐻄'), + ('\u{10efc}', '\u{10eff}'), ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'), @@ -246,14 +254,14 @@ pub const ARMENIAN: &'static [(char, char)] = pub const AVESTAN: &'static [(char, char)] = &[('𐬀', '𐬵'), ('𐬹', '𐬿')]; -pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭌ'), ('᭐', '᭾')]; +pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭌ'), ('᭎', '᭿')]; pub const BAMUM: &'static [(char, char)] = &[('ꚠ', '꛷'), ('𖠀', '𖨸')]; pub const BASSA_VAH: &'static [(char, char)] = &[('𖫐', '𖫭'), ('\u{16af0}', '𖫵')]; -pub const BATAK: &'static [(char, char)] = &[('ᯀ', '᯳'), ('᯼', '᯿')]; +pub const BATAK: &'static [(char, char)] = &[('ᯀ', '\u{1bf3}'), ('᯼', '᯿')]; pub const BENGALI: &'static [(char, char)] = &[ ('ঀ', 'ঃ'), @@ -354,15 +362,14 @@ pub const COMMON: &'static [(char, char)] = &[ ('ℳ', '⅍'), ('⅏', '⅟'), ('↉', '↋'), - ('←', '␦'), + ('←', '␩'), ('⑀', '⑊'), ('①', '⟿'), ('⤀', '⭳'), ('⭶', '⮕'), ('⮗', '⯿'), ('⸀', '⹝'), - ('⿰', '⿻'), - ('\u{3000}', '〄'), + ('⿰', '〄'), ('〆', '〆'), ('〈', '〠'), ('〰', '〷'), @@ -371,7 +378,8 @@ pub const COMMON: &'static [(char, char)] = &[ ('゠', '゠'), ('・', 'ー'), ('㆐', '㆟'), - ('㇀', '㇣'), + ('㇀', '㇥'), + ('㇯', '㇯'), ('㈠', '㉟'), ('㉿', '㋏'), ('㋿', '㋿'), @@ -405,10 +413,12 @@ pub const COMMON: &'static [(char, char)] = &[ ('𐇐', '𐇼'), ('𐋡', '𐋻'), ('\u{1bca0}', '\u{1bca3}'), + ('𜰀', '𜳹'), + ('𜴀', '𜺳'), ('𜽐', '𜿃'), ('𝀀', '𝃵'), ('𝄀', '𝄦'), - ('𝄩', '𝅦'), + ('𝄩', '\u{1d166}'), ('𝅪', '\u{1d17a}'), ('𝆃', '𝆄'), ('𝆌', '𝆩'), @@ -465,19 +475,18 @@ pub const COMMON: &'static [(char, char)] = &[ ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'), - ('🢰', '🢱'), + ('🢰', '🢻'), + ('🣀', '🣁'), ('🤀', '🩓'), ('🩠', '🩭'), ('🩰', '🩼'), - ('🪀', '🪈'), - ('🪐', '🪽'), - ('🪿', '🫅'), - ('🫎', '🫛'), - ('🫠', '🫨'), + ('🪀', '🪉'), + ('🪏', '🫆'), + ('🫎', '🫜'), + ('🫟', '🫩'), ('🫰', '🫸'), ('🬀', '🮒'), - ('🮔', '🯊'), - ('🯰', '🯹'), + ('🮔', '🯹'), ('\u{e0001}', '\u{e0001}'), ('\u{e0020}', '\u{e007f}'), ]; @@ -496,7 +505,7 @@ pub const CYPRO_MINOAN: &'static [(char, char)] = &[('𒾐', '𒿲')]; pub const CYRILLIC: &'static [(char, char)] = &[ ('Ѐ', '\u{484}'), ('\u{487}', 'ԯ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('ᴫ', 'ᴫ'), ('ᵸ', 'ᵸ'), ('\u{2de0}', '\u{2dff}'), @@ -533,7 +542,7 @@ pub const DUPLOYAN: &'static [(char, char)] = &[('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲜', '𛲟')]; pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] = - &[('𓀀', '\u{13455}')]; + &[('𓀀', '\u{13455}'), ('𓑠', '𔏺')]; pub const ELBASAN: &'static [(char, char)] = &[('𐔀', '𐔧')]; @@ -578,6 +587,9 @@ pub const ETHIOPIC: &'static [(char, char)] = &[ ('𞟰', '𞟾'), ]; +pub const GARAY: &'static [(char, char)] = + &[('𐵀', '𐵥'), ('\u{10d69}', '𐶅'), ('𐶎', '𐶏')]; + pub const GEORGIAN: &'static [(char, char)] = &[ ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), @@ -612,7 +624,7 @@ pub const GRANTHA: &'static [(char, char)] = &[ ('𑌵', '𑌹'), ('\u{1133c}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('𑍐', '𑍐'), ('\u{11357}', '\u{11357}'), ('𑍝', '𑍣'), @@ -704,6 +716,8 @@ pub const GURMUKHI: &'static [(char, char)] = &[ ('੦', '੶'), ]; +pub const GURUNG_KHEMA: &'static [(char, char)] = &[('𖄀', '𖄹')]; + pub const HAN: &'static [(char, char)] = &[ ('⺀', '⺙'), ('⺛', '⻳'), @@ -717,12 +731,13 @@ pub const HAN: &'static [(char, char)] = &[ ('豈', '舘'), ('並', '龎'), ('𖿢', '𖿣'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('𠀀', '𪛟'), ('𪜀', '𫜹'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -748,7 +763,7 @@ pub const HANGUL: &'static [(char, char)] = &[ pub const HANIFI_ROHINGYA: &'static [(char, char)] = &[('𐴀', '\u{10d27}'), ('𐴰', '𐴹')]; -pub const HANUNOO: &'static [(char, char)] = &[('ᜠ', '᜴')]; +pub const HANUNOO: &'static [(char, char)] = &[('ᜠ', '\u{1734}')]; pub const HATRAN: &'static [(char, char)] = &[('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐣻', '𐣿')]; @@ -828,8 +843,8 @@ pub const KANNADA: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('\u{cbc}', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), @@ -855,7 +870,7 @@ pub const KATAKANA: &'static [(char, char)] = &[ ]; pub const KAWI: &'static [(char, char)] = - &[('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '𑽙')]; + &[('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '\u{11f5a}')]; pub const KAYAH_LI: &'static [(char, char)] = &[('꤀', '\u{a92d}'), ('꤯', '꤯')]; @@ -871,7 +886,7 @@ pub const KHAROSHTHI: &'static [(char, char)] = &[ ]; pub const KHITAN_SMALL_SCRIPT: &'static [(char, char)] = - &[('\u{16fe4}', '\u{16fe4}'), ('𘬀', '𘳕')]; + &[('\u{16fe4}', '\u{16fe4}'), ('𘬀', '𘳕'), ('𘳿', '𘳿')]; pub const KHMER: &'static [(char, char)] = &[('ក', '\u{17dd}'), ('០', '៩'), ('៰', '៹'), ('᧠', '᧿')]; @@ -881,6 +896,8 @@ pub const KHOJKI: &'static [(char, char)] = &[('𑈀', '𑈑'), ('𑈓', '\u{112 pub const KHUDAWADI: &'static [(char, char)] = &[('𑊰', '\u{112ea}'), ('𑋰', '𑋹')]; +pub const KIRAT_RAI: &'static [(char, char)] = &[('𖵀', '𖵹')]; + pub const LAO: &'static [(char, char)] = &[ ('ກ', 'ຂ'), ('ຄ', 'ຄ'), @@ -919,10 +936,10 @@ pub const LATIN: &'static [(char, char)] = &[ ('Ⅰ', 'ↈ'), ('Ⱡ', 'Ɀ'), ('Ꜣ', 'ꞇ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꟿ'), ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭤ'), @@ -1026,7 +1043,7 @@ pub const MULTANI: &'static [(char, char)] = &[('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊩')]; pub const MYANMAR: &'static [(char, char)] = - &[('က', '႟'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ')]; + &[('က', '႟'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ'), ('𑛐', '𑛣')]; pub const NABATAEAN: &'static [(char, char)] = &[('𐢀', '𐢞'), ('𐢧', '𐢯')]; @@ -1051,6 +1068,8 @@ pub const OGHAM: &'static [(char, char)] = &[('\u{1680}', '᚜')]; pub const OL_CHIKI: &'static [(char, char)] = &[('᱐', '᱿')]; +pub const OL_ONAL: &'static [(char, char)] = &[('𞗐', '𞗺'), ('𞗿', '𞗿')]; + pub const OLD_HUNGARIAN: &'static [(char, char)] = &[('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐳺', '𐳿')]; @@ -1105,7 +1124,7 @@ pub const PHOENICIAN: &'static [(char, char)] = &[('𐤀', '𐤛'), ('𐤟', ' pub const PSALTER_PAHLAVI: &'static [(char, char)] = &[('𐮀', '𐮑'), ('𐮙', '𐮜'), ('𐮩', '𐮯')]; -pub const REJANG: &'static [(char, char)] = &[('ꤰ', '꥓'), ('꥟', '꥟')]; +pub const REJANG: &'static [(char, char)] = &[('ꤰ', '\u{a953}'), ('꥟', '꥟')]; pub const RUNIC: &'static [(char, char)] = &[('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ')]; @@ -1149,12 +1168,14 @@ pub const SOYOMBO: &'static [(char, char)] = &[('𑩐', '𑪢')]; pub const SUNDANESE: &'static [(char, char)] = &[('\u{1b80}', 'ᮿ'), ('᳀', '᳇')]; +pub const SUNUWAR: &'static [(char, char)] = &[('𑯀', '𑯡'), ('𑯰', '𑯹')]; + pub const SYLOTI_NAGRI: &'static [(char, char)] = &[('ꠀ', '\u{a82c}')]; pub const SYRIAC: &'static [(char, char)] = &[('܀', '܍'), ('\u{70f}', '\u{74a}'), ('ݍ', 'ݏ'), ('ࡠ', 'ࡪ')]; -pub const TAGALOG: &'static [(char, char)] = &[('ᜀ', '᜕'), ('ᜟ', 'ᜟ')]; +pub const TAGALOG: &'static [(char, char)] = &[('ᜀ', '\u{1715}'), ('ᜟ', 'ᜟ')]; pub const TAGBANWA: &'static [(char, char)] = &[('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('\u{1772}', '\u{1773}')]; @@ -1234,8 +1255,24 @@ pub const TIFINAGH: &'static [(char, char)] = pub const TIRHUTA: &'static [(char, char)] = &[('𑒀', '𑓇'), ('𑓐', '𑓙')]; +pub const TODHRI: &'static [(char, char)] = &[('𐗀', '𐗳')]; + pub const TOTO: &'static [(char, char)] = &[('𞊐', '\u{1e2ae}')]; +pub const TULU_TIGALARI: &'static [(char, char)] = &[ + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏕'), + ('𑏗', '𑏘'), + ('\u{113e1}', '\u{113e2}'), +]; + pub const UGARITIC: &'static [(char, char)] = &[('𐎀', '𐎝'), ('𐎟', '𐎟')]; pub const VAI: &'static [(char, char)] = &[('ꔀ', 'ꘫ')]; diff --git a/regex-syntax/src/unicode_tables/script_extension.rs b/regex-syntax/src/unicode_tables/script_extension.rs index 42625e21b..e3f492e2d 100644 --- a/regex-syntax/src/unicode_tables/script_extension.rs +++ b/regex-syntax/src/unicode_tables/script_extension.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate script-extension ucd-15.0.0 --chars +// ucd-generate script-extension ucd-16.0.0 --chars // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Adlam", ADLAM), @@ -46,6 +46,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Elbasan", ELBASAN), ("Elymaic", ELYMAIC), ("Ethiopic", ETHIOPIC), + ("Garay", GARAY), ("Georgian", GEORGIAN), ("Glagolitic", GLAGOLITIC), ("Gothic", GOTHIC), @@ -54,6 +55,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Gujarati", GUJARATI), ("Gunjala_Gondi", GUNJALA_GONDI), ("Gurmukhi", GURMUKHI), + ("Gurung_Khema", GURUNG_KHEMA), ("Han", HAN), ("Hangul", HANGUL), ("Hanifi_Rohingya", HANIFI_ROHINGYA), @@ -76,6 +78,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Khmer", KHMER), ("Khojki", KHOJKI), ("Khudawadi", KHUDAWADI), + ("Kirat_Rai", KIRAT_RAI), ("Lao", LAO), ("Latin", LATIN), ("Lepcha", LEPCHA), @@ -113,6 +116,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Nyiakeng_Puachue_Hmong", NYIAKENG_PUACHUE_HMONG), ("Ogham", OGHAM), ("Ol_Chiki", OL_CHIKI), + ("Ol_Onal", OL_ONAL), ("Old_Hungarian", OLD_HUNGARIAN), ("Old_Italic", OLD_ITALIC), ("Old_North_Arabian", OLD_NORTH_ARABIAN), @@ -144,6 +148,7 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Sora_Sompeng", SORA_SOMPENG), ("Soyombo", SOYOMBO), ("Sundanese", SUNDANESE), + ("Sunuwar", SUNUWAR), ("Syloti_Nagri", SYLOTI_NAGRI), ("Syriac", SYRIAC), ("Tagalog", TAGALOG), @@ -161,7 +166,9 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Tibetan", TIBETAN), ("Tifinagh", TIFINAGH), ("Tirhuta", TIRHUTA), + ("Todhri", TODHRI), ("Toto", TOTO), + ("Tulu_Tigalari", TULU_TIGALARI), ("Ugaritic", UGARITIC), ("Vai", VAI), ("Vithkuqi", VITHKUQI), @@ -172,8 +179,15 @@ pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("Zanabazar_Square", ZANABAZAR_SQUARE), ]; -pub const ADLAM: &'static [(char, char)] = - &[('؟', '؟'), ('ـ', 'ـ'), ('𞤀', '𞥋'), ('𞥐', '𞥙'), ('𞥞', '𞥟')]; +pub const ADLAM: &'static [(char, char)] = &[ + ('؟', '؟'), + ('ـ', 'ـ'), + ('⁏', '⁏'), + ('⹁', '⹁'), + ('𞤀', '𞥋'), + ('𞥐', '𞥙'), + ('𞥞', '𞥟'), +]; pub const AHOM: &'static [(char, char)] = &[('𑜀', '𑜚'), ('\u{1171d}', '\u{1172b}'), ('𑜰', '𑝆')]; @@ -187,8 +201,10 @@ pub const ARABIC: &'static [(char, char)] = &[ ('ݐ', 'ݿ'), ('ࡰ', 'ࢎ'), ('\u{890}', '\u{891}'), - ('\u{898}', '\u{8e1}'), + ('\u{897}', '\u{8e1}'), ('\u{8e3}', '\u{8ff}'), + ('⁏', '⁏'), + ('⹁', '⹁'), ('ﭐ', '﯂'), ('ﯓ', 'ﶏ'), ('ﶒ', 'ﷇ'), @@ -198,7 +214,8 @@ pub const ARABIC: &'static [(char, char)] = &[ ('ﹶ', 'ﻼ'), ('\u{102e0}', '𐋻'), ('𐹠', '𐹾'), - ('\u{10efd}', '\u{10eff}'), + ('𐻂', '𐻄'), + ('\u{10efc}', '\u{10eff}'), ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'), @@ -236,20 +253,22 @@ pub const ARABIC: &'static [(char, char)] = &[ ]; pub const ARMENIAN: &'static [(char, char)] = - &[('Ա', 'Ֆ'), ('ՙ', '֊'), ('֍', '֏'), ('ﬓ', 'ﬗ')]; + &[('\u{308}', '\u{308}'), ('Ա', 'Ֆ'), ('ՙ', '֊'), ('֍', '֏'), ('ﬓ', 'ﬗ')]; -pub const AVESTAN: &'static [(char, char)] = &[('𐬀', '𐬵'), ('𐬹', '𐬿')]; +pub const AVESTAN: &'static [(char, char)] = + &[('·', '·'), ('⸰', '⸱'), ('𐬀', '𐬵'), ('𐬹', '𐬿')]; -pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭌ'), ('᭐', '᭾')]; +pub const BALINESE: &'static [(char, char)] = &[('\u{1b00}', 'ᭌ'), ('᭎', '᭿')]; pub const BAMUM: &'static [(char, char)] = &[('ꚠ', '꛷'), ('𖠀', '𖨸')]; pub const BASSA_VAH: &'static [(char, char)] = &[('𖫐', '𖫭'), ('\u{16af0}', '𖫵')]; -pub const BATAK: &'static [(char, char)] = &[('ᯀ', '᯳'), ('᯼', '᯿')]; +pub const BATAK: &'static [(char, char)] = &[('ᯀ', '\u{1bf3}'), ('᯼', '᯿')]; pub const BENGALI: &'static [(char, char)] = &[ + ('ʼ', 'ʼ'), ('\u{951}', '\u{952}'), ('।', '॥'), ('ঀ', 'ঃ'), @@ -282,6 +301,9 @@ pub const BHAIKSUKI: &'static [(char, char)] = &[('𑰀', '𑰈'), ('𑰊', '\u{11c36}'), ('\u{11c38}', '𑱅'), ('𑱐', '𑱬')]; pub const BOPOMOFO: &'static [(char, char)] = &[ + ('ˇ', 'ˇ'), + ('ˉ', 'ˋ'), + ('˙', '˙'), ('˪', '˫'), ('、', '〃'), ('〈', '】'), @@ -309,10 +331,16 @@ pub const BUHID: &'static [(char, char)] = &[('᜵', '᜶'), ('ᝀ', '\u{1753}') pub const CANADIAN_ABORIGINAL: &'static [(char, char)] = &[('᐀', 'ᙿ'), ('ᢰ', 'ᣵ'), ('𑪰', '𑪿')]; -pub const CARIAN: &'static [(char, char)] = &[('𐊠', '𐋐')]; +pub const CARIAN: &'static [(char, char)] = + &[('·', '·'), ('⁚', '⁚'), ('⁝', '⁝'), ('⸱', '⸱'), ('𐊠', '𐋐')]; -pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] = - &[('𐔰', '𐕣'), ('𐕯', '𐕯')]; +pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] = &[ + ('\u{304}', '\u{304}'), + ('\u{331}', '\u{331}'), + ('\u{35e}', '\u{35e}'), + ('𐔰', '𐕣'), + ('𐕯', '𐕯'), +]; pub const CHAKMA: &'static [(char, char)] = &[('০', '৯'), ('၀', '၉'), ('\u{11100}', '\u{11134}'), ('𑄶', '𑅇')]; @@ -320,8 +348,16 @@ pub const CHAKMA: &'static [(char, char)] = pub const CHAM: &'static [(char, char)] = &[('ꨀ', '\u{aa36}'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'), ('꩜', '꩟')]; -pub const CHEROKEE: &'static [(char, char)] = - &[('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ꭰ', 'ꮿ')]; +pub const CHEROKEE: &'static [(char, char)] = &[ + ('\u{300}', '\u{302}'), + ('\u{304}', '\u{304}'), + ('\u{30b}', '\u{30c}'), + ('\u{323}', '\u{324}'), + ('\u{330}', '\u{331}'), + ('Ꭰ', 'Ᏽ'), + ('ᏸ', 'ᏽ'), + ('ꭰ', 'ꮿ'), +]; pub const CHORASMIAN: &'static [(char, char)] = &[('𐾰', '𐿋')]; @@ -329,14 +365,20 @@ pub const COMMON: &'static [(char, char)] = &[ ('\0', '@'), ('[', '`'), ('{', '©'), - ('«', '¹'), + ('«', '¶'), + ('¸', '¹'), ('»', '¿'), ('×', '×'), ('÷', '÷'), - ('ʹ', '˟'), + ('ʹ', 'ʻ'), + ('ʽ', 'ˆ'), + ('ˈ', 'ˈ'), + ('ˌ', 'ˌ'), + ('ˎ', '˖'), + ('˘', '˘'), + ('˚', '˟'), ('˥', '˩'), ('ˬ', '˿'), - ('ʹ', 'ʹ'), (';', ';'), ('΅', '΅'), ('·', '·'), @@ -345,10 +387,12 @@ pub const COMMON: &'static [(char, char)] = &[ ('\u{8e2}', '\u{8e2}'), ('฿', '฿'), ('࿕', '࿘'), - ('᛫', '᛭'), ('\u{2000}', '\u{200b}'), ('\u{200e}', '\u{202e}'), - ('‰', '\u{2064}'), + ('‰', '⁎'), + ('⁐', '⁙'), + ('⁛', '⁜'), + ('⁞', '\u{2064}'), ('\u{2066}', '⁰'), ('⁴', '⁾'), ('₀', '₎'), @@ -359,15 +403,18 @@ pub const COMMON: &'static [(char, char)] = &[ ('ℳ', '⅍'), ('⅏', '⅟'), ('↉', '↋'), - ('←', '␦'), + ('←', '␩'), ('⑀', '⑊'), ('①', '⟿'), ('⤀', '⭳'), ('⭶', '⮕'), ('⮗', '⯿'), - ('⸀', '⹂'), + ('⸀', '⸖'), + ('⸘', 'ⸯ'), + ('⸲', '⸻'), + ('⸽', '⹀'), + ('⹂', '⹂'), ('⹄', '⹝'), - ('⿰', '⿻'), ('\u{3000}', '\u{3000}'), ('〄', '〄'), ('〒', '〒'), @@ -399,10 +446,12 @@ pub const COMMON: &'static [(char, char)] = &[ ('\u{fff9}', '�'), ('𐆐', '𐆜'), ('𐇐', '𐇼'), + ('𜰀', '𜳹'), + ('𜴀', '𜺳'), ('𜽐', '𜿃'), ('𝀀', '𝃵'), ('𝄀', '𝄦'), - ('𝄩', '𝅦'), + ('𝄩', '\u{1d166}'), ('𝅪', '\u{1d17a}'), ('𝆃', '𝆄'), ('𝆌', '𝆩'), @@ -458,25 +507,34 @@ pub const COMMON: &'static [(char, char)] = &[ ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'), - ('🢰', '🢱'), + ('🢰', '🢻'), + ('🣀', '🣁'), ('🤀', '🩓'), ('🩠', '🩭'), ('🩰', '🩼'), - ('🪀', '🪈'), - ('🪐', '🪽'), - ('🪿', '🫅'), - ('🫎', '🫛'), - ('🫠', '🫨'), + ('🪀', '🪉'), + ('🪏', '🫆'), + ('🫎', '🫜'), + ('🫟', '🫩'), ('🫰', '🫸'), ('🬀', '🮒'), - ('🮔', '🯊'), - ('🯰', '🯹'), + ('🮔', '🯹'), ('\u{e0001}', '\u{e0001}'), ('\u{e0020}', '\u{e007f}'), ]; -pub const COPTIC: &'static [(char, char)] = - &[('Ϣ', 'ϯ'), ('Ⲁ', 'ⳳ'), ('⳹', '⳿'), ('\u{102e0}', '𐋻')]; +pub const COPTIC: &'static [(char, char)] = &[ + ('·', '·'), + ('\u{300}', '\u{300}'), + ('\u{304}', '\u{305}'), + ('\u{307}', '\u{307}'), + ('ʹ', '͵'), + ('Ϣ', 'ϯ'), + ('Ⲁ', 'ⳳ'), + ('⳹', '⳿'), + ('⸗', '⸗'), + ('\u{102e0}', '𐋻'), +]; pub const CUNEIFORM: &'static [(char, char)] = &[('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒑰', '𒑴'), ('𒒀', '𒕃')]; @@ -496,8 +554,15 @@ pub const CYPRIOT: &'static [(char, char)] = &[ pub const CYPRO_MINOAN: &'static [(char, char)] = &[('𐄀', '𐄁'), ('𒾐', '𒿲')]; pub const CYRILLIC: &'static [(char, char)] = &[ + ('ʼ', 'ʼ'), + ('\u{300}', '\u{302}'), + ('\u{304}', '\u{304}'), + ('\u{306}', '\u{306}'), + ('\u{308}', '\u{308}'), + ('\u{30b}', '\u{30b}'), + ('\u{311}', '\u{311}'), ('Ѐ', 'ԯ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('ᴫ', 'ᴫ'), ('ᵸ', 'ᵸ'), ('\u{1df8}', '\u{1df8}'), @@ -512,6 +577,7 @@ pub const CYRILLIC: &'static [(char, char)] = &[ pub const DESERET: &'static [(char, char)] = &[('𐐀', '𐑏')]; pub const DEVANAGARI: &'static [(char, char)] = &[ + ('ʼ', 'ʼ'), ('\u{900}', '\u{952}'), ('\u{955}', 'ॿ'), ('\u{1cd0}', 'ᳶ'), @@ -536,17 +602,29 @@ pub const DIVES_AKURU: &'static [(char, char)] = &[ pub const DOGRA: &'static [(char, char)] = &[('।', '९'), ('꠰', '꠹'), ('𑠀', '𑠻')]; -pub const DUPLOYAN: &'static [(char, char)] = - &[('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲜', '\u{1bca3}')]; +pub const DUPLOYAN: &'static [(char, char)] = &[ + ('·', '·'), + ('\u{307}', '\u{308}'), + ('\u{30a}', '\u{30a}'), + ('\u{323}', '\u{324}'), + ('⸼', '⸼'), + ('𛰀', '𛱪'), + ('𛱰', '𛱼'), + ('𛲀', '𛲈'), + ('𛲐', '𛲙'), + ('𛲜', '\u{1bca3}'), +]; pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] = - &[('𓀀', '\u{13455}')]; + &[('𓀀', '\u{13455}'), ('𓑠', '𔏺')]; -pub const ELBASAN: &'static [(char, char)] = &[('𐔀', '𐔧')]; +pub const ELBASAN: &'static [(char, char)] = + &[('·', '·'), ('\u{305}', '\u{305}'), ('𐔀', '𐔧')]; pub const ELYMAIC: &'static [(char, char)] = &[('𐿠', '𐿶')]; pub const ETHIOPIC: &'static [(char, char)] = &[ + ('\u{30e}', '\u{30e}'), ('ሀ', 'ቈ'), ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), @@ -585,21 +663,40 @@ pub const ETHIOPIC: &'static [(char, char)] = &[ ('𞟰', '𞟾'), ]; +pub const GARAY: &'static [(char, char)] = &[ + ('،', '،'), + ('؛', '؛'), + ('؟', '؟'), + ('𐵀', '𐵥'), + ('\u{10d69}', '𐶅'), + ('𐶎', '𐶏'), +]; + pub const GEORGIAN: &'static [(char, char)] = &[ + ('·', '·'), + ('։', '։'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ჿ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), + ('⁚', '⁚'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), + ('⸱', '⸱'), ]; pub const GLAGOLITIC: &'static [(char, char)] = &[ + ('·', '·'), + ('\u{303}', '\u{303}'), + ('\u{305}', '\u{305}'), ('\u{484}', '\u{484}'), ('\u{487}', '\u{487}'), + ('։', '։'), + ('჻', '჻'), + ('⁚', '⁚'), ('Ⰰ', 'ⱟ'), ('⹃', '⹃'), ('\u{a66f}', '\u{a66f}'), @@ -610,7 +707,13 @@ pub const GLAGOLITIC: &'static [(char, char)] = &[ ('\u{1e026}', '\u{1e02a}'), ]; -pub const GOTHIC: &'static [(char, char)] = &[('𐌰', '𐍊')]; +pub const GOTHIC: &'static [(char, char)] = &[ + ('·', '·'), + ('\u{304}', '\u{305}'), + ('\u{308}', '\u{308}'), + ('\u{331}', '\u{331}'), + ('𐌰', '𐍊'), +]; pub const GRANTHA: &'static [(char, char)] = &[ ('\u{951}', '\u{952}'), @@ -630,7 +733,7 @@ pub const GRANTHA: &'static [(char, char)] = &[ ('𑌵', '𑌹'), ('\u{1133b}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('𑍐', '𑍐'), ('\u{11357}', '\u{11357}'), ('𑍝', '𑍣'), @@ -641,10 +744,15 @@ pub const GRANTHA: &'static [(char, char)] = &[ ]; pub const GREEK: &'static [(char, char)] = &[ + ('·', '·'), + ('\u{300}', '\u{301}'), + ('\u{304}', '\u{304}'), + ('\u{306}', '\u{306}'), + ('\u{308}', '\u{308}'), + ('\u{313}', '\u{313}'), ('\u{342}', '\u{342}'), ('\u{345}', '\u{345}'), - ('Ͱ', 'ͳ'), - ('͵', 'ͷ'), + ('Ͱ', 'ͷ'), ('ͺ', 'ͽ'), ('Ϳ', 'Ϳ'), ('΄', '΄'), @@ -674,6 +782,7 @@ pub const GREEK: &'static [(char, char)] = &[ ('῝', '`'), ('ῲ', 'ῴ'), ('ῶ', '῾'), + ('⁝', '⁝'), ('Ω', 'Ω'), ('ꭥ', 'ꭥ'), ('𐅀', '𐆎'), @@ -702,6 +811,7 @@ pub const GUJARATI: &'static [(char, char)] = &[ ]; pub const GUNJALA_GONDI: &'static [(char, char)] = &[ + ('·', '·'), ('।', '॥'), ('𑵠', '𑵥'), ('𑵧', '𑵨'), @@ -733,10 +843,14 @@ pub const GURMUKHI: &'static [(char, char)] = &[ ('꠰', '꠹'), ]; +pub const GURUNG_KHEMA: &'static [(char, char)] = &[('॥', '॥'), ('𖄀', '𖄹')]; + pub const HAN: &'static [(char, char)] = &[ + ('·', '·'), ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'), + ('⿰', '⿿'), ('、', '〃'), ('々', '】'), ('〓', '〟'), @@ -745,7 +859,8 @@ pub const HAN: &'static [(char, char)] = &[ ('〷', '〿'), ('・', '・'), ('㆐', '㆟'), - ('㇀', '㇣'), + ('㇀', '㇥'), + ('㇯', '㇯'), ('㈠', '㉇'), ('㊀', '㊰'), ('㋀', '㋋'), @@ -761,7 +876,7 @@ pub const HAN: &'static [(char, char)] = &[ ('﹅', '﹆'), ('。', '・'), ('𖿢', '𖿣'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('𝍠', '𝍱'), ('🉐', '🉑'), ('𠀀', '𪛟'), @@ -769,6 +884,7 @@ pub const HAN: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -814,6 +930,7 @@ pub const HATRAN: &'static [(char, char)] = &[('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐣻', '𐣿')]; pub const HEBREW: &'static [(char, char)] = &[ + ('\u{307}', '\u{308}'), ('\u{591}', '\u{5c7}'), ('א', 'ת'), ('ׯ', '״'), @@ -849,9 +966,17 @@ pub const IMPERIAL_ARAMAIC: &'static [(char, char)] = &[('𐡀', '𐡕'), ('𐡗', '𐡟')]; pub const INHERITED: &'static [(char, char)] = &[ - ('\u{300}', '\u{341}'), + ('\u{30f}', '\u{30f}'), + ('\u{312}', '\u{312}'), + ('\u{314}', '\u{31f}'), + ('\u{321}', '\u{322}'), + ('\u{326}', '\u{32c}'), + ('\u{32f}', '\u{32f}'), + ('\u{332}', '\u{341}'), ('\u{343}', '\u{344}'), - ('\u{346}', '\u{362}'), + ('\u{346}', '\u{357}'), + ('\u{359}', '\u{35d}'), + ('\u{35f}', '\u{362}'), ('\u{953}', '\u{954}'), ('\u{1ab0}', '\u{1ace}'), ('\u{1dc2}', '\u{1df7}'), @@ -882,6 +1007,7 @@ pub const JAVANESE: &'static [(char, char)] = pub const KAITHI: &'static [(char, char)] = &[ ('०', '९'), + ('⸱', '⸱'), ('꠰', '꠹'), ('\u{11080}', '\u{110c2}'), ('\u{110cd}', '\u{110cd}'), @@ -896,15 +1022,15 @@ pub const KANNADA: &'static [(char, char)] = &[ ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('\u{cbc}', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('ೝ', 'ೞ'), ('ೠ', '\u{ce3}'), ('೦', '೯'), ('ೱ', 'ೳ'), ('\u{1cd0}', '\u{1cd0}'), - ('\u{1cd2}', '\u{1cd2}'), + ('\u{1cd2}', '᳓'), ('\u{1cda}', '\u{1cda}'), ('ᳲ', 'ᳲ'), ('\u{1cf4}', '\u{1cf4}'), @@ -912,6 +1038,8 @@ pub const KANNADA: &'static [(char, char)] = &[ ]; pub const KATAKANA: &'static [(char, char)] = &[ + ('\u{305}', '\u{305}'), + ('\u{323}', '\u{323}'), ('、', '〃'), ('〈', '】'), ('〓', '〟'), @@ -935,7 +1063,7 @@ pub const KATAKANA: &'static [(char, char)] = &[ ]; pub const KAWI: &'static [(char, char)] = - &[('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '𑽙')]; + &[('\u{11f00}', '𑼐'), ('𑼒', '\u{11f3a}'), ('𑼾', '\u{11f5a}')]; pub const KAYAH_LI: &'static [(char, char)] = &[('꤀', '꤯')]; @@ -951,7 +1079,7 @@ pub const KHAROSHTHI: &'static [(char, char)] = &[ ]; pub const KHITAN_SMALL_SCRIPT: &'static [(char, char)] = - &[('\u{16fe4}', '\u{16fe4}'), ('𘬀', '𘳕')]; + &[('\u{16fe4}', '\u{16fe4}'), ('𘬀', '𘳕'), ('𘳿', '𘳿')]; pub const KHMER: &'static [(char, char)] = &[('ក', '\u{17dd}'), ('០', '៩'), ('៰', '៹'), ('᧠', '᧿')]; @@ -962,6 +1090,8 @@ pub const KHOJKI: &'static [(char, char)] = pub const KHUDAWADI: &'static [(char, char)] = &[('।', '॥'), ('꠰', '꠹'), ('𑊰', '\u{112ea}'), ('𑋰', '𑋹')]; +pub const KIRAT_RAI: &'static [(char, char)] = &[('𖵀', '𖵹')]; + pub const LAO: &'static [(char, char)] = &[ ('ກ', 'ຂ'), ('ຄ', 'ຄ'), @@ -980,11 +1110,27 @@ pub const LATIN: &'static [(char, char)] = &[ ('A', 'Z'), ('a', 'z'), ('ª', 'ª'), + ('·', '·'), ('º', 'º'), ('À', 'Ö'), ('Ø', 'ö'), ('ø', 'ʸ'), + ('ʼ', 'ʼ'), + ('ˇ', 'ˇ'), + ('ˉ', 'ˋ'), + ('ˍ', 'ˍ'), + ('˗', '˗'), + ('˙', '˙'), ('ˠ', 'ˤ'), + ('\u{300}', '\u{30e}'), + ('\u{310}', '\u{311}'), + ('\u{313}', '\u{313}'), + ('\u{320}', '\u{320}'), + ('\u{323}', '\u{325}'), + ('\u{32d}', '\u{32e}'), + ('\u{330}', '\u{331}'), + ('\u{358}', '\u{358}'), + ('\u{35e}', '\u{35e}'), ('\u{363}', '\u{36f}'), ('\u{485}', '\u{486}'), ('\u{951}', '\u{952}'), @@ -994,6 +1140,7 @@ pub const LATIN: &'static [(char, char)] = &[ ('ᵢ', 'ᵥ'), ('ᵫ', 'ᵷ'), ('ᵹ', 'ᶾ'), + ('\u{1df8}', '\u{1df8}'), ('Ḁ', 'ỿ'), ('\u{202f}', '\u{202f}'), ('ⁱ', 'ⁱ'), @@ -1005,12 +1152,13 @@ pub const LATIN: &'static [(char, char)] = &[ ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'), ('Ⱡ', 'Ɀ'), + ('⸗', '⸗'), ('꜀', '꜇'), ('Ꜣ', 'ꞇ'), - ('Ꞌ', 'ꟊ'), + ('Ꞌ', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꟿ'), ('꤮', '꤮'), ('ꬰ', 'ꭚ'), @@ -1054,14 +1202,16 @@ pub const LINEAR_B: &'static [(char, char)] = &[ ('𐄷', '𐄿'), ]; -pub const LISU: &'static [(char, char)] = &[('ꓐ', '꓿'), ('𑾰', '𑾰')]; +pub const LISU: &'static [(char, char)] = + &[('ʼ', 'ʼ'), ('ˍ', 'ˍ'), ('《', '》'), ('ꓐ', '꓿'), ('𑾰', '𑾰')]; -pub const LYCIAN: &'static [(char, char)] = &[('𐊀', '𐊜')]; +pub const LYCIAN: &'static [(char, char)] = &[('⁚', '⁚'), ('𐊀', '𐊜')]; -pub const LYDIAN: &'static [(char, char)] = &[('𐤠', '𐤹'), ('𐤿', '𐤿')]; +pub const LYDIAN: &'static [(char, char)] = + &[('·', '·'), ('⸱', '⸱'), ('𐤠', '𐤹'), ('𐤿', '𐤿')]; pub const MAHAJANI: &'static [(char, char)] = - &[('।', '९'), ('꠰', '꠹'), ('𑅐', '𑅶')]; + &[('·', '·'), ('।', '९'), ('꠰', '꠹'), ('𑅐', '𑅶')]; pub const MAKASAR: &'static [(char, char)] = &[('𑻠', '𑻸')]; @@ -1076,6 +1226,7 @@ pub const MALAYALAM: &'static [(char, char)] = &[ ('ൔ', '\u{d63}'), ('൦', 'ൿ'), ('\u{1cda}', '\u{1cda}'), + ('ᳲ', 'ᳲ'), ('꠰', '꠲'), ]; @@ -1110,7 +1261,8 @@ pub const MENDE_KIKAKUI: &'static [(char, char)] = pub const MEROITIC_CURSIVE: &'static [(char, char)] = &[('𐦠', '𐦷'), ('𐦼', '𐧏'), ('𐧒', '𐧿')]; -pub const MEROITIC_HIEROGLYPHS: &'static [(char, char)] = &[('𐦀', '𐦟')]; +pub const MEROITIC_HIEROGLYPHS: &'static [(char, char)] = + &[('⁝', '⁝'), ('𐦀', '𐦟')]; pub const MIAO: &'static [(char, char)] = &[('𖼀', '𖽊'), ('\u{16f4f}', '𖾇'), ('\u{16f8f}', '𖾟')]; @@ -1123,6 +1275,8 @@ pub const MONGOLIAN: &'static [(char, char)] = &[ ('ᠠ', 'ᡸ'), ('ᢀ', 'ᢪ'), ('\u{202f}', '\u{202f}'), + ('、', '。'), + ('〈', '》'), ('𑙠', '𑙬'), ]; @@ -1132,7 +1286,7 @@ pub const MULTANI: &'static [(char, char)] = &[('੦', '੯'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊩')]; pub const MYANMAR: &'static [(char, char)] = - &[('က', '႟'), ('꤮', '꤮'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ')]; + &[('က', '႟'), ('꤮', '꤮'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ'), ('𑛐', '𑛣')]; pub const NABATAEAN: &'static [(char, char)] = &[('𐢀', '𐢞'), ('𐢧', '𐢯')]; @@ -1173,15 +1327,31 @@ pub const OGHAM: &'static [(char, char)] = &[('\u{1680}', '᚜')]; pub const OL_CHIKI: &'static [(char, char)] = &[('᱐', '᱿')]; -pub const OLD_HUNGARIAN: &'static [(char, char)] = - &[('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐳺', '𐳿')]; +pub const OL_ONAL: &'static [(char, char)] = + &[('।', '॥'), ('𞗐', '𞗺'), ('𞗿', '𞗿')]; + +pub const OLD_HUNGARIAN: &'static [(char, char)] = &[ + ('⁚', '⁚'), + ('⁝', '⁝'), + ('⸱', '⸱'), + ('⹁', '⹁'), + ('𐲀', '𐲲'), + ('𐳀', '𐳲'), + ('𐳺', '𐳿'), +]; pub const OLD_ITALIC: &'static [(char, char)] = &[('𐌀', '𐌣'), ('𐌭', '𐌯')]; pub const OLD_NORTH_ARABIAN: &'static [(char, char)] = &[('𐪀', '𐪟')]; -pub const OLD_PERMIC: &'static [(char, char)] = - &[('\u{483}', '\u{483}'), ('𐍐', '\u{1037a}')]; +pub const OLD_PERMIC: &'static [(char, char)] = &[ + ('·', '·'), + ('\u{300}', '\u{300}'), + ('\u{306}', '\u{308}'), + ('\u{313}', '\u{313}'), + ('\u{483}', '\u{483}'), + ('𐍐', '\u{1037a}'), +]; pub const OLD_PERSIAN: &'static [(char, char)] = &[('𐎠', '𐏃'), ('𐏈', '𐏕')]; @@ -1189,7 +1359,8 @@ pub const OLD_SOGDIAN: &'static [(char, char)] = &[('𐼀', '𐼧')]; pub const OLD_SOUTH_ARABIAN: &'static [(char, char)] = &[('𐩠', '𐩿')]; -pub const OLD_TURKIC: &'static [(char, char)] = &[('𐰀', '𐱈')]; +pub const OLD_TURKIC: &'static [(char, char)] = + &[('⁚', '⁚'), ('⸰', '⸰'), ('𐰀', '𐱈')]; pub const OLD_UYGHUR: &'static [(char, char)] = &[('ـ', 'ـ'), ('𐫲', '𐫲'), ('𐽰', '𐾉')]; @@ -1215,7 +1386,14 @@ pub const ORIYA: &'static [(char, char)] = &[ ('ᳲ', 'ᳲ'), ]; -pub const OSAGE: &'static [(char, char)] = &[('𐒰', '𐓓'), ('𐓘', '𐓻')]; +pub const OSAGE: &'static [(char, char)] = &[ + ('\u{301}', '\u{301}'), + ('\u{304}', '\u{304}'), + ('\u{30b}', '\u{30b}'), + ('\u{358}', '\u{358}'), + ('𐒰', '𐓓'), + ('𐓘', '𐓻'), +]; pub const OSMANYA: &'static [(char, char)] = &[('𐒀', '𐒝'), ('𐒠', '𐒩')]; @@ -1226,19 +1404,25 @@ pub const PALMYRENE: &'static [(char, char)] = &[('𐡠', '𐡿')]; pub const PAU_CIN_HAU: &'static [(char, char)] = &[('𑫀', '𑫸')]; -pub const PHAGS_PA: &'static [(char, char)] = - &[('᠂', '᠃'), ('᠅', '᠅'), ('ꡀ', '꡷')]; +pub const PHAGS_PA: &'static [(char, char)] = &[ + ('᠂', '᠃'), + ('᠅', '᠅'), + ('\u{202f}', '\u{202f}'), + ('。', '。'), + ('ꡀ', '꡷'), +]; pub const PHOENICIAN: &'static [(char, char)] = &[('𐤀', '𐤛'), ('𐤟', '𐤟')]; pub const PSALTER_PAHLAVI: &'static [(char, char)] = &[('ـ', 'ـ'), ('𐮀', '𐮑'), ('𐮙', '𐮜'), ('𐮩', '𐮯')]; -pub const REJANG: &'static [(char, char)] = &[('ꤰ', '꥓'), ('꥟', '꥟')]; +pub const REJANG: &'static [(char, char)] = &[('ꤰ', '\u{a953}'), ('꥟', '꥟')]; -pub const RUNIC: &'static [(char, char)] = &[('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ')]; +pub const RUNIC: &'static [(char, char)] = &[('ᚠ', 'ᛸ')]; -pub const SAMARITAN: &'static [(char, char)] = &[('ࠀ', '\u{82d}'), ('࠰', '࠾')]; +pub const SAMARITAN: &'static [(char, char)] = + &[('ࠀ', '\u{82d}'), ('࠰', '࠾'), ('⸱', '⸱')]; pub const SAURASHTRA: &'static [(char, char)] = &[('ꢀ', '\u{a8c5}'), ('꣎', '꣙')]; @@ -1249,10 +1433,12 @@ pub const SHARADA: &'static [(char, char)] = &[ ('\u{1cd9}', '\u{1cd9}'), ('\u{1cdc}', '\u{1cdd}'), ('\u{1ce0}', '\u{1ce0}'), + ('꠰', '꠵'), + ('꠸', '꠸'), ('\u{11180}', '𑇟'), ]; -pub const SHAVIAN: &'static [(char, char)] = &[('𐑐', '𐑿')]; +pub const SHAVIAN: &'static [(char, char)] = &[('·', '·'), ('𐑐', '𐑿')]; pub const SIDDHAM: &'static [(char, char)] = &[('𑖀', '\u{115b5}'), ('𑖸', '\u{115dd}')]; @@ -1274,6 +1460,7 @@ pub const SINHALA: &'static [(char, char)] = &[ ('ෘ', '\u{ddf}'), ('෦', '෯'), ('ෲ', '෴'), + ('ᳲ', 'ᳲ'), ('𑇡', '𑇴'), ]; @@ -1286,10 +1473,28 @@ pub const SOYOMBO: &'static [(char, char)] = &[('𑩐', '𑪢')]; pub const SUNDANESE: &'static [(char, char)] = &[('\u{1b80}', 'ᮿ'), ('᳀', '᳇')]; +pub const SUNUWAR: &'static [(char, char)] = &[ + ('\u{300}', '\u{301}'), + ('\u{303}', '\u{303}'), + ('\u{30d}', '\u{30d}'), + ('\u{310}', '\u{310}'), + ('\u{32d}', '\u{32d}'), + ('\u{331}', '\u{331}'), + ('𑯀', '𑯡'), + ('𑯰', '𑯹'), +]; + pub const SYLOTI_NAGRI: &'static [(char, char)] = &[('।', '॥'), ('০', '৯'), ('ꠀ', '\u{a82c}')]; pub const SYRIAC: &'static [(char, char)] = &[ + ('\u{303}', '\u{304}'), + ('\u{307}', '\u{308}'), + ('\u{30a}', '\u{30a}'), + ('\u{320}', '\u{320}'), + ('\u{323}', '\u{325}'), + ('\u{32d}', '\u{32e}'), + ('\u{330}', '\u{330}'), ('،', '،'), ('؛', '\u{61c}'), ('؟', '؟'), @@ -1305,13 +1510,19 @@ pub const SYRIAC: &'static [(char, char)] = &[ ]; pub const TAGALOG: &'static [(char, char)] = - &[('ᜀ', '᜕'), ('ᜟ', 'ᜟ'), ('᜵', '᜶')]; + &[('ᜀ', '\u{1715}'), ('ᜟ', 'ᜟ'), ('᜵', '᜶')]; pub const TAGBANWA: &'static [(char, char)] = &[('᜵', '᜶'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('\u{1772}', '\u{1773}')]; -pub const TAI_LE: &'static [(char, char)] = - &[('၀', '၉'), ('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ')]; +pub const TAI_LE: &'static [(char, char)] = &[ + ('\u{300}', '\u{301}'), + ('\u{307}', '\u{308}'), + ('\u{30c}', '\u{30c}'), + ('၀', '၉'), + ('ᥐ', 'ᥭ'), + ('ᥰ', 'ᥴ'), +]; pub const TAI_THAM: &'static [(char, char)] = &[ ('ᨠ', '\u{1a5e}'), @@ -1356,8 +1567,14 @@ pub const TAMIL: &'static [(char, char)] = &[ pub const TANGSA: &'static [(char, char)] = &[('𖩰', '𖪾'), ('𖫀', '𖫉')]; -pub const TANGUT: &'static [(char, char)] = - &[('𖿠', '𖿠'), ('𗀀', '𘟷'), ('𘠀', '𘫿'), ('𘴀', '𘴈')]; +pub const TANGUT: &'static [(char, char)] = &[ + ('⿰', '⿿'), + ('㇯', '㇯'), + ('𖿠', '𖿠'), + ('𗀀', '𘟷'), + ('𘠀', '𘫿'), + ('𘴀', '𘴈'), +]; pub const TELUGU: &'static [(char, char)] = &[ ('\u{951}', '\u{952}'), @@ -1389,7 +1606,14 @@ pub const THAANA: &'static [(char, char)] = &[ ('﷽', '﷽'), ]; -pub const THAI: &'static [(char, char)] = &[('ก', '\u{e3a}'), ('เ', '๛')]; +pub const THAI: &'static [(char, char)] = &[ + ('ʼ', 'ʼ'), + ('˗', '˗'), + ('\u{303}', '\u{303}'), + ('\u{331}', '\u{331}'), + ('ก', '\u{e3a}'), + ('เ', '๛'), +]; pub const TIBETAN: &'static [(char, char)] = &[ ('ༀ', 'ཇ'), @@ -1399,10 +1623,18 @@ pub const TIBETAN: &'static [(char, char)] = &[ ('྾', '࿌'), ('࿎', '࿔'), ('࿙', '࿚'), + ('〈', '》'), ]; -pub const TIFINAGH: &'static [(char, char)] = - &[('ⴰ', 'ⵧ'), ('ⵯ', '⵰'), ('\u{2d7f}', '\u{2d7f}')]; +pub const TIFINAGH: &'static [(char, char)] = &[ + ('\u{302}', '\u{302}'), + ('\u{304}', '\u{304}'), + ('\u{307}', '\u{307}'), + ('\u{309}', '\u{309}'), + ('ⴰ', 'ⵧ'), + ('ⵯ', '⵰'), + ('\u{2d7f}', '\u{2d7f}'), +]; pub const TIRHUTA: &'static [(char, char)] = &[ ('\u{951}', '\u{952}'), @@ -1413,7 +1645,36 @@ pub const TIRHUTA: &'static [(char, char)] = &[ ('𑓐', '𑓙'), ]; -pub const TOTO: &'static [(char, char)] = &[('𞊐', '\u{1e2ae}')]; +pub const TODHRI: &'static [(char, char)] = &[ + ('\u{301}', '\u{301}'), + ('\u{304}', '\u{304}'), + ('\u{307}', '\u{307}'), + ('\u{311}', '\u{311}'), + ('\u{313}', '\u{313}'), + ('\u{35e}', '\u{35e}'), + ('𐗀', '𐗳'), +]; + +pub const TOTO: &'static [(char, char)] = &[('ʼ', 'ʼ'), ('𞊐', '\u{1e2ae}')]; + +pub const TULU_TIGALARI: &'static [(char, char)] = &[ + ('೦', '೯'), + ('ᳲ', 'ᳲ'), + ('\u{1cf4}', '\u{1cf4}'), + ('꠰', '꠵'), + ('\u{a8f1}', '\u{a8f1}'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '𑏕'), + ('𑏗', '𑏘'), + ('\u{113e1}', '\u{113e2}'), +]; pub const UGARITIC: &'static [(char, char)] = &[('𐎀', '𐎝'), ('𐎟', '𐎟')]; diff --git a/regex-syntax/src/unicode_tables/sentence_break.rs b/regex-syntax/src/unicode_tables/sentence_break.rs index 24348736f..af1c5bea9 100644 --- a/regex-syntax/src/unicode_tables/sentence_break.rs +++ b/regex-syntax/src/unicode_tables/sentence_break.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate sentence-break ucd-15.0.0 --chars +// ucd-generate sentence-break ucd-16.0.0 --chars // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("ATerm", ATERM), @@ -101,7 +101,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{898}', '\u{89f}'), + ('\u{897}', '\u{89f}'), ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', 'ः'), ('\u{93a}', '\u{93c}'), @@ -153,8 +153,8 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{c81}', 'ಃ'), ('\u{cbc}', '\u{cbc}'), ('ಾ', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), ('ೳ', 'ೳ'), @@ -197,8 +197,8 @@ pub const EXTEND: &'static [(char, char)] = &[ ('ႏ', 'ႏ'), ('ႚ', '\u{109d}'), ('\u{135d}', '\u{135f}'), - ('\u{1712}', '᜕'), - ('\u{1732}', '᜴'), + ('\u{1712}', '\u{1715}'), + ('\u{1732}', '\u{1734}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17d3}'), @@ -215,11 +215,11 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1a7f}', '\u{1a7f}'), ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', 'ᬄ'), - ('\u{1b34}', '᭄'), + ('\u{1b34}', '\u{1b44}'), ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', 'ᮂ'), ('ᮡ', '\u{1bad}'), - ('\u{1be6}', '᯳'), + ('\u{1be6}', '\u{1bf3}'), ('ᰤ', '\u{1c37}'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', '\u{1ce8}'), @@ -248,9 +248,9 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{a8e0}', '\u{a8f1}'), ('\u{a8ff}', '\u{a8ff}'), ('\u{a926}', '\u{a92d}'), - ('\u{a947}', '꥓'), + ('\u{a947}', '\u{a953}'), ('\u{a980}', 'ꦃ'), - ('\u{a9b3}', '꧀'), + ('\u{a9b3}', '\u{a9c0}'), ('\u{a9e5}', '\u{a9e5}'), ('\u{aa29}', '\u{aa36}'), ('\u{aa43}', '\u{aa43}'), @@ -279,8 +279,9 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), + ('\u{10d69}', '\u{10d6d}'), ('\u{10eab}', '\u{10eac}'), - ('\u{10efd}', '\u{10eff}'), + ('\u{10efc}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), ('\u{10f82}', '\u{10f85}'), ('𑀀', '𑀂'), @@ -295,7 +296,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('𑅅', '𑅆'), ('\u{11173}', '\u{11173}'), ('\u{11180}', '𑆂'), - ('𑆳', '𑇀'), + ('𑆳', '\u{111c0}'), ('\u{111c9}', '\u{111cc}'), ('𑇎', '\u{111cf}'), ('𑈬', '\u{11237}'), @@ -306,11 +307,18 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1133b}', '\u{1133c}'), ('\u{1133e}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('\u{11357}', '\u{11357}'), ('𑍢', '𑍣'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('\u{113b8}', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '\u{113d0}'), + ('\u{113d2}', '\u{113d2}'), + ('\u{113e1}', '\u{113e2}'), ('𑐵', '\u{11446}'), ('\u{1145e}', '\u{1145e}'), ('\u{114b0}', '\u{114c3}'), @@ -352,20 +360,22 @@ pub const EXTEND: &'static [(char, char)] = &[ ('𑼃', '𑼃'), ('𑼴', '\u{11f3a}'), ('𑼾', '\u{11f42}'), + ('\u{11f5a}', '\u{11f5a}'), ('\u{13440}', '\u{13440}'), ('\u{13447}', '\u{13455}'), + ('\u{1611e}', '\u{1612f}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('𖽑', '𖾇'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('\u{1bc9d}', '\u{1bc9e}'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), - ('𝅭', '\u{1d172}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -386,6 +396,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), ('\u{1e4ec}', '\u{1e4ef}'), + ('\u{1e5ee}', '\u{1e5ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('\u{e0020}', '\u{e007f}'), @@ -394,12 +405,8 @@ pub const EXTEND: &'static [(char, char)] = &[ pub const FORMAT: &'static [(char, char)] = &[ ('\u{ad}', '\u{ad}'), - ('\u{600}', '\u{605}'), ('\u{61c}', '\u{61c}'), - ('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'), - ('\u{890}', '\u{891}'), - ('\u{8e2}', '\u{8e2}'), ('\u{180e}', '\u{180e}'), ('\u{200b}', '\u{200b}'), ('\u{200e}', '\u{200f}'), @@ -408,8 +415,6 @@ pub const FORMAT: &'static [(char, char)] = &[ ('\u{2066}', '\u{206f}'), ('\u{feff}', '\u{feff}'), ('\u{fff9}', '\u{fffb}'), - ('\u{110bd}', '\u{110bd}'), - ('\u{110cd}', '\u{110cd}'), ('\u{13430}', '\u{1343f}'), ('\u{1bca0}', '\u{1bca3}'), ('\u{1d173}', '\u{1d17a}'), @@ -696,6 +701,7 @@ pub const LOWER: &'static [(char, char)] = &[ ('ჼ', 'ჼ'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), + ('ᲊ', 'ᲊ'), ('ᴀ', 'ᶿ'), ('ḁ', 'ḁ'), ('ḃ', 'ḃ'), @@ -1028,11 +1034,13 @@ pub const LOWER: &'static [(char, char)] = &[ ('ꟃ', 'ꟃ'), ('ꟈ', 'ꟈ'), ('ꟊ', 'ꟊ'), + ('ꟍ', 'ꟍ'), ('ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), ('ꟕ', 'ꟕ'), ('ꟗ', 'ꟗ'), ('ꟙ', 'ꟙ'), + ('ꟛ', 'ꟛ'), ('ꟲ', 'ꟴ'), ('ꟶ', 'ꟶ'), ('ꟸ', 'ꟺ'), @@ -1053,6 +1061,7 @@ pub const LOWER: &'static [(char, char)] = &[ ('𐞇', '𐞰'), ('𐞲', '𐞺'), ('𐳀', '𐳲'), + ('𐵰', '𐶅'), ('𑣀', '𑣟'), ('𖹠', '𖹿'), ('𝐚', '𝐳'), @@ -1092,10 +1101,14 @@ pub const LOWER: &'static [(char, char)] = &[ pub const NUMERIC: &'static [(char, char)] = &[ ('0', '9'), + ('\u{600}', '\u{605}'), ('٠', '٩'), ('٫', '٬'), + ('\u{6dd}', '\u{6dd}'), ('۰', '۹'), ('߀', '߉'), + ('\u{890}', '\u{891}'), + ('\u{8e2}', '\u{8e2}'), ('०', '९'), ('০', '৯'), ('੦', '੯'), @@ -1114,7 +1127,7 @@ pub const NUMERIC: &'static [(char, char)] = &[ ('០', '៩'), ('᠐', '᠙'), ('᥆', '᥏'), - ('᧐', '᧙'), + ('᧐', '᧚'), ('᪀', '᪉'), ('᪐', '᪙'), ('᭐', '᭙'), @@ -1131,7 +1144,10 @@ pub const NUMERIC: &'static [(char, char)] = &[ ('0', '9'), ('𐒠', '𐒩'), ('𐴰', '𐴹'), + ('𐵀', '𐵉'), ('𑁦', '𑁯'), + ('\u{110bd}', '\u{110bd}'), + ('\u{110cd}', '\u{110cd}'), ('𑃰', '𑃹'), ('𑄶', '𑄿'), ('𑇐', '𑇙'), @@ -1140,20 +1156,26 @@ pub const NUMERIC: &'static [(char, char)] = &[ ('𑓐', '𑓙'), ('𑙐', '𑙙'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜰', '𑜹'), ('𑣠', '𑣩'), ('𑥐', '𑥙'), + ('𑯰', '𑯹'), ('𑱐', '𑱙'), ('𑵐', '𑵙'), ('𑶠', '𑶩'), ('𑽐', '𑽙'), + ('𖄰', '𖄹'), ('𖩠', '𖩩'), ('𖫀', '𖫉'), ('𖭐', '𖭙'), + ('𖵰', '𖵹'), + ('𜳰', '𜳹'), ('𝟎', '𝟿'), ('𞅀', '𞅉'), ('𞋰', '𞋹'), ('𞓰', '𞓹'), + ('𞗱', '𞗺'), ('𞥐', '𞥙'), ('🯰', '🯹'), ]; @@ -1490,6 +1512,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𐑐', '𐒝'), ('𐔀', '𐔧'), ('𐔰', '𐕣'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -1522,8 +1545,11 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𐮀', '𐮑'), ('𐰀', '𐱈'), ('𐴀', '𐴣'), + ('𐵊', '𐵏'), + ('𐵯', '𐵯'), ('𐺀', '𐺩'), ('𐺰', '𐺱'), + ('𐻂', '𐻄'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), @@ -1562,6 +1588,13 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𑌽', '𑌽'), ('𑍐', '𑍐'), ('𑍝', '𑍡'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '𑎷'), + ('𑏑', '𑏑'), + ('𑏓', '𑏓'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), ('𑑟', '𑑡'), @@ -1595,6 +1628,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𑩜', '𑪉'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), @@ -1618,7 +1652,9 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('𓑁', '𓑆'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄝'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩰', '𖪾'), @@ -1627,6 +1663,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𖭀', '𖭃'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), ('𖼀', '𖽊'), ('𖽐', '𖽐'), ('𖾓', '𖾟'), @@ -1634,7 +1671,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𖿣', '𖿣'), ('𗀀', '𘟷'), ('𘠀', '𘳕'), - ('𘴀', '𘴈'), + ('𘳿', '𘴈'), ('𚿰', '𚿳'), ('𚿵', '𚿻'), ('𚿽', '𚿾'), @@ -1655,6 +1692,8 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𞊐', '𞊭'), ('𞋀', '𞋫'), ('𞓐', '𞓫'), + ('𞗐', '𞗭'), + ('𞗰', '𞗰'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -1699,6 +1738,7 @@ pub const OLETTER: &'static [(char, char)] = &[ ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), + ('𮯰', '𮹝'), ('丽', '𪘀'), ('𰀀', '𱍊'), ('𱍐', '𲎯'), @@ -1706,7 +1746,8 @@ pub const OLETTER: &'static [(char, char)] = &[ pub const SCONTINUE: &'static [(char, char)] = &[ (',', '-'), - (':', ':'), + (':', ';'), + (';', ';'), ('՝', '՝'), ('،', '؍'), ('߸', '߸'), @@ -1715,14 +1756,14 @@ pub const SCONTINUE: &'static [(char, char)] = &[ ('–', '—'), ('、', '、'), ('︐', '︑'), - ('︓', '︓'), + ('︓', '︔'), ('︱', '︲'), ('﹐', '﹑'), - ('﹕', '﹕'), + ('﹔', '﹕'), ('﹘', '﹘'), ('﹣', '﹣'), (',', '-'), - (':', ':'), + (':', ';'), ('、', '、'), ]; @@ -1743,17 +1784,20 @@ pub const STERM: &'static [(char, char)] = &[ ('፧', '፨'), ('᙮', '᙮'), ('᜵', '᜶'), + ('។', '៕'), ('᠃', '᠃'), ('᠉', '᠉'), ('᥄', '᥅'), ('᪨', '᪫'), + ('᭎', '᭏'), ('᭚', '᭛'), ('᭞', '᭟'), - ('᭽', '᭾'), + ('᭽', '᭿'), ('᰻', '᰼'), ('᱾', '᱿'), ('‼', '‽'), ('⁇', '⁉'), + ('⳹', '⳻'), ('⸮', '⸮'), ('⸼', '⸼'), ('⹓', '⹔'), @@ -1769,6 +1813,8 @@ pub const STERM: &'static [(char, char)] = &[ ('꩝', '꩟'), ('꫰', '꫱'), ('꯫', '꯫'), + ('︒', '︒'), + ('︕', '︖'), ('﹖', '﹗'), ('!', '!'), ('?', '?'), @@ -1785,6 +1831,7 @@ pub const STERM: &'static [(char, char)] = &[ ('𑈸', '𑈹'), ('𑈻', '𑈼'), ('𑊩', '𑊩'), + ('𑏔', '𑏕'), ('𑑋', '𑑌'), ('𑗂', '𑗃'), ('𑗉', '𑗗'), @@ -1801,6 +1848,7 @@ pub const STERM: &'static [(char, char)] = &[ ('𖫵', '𖫵'), ('𖬷', '𖬸'), ('𖭄', '𖭄'), + ('𖵮', '𖵯'), ('𖺘', '𖺘'), ('𛲟', '𛲟'), ('𝪈', '𝪈'), @@ -2098,6 +2146,7 @@ pub const UPPER: &'static [(char, char)] = &[ ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('Ꭰ', 'Ᏽ'), + ('Ᲊ', 'Ᲊ'), ('Ḁ', 'Ḁ'), ('Ḃ', 'Ḃ'), ('Ḅ', 'Ḅ'), @@ -2425,9 +2474,12 @@ pub const UPPER: &'static [(char, char)] = &[ ('Ꟃ', 'Ꟃ'), ('Ꞔ', 'Ꟈ'), ('Ꟊ', 'Ꟊ'), + ('Ɤ', 'Ꟍ'), ('Ꟑ', 'Ꟑ'), ('Ꟗ', 'Ꟗ'), ('Ꟙ', 'Ꟙ'), + ('Ꟛ', 'Ꟛ'), + ('Ƛ', 'Ƛ'), ('Ꟶ', 'Ꟶ'), ('A', 'Z'), ('𐐀', '𐐧'), @@ -2437,6 +2489,7 @@ pub const UPPER: &'static [(char, char)] = &[ ('𐖌', '𐖒'), ('𐖔', '𐖕'), ('𐲀', '𐲲'), + ('𐵐', '𐵥'), ('𑢠', '𑢿'), ('𖹀', '𖹟'), ('𝐀', '𝐙'), diff --git a/regex-syntax/src/unicode_tables/word_break.rs b/regex-syntax/src/unicode_tables/word_break.rs index c0714956f..b764d34ac 100644 --- a/regex-syntax/src/unicode_tables/word_break.rs +++ b/regex-syntax/src/unicode_tables/word_break.rs @@ -1,10 +1,10 @@ // DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY: // -// ucd-generate word-break ucd-15.0.0 --chars +// ucd-generate word-break ucd-16.0.0 --chars // -// Unicode version: 15.0.0. +// Unicode version: 16.0.0. // -// ucd-generate 0.2.14 is available on crates.io. +// ucd-generate 0.3.1 is available on crates.io. pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[ ("ALetter", ALETTER), @@ -62,7 +62,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('ۮ', 'ۯ'), ('ۺ', 'ۼ'), ('ۿ', 'ۿ'), - ('ܐ', 'ܐ'), + ('\u{70f}', 'ܐ'), ('ܒ', 'ܯ'), ('ݍ', 'ޥ'), ('ޱ', 'ޱ'), @@ -219,7 +219,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('ᰀ', 'ᰣ'), ('ᱍ', 'ᱏ'), ('ᱚ', 'ᱽ'), - ('ᲀ', 'ᲈ'), + ('ᲀ', 'ᲊ'), ('Ა', 'Ჺ'), ('Ჽ', 'Ჿ'), ('ᳩ', 'ᳬ'), @@ -295,10 +295,10 @@ pub const ALETTER: &'static [(char, char)] = &[ ('Ꙁ', 'ꙮ'), ('ꙿ', 'ꚝ'), ('ꚠ', 'ꛯ'), - ('꜈', 'ꟊ'), + ('꜈', 'ꟍ'), ('Ꟑ', 'ꟑ'), ('ꟓ', 'ꟓ'), - ('ꟕ', 'ꟙ'), + ('ꟕ', 'Ƛ'), ('ꟲ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), @@ -374,6 +374,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𐖣', '𐖱'), ('𐖳', '𐖹'), ('𐖻', '𐖼'), + ('𐗀', '𐗳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), @@ -410,8 +411,11 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐴀', '𐴣'), + ('𐵊', '𐵥'), + ('𐵯', '𐶅'), ('𐺀', '𐺩'), ('𐺰', '𐺱'), + ('𐻂', '𐻄'), ('𐼀', '𐼜'), ('𐼧', '𐼧'), ('𐼰', '𐽅'), @@ -450,6 +454,13 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𑌽', '𑌽'), ('𑍐', '𑍐'), ('𑍝', '𑍡'), + ('𑎀', '𑎉'), + ('𑎋', '𑎋'), + ('𑎎', '𑎎'), + ('𑎐', '𑎵'), + ('𑎷', '𑎷'), + ('𑏑', '𑏑'), + ('𑏓', '𑏓'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), ('𑑟', '𑑡'), @@ -482,6 +493,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𑩜', '𑪉'), ('𑪝', '𑪝'), ('𑪰', '𑫸'), + ('𑯀', '𑯠'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), @@ -505,7 +517,9 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𒾐', '𒿰'), ('𓀀', '𓐯'), ('𓑁', '𓑆'), + ('𓑠', '𔏺'), ('𔐀', '𔙆'), + ('𖄀', '𖄝'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩰', '𖪾'), @@ -514,6 +528,7 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𖭀', '𖭃'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), + ('𖵀', '𖵬'), ('𖹀', '𖹿'), ('𖼀', '𖽊'), ('𖽐', '𖽐'), @@ -563,6 +578,8 @@ pub const ALETTER: &'static [(char, char)] = &[ ('𞊐', '𞊭'), ('𞋀', '𞋫'), ('𞓐', '𞓫'), + ('𞗐', '𞗭'), + ('𞗰', '𞗰'), ('𞟠', '𞟦'), ('𞟨', '𞟫'), ('𞟭', '𞟮'), @@ -637,7 +654,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{825}', '\u{827}'), ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), - ('\u{898}', '\u{89f}'), + ('\u{897}', '\u{89f}'), ('\u{8ca}', '\u{8e1}'), ('\u{8e3}', 'ः'), ('\u{93a}', '\u{93c}'), @@ -689,8 +706,8 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{c81}', 'ಃ'), ('\u{cbc}', '\u{cbc}'), ('ಾ', 'ೄ'), - ('\u{cc6}', 'ೈ'), - ('ೊ', '\u{ccd}'), + ('\u{cc6}', '\u{cc8}'), + ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), ('ೳ', 'ೳ'), @@ -733,8 +750,8 @@ pub const EXTEND: &'static [(char, char)] = &[ ('ႏ', 'ႏ'), ('ႚ', '\u{109d}'), ('\u{135d}', '\u{135f}'), - ('\u{1712}', '᜕'), - ('\u{1732}', '᜴'), + ('\u{1712}', '\u{1715}'), + ('\u{1732}', '\u{1734}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17d3}'), @@ -751,11 +768,11 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1a7f}', '\u{1a7f}'), ('\u{1ab0}', '\u{1ace}'), ('\u{1b00}', 'ᬄ'), - ('\u{1b34}', '᭄'), + ('\u{1b34}', '\u{1b44}'), ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', 'ᮂ'), ('ᮡ', '\u{1bad}'), - ('\u{1be6}', '᯳'), + ('\u{1be6}', '\u{1bf3}'), ('ᰤ', '\u{1c37}'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', '\u{1ce8}'), @@ -784,9 +801,9 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{a8e0}', '\u{a8f1}'), ('\u{a8ff}', '\u{a8ff}'), ('\u{a926}', '\u{a92d}'), - ('\u{a947}', '꥓'), + ('\u{a947}', '\u{a953}'), ('\u{a980}', 'ꦃ'), - ('\u{a9b3}', '꧀'), + ('\u{a9b3}', '\u{a9c0}'), ('\u{a9e5}', '\u{a9e5}'), ('\u{aa29}', '\u{aa36}'), ('\u{aa43}', '\u{aa43}'), @@ -815,8 +832,9 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'), ('\u{10d24}', '\u{10d27}'), + ('\u{10d69}', '\u{10d6d}'), ('\u{10eab}', '\u{10eac}'), - ('\u{10efd}', '\u{10eff}'), + ('\u{10efc}', '\u{10eff}'), ('\u{10f46}', '\u{10f50}'), ('\u{10f82}', '\u{10f85}'), ('𑀀', '𑀂'), @@ -831,7 +849,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('𑅅', '𑅆'), ('\u{11173}', '\u{11173}'), ('\u{11180}', '𑆂'), - ('𑆳', '𑇀'), + ('𑆳', '\u{111c0}'), ('\u{111c9}', '\u{111cc}'), ('𑇎', '\u{111cf}'), ('𑈬', '\u{11237}'), @@ -842,11 +860,18 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1133b}', '\u{1133c}'), ('\u{1133e}', '𑍄'), ('𑍇', '𑍈'), - ('𑍋', '𑍍'), + ('𑍋', '\u{1134d}'), ('\u{11357}', '\u{11357}'), ('𑍢', '𑍣'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'), + ('\u{113b8}', '\u{113c0}'), + ('\u{113c2}', '\u{113c2}'), + ('\u{113c5}', '\u{113c5}'), + ('\u{113c7}', '𑏊'), + ('𑏌', '\u{113d0}'), + ('\u{113d2}', '\u{113d2}'), + ('\u{113e1}', '\u{113e2}'), ('𑐵', '\u{11446}'), ('\u{1145e}', '\u{1145e}'), ('\u{114b0}', '\u{114c3}'), @@ -888,20 +913,22 @@ pub const EXTEND: &'static [(char, char)] = &[ ('𑼃', '𑼃'), ('𑼴', '\u{11f3a}'), ('𑼾', '\u{11f42}'), + ('\u{11f5a}', '\u{11f5a}'), ('\u{13440}', '\u{13440}'), ('\u{13447}', '\u{13455}'), + ('\u{1611e}', '\u{1612f}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f4f}', '\u{16f4f}'), ('𖽑', '𖾇'), ('\u{16f8f}', '\u{16f92}'), ('\u{16fe4}', '\u{16fe4}'), - ('𖿰', '𖿱'), + ('\u{16ff0}', '\u{16ff1}'), ('\u{1bc9d}', '\u{1bc9e}'), ('\u{1cf00}', '\u{1cf2d}'), ('\u{1cf30}', '\u{1cf46}'), ('\u{1d165}', '\u{1d169}'), - ('𝅭', '\u{1d172}'), + ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), @@ -922,6 +949,7 @@ pub const EXTEND: &'static [(char, char)] = &[ ('\u{1e2ae}', '\u{1e2ae}'), ('\u{1e2ec}', '\u{1e2ef}'), ('\u{1e4ec}', '\u{1e4ef}'), + ('\u{1e5ee}', '\u{1e5ef}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1e944}', '\u{1e94a}'), ('🏻', '🏿'), @@ -941,12 +969,7 @@ pub const EXTENDNUMLET: &'static [(char, char)] = &[ pub const FORMAT: &'static [(char, char)] = &[ ('\u{ad}', '\u{ad}'), - ('\u{600}', '\u{605}'), ('\u{61c}', '\u{61c}'), - ('\u{6dd}', '\u{6dd}'), - ('\u{70f}', '\u{70f}'), - ('\u{890}', '\u{891}'), - ('\u{8e2}', '\u{8e2}'), ('\u{180e}', '\u{180e}'), ('\u{200e}', '\u{200f}'), ('\u{202a}', '\u{202e}'), @@ -954,8 +977,6 @@ pub const FORMAT: &'static [(char, char)] = &[ ('\u{2066}', '\u{206f}'), ('\u{feff}', '\u{feff}'), ('\u{fff9}', '\u{fffb}'), - ('\u{110bd}', '\u{110bd}'), - ('\u{110cd}', '\u{110cd}'), ('\u{13430}', '\u{1343f}'), ('\u{1bca0}', '\u{1bca3}'), ('\u{1d173}', '\u{1d17a}'), @@ -1016,8 +1037,6 @@ pub const MIDNUM: &'static [(char, char)] = &[ ('٬', '٬'), ('߸', '߸'), ('⁄', '⁄'), - ('︐', '︐'), - ('︔', '︔'), ('﹐', '﹐'), ('﹔', '﹔'), (',', ','), @@ -1038,10 +1057,14 @@ pub const NEWLINE: &'static [(char, char)] = pub const NUMERIC: &'static [(char, char)] = &[ ('0', '9'), + ('\u{600}', '\u{605}'), ('٠', '٩'), ('٫', '٫'), + ('\u{6dd}', '\u{6dd}'), ('۰', '۹'), ('߀', '߉'), + ('\u{890}', '\u{891}'), + ('\u{8e2}', '\u{8e2}'), ('०', '९'), ('০', '৯'), ('੦', '੯'), @@ -1060,7 +1083,7 @@ pub const NUMERIC: &'static [(char, char)] = &[ ('០', '៩'), ('᠐', '᠙'), ('᥆', '᥏'), - ('᧐', '᧙'), + ('᧐', '᧚'), ('᪀', '᪉'), ('᪐', '᪙'), ('᭐', '᭙'), @@ -1077,7 +1100,10 @@ pub const NUMERIC: &'static [(char, char)] = &[ ('0', '9'), ('𐒠', '𐒩'), ('𐴰', '𐴹'), + ('𐵀', '𐵉'), ('𑁦', '𑁯'), + ('\u{110bd}', '\u{110bd}'), + ('\u{110cd}', '\u{110cd}'), ('𑃰', '𑃹'), ('𑄶', '𑄿'), ('𑇐', '𑇙'), @@ -1086,20 +1112,26 @@ pub const NUMERIC: &'static [(char, char)] = &[ ('𑓐', '𑓙'), ('𑙐', '𑙙'), ('𑛀', '𑛉'), + ('𑛐', '𑛣'), ('𑜰', '𑜹'), ('𑣠', '𑣩'), ('𑥐', '𑥙'), + ('𑯰', '𑯹'), ('𑱐', '𑱙'), ('𑵐', '𑵙'), ('𑶠', '𑶩'), ('𑽐', '𑽙'), + ('𖄰', '𖄹'), ('𖩠', '𖩩'), ('𖫀', '𖫉'), ('𖭐', '𖭙'), + ('𖵰', '𖵹'), + ('𜳰', '𜳹'), ('𝟎', '𝟿'), ('𞅀', '𞅉'), ('𞋰', '𞋹'), ('𞓰', '𞓹'), + ('𞗱', '𞗺'), ('𞥐', '𞥙'), ('🯰', '🯹'), ]; From 1533257e7d337022bbfae9df99fb99948044aa87 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 29 Sep 2024 09:25:58 -0400 Subject: [PATCH 28/36] changelog: 1.11.0 --- CHANGELOG.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index df7977c39..6ec827d9e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,17 @@ +1.11.0 (2024-09-29) +=================== +This is a new minor release of `regex` that brings in an update to the +Unicode Character Database. Specifically, this updates the Unicode data +used by `regex` internally to the version 16 release. + +New features: + +* [FEATURE #1228](https://github.com/rust-lang/regex/pull/1228): +Add new `regex::SetMatches::matched_all` method. +* [FEATURE #1229](https://github.com/rust-lang/regex/pull/1229): +Update to Unicode Character Database (UCD) version 16. + + 1.10.6 (2024-08-02) =================== This is a new patch release with a fix for the `unstable` crate feature that From cba0fbc0194456f644040d7558ae6ed261d57cc2 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 29 Sep 2024 09:26:56 -0400 Subject: [PATCH 29/36] regex-syntax-0.8.5 --- regex-syntax/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regex-syntax/Cargo.toml b/regex-syntax/Cargo.toml index 3f213542b..0cbcde5e7 100644 --- a/regex-syntax/Cargo.toml +++ b/regex-syntax/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regex-syntax" -version = "0.8.4" #:version +version = "0.8.5" #:version authors = ["The Rust Project Developers", "Andrew Gallant "] license = "MIT OR Apache-2.0" repository = "https://github.com/rust-lang/regex/tree/master/regex-syntax" From 4bb1e3d992a4e237c5ad535f1798f7a46552c34d Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 29 Sep 2024 09:28:21 -0400 Subject: [PATCH 30/36] deps: bump regex-syntax --- Cargo.toml | 2 +- regex-automata/Cargo.toml | 2 +- regex-cli/Cargo.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 37696cf46..cd0054995 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -183,7 +183,7 @@ features = ["alloc", "syntax", "meta", "nfa-pikevm"] # For parsing regular expressions. [dependencies.regex-syntax] path = "regex-syntax" -version = "0.8.2" +version = "0.8.5" default-features = false [dev-dependencies] diff --git a/regex-automata/Cargo.toml b/regex-automata/Cargo.toml index 97bfacfec..075a9aff3 100644 --- a/regex-automata/Cargo.toml +++ b/regex-automata/Cargo.toml @@ -86,7 +86,7 @@ internal-instrument-pikevm = ["logging", "std"] aho-corasick = { version = "1.0.0", optional = true, default-features = false } log = { version = "0.4.14", optional = true } memchr = { version = "2.6.0", optional = true, default-features = false } -regex-syntax = { path = "../regex-syntax", version = "0.8.2", optional = true, default-features = false } +regex-syntax = { path = "../regex-syntax", version = "0.8.5", optional = true, default-features = false } [dev-dependencies] anyhow = "1.0.69" diff --git a/regex-cli/Cargo.toml b/regex-cli/Cargo.toml index 543732285..e756887d9 100644 --- a/regex-cli/Cargo.toml +++ b/regex-cli/Cargo.toml @@ -31,6 +31,6 @@ memmap2 = "0.9.4" regex = { version = "1.9.0", path = ".." } regex-automata = { version = "0.4.0", path = "../regex-automata", features = ["logging"] } regex-lite = { version = "0.1.0", path = "../regex-lite" } -regex-syntax = { version = "0.8.0", path = "../regex-syntax" } +regex-syntax = { version = "0.8.5", path = "../regex-syntax" } tabwriter = { version = "1.2.1", features = ["ansi_formatting"] } textwrap = { version = "0.16.0", default-features = false } From 58e16f50f07729bf856570d1a8be0de0b4d5e9e0 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 29 Sep 2024 09:28:28 -0400 Subject: [PATCH 31/36] regex-automata-0.4.8 --- regex-automata/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regex-automata/Cargo.toml b/regex-automata/Cargo.toml index 075a9aff3..61a51e51c 100644 --- a/regex-automata/Cargo.toml +++ b/regex-automata/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regex-automata" -version = "0.4.7" #:version +version = "0.4.8" #:version authors = ["The Rust Project Developers", "Andrew Gallant "] description = "Automata construction and matching using regular expressions." documentation = "https://docs.rs/regex-automata" From 9e17e56d3bd77f135f8ffccefcea3f49cbb0cd44 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 29 Sep 2024 09:29:26 -0400 Subject: [PATCH 32/36] deps: bump regex-automata --- Cargo.toml | 2 +- regex-cli/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index cd0054995..49034e087 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -176,7 +176,7 @@ default-features = false # For the actual regex engines. [dependencies.regex-automata] path = "regex-automata" -version = "0.4.4" +version = "0.4.8" default-features = false features = ["alloc", "syntax", "meta", "nfa-pikevm"] diff --git a/regex-cli/Cargo.toml b/regex-cli/Cargo.toml index e756887d9..d7fd44b7b 100644 --- a/regex-cli/Cargo.toml +++ b/regex-cli/Cargo.toml @@ -29,7 +29,7 @@ lexopt = "0.3.0" log = { version = "0.4.17", features = ["std"] } memmap2 = "0.9.4" regex = { version = "1.9.0", path = ".." } -regex-automata = { version = "0.4.0", path = "../regex-automata", features = ["logging"] } +regex-automata = { version = "0.4.8", path = "../regex-automata", features = ["logging"] } regex-lite = { version = "0.1.0", path = "../regex-lite" } regex-syntax = { version = "0.8.5", path = "../regex-syntax" } tabwriter = { version = "1.2.1", features = ["ansi_formatting"] } From bcbe40342628b15ab2543d386c745f7f0811b791 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 29 Sep 2024 09:30:39 -0400 Subject: [PATCH 33/36] 1.11.0 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 49034e087..12516c6dd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regex" -version = "1.10.6" #:version +version = "1.11.0" #:version authors = ["The Rust Project Developers", "Andrew Gallant "] license = "MIT OR Apache-2.0" readme = "README.md" From 991ba8836b3dea741bdcb4e6f680543ab10331d6 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Thu, 24 Oct 2024 09:58:32 -0400 Subject: [PATCH 34/36] unstable: fix `Pattern` trait implementation I am teetering on removing this cursed implementation. Fixes #1231 --- src/pattern.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/pattern.rs b/src/pattern.rs index 5c4260e95..d7bf148d5 100644 --- a/src/pattern.rs +++ b/src/pattern.rs @@ -1,4 +1,4 @@ -use core::str::pattern::{Pattern, SearchStep, Searcher}; +use core::str::pattern::{Pattern, SearchStep, Searcher, Utf8Pattern}; use crate::{Matches, Regex}; @@ -21,6 +21,10 @@ impl<'r> Pattern for &'r Regex { next_match: None, } } + + fn as_utf8_pattern<'p>(&'p self) -> Option> { + None + } } unsafe impl<'r, 't> Searcher<'t> for RegexSearcher<'r, 't> { From 80df54e4b9ed4a9b14c502668856c3fc47b30cf4 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Thu, 24 Oct 2024 10:16:57 -0400 Subject: [PATCH 35/36] changelog: 1.11.1 --- CHANGELOG.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ec827d9e..b88e2aa49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,15 @@ +1.11.1 (2024-10-24) +=================== +This is a new patch release of `regex` that fixes compilation on nightly +Rust when the unstable `pattern` crate feature is enabled. Users on nightly +Rust without this feature enabled are unaffected. + +Bug fixes: + +* [BUG #1231](https://github.com/rust-lang/regex/issues/1231): +Fix the `Pattern` trait implementation as a result of nightly API breakage. + + 1.11.0 (2024-09-29) =================== This is a new minor release of `regex` that brings in an update to the From 9870c06e6c772daaad7ab612faab29130753e41c Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Thu, 24 Oct 2024 10:17:01 -0400 Subject: [PATCH 36/36] 1.11.1 --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 12516c6dd..60be5b9d4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regex" -version = "1.11.0" #:version +version = "1.11.1" #:version authors = ["The Rust Project Developers", "Andrew Gallant "] license = "MIT OR Apache-2.0" readme = "README.md"