diff --git a/lib/Regex/Executor.cpp b/lib/Regex/Executor.cpp index a8fea998b58..e2d7d50c49d 100644 --- a/lib/Regex/Executor.cpp +++ b/lib/Regex/Executor.cpp @@ -1048,7 +1048,6 @@ auto Context::match(State *s, bool onlyAtStart) case Opcode::U16MatchCharICase32: { const auto *insn = llvh::cast(base); - assert(insn->c >= 0x010000 && "Character should be astral"); bool matched = false; if (!c.atEnd()) { CodePoint cp = c.consumeUTF16(); diff --git a/test/hermes/regexp_unicode.js b/test/hermes/regexp_unicode.js index 853f1d72952..bc817383ba3 100644 --- a/test/hermes/regexp_unicode.js +++ b/test/hermes/regexp_unicode.js @@ -89,6 +89,13 @@ print(/.*/u.exec("\u0101bc\ndef")[0].length); // We should not match a low surrogate in a Unicode regexp. print(!! /\uDE42/u.exec("\uD83D\uDE42ZZZ")); // CHECK-NEXT: false +// We should match an unpaired surrogate. +print(!! /\uDC00/u.exec("\uDC00")); +// CHECK-NEXT: true +// Test the case insensitive variant. +print(!! /\uDC00/iu.exec("\uDC00")); +// CHECK-NEXT: true +// We should match the low surrogate when Unicode is off. print(!! /\uDE42/.exec("\uD83D\uDE42ZZZ")); // CHECK-NEXT: true