swiftlang · hamishknight · May 10, 2022 · May 10, 2022 · May 10, 2022
@@ -94,9 +94,6 @@ ERROR(forbidden_extended_escaping_string,none,
 ERROR(regex_literal_parsing_error,none,
       "%0", (StringRef))
 
-ERROR(prefix_slash_not_allowed,none,
-      "prefix operator may not contain '/'", ())
-
 //------------------------------------------------------------------------------
 // MARK: Lexer diagnostics
 //------------------------------------------------------------------------------

@@ -608,6 +608,10 @@ class Lexer {
   void formStringLiteralToken(const char *TokStart, bool IsMultilineString,
                               unsigned CustomDelimiterLen);
 
+  /// Form an operator token starting at \p TokStart. \p OperEnd is the last
+  /// character, not including backticks.
+  void formOperatorToken(const char *TokStart, const char *OperEnd);
+
   /// Advance to the end of the line.
   /// If EatNewLine is true, CurPtr will be at end of newline character.
   /// Otherwise, CurPtr will be at newline character.

@@ -1763,10 +1763,7 @@ class Parser {
   /// Try re-lex a '/' operator character as a regex literal. This should be
   /// called when parsing in an expression position to ensure a regex literal is
   /// correctly parsed.
-  ///
-  /// If \p mustBeRegex is set to true, a regex literal will always be lexed if
-  /// enabled. Otherwise, it will not be lexed if it may be ambiguous.
-  void tryLexRegexLiteral(bool mustBeRegex);
+  void tryLexRegexLiteral(bool forUnappliedOperator);
 
   void validateCollectionElement(ParserResult<Expr> element);
 

@@ -41,6 +41,8 @@ class Token {
 
   /// Whether this token is an escaped `identifier` token.
   unsigned EscapedIdentifier : 1;
+
+  unsigned EscapedOperator : 1;
 
   /// Modifiers for string literals
   unsigned MultilineString : 1;
@@ -65,8 +67,8 @@ class Token {
 public:
   Token(tok Kind, StringRef Text, unsigned CommentLength = 0)
           : Kind(Kind), AtStartOfLine(false), EscapedIdentifier(false),
-            MultilineString(false), CustomDelimiterLen(0),
-            CommentLength(CommentLength), Text(Text) {}
+            EscapedOperator(false), MultilineString(false),
+            CustomDelimiterLen(0), CommentLength(CommentLength), Text(Text) {}
 
   Token() : Token(tok::NUM_TOKENS, {}, 0) {}
 
@@ -128,6 +130,11 @@ class Token {
            "only identifiers can be escaped identifiers");
     EscapedIdentifier = value;
   }
+
+  bool isEscapedOperator() const { return EscapedOperator; }
+  void setEscapedOperator(bool value) {
+    EscapedOperator = value;
+  }
 
   bool isContextualKeyword(StringRef ContextKW) const {
     return isAny(tok::identifier, tok::contextual_keyword) &&
@@ -276,7 +283,7 @@ class Token {
   }
 
   StringRef getText() const {
-    if (EscapedIdentifier) {
+    if (EscapedIdentifier || EscapedOperator) {
       // Strip off the backticks on either side.
       assert(Text.front() == '`' && Text.back() == '`');
       return Text.slice(1, Text.size() - 1);
@@ -292,6 +299,7 @@ class Token {
     Text = T;
     this->CommentLength = CommentLength;
     EscapedIdentifier = false;
+    EscapedOperator = false;
     this->MultilineString = false;
     this->CustomDelimiterLen = 0;
     assert(this->CustomDelimiterLen == CustomDelimiterLen &&

@@ -792,57 +792,15 @@ static bool rangeContainsPlaceholderEnd(const char *CurPtr,
   return false;
 }
 
-/// lexOperatorIdentifier - Match identifiers formed out of punctuation.
-void Lexer::lexOperatorIdentifier() {
-  const char *TokStart = CurPtr-1;
-  CurPtr = TokStart;
-  bool didStart = advanceIfValidStartOfOperator(CurPtr, BufferEnd);
-  assert(didStart && "unexpected operator start");
-  (void) didStart;
-
-  do {
-    if (CurPtr != BufferEnd && InSILBody &&
-        (*CurPtr == '!' || *CurPtr == '?'))
-      // When parsing SIL body, '!' and '?' are special token and can't be
-      // in the middle of an operator.
-      break;
-
-    // '.' cannot appear in the middle of an operator unless the operator
-    // started with a '.'.
-    if (*CurPtr == '.' && *TokStart != '.')
-      break;
-    if (Identifier::isEditorPlaceholder(StringRef(CurPtr, BufferEnd-CurPtr)) &&
-        rangeContainsPlaceholderEnd(CurPtr + 2, BufferEnd)) {
-      break;
-    }
-
-    // If we are lexing a `/.../` regex literal, we don't consider `/` to be an
-    // operator character.
-    if (ForwardSlashRegexMode != LexerForwardSlashRegexMode::None &&
-        *CurPtr == '/') {
-      break;
-    }
-  } while (advanceIfValidContinuationOfOperator(CurPtr, BufferEnd));
-
-  if (CurPtr-TokStart > 2) {
-    // If there is a "//" or "/*" in the middle of an identifier token, 
-    // it starts a comment.
-    for (auto Ptr = TokStart+1; Ptr != CurPtr-1; ++Ptr) {
-      if (Ptr[0] == '/' && (Ptr[1] == '/' || Ptr[1] == '*')) {
-        CurPtr = Ptr;
-        break;
-      }
-    }
-  }
-
+void Lexer::formOperatorToken(const char *TokStart, const char *OperEnd) {
   // Decide between the binary, prefix, and postfix cases.
   // It's binary if either both sides are bound or both sides are not bound.
   // Otherwise, it's postfix if left-bound and prefix if right-bound.
   bool leftBound = isLeftBound(TokStart, ContentStart);
   bool rightBound = isRightBound(CurPtr, leftBound, CodeCompletionPtr);
 
   // Match various reserved words.
-  if (CurPtr-TokStart == 1) {
+  if (OperEnd-TokStart == 1) {
     switch (TokStart[0]) {
     case '=':
       // Refrain from emitting this message in operator name position.
@@ -901,7 +859,7 @@ void Lexer::lexOperatorIdentifier() {
         return formToken(tok::question_postfix, TokStart);
       return formToken(tok::question_infix, TokStart);
     }
-  } else if (CurPtr-TokStart == 2) {
+  } else if (OperEnd-TokStart == 2) {
     switch ((TokStart[0] << 8) | TokStart[1]) {
     case ('-' << 8) | '>': // ->
       return formToken(tok::arrow, TokStart);
@@ -912,7 +870,7 @@ void Lexer::lexOperatorIdentifier() {
   } else {
     // Verify there is no "*/" in the middle of the identifier token, we reject
     // it as potentially ending a block comment.
-    auto Pos = StringRef(TokStart, CurPtr-TokStart).find("*/");
+    auto Pos = StringRef(TokStart, OperEnd-TokStart).find("*/");
     if (Pos != StringRef::npos) {
       diagnose(TokStart+Pos, diag::lex_unexpected_block_comment_end);
       return formToken(tok::unknown, TokStart);
@@ -926,6 +884,75 @@ void Lexer::lexOperatorIdentifier() {
   return formToken(leftBound ? tok::oper_postfix : tok::oper_prefix, TokStart);
 }
 
+/// lexOperatorIdentifier - Match identifiers formed out of punctuation.
+void Lexer::lexOperatorIdentifier() {
+  auto *const TokStart = CurPtr-1;
+
+  auto HadBacktick = (*TokStart == '`');
+  if (!HadBacktick)
+    CurPtr = TokStart;
+
+  auto *const OperStart = CurPtr;
+
+  bool didStart = advanceIfValidStartOfOperator(CurPtr, BufferEnd);
+  assert(didStart && "unexpected operator start");
+  (void) didStart;
+
+  do {
+    if (CurPtr != BufferEnd && InSILBody &&
+        (*CurPtr == '!' || *CurPtr == '?'))
+      // When parsing SIL body, '!' and '?' are special token and can't be
+      // in the middle of an operator.
+      break;
+
+    // '.' cannot appear in the middle of an operator unless the operator
+    // started with a '.'.
+    if (*CurPtr == '.' && *TokStart != '.')
+      break;
+    if (Identifier::isEditorPlaceholder(StringRef(CurPtr, BufferEnd-CurPtr)) &&
+        rangeContainsPlaceholderEnd(CurPtr + 2, BufferEnd)) {
+      break;
+    }
+
+    // If we are lexing a `/.../` regex literal, we don't consider `/` to be an
+    // operator character.
+    if (ForwardSlashRegexMode != LexerForwardSlashRegexMode::None &&
+        *CurPtr == '/') {
+      break;
+    }
+  } while (advanceIfValidContinuationOfOperator(CurPtr, BufferEnd));
+
+  if (CurPtr-TokStart > 2) {
+    // If there is a "//" or "/*" in the middle of an identifier token, 
+    // it starts a comment.
+    for (auto Ptr = TokStart+1; Ptr != CurPtr-1; ++Ptr) {
+      if (Ptr[0] == '/' && (Ptr[1] == '/' || Ptr[1] == '*')) {
+        CurPtr = Ptr;
+        break;
+      }
+    }
+  }
+
+  auto *const OperEnd = CurPtr;
+  if (HadBacktick) {
+    if (*OperEnd != '`') {
+      // The backtick is punctuation.
+      CurPtr = OperStart;
+      return formToken(tok::backtick, TokStart);
+    }
+    ++CurPtr;
+  }
+
+  formOperatorToken(TokStart, OperEnd);
+  if (HadBacktick) {
+    // If this token is at ArtificialEOF, it's forced to be tok::eof. Don't mark
+    // this as escaped-operator in this case. Also don't mark if we had
+    // something unrecoverable.
+    if (!NextToken.is(tok::eof) && !NextToken.is(tok::unknown))
+      NextToken.setEscapedOperator(true);
+  }
+}
+
 /// lexDollarIdent - Match $[0-9a-zA-Z_$]+
 void Lexer::lexDollarIdent() {
   const char *tokStart = CurPtr-1;
@@ -2652,6 +2679,10 @@ void Lexer::lexImpl() {
     return lexStringLiteral();
 
   case '`':
+    auto *Tmp = CurPtr;
+    if (advanceIfValidStartOfOperator(Tmp, BufferEnd))
+      return lexOperatorIdentifier();
+
     return lexEscapedIdentifier();
   }
 }

@@ -8535,17 +8535,10 @@ Parser::parseDeclOperator(ParseDeclOptions Flags, DeclAttributes &Attributes) {
   // Postfix operators starting with ? or ! conflict with builtin
   // unwrapping operators.
   if (Attributes.hasAttribute<PostfixAttr>())
-    if (!Tok.getText().empty() && (Tok.getRawText().front() == '?' ||
-                                   Tok.getRawText().front() == '!'))
+    if (!Tok.getText().empty() && (Tok.getText().front() == '?' ||
+                                   Tok.getText().front() == '!'))
       diagnose(Tok, diag::postfix_operator_name_cannot_start_with_unwrap);
 
-  // Prefix operators may not contain the `/` character when `/.../` regex
-  // literals are enabled.
-  if (Context.LangOpts.EnableBareSlashRegexLiterals) {
-    if (Attributes.hasAttribute<PrefixAttr>() && Tok.getText().contains("/"))
-      diagnose(Tok, diag::prefix_slash_not_allowed);
-  }
-
   // A common error is to try to define an operator with something in the
   // unicode plane considered to be an operator, or to try to define an
   // operator like "not".  Analyze and diagnose this specifically.

@@ -513,7 +513,7 @@ ParserResult<Expr> Parser::parseExprUnary(Diag<> Message, bool isExprBasic) {
   UnresolvedDeclRefExpr *Operator;
 
   // First check to see if we have the start of a regex literal `/.../`.
-  tryLexRegexLiteral(/*mustBeRegex*/ true);
+  tryLexRegexLiteral(/*forUnappliedOperator*/ false);
 
   switch (Tok.getKind()) {
   default:
@@ -880,56 +880,70 @@ UnresolvedDeclRefExpr *Parser::parseExprOperator() {
   return new (Context) UnresolvedDeclRefExpr(name, refKind, DeclNameLoc(loc));
 }
 
-void Parser::tryLexRegexLiteral(bool mustBeRegex) {
+void Parser::tryLexRegexLiteral(bool forUnappliedOperator) {
   if (!Context.LangOpts.EnableBareSlashRegexLiterals)
     return;
 
+  // Never a regex literal.
+  if (Tok.isEscapedOperator())
+    return;
+
   // Check to see if we have a regex literal `/.../`, optionally with a prefix
   // operator e.g `!/.../`.
+  bool mustBeRegex = false;
   switch (Tok.getKind()) {
   case tok::oper_prefix:
+    // Prefix operators may contain `/` characters, so this may not be a regex,
+    // and as such need to make sure we have a closing `/`. The first character
+    // heuristics aren't relevant here as a right-bound operator will not have
+    // a space, tab, or `)` character.
+    break;
   case tok::oper_binary_spaced:
-  case tok::oper_binary_unspaced: {
-    // Check to see if we have an operator containing '/'.
-    auto slashIdx = Tok.getText().find("/");
-    if (slashIdx == StringRef::npos)
-      break;
+  case tok::oper_binary_unspaced:
+    // When re-lexing for a 'proper' expression, binary operators are always
+    // invalid, so we can be confident in always lexing a regex literal.
+    mustBeRegex = !forUnappliedOperator;
+    break;
+  default:
+    // We only re-lex regex literals for operator tokens.
+    return;
+  }
 
-    CancellableBacktrackingScope backtrack(*this);
-    {
-      Optional<Lexer::ForwardSlashRegexRAII> regexScope;
-      regexScope.emplace(*L, mustBeRegex);
-
-      // Try re-lex as a `/.../` regex literal, this will split an operator if
-      // necessary.
-      L->restoreState(getParserPosition().LS, /*enableDiagnostics*/ true);
-
-      // If we didn't split a prefix operator, reset the regex lexing scope.
-      // Otherwise, we want to keep it in place for the next token.
-      auto didSplit = L->peekNextToken().getLength() == slashIdx;
-      if (!didSplit)
-        regexScope.reset();
-
-      // Discard the current token, which will be replaced by the re-lexed
-      // token, which will either be a regex literal token, a prefix operator,
-      // or the original unchanged token.
-      discardToken();
-
-      // If we split a prefix operator from the regex literal, and are not sure
-      // whether this should be a regex, backtrack if we didn't end up lexing a
-      // regex literal.
-      if (didSplit && !mustBeRegex &&
-          !L->peekNextToken().is(tok::regex_literal)) {
-        return;
-      }
+  // Check to see if we have an operator containing '/'.
+  auto slashIdx = Tok.getText().find("/");
+  if (slashIdx == StringRef::npos)
+    return;
+
+  CancellableBacktrackingScope backtrack(*this);
+  {
+    Optional<Lexer::ForwardSlashRegexRAII> regexScope;
+    regexScope.emplace(*L, mustBeRegex);
+
+    // Try re-lex as a `/.../` regex literal, this will split an operator if
+    // necessary.
+    L->restoreState(getParserPosition().LS, /*enableDiagnostics*/ true);
+
+    // If we didn't split a prefix operator, reset the regex lexing scope.
+    // Otherwise, we want to keep it in place for the next token.
+    auto didSplit = L->peekNextToken().getLength() == slashIdx;
+    if (!didSplit)
+      regexScope.reset();
+
+    // Discard the current token, which will be replaced by the re-lexed
+    // token, which will either be a regex literal token, a prefix operator,
+    // or the original unchanged token.
+    discardToken();
 
-      // Otherwise, accept the result.
-      backtrack.cancelBacktrack();
+    // If we split a prefix operator from the regex literal, and are not sure
+    // whether this should be a regex, backtrack if we didn't end up lexing a
+    // regex literal.
+    if (didSplit && !mustBeRegex &&
+        !L->peekNextToken().is(tok::regex_literal)) {
+      return;
     }
-    break;
-  }
-  default:
-    break;
+
+    // Otherwise, accept the result.
+    backtrack.cancelBacktrack();
   }
 }
 
@@ -3226,7 +3240,7 @@ ParserStatus Parser::parseExprList(tok leftTok, tok rightTok,
     // First check to see if we have the start of a regex literal `/.../`. We
     // need to do this before handling unapplied operator references, as e.g
     // `(/, /)` might be a regex literal.
-    tryLexRegexLiteral(/*mustBeRegex*/ false);
+    tryLexRegexLiteral(/*forUnappliedOperator*/ true);
 
     // See if we have an operator decl ref '(<op>)'. The operator token in
     // this case lexes as a binary operator because it neither leads nor