Skip to content

[WIP] Backtick operators #58726

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions include/swift/AST/DiagnosticsParse.def
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,6 @@ ERROR(forbidden_extended_escaping_string,none,
ERROR(regex_literal_parsing_error,none,
"%0", (StringRef))

ERROR(prefix_slash_not_allowed,none,
"prefix operator may not contain '/'", ())

//------------------------------------------------------------------------------
// MARK: Lexer diagnostics
//------------------------------------------------------------------------------
Expand Down
4 changes: 4 additions & 0 deletions include/swift/Parse/Lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,10 @@ class Lexer {
void formStringLiteralToken(const char *TokStart, bool IsMultilineString,
unsigned CustomDelimiterLen);

/// Form an operator token starting at \p TokStart. \p OperEnd is the last
/// character, not including backticks.
void formOperatorToken(const char *TokStart, const char *OperEnd);

/// Advance to the end of the line.
/// If EatNewLine is true, CurPtr will be at end of newline character.
/// Otherwise, CurPtr will be at newline character.
Expand Down
5 changes: 1 addition & 4 deletions include/swift/Parse/Parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -1763,10 +1763,7 @@ class Parser {
/// Try re-lex a '/' operator character as a regex literal. This should be
/// called when parsing in an expression position to ensure a regex literal is
/// correctly parsed.
///
/// If \p mustBeRegex is set to true, a regex literal will always be lexed if
/// enabled. Otherwise, it will not be lexed if it may be ambiguous.
void tryLexRegexLiteral(bool mustBeRegex);
void tryLexRegexLiteral(bool forUnappliedOperator);

void validateCollectionElement(ParserResult<Expr> element);

Expand Down
14 changes: 11 additions & 3 deletions include/swift/Parse/Token.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ class Token {

/// Whether this token is an escaped `identifier` token.
unsigned EscapedIdentifier : 1;

unsigned EscapedOperator : 1;

/// Modifiers for string literals
unsigned MultilineString : 1;
Expand All @@ -65,8 +67,8 @@ class Token {
public:
Token(tok Kind, StringRef Text, unsigned CommentLength = 0)
: Kind(Kind), AtStartOfLine(false), EscapedIdentifier(false),
MultilineString(false), CustomDelimiterLen(0),
CommentLength(CommentLength), Text(Text) {}
EscapedOperator(false), MultilineString(false),
CustomDelimiterLen(0), CommentLength(CommentLength), Text(Text) {}

Token() : Token(tok::NUM_TOKENS, {}, 0) {}

Expand Down Expand Up @@ -128,6 +130,11 @@ class Token {
"only identifiers can be escaped identifiers");
EscapedIdentifier = value;
}

bool isEscapedOperator() const { return EscapedOperator; }
void setEscapedOperator(bool value) {
EscapedOperator = value;
}

bool isContextualKeyword(StringRef ContextKW) const {
return isAny(tok::identifier, tok::contextual_keyword) &&
Expand Down Expand Up @@ -276,7 +283,7 @@ class Token {
}

StringRef getText() const {
if (EscapedIdentifier) {
if (EscapedIdentifier || EscapedOperator) {
// Strip off the backticks on either side.
assert(Text.front() == '`' && Text.back() == '`');
return Text.slice(1, Text.size() - 1);
Expand All @@ -292,6 +299,7 @@ class Token {
Text = T;
this->CommentLength = CommentLength;
EscapedIdentifier = false;
EscapedOperator = false;
this->MultilineString = false;
this->CustomDelimiterLen = 0;
assert(this->CustomDelimiterLen == CustomDelimiterLen &&
Expand Down
123 changes: 77 additions & 46 deletions lib/Parse/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -792,57 +792,15 @@ static bool rangeContainsPlaceholderEnd(const char *CurPtr,
return false;
}

/// lexOperatorIdentifier - Match identifiers formed out of punctuation.
void Lexer::lexOperatorIdentifier() {
const char *TokStart = CurPtr-1;
CurPtr = TokStart;
bool didStart = advanceIfValidStartOfOperator(CurPtr, BufferEnd);
assert(didStart && "unexpected operator start");
(void) didStart;

do {
if (CurPtr != BufferEnd && InSILBody &&
(*CurPtr == '!' || *CurPtr == '?'))
// When parsing SIL body, '!' and '?' are special token and can't be
// in the middle of an operator.
break;

// '.' cannot appear in the middle of an operator unless the operator
// started with a '.'.
if (*CurPtr == '.' && *TokStart != '.')
break;
if (Identifier::isEditorPlaceholder(StringRef(CurPtr, BufferEnd-CurPtr)) &&
rangeContainsPlaceholderEnd(CurPtr + 2, BufferEnd)) {
break;
}

// If we are lexing a `/.../` regex literal, we don't consider `/` to be an
// operator character.
if (ForwardSlashRegexMode != LexerForwardSlashRegexMode::None &&
*CurPtr == '/') {
break;
}
} while (advanceIfValidContinuationOfOperator(CurPtr, BufferEnd));

if (CurPtr-TokStart > 2) {
// If there is a "//" or "/*" in the middle of an identifier token,
// it starts a comment.
for (auto Ptr = TokStart+1; Ptr != CurPtr-1; ++Ptr) {
if (Ptr[0] == '/' && (Ptr[1] == '/' || Ptr[1] == '*')) {
CurPtr = Ptr;
break;
}
}
}

void Lexer::formOperatorToken(const char *TokStart, const char *OperEnd) {
// Decide between the binary, prefix, and postfix cases.
// It's binary if either both sides are bound or both sides are not bound.
// Otherwise, it's postfix if left-bound and prefix if right-bound.
bool leftBound = isLeftBound(TokStart, ContentStart);
bool rightBound = isRightBound(CurPtr, leftBound, CodeCompletionPtr);

// Match various reserved words.
if (CurPtr-TokStart == 1) {
if (OperEnd-TokStart == 1) {
switch (TokStart[0]) {
case '=':
// Refrain from emitting this message in operator name position.
Expand Down Expand Up @@ -901,7 +859,7 @@ void Lexer::lexOperatorIdentifier() {
return formToken(tok::question_postfix, TokStart);
return formToken(tok::question_infix, TokStart);
}
} else if (CurPtr-TokStart == 2) {
} else if (OperEnd-TokStart == 2) {
switch ((TokStart[0] << 8) | TokStart[1]) {
case ('-' << 8) | '>': // ->
return formToken(tok::arrow, TokStart);
Expand All @@ -912,7 +870,7 @@ void Lexer::lexOperatorIdentifier() {
} else {
// Verify there is no "*/" in the middle of the identifier token, we reject
// it as potentially ending a block comment.
auto Pos = StringRef(TokStart, CurPtr-TokStart).find("*/");
auto Pos = StringRef(TokStart, OperEnd-TokStart).find("*/");
if (Pos != StringRef::npos) {
diagnose(TokStart+Pos, diag::lex_unexpected_block_comment_end);
return formToken(tok::unknown, TokStart);
Expand All @@ -926,6 +884,75 @@ void Lexer::lexOperatorIdentifier() {
return formToken(leftBound ? tok::oper_postfix : tok::oper_prefix, TokStart);
}

/// lexOperatorIdentifier - Match identifiers formed out of punctuation.
void Lexer::lexOperatorIdentifier() {
auto *const TokStart = CurPtr-1;

auto HadBacktick = (*TokStart == '`');
if (!HadBacktick)
CurPtr = TokStart;

auto *const OperStart = CurPtr;

bool didStart = advanceIfValidStartOfOperator(CurPtr, BufferEnd);
assert(didStart && "unexpected operator start");
(void) didStart;

do {
if (CurPtr != BufferEnd && InSILBody &&
(*CurPtr == '!' || *CurPtr == '?'))
// When parsing SIL body, '!' and '?' are special token and can't be
// in the middle of an operator.
break;

// '.' cannot appear in the middle of an operator unless the operator
// started with a '.'.
if (*CurPtr == '.' && *TokStart != '.')
break;
if (Identifier::isEditorPlaceholder(StringRef(CurPtr, BufferEnd-CurPtr)) &&
rangeContainsPlaceholderEnd(CurPtr + 2, BufferEnd)) {
break;
}

// If we are lexing a `/.../` regex literal, we don't consider `/` to be an
// operator character.
if (ForwardSlashRegexMode != LexerForwardSlashRegexMode::None &&
*CurPtr == '/') {
break;
}
} while (advanceIfValidContinuationOfOperator(CurPtr, BufferEnd));

if (CurPtr-TokStart > 2) {
// If there is a "//" or "/*" in the middle of an identifier token,
// it starts a comment.
for (auto Ptr = TokStart+1; Ptr != CurPtr-1; ++Ptr) {
if (Ptr[0] == '/' && (Ptr[1] == '/' || Ptr[1] == '*')) {
CurPtr = Ptr;
break;
}
}
}

auto *const OperEnd = CurPtr;
if (HadBacktick) {
if (*OperEnd != '`') {
// The backtick is punctuation.
CurPtr = OperStart;
return formToken(tok::backtick, TokStart);
}
++CurPtr;
}

formOperatorToken(TokStart, OperEnd);
if (HadBacktick) {
// If this token is at ArtificialEOF, it's forced to be tok::eof. Don't mark
// this as escaped-operator in this case. Also don't mark if we had
// something unrecoverable.
if (!NextToken.is(tok::eof) && !NextToken.is(tok::unknown))
NextToken.setEscapedOperator(true);
}
}

/// lexDollarIdent - Match $[0-9a-zA-Z_$]+
void Lexer::lexDollarIdent() {
const char *tokStart = CurPtr-1;
Expand Down Expand Up @@ -2652,6 +2679,10 @@ void Lexer::lexImpl() {
return lexStringLiteral();

case '`':
auto *Tmp = CurPtr;
if (advanceIfValidStartOfOperator(Tmp, BufferEnd))
return lexOperatorIdentifier();

return lexEscapedIdentifier();
}
}
Expand Down
11 changes: 2 additions & 9 deletions lib/Parse/ParseDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8535,17 +8535,10 @@ Parser::parseDeclOperator(ParseDeclOptions Flags, DeclAttributes &Attributes) {
// Postfix operators starting with ? or ! conflict with builtin
// unwrapping operators.
if (Attributes.hasAttribute<PostfixAttr>())
if (!Tok.getText().empty() && (Tok.getRawText().front() == '?' ||
Tok.getRawText().front() == '!'))
if (!Tok.getText().empty() && (Tok.getText().front() == '?' ||
Tok.getText().front() == '!'))
diagnose(Tok, diag::postfix_operator_name_cannot_start_with_unwrap);

// Prefix operators may not contain the `/` character when `/.../` regex
// literals are enabled.
if (Context.LangOpts.EnableBareSlashRegexLiterals) {
if (Attributes.hasAttribute<PrefixAttr>() && Tok.getText().contains("/"))
diagnose(Tok, diag::prefix_slash_not_allowed);
}

// A common error is to try to define an operator with something in the
// unicode plane considered to be an operator, or to try to define an
// operator like "not". Analyze and diagnose this specifically.
Expand Down
96 changes: 55 additions & 41 deletions lib/Parse/ParseExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ ParserResult<Expr> Parser::parseExprUnary(Diag<> Message, bool isExprBasic) {
UnresolvedDeclRefExpr *Operator;

// First check to see if we have the start of a regex literal `/.../`.
tryLexRegexLiteral(/*mustBeRegex*/ true);
tryLexRegexLiteral(/*forUnappliedOperator*/ false);

switch (Tok.getKind()) {
default:
Expand Down Expand Up @@ -880,56 +880,70 @@ UnresolvedDeclRefExpr *Parser::parseExprOperator() {
return new (Context) UnresolvedDeclRefExpr(name, refKind, DeclNameLoc(loc));
}

void Parser::tryLexRegexLiteral(bool mustBeRegex) {
void Parser::tryLexRegexLiteral(bool forUnappliedOperator) {
if (!Context.LangOpts.EnableBareSlashRegexLiterals)
return;

// Never a regex literal.
if (Tok.isEscapedOperator())
return;

// Check to see if we have a regex literal `/.../`, optionally with a prefix
// operator e.g `!/.../`.
bool mustBeRegex = false;
switch (Tok.getKind()) {
case tok::oper_prefix:
// Prefix operators may contain `/` characters, so this may not be a regex,
// and as such need to make sure we have a closing `/`. The first character
// heuristics aren't relevant here as a right-bound operator will not have
// a space, tab, or `)` character.
break;
case tok::oper_binary_spaced:
case tok::oper_binary_unspaced: {
// Check to see if we have an operator containing '/'.
auto slashIdx = Tok.getText().find("/");
if (slashIdx == StringRef::npos)
break;
case tok::oper_binary_unspaced:
// When re-lexing for a 'proper' expression, binary operators are always
// invalid, so we can be confident in always lexing a regex literal.
mustBeRegex = !forUnappliedOperator;
break;
default:
// We only re-lex regex literals for operator tokens.
return;
}

CancellableBacktrackingScope backtrack(*this);
{
Optional<Lexer::ForwardSlashRegexRAII> regexScope;
regexScope.emplace(*L, mustBeRegex);

// Try re-lex as a `/.../` regex literal, this will split an operator if
// necessary.
L->restoreState(getParserPosition().LS, /*enableDiagnostics*/ true);

// If we didn't split a prefix operator, reset the regex lexing scope.
// Otherwise, we want to keep it in place for the next token.
auto didSplit = L->peekNextToken().getLength() == slashIdx;
if (!didSplit)
regexScope.reset();

// Discard the current token, which will be replaced by the re-lexed
// token, which will either be a regex literal token, a prefix operator,
// or the original unchanged token.
discardToken();

// If we split a prefix operator from the regex literal, and are not sure
// whether this should be a regex, backtrack if we didn't end up lexing a
// regex literal.
if (didSplit && !mustBeRegex &&
!L->peekNextToken().is(tok::regex_literal)) {
return;
}
// Check to see if we have an operator containing '/'.
auto slashIdx = Tok.getText().find("/");
if (slashIdx == StringRef::npos)
return;

CancellableBacktrackingScope backtrack(*this);
{
Optional<Lexer::ForwardSlashRegexRAII> regexScope;
regexScope.emplace(*L, mustBeRegex);

// Try re-lex as a `/.../` regex literal, this will split an operator if
// necessary.
L->restoreState(getParserPosition().LS, /*enableDiagnostics*/ true);

// If we didn't split a prefix operator, reset the regex lexing scope.
// Otherwise, we want to keep it in place for the next token.
auto didSplit = L->peekNextToken().getLength() == slashIdx;
if (!didSplit)
regexScope.reset();

// Discard the current token, which will be replaced by the re-lexed
// token, which will either be a regex literal token, a prefix operator,
// or the original unchanged token.
discardToken();

// Otherwise, accept the result.
backtrack.cancelBacktrack();
// If we split a prefix operator from the regex literal, and are not sure
// whether this should be a regex, backtrack if we didn't end up lexing a
// regex literal.
if (didSplit && !mustBeRegex &&
!L->peekNextToken().is(tok::regex_literal)) {
return;
}
break;
}
default:
break;

// Otherwise, accept the result.
backtrack.cancelBacktrack();
}
}

Expand Down Expand Up @@ -3226,7 +3240,7 @@ ParserStatus Parser::parseExprList(tok leftTok, tok rightTok,
// First check to see if we have the start of a regex literal `/.../`. We
// need to do this before handling unapplied operator references, as e.g
// `(/, /)` might be a regex literal.
tryLexRegexLiteral(/*mustBeRegex*/ false);
tryLexRegexLiteral(/*forUnappliedOperator*/ true);

// See if we have an operator decl ref '(<op>)'. The operator token in
// this case lexes as a binary operator because it neither leads nor
Expand Down
Loading