From 3cca818efabbccdde36b06609cf75ee7caa8e012 Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Thu, 9 Jul 2020 17:09:57 +0200 Subject: [PATCH] Refactored NumericLiteralParser to not require a Preprocessor Summary: We would like to use NumericLiteralParser in the implementation of the syntax tree builder, and plumbing a preprocessor there seems inconvenient and superfluous. Reviewers: eduucaldas Reviewed By: eduucaldas Subscribers: gribozavr2, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D83480 --- clang/include/clang/Lex/LiteralSupport.h | 10 ++- clang/lib/Lex/LiteralSupport.cpp | 104 +++++++++++++---------- clang/lib/Lex/PPExpressions.cpp | 4 +- clang/lib/Lex/Preprocessor.cpp | 4 +- clang/lib/Sema/SemaExpr.cpp | 4 +- 5 files changed, 74 insertions(+), 52 deletions(-) diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h index 6829771b283088..0c4f0fe277b7c6 100644 --- a/clang/include/clang/Lex/LiteralSupport.h +++ b/clang/include/clang/Lex/LiteralSupport.h @@ -40,7 +40,9 @@ void expandUCNs(SmallVectorImpl &Buf, StringRef Input); /// of a ppnumber, classifying it as either integer, floating, or erroneous, /// determines the radix of the value and can convert it to a useful value. class NumericLiteralParser { - Preprocessor &PP; // needed for diagnostics + const SourceManager &SM; + const LangOptions &LangOpts; + DiagnosticsEngine &Diags; const char *const ThisTokBegin; const char *const ThisTokEnd; @@ -54,9 +56,9 @@ class NumericLiteralParser { SmallString<32> UDSuffixBuf; public: - NumericLiteralParser(StringRef TokSpelling, - SourceLocation TokLoc, - Preprocessor &PP); + NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc, + const SourceManager &SM, const LangOptions &LangOpts, + const TargetInfo &Target, DiagnosticsEngine &Diags); bool hadError : 1; bool isUnsigned : 1; bool isLong : 1; // This is *not* set for long long. diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp index f44614b4bec466..eb16bc8c7da2d0 100644 --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -525,8 +525,12 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, /// NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc, - Preprocessor &PP) - : PP(PP), ThisTokBegin(TokSpelling.begin()), ThisTokEnd(TokSpelling.end()) { + const SourceManager &SM, + const LangOptions &LangOpts, + const TargetInfo &Target, + DiagnosticsEngine &Diags) + : SM(SM), LangOpts(LangOpts), Diags(Diags), + ThisTokBegin(TokSpelling.begin()), ThisTokEnd(TokSpelling.end()) { // This routine assumes that the range begin/end matches the regex for integer // and FP constants (specifically, the 'pp-number' regex), and assumes that @@ -572,7 +576,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, checkSeparator(TokLoc, s, CSK_AfterDigits); // Initial scan to lookahead for fixed point suffix. - if (PP.getLangOpts().FixedPoint) { + if (LangOpts.FixedPoint) { for (const char *c = s; c != ThisTokEnd; ++c) { if (*c == 'r' || *c == 'k' || *c == 'R' || *c == 'K') { saw_fixed_point_suffix = true; @@ -592,14 +596,16 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, switch (*s) { case 'R': case 'r': - if (!PP.getLangOpts().FixedPoint) break; + if (!LangOpts.FixedPoint) + break; if (isFract || isAccum) break; if (!(saw_period || saw_exponent)) break; isFract = true; continue; case 'K': case 'k': - if (!PP.getLangOpts().FixedPoint) break; + if (!LangOpts.FixedPoint) + break; if (isFract || isAccum) break; if (!(saw_period || saw_exponent)) break; isAccum = true; @@ -607,7 +613,8 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, case 'h': // FP Suffix for "half". case 'H': // OpenCL Extension v1.2 s9.5 - h or H suffix for half type. - if (!(PP.getLangOpts().Half || PP.getLangOpts().FixedPoint)) break; + if (!(LangOpts.Half || LangOpts.FixedPoint)) + break; if (isIntegerLiteral()) break; // Error for integer constant. if (isHalf || isFloat || isLong) break; // HH, FH, LH invalid. isHalf = true; @@ -621,8 +628,8 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, // CUDA host and device may have different _Float16 support, therefore // allows f16 literals to avoid false alarm. // ToDo: more precise check for CUDA. - if ((PP.getTargetInfo().hasFloat16Type() || PP.getLangOpts().CUDA) && - s + 2 < ThisTokEnd && s[1] == '1' && s[2] == '6') { + if ((Target.hasFloat16Type() || LangOpts.CUDA) && s + 2 < ThisTokEnd && + s[1] == '1' && s[2] == '6') { s += 2; // success, eat up 2 characters. isFloat16 = true; continue; @@ -657,10 +664,10 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, } else { isLong = true; } - continue; // Success. + continue; // Success. case 'i': case 'I': - if (PP.getLangOpts().MicrosoftExt) { + if (LangOpts.MicrosoftExt) { if (isLong || isLongLong || MicrosoftInteger) break; @@ -713,7 +720,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, if (s != ThisTokEnd || isImaginary) { // FIXME: Don't bother expanding UCNs if !tok.hasUCN(). expandUCNs(UDSuffixBuf, StringRef(SuffixBegin, ThisTokEnd - SuffixBegin)); - if (isValidUDSuffix(PP.getLangOpts(), UDSuffixBuf)) { + if (isValidUDSuffix(LangOpts, UDSuffixBuf)) { if (!isImaginary) { // Any suffix pieces we might have parsed are actually part of the // ud-suffix. @@ -736,8 +743,9 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, if (s != ThisTokEnd) { // Report an error if there are any. - PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, SuffixBegin - ThisTokBegin), - diag::err_invalid_suffix_constant) + Diags.Report(Lexer::AdvanceToTokenCharacter( + TokLoc, SuffixBegin - ThisTokBegin, SM, LangOpts), + diag::err_invalid_suffix_constant) << StringRef(SuffixBegin, ThisTokEnd - SuffixBegin) << (isFixedPointConstant ? 2 : isFPConstant); hadError = true; @@ -758,9 +766,11 @@ void NumericLiteralParser::ParseDecimalOrOctalCommon(SourceLocation TokLoc){ // If we have a hex digit other than 'e' (which denotes a FP exponent) then // the code is using an incorrect base. if (isHexDigit(*s) && *s != 'e' && *s != 'E' && - !isValidUDSuffix(PP.getLangOpts(), StringRef(s, ThisTokEnd - s))) { - PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), - diag::err_invalid_digit) << StringRef(s, 1) << (radix == 8 ? 1 : 0); + !isValidUDSuffix(LangOpts, StringRef(s, ThisTokEnd - s))) { + Diags.Report( + Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM, LangOpts), + diag::err_invalid_digit) + << StringRef(s, 1) << (radix == 8 ? 1 : 0); hadError = true; return; } @@ -786,8 +796,9 @@ void NumericLiteralParser::ParseDecimalOrOctalCommon(SourceLocation TokLoc){ s = first_non_digit; } else { if (!hadError) { - PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin), - diag::err_exponent_has_no_digits); + Diags.Report(Lexer::AdvanceToTokenCharacter( + TokLoc, Exponent - ThisTokBegin, SM, LangOpts), + diag::err_exponent_has_no_digits); hadError = true; } return; @@ -833,9 +844,10 @@ void NumericLiteralParser::checkSeparator(SourceLocation TokLoc, return; if (isDigitSeparator(*Pos)) { - PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Pos - ThisTokBegin), - diag::err_digit_separator_not_between_digits) - << IsAfterDigits; + Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, Pos - ThisTokBegin, SM, + LangOpts), + diag::err_digit_separator_not_between_digits) + << IsAfterDigits; hadError = true; } } @@ -873,9 +885,10 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { } if (!HasSignificandDigits) { - PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin), - diag::err_hex_constant_requires) - << PP.getLangOpts().CPlusPlus << 1; + Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM, + LangOpts), + diag::err_hex_constant_requires) + << LangOpts.CPlusPlus << 1; hadError = true; return; } @@ -891,8 +904,9 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { const char *first_non_digit = SkipDigits(s); if (!containsDigits(s, first_non_digit)) { if (!hadError) { - PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin), - diag::err_exponent_has_no_digits); + Diags.Report(Lexer::AdvanceToTokenCharacter( + TokLoc, Exponent - ThisTokBegin, SM, LangOpts), + diag::err_exponent_has_no_digits); hadError = true; } return; @@ -900,16 +914,17 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { checkSeparator(TokLoc, s, CSK_BeforeDigits); s = first_non_digit; - if (!PP.getLangOpts().HexFloats) - PP.Diag(TokLoc, PP.getLangOpts().CPlusPlus - ? diag::ext_hex_literal_invalid - : diag::ext_hex_constant_invalid); - else if (PP.getLangOpts().CPlusPlus17) - PP.Diag(TokLoc, diag::warn_cxx17_hex_literal); + if (!LangOpts.HexFloats) + Diags.Report(TokLoc, LangOpts.CPlusPlus + ? diag::ext_hex_literal_invalid + : diag::ext_hex_constant_invalid); + else if (LangOpts.CPlusPlus17) + Diags.Report(TokLoc, diag::warn_cxx17_hex_literal); } else if (saw_period) { - PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin), - diag::err_hex_constant_requires) - << PP.getLangOpts().CPlusPlus << 0; + Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM, + LangOpts), + diag::err_hex_constant_requires) + << LangOpts.CPlusPlus << 0; hadError = true; } return; @@ -918,12 +933,10 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { // Handle simple binary numbers 0b01010 if ((c1 == 'b' || c1 == 'B') && (s[1] == '0' || s[1] == '1')) { // 0b101010 is a C++1y / GCC extension. - PP.Diag(TokLoc, - PP.getLangOpts().CPlusPlus14 - ? diag::warn_cxx11_compat_binary_literal - : PP.getLangOpts().CPlusPlus - ? diag::ext_binary_literal_cxx14 - : diag::ext_binary_literal); + Diags.Report(TokLoc, LangOpts.CPlusPlus14 + ? diag::warn_cxx11_compat_binary_literal + : LangOpts.CPlusPlus ? diag::ext_binary_literal_cxx14 + : diag::ext_binary_literal); ++s; assert(s < ThisTokEnd && "didn't maximally munch?"); radix = 2; @@ -932,10 +945,11 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { if (s == ThisTokEnd) { // Done. } else if (isHexDigit(*s) && - !isValidUDSuffix(PP.getLangOpts(), - StringRef(s, ThisTokEnd - s))) { - PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), - diag::err_invalid_digit) << StringRef(s, 1) << 2; + !isValidUDSuffix(LangOpts, StringRef(s, ThisTokEnd - s))) { + Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM, + LangOpts), + diag::err_invalid_digit) + << StringRef(s, 1) << 2; hadError = true; } // Other suffixes will be diagnosed by the caller. diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp index 7a158a31490d29..8c120c13d7d26e 100644 --- a/clang/lib/Lex/PPExpressions.cpp +++ b/clang/lib/Lex/PPExpressions.cpp @@ -295,7 +295,9 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT, if (NumberInvalid) return true; // a diagnostic was already reported - NumericLiteralParser Literal(Spelling, PeekTok.getLocation(), PP); + NumericLiteralParser Literal(Spelling, PeekTok.getLocation(), + PP.getSourceManager(), PP.getLangOpts(), + PP.getTargetInfo(), PP.getDiagnostics()); if (Literal.hadError) return true; // a diagnostic was already reported. diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 105aa6683c8b56..160e2b6ed88463 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -1370,7 +1370,9 @@ bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); if (NumberInvalid) return false; - NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this); + NumericLiteralParser Literal(Spelling, Tok.getLocation(), getSourceManager(), + getLangOpts(), getTargetInfo(), + getDiagnostics()); if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) return false; llvm::APInt APVal(64, 0); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index e1433d293286ec..986c03bb872b8a 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -3634,7 +3634,9 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { if (Invalid) return ExprError(); - NumericLiteralParser Literal(TokSpelling, Tok.getLocation(), PP); + NumericLiteralParser Literal(TokSpelling, Tok.getLocation(), + PP.getSourceManager(), PP.getLangOpts(), + PP.getTargetInfo(), PP.getDiagnostics()); if (Literal.hadError) return ExprError();