From 3cca818efabbccdde36b06609cf75ee7caa8e012 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Thu, 9 Jul 2020 17:09:57 +0200
Subject: [PATCH] Refactored NumericLiteralParser to not require a Preprocessor

Summary:
We would like to use NumericLiteralParser in the implementation of the
syntax tree builder, and plumbing a preprocessor there seems
inconvenient and superfluous.

Reviewers: eduucaldas

Reviewed By: eduucaldas

Subscribers: gribozavr2, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83480
---
 clang/include/clang/Lex/LiteralSupport.h |  10 ++-
 clang/lib/Lex/LiteralSupport.cpp         | 104 +++++++++++++----------
 clang/lib/Lex/PPExpressions.cpp          |   4 +-
 clang/lib/Lex/Preprocessor.cpp           |   4 +-
 clang/lib/Sema/SemaExpr.cpp              |   4 +-
 5 files changed, 74 insertions(+), 52 deletions(-)
diff --git a/clang/include/clang/Lex/LiteralSupport.h b/clang/include/clang/Lex/LiteralSupport.h
index 6829771b283088..0c4f0fe277b7c6 100644
--- a/clang/include/clang/Lex/LiteralSupport.h
+++ b/clang/include/clang/Lex/LiteralSupport.h
@@ -40,7 +40,9 @@ void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input);
 /// of a ppnumber, classifying it as either integer, floating, or erroneous,
 /// determines the radix of the value and can convert it to a useful value.
 class NumericLiteralParser {
-  Preprocessor &PP; // needed for diagnostics
+  const SourceManager &SM;
+  const LangOptions &LangOpts;
+  DiagnosticsEngine &Diags;
 
   const char *const ThisTokBegin;
   const char *const ThisTokEnd;
@@ -54,9 +56,9 @@ class NumericLiteralParser {
   SmallString<32> UDSuffixBuf;
 
 public:
-  NumericLiteralParser(StringRef TokSpelling,
-                       SourceLocation TokLoc,
-                       Preprocessor &PP);
+  NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc,
+                       const SourceManager &SM, const LangOptions &LangOpts,
+                       const TargetInfo &Target, DiagnosticsEngine &Diags);
   bool hadError : 1;
   bool isUnsigned : 1;
   bool isLong : 1;          // This is *not* set for long long.
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index f44614b4bec466..eb16bc8c7da2d0 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -525,8 +525,12 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
 ///
 NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
                                            SourceLocation TokLoc,
-                                           Preprocessor &PP)
-  : PP(PP), ThisTokBegin(TokSpelling.begin()), ThisTokEnd(TokSpelling.end()) {
+                                           const SourceManager &SM,
+                                           const LangOptions &LangOpts,
+                                           const TargetInfo &Target,
+                                           DiagnosticsEngine &Diags)
+    : SM(SM), LangOpts(LangOpts), Diags(Diags),
+      ThisTokBegin(TokSpelling.begin()), ThisTokEnd(TokSpelling.end()) {
 
   // This routine assumes that the range begin/end matches the regex for integer
   // and FP constants (specifically, the 'pp-number' regex), and assumes that
@@ -572,7 +576,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
   checkSeparator(TokLoc, s, CSK_AfterDigits);
 
   // Initial scan to lookahead for fixed point suffix.
-  if (PP.getLangOpts().FixedPoint) {
+  if (LangOpts.FixedPoint) {
     for (const char *c = s; c != ThisTokEnd; ++c) {
       if (*c == 'r' || *c == 'k' || *c == 'R' || *c == 'K') {
         saw_fixed_point_suffix = true;
@@ -592,14 +596,16 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
     switch (*s) {
     case 'R':
     case 'r':
-      if (!PP.getLangOpts().FixedPoint) break;
+      if (!LangOpts.FixedPoint)
+        break;
       if (isFract || isAccum) break;
       if (!(saw_period || saw_exponent)) break;
       isFract = true;
       continue;
     case 'K':
     case 'k':
-      if (!PP.getLangOpts().FixedPoint) break;
+      if (!LangOpts.FixedPoint)
+        break;
       if (isFract || isAccum) break;
       if (!(saw_period || saw_exponent)) break;
       isAccum = true;
@@ -607,7 +613,8 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
     case 'h':      // FP Suffix for "half".
     case 'H':
       // OpenCL Extension v1.2 s9.5 - h or H suffix for half type.
-      if (!(PP.getLangOpts().Half || PP.getLangOpts().FixedPoint)) break;
+      if (!(LangOpts.Half || LangOpts.FixedPoint))
+        break;
       if (isIntegerLiteral()) break;  // Error for integer constant.
       if (isHalf || isFloat || isLong) break; // HH, FH, LH invalid.
       isHalf = true;
@@ -621,8 +628,8 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
       // CUDA host and device may have different _Float16 support, therefore
       // allows f16 literals to avoid false alarm.
       // ToDo: more precise check for CUDA.
-      if ((PP.getTargetInfo().hasFloat16Type() || PP.getLangOpts().CUDA) &&
-          s + 2 < ThisTokEnd && s[1] == '1' && s[2] == '6') {
+      if ((Target.hasFloat16Type() || LangOpts.CUDA) && s + 2 < ThisTokEnd &&
+          s[1] == '1' && s[2] == '6') {
         s += 2; // success, eat up 2 characters.
         isFloat16 = true;
         continue;
@@ -657,10 +664,10 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
       } else {
         isLong = true;
       }
-      continue;  // Success.
+      continue; // Success.
     case 'i':
     case 'I':
-      if (PP.getLangOpts().MicrosoftExt) {
+      if (LangOpts.MicrosoftExt) {
         if (isLong || isLongLong || MicrosoftInteger)
           break;
 
@@ -713,7 +720,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
   if (s != ThisTokEnd || isImaginary) {
     // FIXME: Don't bother expanding UCNs if !tok.hasUCN().
     expandUCNs(UDSuffixBuf, StringRef(SuffixBegin, ThisTokEnd - SuffixBegin));
-    if (isValidUDSuffix(PP.getLangOpts(), UDSuffixBuf)) {
+    if (isValidUDSuffix(LangOpts, UDSuffixBuf)) {
       if (!isImaginary) {
         // Any suffix pieces we might have parsed are actually part of the
         // ud-suffix.
@@ -736,8 +743,9 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
 
     if (s != ThisTokEnd) {
       // Report an error if there are any.
-      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, SuffixBegin - ThisTokBegin),
-              diag::err_invalid_suffix_constant)
+      Diags.Report(Lexer::AdvanceToTokenCharacter(
+                       TokLoc, SuffixBegin - ThisTokBegin, SM, LangOpts),
+                   diag::err_invalid_suffix_constant)
           << StringRef(SuffixBegin, ThisTokEnd - SuffixBegin)
           << (isFixedPointConstant ? 2 : isFPConstant);
       hadError = true;
@@ -758,9 +766,11 @@ void NumericLiteralParser::ParseDecimalOrOctalCommon(SourceLocation TokLoc){
   // If we have a hex digit other than 'e' (which denotes a FP exponent) then
   // the code is using an incorrect base.
   if (isHexDigit(*s) && *s != 'e' && *s != 'E' &&
-      !isValidUDSuffix(PP.getLangOpts(), StringRef(s, ThisTokEnd - s))) {
-    PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
-            diag::err_invalid_digit) << StringRef(s, 1) << (radix == 8 ? 1 : 0);
+      !isValidUDSuffix(LangOpts, StringRef(s, ThisTokEnd - s))) {
+    Diags.Report(
+        Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM, LangOpts),
+        diag::err_invalid_digit)
+        << StringRef(s, 1) << (radix == 8 ? 1 : 0);
     hadError = true;
     return;
   }
@@ -786,8 +796,9 @@ void NumericLiteralParser::ParseDecimalOrOctalCommon(SourceLocation TokLoc){
       s = first_non_digit;
     } else {
       if (!hadError) {
-        PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
-                diag::err_exponent_has_no_digits);
+        Diags.Report(Lexer::AdvanceToTokenCharacter(
+                         TokLoc, Exponent - ThisTokBegin, SM, LangOpts),
+                     diag::err_exponent_has_no_digits);
         hadError = true;
       }
       return;
@@ -833,9 +844,10 @@ void NumericLiteralParser::checkSeparator(SourceLocation TokLoc,
     return;
 
   if (isDigitSeparator(*Pos)) {
-    PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Pos - ThisTokBegin),
-            diag::err_digit_separator_not_between_digits)
-      << IsAfterDigits;
+    Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, Pos - ThisTokBegin, SM,
+                                                LangOpts),
+                 diag::err_digit_separator_not_between_digits)
+        << IsAfterDigits;
     hadError = true;
   }
 }
@@ -873,9 +885,10 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
     }
 
     if (!HasSignificandDigits) {
-      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin),
-              diag::err_hex_constant_requires)
-          << PP.getLangOpts().CPlusPlus << 1;
+      Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM,
+                                                  LangOpts),
+                   diag::err_hex_constant_requires)
+          << LangOpts.CPlusPlus << 1;
       hadError = true;
       return;
     }
@@ -891,8 +904,9 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
       const char *first_non_digit = SkipDigits(s);
       if (!containsDigits(s, first_non_digit)) {
         if (!hadError) {
-          PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
-                  diag::err_exponent_has_no_digits);
+          Diags.Report(Lexer::AdvanceToTokenCharacter(
+                           TokLoc, Exponent - ThisTokBegin, SM, LangOpts),
+                       diag::err_exponent_has_no_digits);
           hadError = true;
         }
         return;
@@ -900,16 +914,17 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
       checkSeparator(TokLoc, s, CSK_BeforeDigits);
       s = first_non_digit;
 
-      if (!PP.getLangOpts().HexFloats)
-        PP.Diag(TokLoc, PP.getLangOpts().CPlusPlus
-                            ? diag::ext_hex_literal_invalid
-                            : diag::ext_hex_constant_invalid);
-      else if (PP.getLangOpts().CPlusPlus17)
-        PP.Diag(TokLoc, diag::warn_cxx17_hex_literal);
+      if (!LangOpts.HexFloats)
+        Diags.Report(TokLoc, LangOpts.CPlusPlus
+                                 ? diag::ext_hex_literal_invalid
+                                 : diag::ext_hex_constant_invalid);
+      else if (LangOpts.CPlusPlus17)
+        Diags.Report(TokLoc, diag::warn_cxx17_hex_literal);
     } else if (saw_period) {
-      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin),
-              diag::err_hex_constant_requires)
-          << PP.getLangOpts().CPlusPlus << 0;
+      Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM,
+                                                  LangOpts),
+                   diag::err_hex_constant_requires)
+          << LangOpts.CPlusPlus << 0;
       hadError = true;
     }
     return;
@@ -918,12 +933,10 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
   // Handle simple binary numbers 0b01010
   if ((c1 == 'b' || c1 == 'B') && (s[1] == '0' || s[1] == '1')) {
     // 0b101010 is a C++1y / GCC extension.
-    PP.Diag(TokLoc,
-            PP.getLangOpts().CPlusPlus14
-              ? diag::warn_cxx11_compat_binary_literal
-              : PP.getLangOpts().CPlusPlus
-                ? diag::ext_binary_literal_cxx14
-                : diag::ext_binary_literal);
+    Diags.Report(TokLoc, LangOpts.CPlusPlus14
+                             ? diag::warn_cxx11_compat_binary_literal
+                         : LangOpts.CPlusPlus ? diag::ext_binary_literal_cxx14
+                                              : diag::ext_binary_literal);
     ++s;
     assert(s < ThisTokEnd && "didn't maximally munch?");
     radix = 2;
@@ -932,10 +945,11 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
     if (s == ThisTokEnd) {
       // Done.
     } else if (isHexDigit(*s) &&
-               !isValidUDSuffix(PP.getLangOpts(),
-                                StringRef(s, ThisTokEnd - s))) {
-      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
-              diag::err_invalid_digit) << StringRef(s, 1) << 2;
+               !isValidUDSuffix(LangOpts, StringRef(s, ThisTokEnd - s))) {
+      Diags.Report(Lexer::AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin, SM,
+                                                  LangOpts),
+                   diag::err_invalid_digit)
+          << StringRef(s, 1) << 2;
       hadError = true;
     }
     // Other suffixes will be diagnosed by the caller.
diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp
index 7a158a31490d29..8c120c13d7d26e 100644
--- a/clang/lib/Lex/PPExpressions.cpp
+++ b/clang/lib/Lex/PPExpressions.cpp
@@ -295,7 +295,9 @@ static bool EvaluateValue(PPValue &Result, Token &PeekTok, DefinedTracker &DT,
     if (NumberInvalid)
       return true; // a diagnostic was already reported
 
-    NumericLiteralParser Literal(Spelling, PeekTok.getLocation(), PP);
+    NumericLiteralParser Literal(Spelling, PeekTok.getLocation(),
+                                 PP.getSourceManager(), PP.getLangOpts(),
+                                 PP.getTargetInfo(), PP.getDiagnostics());
     if (Literal.hadError)
       return true; // a diagnostic was already reported.
 
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 105aa6683c8b56..160e2b6ed88463 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1370,7 +1370,9 @@ bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
   if (NumberInvalid)
     return false;
-  NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
+  NumericLiteralParser Literal(Spelling, Tok.getLocation(), getSourceManager(),
+                               getLangOpts(), getTargetInfo(),
+                               getDiagnostics());
   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
     return false;
   llvm::APInt APVal(64, 0);
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index e1433d293286ec..986c03bb872b8a 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -3634,7 +3634,9 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
   if (Invalid)
     return ExprError();
 
-  NumericLiteralParser Literal(TokSpelling, Tok.getLocation(), PP);
+  NumericLiteralParser Literal(TokSpelling, Tok.getLocation(),
+                               PP.getSourceManager(), PP.getLangOpts(),
+                               PP.getTargetInfo(), PP.getDiagnostics());
   if (Literal.hadError)
     return ExprError();