-
Notifications
You must be signed in to change notification settings - Fork 14k
[clang] Refactor IdentifierInfo::ObjcOrBuiltinID
#71709
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This patch refactors how values are stored inside `IdentifierInfo::ObjcOrBuiltinID` bit-field, and annotates it with `preferred_type`. In order to make the value easier to interpret by debuggers, a new `ObjCKeywordOrInterestingOrBuiltin` is added. Previous "layout" of this fields couldn't be represented with this new enum, because it skipped over some arbitrary enumerators, so a new "layout" was invented based on `ObjCKeywordOrInterestingOrBuiltin` enum. I believe the new layout is simpler than the new one.
@llvm/pr-subscribers-clang-modules @llvm/pr-subscribers-clang Author: Vlad Serebrennikov (Endilll) ChangesThis patch refactors how values are stored inside Full diff: https://github.com/llvm/llvm-project/pull/71709.diff 1 Files Affected:
diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index 0898e7d39dd7dee..fa76228da2b143a 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -15,6 +15,7 @@
#ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
#define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
+#include "clang/Basic/Builtins.h"
#include "clang/Basic/DiagnosticIDs.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/TokenKinds.h"
@@ -86,19 +87,26 @@ enum { IdentifierInfoAlignment = 8 };
static constexpr int ObjCOrBuiltinIDBits = 16;
/// The "layout" of ObjCOrBuiltinID is:
-/// - The first value (0) represents "not a special identifier".
-/// - The next (NUM_OBJC_KEYWORDS - 1) values represent ObjCKeywordKinds (not
-/// including objc_not_keyword).
-/// - The next (NUM_INTERESTING_IDENTIFIERS - 1) values represent
-/// InterestingIdentifierKinds (not including not_interesting).
-/// - The rest of the values represent builtin IDs (not including NotBuiltin).
-static constexpr int FirstObjCKeywordID = 1;
-static constexpr int LastObjCKeywordID =
- FirstObjCKeywordID + tok::NUM_OBJC_KEYWORDS - 2;
-static constexpr int FirstInterestingIdentifierID = LastObjCKeywordID + 1;
-static constexpr int LastInterestingIdentifierID =
- FirstInterestingIdentifierID + tok::NUM_INTERESTING_IDENTIFIERS - 2;
-static constexpr int FirstBuiltinID = LastInterestingIdentifierID + 1;
+/// - ObjCKeywordKind enumerators
+/// - InterestingIdentifierKind enumerators
+/// - Builtin::ID enumerators
+/// - NonSpecialIdentifier
+enum class ObjCKeywordOrInterestingOrBuiltin {
+#define OBJC_AT_KEYWORD(X) objc_##X,
+#include "clang/Basic/TokenKinds.def"
+ NUM_OBJC_KEYWORDS,
+
+#define INTERESTING_IDENTIFIER(X) X,
+#include "clang/Basic/TokenKinds.def"
+ NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS,
+
+ NotBuiltin,
+#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
+#include "clang/Basic/Builtins.def"
+ FirstTSBuiltin,
+
+ NonSpecialIdentifier = 65534
+};
/// One of these records is kept for each identifier that
/// is lexed. This contains information about whether the token was \#define'd,
@@ -113,9 +121,7 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
LLVM_PREFERRED_TYPE(tok::TokenKind)
unsigned TokenID : 9;
- // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
- // First NUM_OBJC_KEYWORDS values are for Objective-C,
- // the remaining values are for builtins.
+ LLVM_PREFERRED_TYPE(ObjCKeywordOrInterestingOrBuiltin)
unsigned ObjCOrBuiltinID : ObjCOrBuiltinIDBits;
// True if there is a #define for this.
@@ -198,13 +204,16 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr;
IdentifierInfo()
- : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false),
- HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false),
- IsPoisoned(false), IsCPPOperatorKeyword(false),
- NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false),
- FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false),
- IsModulesImport(false), IsMangledOpenMPVariantName(false),
- IsDeprecatedMacro(false), IsRestrictExpansion(false), IsFinal(false) {}
+ : TokenID(tok::identifier),
+ ObjCOrBuiltinID(llvm::to_underlying(
+ ObjCKeywordOrInterestingOrBuiltin::NonSpecialIdentifier)),
+ HasMacro(false), HadMacro(false), IsExtension(false),
+ IsFutureCompatKeyword(false), IsPoisoned(false),
+ IsCPPOperatorKeyword(false), NeedsHandleIdentifier(false),
+ IsFromAST(false), ChangedAfterLoad(false), FEChangedAfterLoad(false),
+ RevertedTokenID(false), OutOfDate(false), IsModulesImport(false),
+ IsMangledOpenMPVariantName(false), IsDeprecatedMacro(false),
+ IsRestrictExpansion(false), IsFinal(false) {}
public:
IdentifierInfo(const IdentifierInfo &) = delete;
@@ -332,42 +341,62 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
///
/// For example, 'class' will return tok::objc_class if ObjC is enabled.
tok::ObjCKeywordKind getObjCKeywordID() const {
- static_assert(FirstObjCKeywordID == 1,
- "hard-coding this assumption to simplify code");
- if (ObjCOrBuiltinID <= LastObjCKeywordID)
- return tok::ObjCKeywordKind(ObjCOrBuiltinID);
- else
- return tok::objc_not_keyword;
+ auto Value =
+ static_cast<ObjCKeywordOrInterestingOrBuiltin>(ObjCOrBuiltinID);
+ if (Value < ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS)
+ return static_cast<tok::ObjCKeywordKind>(ObjCOrBuiltinID);
+ return tok::objc_not_keyword;
+ }
+ void setObjCKeywordID(tok::ObjCKeywordKind ID) {
+ ObjCOrBuiltinID = ID;
+ assert(getObjCKeywordID() == ID && "ID too large for field!");
}
- void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
/// Return a value indicating whether this is a builtin function.
- ///
- /// 0 is not-built-in. 1+ are specific builtin functions.
unsigned getBuiltinID() const {
- if (ObjCOrBuiltinID >= FirstBuiltinID)
- return 1 + (ObjCOrBuiltinID - FirstBuiltinID);
- else
- return 0;
+ auto Value =
+ static_cast<ObjCKeywordOrInterestingOrBuiltin>(ObjCOrBuiltinID);
+ if (Value > ObjCKeywordOrInterestingOrBuiltin::
+ NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS &&
+ Value != ObjCKeywordOrInterestingOrBuiltin::NonSpecialIdentifier)
+ return static_cast<Builtin::ID>(
+ ObjCOrBuiltinID - 1 -
+ llvm::to_underlying(
+ ObjCKeywordOrInterestingOrBuiltin::
+ NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS));
+ return Builtin::ID::NotBuiltin;
}
void setBuiltinID(unsigned ID) {
- assert(ID != 0);
- ObjCOrBuiltinID = FirstBuiltinID + (ID - 1);
+ assert(ID != Builtin::ID::NotBuiltin);
+ ObjCOrBuiltinID =
+ ID + 1 +
+ llvm::to_underlying(ObjCKeywordOrInterestingOrBuiltin::
+ NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS);
assert(getBuiltinID() == ID && "ID too large for field!");
}
- void clearBuiltinID() { ObjCOrBuiltinID = 0; }
+ void clearBuiltinID() {
+ ObjCOrBuiltinID = llvm::to_underlying(
+ ObjCKeywordOrInterestingOrBuiltin::NonSpecialIdentifier);
+ }
tok::InterestingIdentifierKind getInterestingIdentifierID() const {
- if (ObjCOrBuiltinID >= FirstInterestingIdentifierID &&
- ObjCOrBuiltinID <= LastInterestingIdentifierID)
- return tok::InterestingIdentifierKind(
- 1 + (ObjCOrBuiltinID - FirstInterestingIdentifierID));
+ auto Value =
+ static_cast<ObjCKeywordOrInterestingOrBuiltin>(ObjCOrBuiltinID);
+ if (Value > ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS &&
+ Value < ObjCKeywordOrInterestingOrBuiltin::
+ NUM_OBJC_KEYWORDS_AND_INTERESTING_IDENTIFIERS)
+ return static_cast<tok::InterestingIdentifierKind>(
+ ObjCOrBuiltinID - 1 -
+ llvm::to_underlying(
+ ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS));
else
return tok::not_interesting;
}
void setInterestingIdentifierID(unsigned ID) {
assert(ID != tok::not_interesting);
- ObjCOrBuiltinID = FirstInterestingIdentifierID + (ID - 1);
+ ObjCOrBuiltinID = ID + 1 +
+ llvm::to_underlying(
+ ObjCKeywordOrInterestingOrBuiltin::NUM_OBJC_KEYWORDS);
assert(getInterestingIdentifierID() == ID && "ID too large for field!");
}
|
@ChuanqiXu9 Can I use some of your help? This PR breaks two module tests:
Apparently I somehow break import or export of header unit macros. In llvm-project/clang/test/Modules/cxx20-module-file-info-macros.cpp Lines 51 to 58 in e3c120a
In llvm-project/clang/test/Modules/cxx20-hu-04.cpp Lines 83 to 94 in e3c120a
produces some unexpected diagnostic:
which is again a missing macro definition. |
Oh, I didn't look into the identifier's system before. I took a while to look at the patch but I failed to understand it and I failed to find the relationships between this patch and header units... |
Yeah, the part this PR touches in not the most straightforward one. Thank you for you time! |
Finally got past previous test failures. But the following tests started to fail:
Here is full test output (don't mind |
Aaron suggested to me offline that |
@AaronBallman this looks sensible. WDYT? |
It's sensible but still seems to be failing CI. |
Yeah, I remember I was able to get past the first round of test failures, only to find another one waiting for me. |
Also do refactoing in IdentifierTable.h
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM! Precommit CI didn't run for Windows, but I tested the changes locally and all tests came back clean for me.
this seems to break
the call to
But the newly included I think we can probably just remove the code in |
@aeubanks Thank you for reporting! I'll take a look momentarily |
I'll send out a PR to remove that code, and potentially also remove the |
#4885 for why |
PR llvm#71709 broke the Linux PIE build with `undefined symbol: alloca` errors. With the newly included `clang/Basic/Builtins.h` in that PR, it surfaces an issue with a combination of two previous patches. 26670dc added `#undef alloca` so clang builtins handling of alloca would work under MSVC (unsure if this is still necessary). 194b6a3 added code that calls `alloca` to workaround a Linux kernel < 4.1 bug. Given that Linux 4.1 was EOL in 2018, it should be ok to remove this workaround.
…81533) PR #71709 broke the Linux PIE build with `undefined symbol: alloca` errors. With the newly included `clang/Basic/Builtins.h` in that PR, it surfaces an issue with a combination of two previous patches. 26670dc added `#undef alloca` so clang builtins handling of alloca would work under MSVC (unsure if this is still necessary). 194b6a3 added code that calls `alloca` to workaround a Linux kernel < 4.1 bug. Given that Linux 4.1 was EOL in 2018, it should be ok to remove this workaround.
Added in 26670dc to workaround #4885. Windows CI and a local Windows build are happy with this change, so it seems like this has been properly fixed at some point. If this does break somebody, this can be easily reverted. (Also, Linux does the same `#define alloca` in system headers, so I'm not sure why it'd be different on Windows) This is tech debt that caused breakages, see comments on #71709.
@@ -3597,8 +3597,13 @@ class ASTIdentifierTableTrait { | |||
/// doesn't check whether the name has macros defined; use PublicMacroIterator | |||
/// to check that. | |||
bool isInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset) { | |||
if (MacroOffset || II->isPoisoned() || | |||
(!IsModule && II->getObjCOrBuiltinID()) || | |||
II->getObjCOrBuiltinID(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi @Endilll, Does this call to getObjCOrBuiltinID do anything? Static verifiers report it 'useless' since it only returns a value and the value is not used.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for reporting this!
No, might be a leftover from debugging sessions (this PR was a pain).
Fixed in 662ef86
A follow-up to #71709, addressing the static analysis finding reported in https://github.com/llvm/llvm-project/pull/71709/files#r1576846306
This patch refactors how values are stored inside
IdentifierInfo::ObjcOrBuiltinID
bit-field, and annotates it withpreferred_type
. In order to make the value easier to interpret by debuggers, a newObjCKeywordOrInterestingOrBuiltin
is added. Previous "layout" of this fields couldn't be represented with this new enum, because it skipped over some arbitrary enumerators, so a new "layout" was invented based onObjCKeywordOrInterestingOrBuiltin
enum. I believe the new layout is simpler than the new one.