Skip to content

Commit 2e70da3

Browse files
authored
[clang][bytecode] Partially address string literal uniqueness (#142555)
This still leaves the case of the constexpr auto b3 = name1() == name1(); test from cxx20.cpp broken.
1 parent d9df710 commit 2e70da3

File tree

6 files changed

+102
-2
lines changed

6 files changed

+102
-2
lines changed

clang/lib/AST/ByteCode/Interp.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2008,6 +2008,51 @@ bool DiagTypeid(InterpState &S, CodePtr OpPC) {
20082008
return false;
20092009
}
20102010

2011+
bool arePotentiallyOverlappingStringLiterals(const Pointer &LHS,
2012+
const Pointer &RHS) {
2013+
unsigned LHSOffset = LHS.getIndex();
2014+
unsigned RHSOffset = RHS.getIndex();
2015+
unsigned LHSLength = (LHS.getNumElems() - 1) * LHS.elemSize();
2016+
unsigned RHSLength = (RHS.getNumElems() - 1) * RHS.elemSize();
2017+
2018+
StringRef LHSStr((const char *)LHS.atIndex(0).getRawAddress(), LHSLength);
2019+
StringRef RHSStr((const char *)RHS.atIndex(0).getRawAddress(), RHSLength);
2020+
int32_t IndexDiff = RHSOffset - LHSOffset;
2021+
if (IndexDiff < 0) {
2022+
if (static_cast<int32_t>(LHSLength) < -IndexDiff)
2023+
return false;
2024+
LHSStr = LHSStr.drop_front(-IndexDiff);
2025+
} else {
2026+
if (static_cast<int32_t>(RHSLength) < IndexDiff)
2027+
return false;
2028+
RHSStr = RHSStr.drop_front(IndexDiff);
2029+
}
2030+
2031+
unsigned ShorterCharWidth;
2032+
StringRef Shorter;
2033+
StringRef Longer;
2034+
if (LHSLength < RHSLength) {
2035+
ShorterCharWidth = LHS.elemSize();
2036+
Shorter = LHSStr;
2037+
Longer = RHSStr;
2038+
} else {
2039+
ShorterCharWidth = RHS.elemSize();
2040+
Shorter = RHSStr;
2041+
Longer = LHSStr;
2042+
}
2043+
2044+
// The null terminator isn't included in the string data, so check for it
2045+
// manually. If the longer string doesn't have a null terminator where the
2046+
// shorter string ends, they aren't potentially overlapping.
2047+
for (unsigned NullByte : llvm::seq(ShorterCharWidth)) {
2048+
if (Shorter.size() + NullByte >= Longer.size())
2049+
break;
2050+
if (Longer[Shorter.size() + NullByte])
2051+
return false;
2052+
}
2053+
return Shorter == Longer.take_front(Shorter.size());
2054+
}
2055+
20112056
// https://github.com/llvm/llvm-project/issues/102513
20122057
#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
20132058
#pragma optimize("", off)

clang/lib/AST/ByteCode/Interp.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,6 +1035,9 @@ static inline bool IsOpaqueConstantCall(const CallExpr *E) {
10351035
Builtin == Builtin::BI__builtin_function_start);
10361036
}
10371037

1038+
bool arePotentiallyOverlappingStringLiterals(const Pointer &LHS,
1039+
const Pointer &RHS);
1040+
10381041
template <>
10391042
inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) {
10401043
using BoolT = PrimConv<PT_Bool>::T;
@@ -1069,6 +1072,18 @@ inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) {
10691072
return true;
10701073
}
10711074

1075+
// FIXME: The source check here isn't entirely correct.
1076+
if (LHS.pointsToStringLiteral() && RHS.pointsToStringLiteral() &&
1077+
LHS.getFieldDesc()->asExpr() != RHS.getFieldDesc()->asExpr()) {
1078+
if (arePotentiallyOverlappingStringLiterals(LHS, RHS)) {
1079+
const SourceInfo &Loc = S.Current->getSource(OpPC);
1080+
S.FFDiag(Loc, diag::note_constexpr_literal_comparison)
1081+
<< LHS.toDiagnosticString(S.getASTContext())
1082+
<< RHS.toDiagnosticString(S.getASTContext());
1083+
return false;
1084+
}
1085+
}
1086+
10721087
if (Pointer::hasSameBase(LHS, RHS)) {
10731088
if (LHS.inUnion() && RHS.inUnion()) {
10741089
// If the pointers point into a union, things are a little more

clang/lib/AST/ByteCode/Pointer.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,17 @@ bool Pointer::pointsToLiteral() const {
571571
return E && !isa<MaterializeTemporaryExpr, StringLiteral>(E);
572572
}
573573

574+
bool Pointer::pointsToStringLiteral() const {
575+
if (isZero() || !isBlockPointer())
576+
return false;
577+
578+
if (block()->isDynamic())
579+
return false;
580+
581+
const Expr *E = block()->getDescriptor()->asExpr();
582+
return E && isa<StringLiteral>(E);
583+
}
584+
574585
std::optional<std::pair<Pointer, Pointer>>
575586
Pointer::computeSplitPoint(const Pointer &A, const Pointer &B) {
576587
if (!A.isBlockPointer() || !B.isBlockPointer())

clang/lib/AST/ByteCode/Pointer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,7 @@ class Pointer {
756756
/// Whether this points to a block that's been created for a "literal lvalue",
757757
/// i.e. a non-MaterializeTemporaryExpr Expr.
758758
bool pointsToLiteral() const;
759+
bool pointsToStringLiteral() const;
759760

760761
/// Prints the pointer.
761762
void print(llvm::raw_ostream &OS) const;

clang/test/AST/ByteCode/cxx11.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,3 +260,31 @@ namespace ZeroSizeCmp {
260260
static_assert(&start != &end, ""); // both-error {{constant expression}} \
261261
// both-note {{comparison of pointers '&start' and '&end' to unrelated zero-sized objects}}
262262
}
263+
264+
namespace OverlappingStrings {
265+
static_assert(+"foo" != +"bar", "");
266+
static_assert(&"xfoo"[1] != &"yfoo"[1], "");
267+
static_assert(+"foot" != +"foo", "");
268+
static_assert(+"foo\0bar" != +"foo\0baz", "");
269+
270+
271+
#define fold(x) (__builtin_constant_p(x) ? (x) : (x))
272+
static_assert(fold((const char*)u"A" != (const char*)"\0A\0x"), "");
273+
static_assert(fold((const char*)u"A" != (const char*)"A\0\0x"), "");
274+
static_assert(fold((const char*)u"AAA" != (const char*)"AAA\0\0x"), "");
275+
276+
constexpr const char *string = "hello";
277+
constexpr const char *also_string = string;
278+
static_assert(string == string, "");
279+
static_assert(string == also_string, "");
280+
281+
282+
// These strings may overlap, and so the result of the comparison is unknown.
283+
constexpr bool may_overlap_1 = +"foo" == +"foo"; // both-error {{}} both-note {{addresses of potentially overlapping literals}}
284+
constexpr bool may_overlap_2 = +"foo" == +"foo\0bar"; // both-error {{}} both-note {{addresses of potentially overlapping literals}}
285+
constexpr bool may_overlap_3 = +"foo" == &"bar\0foo"[4]; // both-error {{}} both-note {{addresses of potentially overlapping literals}}
286+
constexpr bool may_overlap_4 = &"xfoo"[1] == &"xfoo"[1]; // both-error {{}} both-note {{addresses of potentially overlapping literals}}
287+
288+
289+
290+
}

clang/test/AST/ByteCode/cxx20.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,8 @@ static_assert(!b4);
122122
constexpr auto bar(const char *p) { return p + __builtin_strlen(p); }
123123
constexpr auto b5 = bar(p1) == p1;
124124
static_assert(!b5);
125-
constexpr auto b6 = bar(p1) == ""; // ref-error {{must be initialized by a constant expression}} \
126-
// ref-note {{comparison of addresses of potentially overlapping literals}}
125+
constexpr auto b6 = bar(p1) == ""; // both-error {{must be initialized by a constant expression}} \
126+
// both-note {{comparison of addresses of potentially overlapping literals}}
127127
constexpr auto b7 = bar(p1) + 1 == ""; // both-error {{must be initialized by a constant expression}} \
128128
// both-note {{comparison against pointer '&"test1"[6]' that points past the end of a complete object has unspecified value}}
129129

0 commit comments

Comments
 (0)