Skip to content

Commit 2ccac07

Browse files
authored
[C++20][Modules] Fix crash when function and lambda inside loaded from different modules (#109167)
Summary: Because AST loading code is lazy and happens in unpredictable order, it is possible that a function and lambda inside the function can be loaded from different modules. As a result, the captured DeclRefExpr won’t match the corresponding VarDecl inside the function. This situation is reflected in the AST as follows: ``` FunctionDecl 0x555564f4aff0 <Conv.h:33:1, line:41:1> line:33:35 imported in ./thrift_cpp2_base.h hidden tryTo 'Expected<Tgt, const char *> ()' inline |-also in ./folly-conv.h `-CompoundStmt 0x555564f7cfc8 <col:43, line:41:1> |-DeclStmt 0x555564f7ced8 <line:34:3, col:17> | `-VarDecl 0x555564f7cef8 <col:3, col:16> col:7 imported in ./thrift_cpp2_base.h hidden referenced result 'Tgt' cinit | `-IntegerLiteral 0x555564f7d080 <col:16> 'int' 0 |-CallExpr 0x555564f7cea8 <line:39:3, col:76> '<dependent type>' | |-UnresolvedLookupExpr 0x555564f7bea0 <col:3, col:19> '<overloaded function type>' lvalue (no ADL) = 'then_' 0x555564f7bef0 | |-CXXTemporaryObjectExpr 0x555564f7bcb0 <col:25, col:45> 'Expected<bool, int>':'folly::Expected<bool, int>' 'void () noexcept' zeroing | `-LambdaExpr 0x555564f7bc88 <col:48, col:75> '(lambda at Conv.h:39:48)' | |-CXXRecordDecl 0x555564f76b88 <col:48> col:48 imported in ./folly-conv.h hidden implicit <undeserialized declarations> class definition | | |-also in ./thrift_cpp2_base.h | | `-DefinitionData lambda empty standard_layout trivially_copyable literal can_const_default_init | | |-DefaultConstructor defaulted_is_constexpr | | |-CopyConstructor simple trivial has_const_param needs_implicit implicit_has_const_param | | |-MoveConstructor exists simple trivial needs_implicit | | |-CopyAssignment trivial has_const_param needs_implicit implicit_has_const_param | | |-MoveAssignment | | `-Destructor simple irrelevant trivial constexpr needs_implicit | `-CompoundStmt 0x555564f7d1a8 <col:58, col:75> | `-ReturnStmt 0x555564f7d198 <col:60, col:67> | `-DeclRefExpr 0x555564f7d0a0 <col:67> 'Tgt' lvalue Var 0x555564f7d0c8 'result' 'Tgt' refers_to_enclosing_variable_or_capture `-ReturnStmt 0x555564f7bc78 <line:40:3, col:11> `-InitListExpr 0x555564f7bc38 <col:10, col:11> 'void' ``` This diff modifies the AST deserialization process to load lambdas within the canonical function declaration sooner, immediately following the function, ensuring that they are loaded from the same module. Re-land #104512 Added test case that caused crash due to multiple enclosed lambdas deserialization. Test Plan: check-clang
1 parent ae7b454 commit 2ccac07

File tree

10 files changed

+206
-0
lines changed

10 files changed

+206
-0
lines changed

clang/include/clang/Serialization/ASTBitCodes.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -724,6 +724,12 @@ enum ASTRecordTypes {
724724

725725
/// Record code for vtables to emit.
726726
VTABLES_TO_EMIT = 70,
727+
728+
/// Record code for the FunctionDecl to lambdas mapping. These lambdas have to
729+
/// be loaded right after the function they belong to. It is required to have
730+
/// canonical declaration for the lambda class from the same module as
731+
/// enclosing function.
732+
FUNCTION_DECL_TO_LAMBDAS_MAP = 71,
727733
};
728734

729735
/// Record types used within a source manager block.

clang/include/clang/Serialization/ASTReader.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,18 @@ class ASTReader
532532
/// namespace as if it is not delayed.
533533
DelayedNamespaceOffsetMapTy DelayedNamespaceOffsetMap;
534534

535+
/// Mapping from FunctionDecl IDs to the corresponding lambda IDs.
536+
///
537+
/// These lambdas have to be loaded right after the function they belong to.
538+
/// It is required to have canonical declaration for lambda class from the
539+
/// same module as enclosing function. This is required to correctly resolve
540+
/// captured variables in the lambda. Without this, due to lazy
541+
/// deserialization, canonical declarations for the function and lambdas can
542+
/// be selected from different modules and DeclRefExprs may refer to the AST
543+
/// nodes that don't exist in the function.
544+
llvm::DenseMap<GlobalDeclID, SmallVector<GlobalDeclID, 4>>
545+
FunctionToLambdasMap;
546+
535547
struct PendingUpdateRecord {
536548
Decl *D;
537549
GlobalDeclID ID;

clang/include/clang/Serialization/ASTWriter.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,14 @@ class ASTWriter : public ASTDeserializationListener,
233233
/// instead of comparing the result of `getDeclID()` or `GetDeclRef()`.
234234
llvm::SmallPtrSet<const Decl *, 32> PredefinedDecls;
235235

236+
/// Mapping from FunctionDecl to the list of lambda IDs inside the function.
237+
///
238+
/// These lambdas have to be loaded right after the function they belong to.
239+
/// In order to have canonical declaration for lambda class from the same
240+
/// module as enclosing function during deserialization.
241+
llvm::DenseMap<const Decl *, SmallVector<LocalDeclID, 4>>
242+
FunctionToLambdasMap;
243+
236244
/// Offset of each declaration in the bitstream, indexed by
237245
/// the declaration's ID.
238246
std::vector<serialization::DeclOffset> DeclOffsets;

clang/lib/Serialization/ASTReader.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3856,6 +3856,17 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
38563856
break;
38573857
}
38583858

3859+
case FUNCTION_DECL_TO_LAMBDAS_MAP:
3860+
for (unsigned I = 0, N = Record.size(); I != N; /*in loop*/) {
3861+
GlobalDeclID ID = ReadDeclID(F, Record, I);
3862+
auto &Lambdas = FunctionToLambdasMap[ID];
3863+
unsigned NN = Record[I++];
3864+
Lambdas.reserve(NN);
3865+
for (unsigned II = 0; II < NN; II++)
3866+
Lambdas.push_back(ReadDeclID(F, Record, I));
3867+
}
3868+
break;
3869+
38593870
case OBJC_CATEGORIES_MAP:
38603871
if (F.LocalNumObjCCategoriesInMap != 0)
38613872
return llvm::createStringError(

clang/lib/Serialization/ASTReaderDecl.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4351,6 +4351,16 @@ void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) {
43514351
reader::ASTDeclContextNameLookupTrait(*this, *Update.Mod));
43524352
DC->setHasExternalVisibleStorage(true);
43534353
}
4354+
4355+
// Load any pending lambdas for the function.
4356+
if (auto *FD = dyn_cast<FunctionDecl>(D); FD && FD->isCanonicalDecl()) {
4357+
if (auto IT = FunctionToLambdasMap.find(ID);
4358+
IT != FunctionToLambdasMap.end()) {
4359+
for (auto LID : IT->second)
4360+
GetDecl(LID);
4361+
FunctionToLambdasMap.erase(IT);
4362+
}
4363+
}
43544364
}
43554365

43564366
void ASTReader::loadPendingDeclChain(Decl *FirstLocal, uint64_t LocalOffset) {

clang/lib/Serialization/ASTWriter.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -903,6 +903,7 @@ void ASTWriter::WriteBlockInfoBlock() {
903903
RECORD(PENDING_IMPLICIT_INSTANTIATIONS);
904904
RECORD(UPDATE_VISIBLE);
905905
RECORD(DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD);
906+
RECORD(FUNCTION_DECL_TO_LAMBDAS_MAP);
906907
RECORD(DECL_UPDATE_OFFSETS);
907908
RECORD(DECL_UPDATES);
908909
RECORD(CUDA_SPECIAL_DECL_REFS);
@@ -5707,6 +5708,27 @@ void ASTWriter::WriteDeclAndTypes(ASTContext &Context) {
57075708
Stream.EmitRecord(DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD,
57085709
DelayedNamespaceRecord);
57095710

5711+
if (!FunctionToLambdasMap.empty()) {
5712+
// TODO: on disk hash table for function to lambda mapping might be more
5713+
// efficent becuase it allows lazy deserialization.
5714+
RecordData FunctionToLambdasMapRecord;
5715+
for (const auto &Pair : FunctionToLambdasMap) {
5716+
FunctionToLambdasMapRecord.push_back(
5717+
GetDeclRef(Pair.first).getRawValue());
5718+
FunctionToLambdasMapRecord.push_back(Pair.second.size());
5719+
for (const auto &Lambda : Pair.second)
5720+
FunctionToLambdasMapRecord.push_back(Lambda.getRawValue());
5721+
}
5722+
5723+
auto Abv = std::make_shared<llvm::BitCodeAbbrev>();
5724+
Abv->Add(llvm::BitCodeAbbrevOp(FUNCTION_DECL_TO_LAMBDAS_MAP));
5725+
Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Array));
5726+
Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6));
5727+
unsigned FunctionToLambdaMapAbbrev = Stream.EmitAbbrev(std::move(Abv));
5728+
Stream.EmitRecord(FUNCTION_DECL_TO_LAMBDAS_MAP, FunctionToLambdasMapRecord,
5729+
FunctionToLambdaMapAbbrev);
5730+
}
5731+
57105732
const TranslationUnitDecl *TU = Context.getTranslationUnitDecl();
57115733
// Create a lexical update block containing all of the declarations in the
57125734
// translation unit that do not come from other AST files.

clang/lib/Serialization/ASTWriterDecl.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1521,6 +1521,11 @@ void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) {
15211521
} else {
15221522
Record.push_back(0);
15231523
}
1524+
// For lambdas inside canonical FunctionDecl remember the mapping.
1525+
if (auto FD = llvm::dyn_cast_or_null<FunctionDecl>(D->getDeclContext());
1526+
FD && FD->isCanonicalDecl()) {
1527+
Writer.FunctionToLambdasMap[FD].push_back(Writer.GetDeclRef(D));
1528+
}
15241529
} else {
15251530
Record.push_back(CXXRecNotTemplate);
15261531
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// RUN: rm -fR %t
2+
// RUN: split-file %s %t
3+
// RUN: cd %t
4+
// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized folly-conv.h
5+
// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized thrift_cpp2_base.h
6+
// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header -Werror=uninitialized -fmodule-file=folly-conv.pcm -fmodule-file=thrift_cpp2_base.pcm logger_base.h
7+
8+
//--- Conv.h
9+
#pragma once
10+
11+
template <typename _Tp, typename _Up = _Tp&&>
12+
_Up __declval(int);
13+
14+
template <typename _Tp>
15+
auto declval() noexcept -> decltype(__declval<_Tp>(0));
16+
17+
namespace folly {
18+
19+
template <class Value, class Error>
20+
struct Expected {
21+
template <class Yes>
22+
auto thenOrThrow() -> decltype(declval<Value&>()) {
23+
return 1;
24+
}
25+
};
26+
27+
struct ExpectedHelper {
28+
template <class Error, class T>
29+
static constexpr Expected<T, Error> return_(T) {
30+
return Expected<T, Error>();
31+
}
32+
33+
template <class This, class Fn, class E = int, class T = ExpectedHelper>
34+
static auto then_(This&&, Fn&&)
35+
-> decltype(T::template return_<E>((declval<Fn>()(true), 0))) {
36+
return Expected<int, int>();
37+
}
38+
};
39+
40+
template <class Tgt>
41+
inline Expected<Tgt, const char*> tryTo() {
42+
Tgt result = 0;
43+
// In build with asserts:
44+
// clang/lib/Sema/SemaTemplateInstantiate.cpp: llvm::PointerUnion<Decl *, LocalInstantiationScope::DeclArgumentPack *> *clang::LocalInstantiationScope::findInstantiationOf(const Decl *): Assertion `isa<LabelDecl>(D) && "declaration not instantiated in this scope"' failed.
45+
// In release build compilation error on the line below inside lambda:
46+
// error: variable 'result' is uninitialized when used here [-Werror,-Wuninitialized]
47+
ExpectedHelper::then_(Expected<bool, int>(), [&](bool) { return result; });
48+
return {};
49+
}
50+
51+
} // namespace folly
52+
53+
inline void bar() {
54+
folly::tryTo<int>();
55+
}
56+
// expected-no-diagnostics
57+
58+
//--- folly-conv.h
59+
#pragma once
60+
#include "Conv.h"
61+
// expected-no-diagnostics
62+
63+
//--- thrift_cpp2_base.h
64+
#pragma once
65+
#include "Conv.h"
66+
// expected-no-diagnostics
67+
68+
//--- logger_base.h
69+
#pragma once
70+
import "folly-conv.h";
71+
import "thrift_cpp2_base.h";
72+
73+
inline void foo() {
74+
folly::tryTo<unsigned>();
75+
}
76+
// expected-no-diagnostics
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// RUN: rm -fR %t
2+
// RUN: split-file %s %t
3+
// RUN: cd %t
4+
// RUN: %clang_cc1 -std=c++20 -emit-header-unit -xc++-user-header header.h
5+
// RUN: %clang_cc1 -std=c++20 -fmodule-file=header.pcm main.cpp
6+
7+
//--- header.h
8+
template <typename T>
9+
void f(T) {}
10+
11+
class A {
12+
virtual ~A();
13+
};
14+
15+
inline A::~A() {
16+
f([](){});
17+
}
18+
19+
struct B {
20+
void g() {
21+
f([](){
22+
[](){};
23+
});
24+
}
25+
};
26+
// expected-no-diagnostics
27+
28+
//--- main.cpp
29+
import "header.h";
30+
// expected-no-diagnostics
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// RUN: %clang_cc1 %s -std=c++11 -emit-pch -o %t
2+
// RUN: %clang_cc1 %s -std=c++11 -include-pch %t -fsyntax-only -verify
3+
4+
// expected-no-diagnostics
5+
#ifndef HEADER
6+
#define HEADER
7+
8+
// No crash or assertion failure on multiple nested lambdas deserialization.
9+
template <typename T>
10+
void b() {
11+
[] {
12+
[]{
13+
[]{
14+
[]{
15+
[]{
16+
}();
17+
}();
18+
}();
19+
}();
20+
}();
21+
}
22+
23+
void foo() {
24+
b<int>();
25+
}
26+
#endif

0 commit comments

Comments
 (0)