Skip to content

[libtooling] DefineImplicitCopyAssignment crashes on structure with array member #67862

Open
@smcpeak

Description

@smcpeak

When using the Clang C++ libtooling API, if I call Sema::ForceDeclarationOfImplicitMembers to declare implicit class members, then call Sema::DefineImplicitCopyAssignment to define the copy assignment operator, for a class that has an array-typed member, it crashes inside Sema::PushOnScopeChains.

Example input:

// test.cc
// Test input for define-implicit.exe.

struct HasArray {
  int arr[2];
};

// EOF

Program to run:

// define-implicit.cc
// Force definition of implicit members.

#include "clang/AST/RecursiveASTVisitor.h"                 // clang::RecursiveASTVisitor
#include "clang/Frontend/ASTUnit.h"                        // clang::ASTUnit
#include "clang/Frontend/CompilerInstance.h"               // clang::CompilerInstance
#include "clang/Frontend/CompilerInvocation.h"             // clang::CompilerInvocation
#include "clang/Frontend/Utils.h"                          // clang::createInvocation
#include "clang/Sema/Sema.h"                               // clang::Sema
#include "clang/Serialization/PCHContainerOperations.h"    // clang::PCHContainerOperations

using clang::dyn_cast;


// Declare and define all implicit class members.
class DefineImplicit : public clang::RecursiveASTVisitor<DefineImplicit> {
public:      // data
  // The ASTUnit we want to process.
  clang::ASTUnit *m_astUnit;

public:      // methods
  DefineImplicit(clang::ASTUnit *astUnit)
    : m_astUnit(astUnit)
  {}

  ~DefineImplicit()
  {}

  clang::Sema &getSema()
    { return m_astUnit->getSema(); }
  clang::ASTContext &getASTContext()
    { return m_astUnit->getASTContext(); }
  clang::SourceManager &getSourceManager()
    { return m_astUnit->getSourceManager(); }

  // Visit everything.
  bool shouldVisitTemplateInstantiations() const
    { return true; }
  bool shouldVisitImplicitCode() const
    { return true; }

  bool VisitCXXRecordDecl(clang::CXXRecordDecl *decl);
  bool VisitCXXMethodDecl(clang::CXXMethodDecl *decl);
};


bool DefineImplicit::VisitCXXRecordDecl(clang::CXXRecordDecl *decl)
{
  getSema().ForceDeclarationOfImplicitMembers(decl);
  return true;
}


bool DefineImplicit::VisitCXXMethodDecl(
  clang::CXXMethodDecl *decl)
{
  // Check the the common preconditions for a method that can be
  // implicitly defined.
  if (!( decl->isDefaulted() &&
         !decl->doesThisDeclarationHaveABody() &&
         !decl->isDeleted() )) {
    return true;
  }

  clang::SourceLocation loc = decl->getLocation();

  llvm::outs() << "visiting method \"" << decl->getNameAsString()
               << "\" with type \"" << decl->getType().getAsString()
               << "\" at " << loc.printToString(getSourceManager())
               << "\n";
  llvm::outs().flush();

  if (auto ctor = dyn_cast<clang::CXXConstructorDecl>(decl)) {
    if (ctor->isDefaultConstructor()) {
      getSema().DefineImplicitDefaultConstructor(loc, ctor);
    }

    else if (ctor->isCopyConstructor()) {
      getSema().DefineImplicitCopyConstructor(loc, ctor);
    }

    else if (ctor->isMoveConstructor()) {
      getSema().DefineImplicitMoveConstructor(loc, ctor);
    }
  }

  else if (auto dtor = dyn_cast<clang::CXXDestructorDecl>(decl)) {
    getSema().DefineImplicitDestructor(loc, dtor);
  }

  else if (decl->isCopyAssignmentOperator()) {
    getSema().DefineImplicitCopyAssignment(loc, decl);
  }

  else if (decl->isMoveAssignmentOperator()) {
    getSema().DefineImplicitMoveAssignment(loc, decl);
  }

  return true;
}


int main(int argc, char **argv)
{
  std::vector<char const *> commandLine;
  commandLine.push_back(CLANG_LLVM_INSTALL_DIR "/bin/clang");
  for (int i = 1; i < argc; ++i) {
    commandLine.push_back(argv[i]);
  }

  std::shared_ptr<clang::CompilerInvocation> compilerInvocation(
    clang::createInvocation(llvm::ArrayRef(commandLine)));
  if (!compilerInvocation) {
    return 2;
  }

  // Boilerplate.
  std::shared_ptr<clang::PCHContainerOperations> pchContainerOps(
    new clang::PCHContainerOperations());
  clang::DiagnosticOptions *diagnosticOptions =
    &(compilerInvocation->getDiagnosticOpts());
  clang::IntrusiveRefCntPtr<clang::DiagnosticsEngine> diagnosticsEngine(
    clang::CompilerInstance::createDiagnostics(
      diagnosticOptions /*callee takes refcount ownership*/));

  // Run the Clang parser to produce an AST.
  std::unique_ptr<clang::ASTUnit> ast(
    clang::ASTUnit::LoadFromCompilerInvocationAction(
      compilerInvocation,
      pchContainerOps,
      diagnosticsEngine));
  if (ast == nullptr || diagnosticsEngine->getNumErrors() > 0) {
    return 2;
  }
  clang::TranslationUnitDecl *tu =
    ast->getASTContext().getTranslationUnitDecl();

  llvm::outs() << "---- before ----\n";
  tu->dump(llvm::outs());
  llvm::outs().flush();

  llvm::outs() << "---- defining implicit members... ----\n";
  llvm::outs().flush();
  DefineImplicit di(ast.get());
  di.TraverseDecl(tu);

  llvm::outs() << "---- after ----\n";
  tu->dump(llvm::outs());
  llvm::outs().flush();

  return 0;
}


// EOF

Makefile:

# define-implicit/Makefile

# ---- Configuration ----
# Set to 1 if I am using a build from source, 0 for a binary
# distribution.
USE_SOURCE_BUILD := 0

ifeq ($(USE_SOURCE_BUILD),1)
  # Use my own build.
  CLANG_LLVM_SRC_DIR = $(HOME)/wrk/clang/llvm-project
  CLANG_LLVM_INSTALL_DIR = $(CLANG_LLVM_SRC_DIR)/build

else
  # Installation directory from a binary distribution.
  # Has five subdirectories: bin include lib libexec share.
  CLANG_LLVM_INSTALL_DIR = $(HOME)/opt/clang+llvm-16.0.0-x86_64-linux-gnu-ubuntu-18.04

endif

# Let the user override my defaults.
-include pre-config.mk


# ---- llvm-config query results ----
# Program to query the various LLVM configuration options.
LLVM_CONFIG := $(CLANG_LLVM_INSTALL_DIR)/bin/llvm-config

# C++ compiler options to ensure ABI compatibility.
LLVM_CXXFLAGS := $(shell $(LLVM_CONFIG) --cxxflags)

# Directory containing the clang library files, both static and dynamic.
LLVM_LIBDIR := $(shell $(LLVM_CONFIG) --libdir)

# Other flags needed for linking, whether statically or dynamically.
LLVM_LDFLAGS_AND_SYSTEM_LIBS := $(shell $(LLVM_CONFIG) --ldflags --system-libs)


# ---- Compiler options ----
# C++ compiler.
CXX = $(CLANG_LLVM_INSTALL_DIR)/bin/clang++

# Compiler options, including preprocessor options.
CXXFLAGS =
CXXFLAGS += -Wall
CXXFLAGS += -Werror

# Silence a warning about a multi-line comment in DeclOpenMP.h.
CXXFLAGS += -Wno-comment

# Get llvm compilation flags.
CXXFLAGS += $(LLVM_CXXFLAGS)

ifeq ($(USE_SOURCE_BUILD),1)
  # When using my own build, I need to separately point at clang includes.
  CXXFLAGS += -I$(CLANG_LLVM_SRC_DIR)/clang/include
  CXXFLAGS += -I$(CLANG_LLVM_INSTALL_DIR)/tools/clang/include
endif

# Tell the source code where the clang installation directory is.
CXXFLAGS += -DCLANG_LLVM_INSTALL_DIR='"$(CLANG_LLVM_INSTALL_DIR)"'

# Switch to enable creation of .d files.
GENDEPS_FLAGS = -MMD


# Linker options.
LDFLAGS =

# Pull in clang+llvm via libclang-cpp.so, which has everything, but is
# only available as a dynamic library.
LDFLAGS += -lclang-cpp

# Arrange for the compiled binary to search the libdir for that library.
# Otherwise, one can set the LD_LIBRARY_PATH envvar before running it.
# Note: the -rpath switch does not work on Windows.
LDFLAGS += -Wl,-rpath=$(LLVM_LIBDIR)

# It appears that llvm::raw_os_ostream::~raw_os_ostream is missing from
# libclang-cpp, so I have to link with LLVMSupport statically.
LDFLAGS += -lLLVMSupport

# Get the needed -L search path, plus things like -ldl.
LDFLAGS += $(LLVM_LDFLAGS_AND_SYSTEM_LIBS)

# Optional custom modifications.
-include config.mk


# ---- Recipes ----
# Default target.
all:
.PHONY: all

# Pull in automatic dependencies.
-include $(wildcard *.d)

# Compile a C++ source file.
%.o: %.cc
	$(CXX) -c -o $@ $(GENDEPS_FLAGS) $(CXXFLAGS) $<

OBJS :=
OBJS += define-implicit.o

# Executable.
all: define-implicit.exe
define-implicit.exe: $(OBJS)
	$(CXX) -g -Wall -o $@ $(OBJS) $(LDFLAGS)

# Test.
.PHONY: check
check: define-implicit.exe test.cc
	./define-implicit.exe test.cc

.PHONY: clean
clean:
	$(RM) *.o *.d *.exe
	$(RM) -r out


# EOF

Output of lldb -- ./define-implicit.exe test.cc:

[...]
visiting method "operator=" with type "HasArray &(const HasArray &)" at test.cc:4:8
Process 406256 stopped
* thread #1, name = 'define-implicit', stop reason = signal SIGSEGV: invalid address (fault address: 0x8)
    frame #0: 0x00007fffef85d019 libclang-cpp.so.16`clang::Sema::PushOnScopeChains(clang::NamedDecl*, clang::Scope*, bool) + 25
libclang-cpp.so.16`clang::Sema::PushOnScopeChains:
->  0x7fffef85d019 <+25>: testb  $-0x80, 0x8(%rdx)
    0x7fffef85d01d <+29>: jne    0x7fffef85d03f            ; <+63>
    0x7fffef85d01f <+31>: nop    
    0x7fffef85d020 <+32>: movq   0x168(%r15), %rdi
(lldb) bt
* thread #1, name = 'define-implicit', stop reason = signal SIGSEGV: invalid address (fault address: 0x8)
  * frame #0: 0x00007fffef85d019 libclang-cpp.so.16`clang::Sema::PushOnScopeChains(clang::NamedDecl*, clang::Scope*, bool) + 25
    frame #1: 0x00007fffef861392 libclang-cpp.so.16`clang::Sema::LazilyCreateBuiltin(clang::IdentifierInfo*, unsigned int, clang::Scope*, bool, clang::SourceLocation) + 738
    frame #2: 0x00007fffefb8313a libclang-cpp.so.16`clang::Sema::LookupBuiltin(clang::LookupResult&) + 3930
    frame #3: 0x00007fffefb8fda6 libclang-cpp.so.16`clang::Sema::LookupName(clang::LookupResult&, clang::Scope*, bool, bool) + 742
    frame #4: 0x00007fffef94520c libclang-cpp.so.16`buildMemcpyForAssignmentOp(clang::Sema&, clang::SourceLocation, clang::QualType, (anonymous namespace)::ExprBuilder const&, (anonymous namespace)::ExprBuilder const&) + 716
    frame #5: 0x00007fffef92ede2 libclang-cpp.so.16`buildSingleCopyAssign(clang::Sema&, clang::SourceLocation, clang::QualType, (anonymous namespace)::ExprBuilder const&, (anonymous namespace)::ExprBuilder const&, bool, bool) + 178
    frame #6: 0x00007fffef92e748 libclang-cpp.so.16`clang::Sema::DefineImplicitCopyAssignment(clang::SourceLocation, clang::CXXMethodDecl*) + 2872
    frame #7: 0x0000555555558af4 define-implicit.exe`DefineImplicit::VisitCXXMethodDecl(clang::CXXMethodDecl*) + 756
    frame #8: 0x000055555564b35d define-implicit.exe`clang::RecursiveASTVisitor<DefineImplicit>::WalkUpFromCXXMethodDecl(clang::CXXMethodDecl*) + 93
    frame #9: 0x000055555556aeef define-implicit.exe`clang::RecursiveASTVisitor<DefineImplicit>::TraverseCXXMethodDecl(clang::CXXMethodDecl*) + 79
    frame #10: 0x000055555555a5ec define-implicit.exe`clang::RecursiveASTVisitor<DefineImplicit>::TraverseDecl(clang::Decl*) + 2620
    frame #11: 0x00005555555e9ff8 define-implicit.exe`clang::RecursiveASTVisitor<DefineImplicit>::TraverseDeclContextHelper(clang::DeclContext*) + 200
    frame #12: 0x00005555555683f5 define-implicit.exe`clang::RecursiveASTVisitor<DefineImplicit>::TraverseCXXRecordDecl(clang::CXXRecordDecl*) + 213
    frame #13: 0x000055555555a2bb define-implicit.exe`clang::RecursiveASTVisitor<DefineImplicit>::TraverseDecl(clang::Decl*) + 1803
    frame #14: 0x00005555555e9ff8 define-implicit.exe`clang::RecursiveASTVisitor<DefineImplicit>::TraverseDeclContextHelper(clang::DeclContext*) + 200
    frame #15: 0x000055555556f251 define-implicit.exe`clang::RecursiveASTVisitor<DefineImplicit>::TraverseTranslationUnitDecl(clang::TranslationUnitDecl*) + 513
    frame #16: 0x000055555555aaf6 define-implicit.exe`clang::RecursiveASTVisitor<DefineImplicit>::TraverseDecl(clang::Decl*) + 3910
    frame #17: 0x0000555555558ee3 define-implicit.exe`main + 899
    frame #18: 0x00007fffed50f0b3 libc.so.6`__libc_start_main + 243
    frame #19: 0x00005555555586fe define-implicit.exe`_start + 46

The above is observed with Clang-16.0.0. The same thing happens with a trunk build from 2023-09-13.

Metadata

Metadata

Assignees

No one assigned

    Labels

    clang:toolingLibToolingcrashPrefer [crash-on-valid] or [crash-on-invalid]

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions