Skip to content
This repository has been archived by the owner on Apr 23, 2020. It is now read-only.

Commit

Permalink
Introduce a new lexer function to compute the "preamble" of a file,
Browse files Browse the repository at this point in the history
which is the part of the file that contains all of the initial
comments, includes, and preprocessor directives that occur before any
of the actual code. Added a new -print-preamble cc1 action that is
only used for testing.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@108913 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
DougGregor committed Jul 20, 2010
1 parent 4751a53 commit f033f1d
Show file tree
Hide file tree
Showing 10 changed files with 223 additions and 1 deletion.
3 changes: 3 additions & 0 deletions include/clang/Driver/CC1Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,9 @@ def fixit_EQ : Joined<"-fixit=">,
HelpText<"Apply fix-it advice creating a file with the given suffix">;
def parse_print_callbacks : Flag<"-parse-print-callbacks">,
HelpText<"Run parser and print each callback invoked">;
def print_preamble : Flag<"-print-preamble">,
HelpText<"Print the \"preamble\" of a file, which is a candidate for implicit"
" precompiled headers.">;
def emit_html : Flag<"-emit-html">,
HelpText<"Output input source as HTML">;
def ast_print : Flag<"-ast-print">,
Expand Down
12 changes: 11 additions & 1 deletion include/clang/Frontend/FrontendActions.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,16 @@ class ASTMergeAction : public FrontendAction {
virtual bool hasCodeCompletionSupport() const;
};

class PrintPreambleAction : public FrontendAction {
protected:
void ExecuteAction();
virtual ASTConsumer *CreateASTConsumer(CompilerInstance &, llvm::StringRef) {
return 0;
}

virtual bool usesPreprocessorOnly() const { return true; }
};

//===----------------------------------------------------------------------===//
// Preprocessor Actions
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -174,7 +184,7 @@ class PrintPreprocessedAction : public PreprocessorFrontendAction {

virtual bool hasPCHSupport() const { return true; }
};

} // end namespace clang

#endif
1 change: 1 addition & 0 deletions include/clang/Frontend/FrontendOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ namespace frontend {
ParseSyntaxOnly, ///< Parse and perform semantic analysis.
PluginAction, ///< Run a plugin action, \see ActionName.
PrintDeclContext, ///< Print DeclContext and their Decls.
PrintPreamble, ///< Print the "preamble" of the input file
PrintPreprocessedInput, ///< -E mode.
RewriteMacros, ///< Expand macros but not #includes.
RewriteObjC, ///< ObjC->C Rewriter.
Expand Down
13 changes: 13 additions & 0 deletions include/clang/Lex/Lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,19 @@ class Lexer : public PreprocessorLexer {
const SourceManager &SM,
const LangOptions &LangOpts);

/// \brief Compute the preamble of the given file.
///
/// The preamble of a file contains the initial comments, include directives,
/// and other preprocessor directives that occur before the code in this
/// particular file actually begins. The preamble of the main source file is
/// a potential prefix header.
///
/// \param Buffer The memory buffer containing the file's contents.
///
/// \returns The offset into the file where the preamble ends and the rest
/// of the file begins.
static unsigned ComputePreamble(const llvm::MemoryBuffer *Buffer);

//===--------------------------------------------------------------------===//
// Internal implementation interfaces.
private:
Expand Down
3 changes: 3 additions & 0 deletions lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,7 @@ static const char *getActionName(frontend::ActionKind Kind) {
case frontend::ParsePrintCallbacks: return "-parse-print-callbacks";
case frontend::ParseSyntaxOnly: return "-fsyntax-only";
case frontend::PrintDeclContext: return "-print-decl-contexts";
case frontend::PrintPreamble: return "-print-preamble";
case frontend::PrintPreprocessedInput: return "-E";
case frontend::RewriteMacros: return "-rewrite-macros";
case frontend::RewriteObjC: return "-rewrite-objc";
Expand Down Expand Up @@ -989,6 +990,8 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
Opts.ProgramAction = frontend::ParseSyntaxOnly; break;
case OPT_print_decl_contexts:
Opts.ProgramAction = frontend::PrintDeclContext; break;
case OPT_print_preamble:
Opts.ProgramAction = frontend::PrintPreamble; break;
case OPT_E:
Opts.ProgramAction = frontend::PrintPreprocessedInput; break;
case OPT_rewrite_macros:
Expand Down
30 changes: 30 additions & 0 deletions lib/Frontend/FrontendActions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "clang/Frontend/FrontendDiagnostic.h"
#include "clang/Frontend/Utils.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
using namespace clang;

Expand Down Expand Up @@ -192,3 +193,32 @@ void PrintPreprocessedAction::ExecuteAction() {
DoPrintPreprocessedInput(CI.getPreprocessor(), OS,
CI.getPreprocessorOutputOpts());
}

void PrintPreambleAction::ExecuteAction() {
switch (getCurrentFileKind()) {
case IK_C:
case IK_CXX:
case IK_ObjC:
case IK_ObjCXX:
case IK_OpenCL:
break;

case IK_None:
case IK_Asm:
case IK_PreprocessedC:
case IK_PreprocessedCXX:
case IK_PreprocessedObjC:
case IK_PreprocessedObjCXX:
case IK_AST:
case IK_LLVM_IR:
// We can't do anything with these.
return;
}

llvm::MemoryBuffer *Buffer = llvm::MemoryBuffer::getFile(getCurrentFile());
if (Buffer) {
unsigned Preamble = Lexer::ComputePreamble(Buffer);
llvm::outs().write(Buffer->getBufferStart(), Preamble);
delete Buffer;
}
}
125 changes: 125 additions & 0 deletions lib/Lex/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/LexDiagnostic.h"
#include "clang/Basic/SourceManager.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/MemoryBuffer.h"
#include <cctype>
Expand Down Expand Up @@ -247,6 +248,130 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
return TheTok.getLength();
}

namespace {
enum PreambleDirectiveKind {
PDK_Skipped,
PDK_StartIf,
PDK_EndIf,
PDK_Unknown
};
}

unsigned Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer) {
// Create a lexer starting at the beginning of the file. Note that we use a
// "fake" file source location at offset 1 so that the lexer will track our
// position within the file.
const unsigned StartOffset = 1;
SourceLocation StartLoc = SourceLocation::getFromRawEncoding(StartOffset);
LangOptions LangOpts;
Lexer TheLexer(StartLoc, LangOpts, Buffer->getBufferStart(),
Buffer->getBufferStart(), Buffer->getBufferEnd());

bool InPreprocessorDirective = false;
Token TheTok;
Token IfStartTok;
unsigned IfCount = 0;
do {
TheLexer.LexFromRawLexer(TheTok);

if (InPreprocessorDirective) {
// If we've hit the end of the file, we're done.
if (TheTok.getKind() == tok::eof) {
InPreprocessorDirective = false;
break;
}

// If we haven't hit the end of the preprocessor directive, skip this
// token.
if (!TheTok.isAtStartOfLine())
continue;

// We've passed the end of the preprocessor directive, and will look
// at this token again below.
InPreprocessorDirective = false;
}

// Comments are okay; skip over them.
if (TheTok.getKind() == tok::comment)
continue;

if (TheTok.isAtStartOfLine() && TheTok.getKind() == tok::hash) {
// This is the start of a preprocessor directive.
Token HashTok = TheTok;
InPreprocessorDirective = true;

// Figure out which direective this is. Since we're lexing raw tokens,
// we don't have an identifier table available. Instead, just look at
// the raw identifier to recognize and categorize preprocessor directives.
TheLexer.LexFromRawLexer(TheTok);
if (TheTok.getKind() == tok::identifier && !TheTok.needsCleaning()) {
const char *IdStart = Buffer->getBufferStart()
+ TheTok.getLocation().getRawEncoding() - 1;
llvm::StringRef Keyword(IdStart, TheTok.getLength());
PreambleDirectiveKind PDK
= llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
.Case("include", PDK_Skipped)
.Case("__include_macros", PDK_Skipped)
.Case("define", PDK_Skipped)
.Case("undef", PDK_Skipped)
.Case("line", PDK_Skipped)
.Case("error", PDK_Skipped)
.Case("pragma", PDK_Skipped)
.Case("import", PDK_Skipped)
.Case("include_next", PDK_Skipped)
.Case("warning", PDK_Skipped)
.Case("ident", PDK_Skipped)
.Case("sccs", PDK_Skipped)
.Case("assert", PDK_Skipped)
.Case("unassert", PDK_Skipped)
.Case("if", PDK_StartIf)
.Case("ifdef", PDK_StartIf)
.Case("ifndef", PDK_StartIf)
.Case("elif", PDK_Skipped)
.Case("else", PDK_Skipped)
.Case("endif", PDK_EndIf)
.Default(PDK_Unknown);

switch (PDK) {
case PDK_Skipped:
continue;

case PDK_StartIf:
if (IfCount == 0)
IfStartTok = HashTok;

++IfCount;
continue;

case PDK_EndIf:
// Mismatched #endif. The preamble ends here.
if (IfCount == 0)
break;

--IfCount;
continue;

case PDK_Unknown:
// We don't know what this directive is; stop at the '#'.
break;
}
}

// We only end up here if we didn't recognize the preprocessor
// directive or it was one that can't occur in the preamble at this
// point. Roll back the current token to the location of the '#'.
InPreprocessorDirective = false;
TheTok = HashTok;
}

// We hit a token
break;
} while (true);

SourceLocation End = IfCount? IfStartTok.getLocation() : TheTok.getLocation();
return End.getRawEncoding() - StartLoc.getRawEncoding();
}

//===----------------------------------------------------------------------===//
// Character information.
//===----------------------------------------------------------------------===//
Expand Down
11 changes: 11 additions & 0 deletions test/Lexer/Inputs/preamble.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Preamble detection test: see below for comments and test commands.

#include <blah>
#ifndef FOO
#else
#ifdef BAR
#elif WIBBLE
#endif
#pragma unknown
#endif

25 changes: 25 additions & 0 deletions test/Lexer/preamble.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Preamble detection test: see below for comments and test commands.

#include <blah>
#ifndef FOO
#else
#ifdef BAR
#elif WIBBLE
#endif
#pragma unknown
#endif

#ifdef WIBBLE
#include "honk"
#else
int foo();
#endif

// This test checks for detection of the preamble of a file, which
// includes all of the starting comments and #includes. Note that any
// changes to the preamble part of this file must be mirrored in
// Inputs/preamble.txt, since we diff against it.

// RUN: %clang_cc1 -print-preamble %s > %t
// RUN: diff %t %S/Inputs/preamble.txt

1 change: 1 addition & 0 deletions tools/driver/cc1_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ static FrontendAction *CreateFrontendBaseAction(CompilerInstance &CI) {
}

case PrintDeclContext: return new DeclContextPrintAction();
case PrintPreamble: return new PrintPreambleAction();
case PrintPreprocessedInput: return new PrintPreprocessedAction();
case RewriteMacros: return new RewriteMacrosAction();
case RewriteObjC: return new RewriteObjCAction();
Expand Down

0 comments on commit f033f1d

Please sign in to comment.