Skip to content

[Clang][Comments] Support for parsing headers in Doxygen \par commands #91100

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,15 @@ here. Generic improvements to Clang as a whole or to its underlying
infrastructure are described first, followed by language-specific
sections with improvements to Clang's support for those languages.

- The ``\par`` documentation comment command now supports an optional
argument, which denotes the header of the paragraph started by
an instance of the ``\par`` command comment. The implementation
of the argument handling matches its semantics
`in Doxygen <https://www.doxygen.nl/manual/commands.html#cmdpar>`.
Namely, any text on the same line as the ``\par`` command will become
a header for the paragaph, and if there is no text then the command
will start a new paragraph.

C++ Language Changes
--------------------
- C++17 support is now completed, with the enablement of the
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/AST/CommentCommandTraits.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ struct CommandInfo {
LLVM_PREFERRED_TYPE(bool)
unsigned IsHeaderfileCommand : 1;

/// True if this is a \\par command.
LLVM_PREFERRED_TYPE(bool)
unsigned IsParCommand : 1;

/// True if we don't want to warn about this command being passed an empty
/// paragraph. Meaningful only for block commands.
LLVM_PREFERRED_TYPE(bool)
Expand Down
3 changes: 2 additions & 1 deletion clang/include/clang/AST/CommentCommands.td
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class Command<string name> {
bit IsThrowsCommand = 0;
bit IsDeprecatedCommand = 0;
bit IsHeaderfileCommand = 0;
bit IsParCommand = 0;

bit IsEmptyParagraphAllowed = 0;

Expand Down Expand Up @@ -156,7 +157,7 @@ def Date : BlockCommand<"date">;
def Invariant : BlockCommand<"invariant">;
def Li : BlockCommand<"li">;
def Note : BlockCommand<"note">;
def Par : BlockCommand<"par">;
def Par : BlockCommand<"par"> { let IsParCommand = 1; let NumArgs = 1; }
def Post : BlockCommand<"post">;
def Pre : BlockCommand<"pre">;
def Remark : BlockCommand<"remark">;
Expand Down
4 changes: 3 additions & 1 deletion clang/include/clang/AST/CommentParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ class Parser {
ArrayRef<Comment::Argument>
parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs);

ArrayRef<Comment::Argument>
parseParCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs);

BlockCommandComment *parseBlockCommand();
InlineCommandComment *parseInlineCommand();

Expand All @@ -123,4 +126,3 @@ class Parser {
} // end namespace clang

#endif

78 changes: 78 additions & 0 deletions clang/lib/AST/CommentParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,63 @@ class TextTokenRetokenizer {
return true;
}

// Check if this line starts with @par or \par
bool startsWithParCommand() {
unsigned Offset = 1;

// Skip all whitespace characters at the beginning.
// This needs to backtrack because Pos has already advanced past the
// actual \par or @par command by the time this function is called.
while (isWhitespace(*(Pos.BufferPtr - Offset)))
Offset++;

// Once we've reached the whitespace, backtrack and check if the previous
// four characters are \par or @par.
llvm::StringRef LineStart(Pos.BufferPtr - Offset - 3, 4);
return LineStart.starts_with("\\par") || LineStart.starts_with("@par");
}

/// Extract a par command argument-header.
bool lexParHeading(Token &Tok) {
if (isEnd())
return false;

Position SavedPos = Pos;

consumeWhitespace();
SmallString<32> WordText;
const char *WordBegin = Pos.BufferPtr;
SourceLocation Loc = getSourceLocation();

if (!startsWithParCommand())
return false;

// Read until the end of this token, which is effectively the end of the
// line. This gets us the content of the par header, if there is one.
while (!isEnd()) {
WordText.push_back(peek());
if (Pos.BufferPtr + 1 == Pos.BufferEnd) {
consumeChar();
break;
}
consumeChar();
}

unsigned Length = WordText.size();
if (Length == 0) {
Pos = SavedPos;
return false;
}

char *TextPtr = Allocator.Allocate<char>(Length + 1);

memcpy(TextPtr, WordText.c_str(), Length + 1);
StringRef Text = StringRef(TextPtr, Length);

formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
return true;
}

/// Extract a word -- sequence of non-whitespace characters.
bool lexWord(Token &Tok) {
if (isEnd())
Expand Down Expand Up @@ -394,6 +451,24 @@ Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer,
return llvm::ArrayRef(Args, ParsedArgs);
}

ArrayRef<Comment::Argument>
Parser::parseParCommandArgs(TextTokenRetokenizer &Retokenizer,
unsigned NumArgs) {
assert(NumArgs > 0);
auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
Comment::Argument[NumArgs];
unsigned ParsedArgs = 0;
Token Arg;

while (ParsedArgs < NumArgs && Retokenizer.lexParHeading(Arg)) {
Args[ParsedArgs] = Comment::Argument{
SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
ParsedArgs++;
}

return llvm::ArrayRef(Args, ParsedArgs);
}

BlockCommandComment *Parser::parseBlockCommand() {
assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));

Expand Down Expand Up @@ -449,6 +524,9 @@ BlockCommandComment *Parser::parseBlockCommand() {
else if (Info->IsThrowsCommand)
S.actOnBlockCommandArgs(
BC, parseThrowCommandArgs(Retokenizer, Info->NumArgs));
else if (Info->IsParCommand)
S.actOnBlockCommandArgs(BC,
parseParCommandArgs(Retokenizer, Info->NumArgs));
else
S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs));

Expand Down
8 changes: 3 additions & 5 deletions clang/test/Index/comment-misc-tags.m
Original file line number Diff line number Diff line change
Expand Up @@ -91,18 +91,16 @@ @interface IOCommandGate

struct Test {int filler;};

// CHECK: (CXComment_BlockCommand CommandName=[par]
// CHECK: (CXComment_BlockCommand CommandName=[par] Arg[0]=User defined paragraph:
// CHECK-NEXT: (CXComment_Paragraph
// CHECK-NEXT: (CXComment_Text Text=[ User defined paragraph:] HasTrailingNewline)
// CHECK-NEXT: (CXComment_Text Text=[ Contents of the paragraph.])))
// CHECK: (CXComment_BlockCommand CommandName=[par]
// CHECK-NEXT: (CXComment_Paragraph
// CHECK-NEXT: (CXComment_Text Text=[ New paragraph under the same heading.])))
// CHECK-NEXT: (CXComment_Text Text=[New paragraph under the same heading.])))
// CHECK: (CXComment_BlockCommand CommandName=[note]
// CHECK-NEXT: (CXComment_Paragraph
// CHECK-NEXT: (CXComment_Text Text=[ This note consists of two paragraphs.] HasTrailingNewline)
// CHECK-NEXT: (CXComment_Text Text=[ This is the first paragraph.])))
// CHECK: (CXComment_BlockCommand CommandName=[par]
// CHECK-NEXT: (CXComment_Paragraph
// CHECK-NEXT: (CXComment_Text Text=[ And this is the second paragraph.])))

// CHECK-NEXT: (CXComment_Text Text=[And this is the second paragraph.])))
137 changes: 137 additions & 0 deletions clang/unittests/AST/CommentParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1639,6 +1639,143 @@ TEST_F(CommentParserTest, ThrowsCommandHasArg9) {
}
}

TEST_F(CommentParserTest, ParCommandHasArg1) {
const char *Sources[] = {
"/// @par Paragraph header:", "/// @par Paragraph header:\n",
"/// @par Paragraph header:\r\n", "/// @par Paragraph header:\n\r",
"/** @par Paragraph header:*/",
};

for (size_t i = 0, e = std::size(Sources); i != e; i++) {
FullComment *FC = parseString(Sources[i]);
ASSERT_TRUE(HasChildCount(FC, 2));

ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
{
BlockCommandComment *BCC;
ParagraphComment *PC;
ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC));
ASSERT_TRUE(HasChildCount(PC, 0));
ASSERT_TRUE(BCC->getNumArgs() == 1);
ASSERT_TRUE(BCC->getArgText(0) == "Paragraph header:");
}
}
}

TEST_F(CommentParserTest, ParCommandHasArg2) {
const char *Sources[] = {
"/// @par Paragraph header: ", "/// @par Paragraph header: \n",
"/// @par Paragraph header: \r\n", "/// @par Paragraph header: \n\r",
"/** @par Paragraph header: */",
};

for (size_t i = 0, e = std::size(Sources); i != e; i++) {
FullComment *FC = parseString(Sources[i]);
ASSERT_TRUE(HasChildCount(FC, 2));

ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
{
BlockCommandComment *BCC;
ParagraphComment *PC;
ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC));
ASSERT_TRUE(HasChildCount(PC, 0));
ASSERT_TRUE(BCC->getNumArgs() == 1);
ASSERT_TRUE(BCC->getArgText(0) == "Paragraph header: ");
}
}
}

TEST_F(CommentParserTest, ParCommandHasArg3) {
const char *Sources[] = {
("/// @par Paragraph header:\n"
"/// Paragraph body"),
("/// @par Paragraph header:\r\n"
"/// Paragraph body"),
("/// @par Paragraph header:\n\r"
"/// Paragraph body"),
};

for (size_t i = 0, e = std::size(Sources); i != e; i++) {
FullComment *FC = parseString(Sources[i]);
ASSERT_TRUE(HasChildCount(FC, 2));

ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
{
BlockCommandComment *BCC;
ParagraphComment *PC;
TextComment *TC;
ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC));
ASSERT_TRUE(HasChildCount(PC, 1));
ASSERT_TRUE(BCC->getNumArgs() == 1);
ASSERT_TRUE(BCC->getArgText(0) == "Paragraph header:");
ASSERT_TRUE(GetChildAt(PC, 0, TC));
ASSERT_TRUE(TC->getText() == " Paragraph body");
}
}
}

TEST_F(CommentParserTest, ParCommandHasArg4) {
const char *Sources[] = {
("/// @par Paragraph header:\n"
"/// Paragraph body1\n"
"/// Paragraph body2"),
("/// @par Paragraph header:\r\n"
"/// Paragraph body1\n"
"/// Paragraph body2"),
("/// @par Paragraph header:\n\r"
"/// Paragraph body1\n"
"/// Paragraph body2"),
};

for (size_t i = 0, e = std::size(Sources); i != e; i++) {
FullComment *FC = parseString(Sources[i]);
ASSERT_TRUE(HasChildCount(FC, 2));

ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
{
BlockCommandComment *BCC;
ParagraphComment *PC;
TextComment *TC;
ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC));
ASSERT_TRUE(HasChildCount(PC, 2));
ASSERT_TRUE(BCC->getNumArgs() == 1);
ASSERT_TRUE(BCC->getArgText(0) == "Paragraph header:");
ASSERT_TRUE(GetChildAt(PC, 0, TC));
ASSERT_TRUE(TC->getText() == " Paragraph body1");
ASSERT_TRUE(GetChildAt(PC, 1, TC));
ASSERT_TRUE(TC->getText() == " Paragraph body2");
}
}
}

TEST_F(CommentParserTest, ParCommandHasArg5) {
const char *Sources[] = {
("/// @par \n"
"/// Paragraphs with no text before newline have no heading"),
("/// @par \r\n"
"/// Paragraphs with no text before newline have no heading"),
("/// @par \n\r"
"/// Paragraphs with no text before newline have no heading"),
};

for (size_t i = 0, e = std::size(Sources); i != e; i++) {
FullComment *FC = parseString(Sources[i]);
ASSERT_TRUE(HasChildCount(FC, 2));

ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
{
BlockCommandComment *BCC;
ParagraphComment *PC;
TextComment *TC;
ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC));
ASSERT_TRUE(HasChildCount(PC, 1));
ASSERT_TRUE(BCC->getNumArgs() == 0);
ASSERT_TRUE(GetChildAt(PC, 0, TC));
ASSERT_TRUE(TC->getText() ==
"Paragraphs with no text before newline have no heading");
}
}
}

} // unnamed namespace

Expand Down
7 changes: 3 additions & 4 deletions clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ void clang::EmitClangCommentCommandInfo(RecordKeeper &Records,
Record &Tag = *Tags[i];
OS << " { "
<< "\"" << Tag.getValueAsString("Name") << "\", "
<< "\"" << Tag.getValueAsString("EndCommandName") << "\", "
<< i << ", "
<< "\"" << Tag.getValueAsString("EndCommandName") << "\", " << i << ", "
<< Tag.getValueAsInt("NumArgs") << ", "
<< Tag.getValueAsBit("IsInlineCommand") << ", "
<< Tag.getValueAsBit("IsBlockCommand") << ", "
Expand All @@ -44,6 +43,7 @@ void clang::EmitClangCommentCommandInfo(RecordKeeper &Records,
<< Tag.getValueAsBit("IsThrowsCommand") << ", "
<< Tag.getValueAsBit("IsDeprecatedCommand") << ", "
<< Tag.getValueAsBit("IsHeaderfileCommand") << ", "
<< Tag.getValueAsBit("IsParCommand") << ", "
<< Tag.getValueAsBit("IsEmptyParagraphAllowed") << ", "
<< Tag.getValueAsBit("IsVerbatimBlockCommand") << ", "
<< Tag.getValueAsBit("IsVerbatimBlockEndCommand") << ", "
Expand All @@ -52,8 +52,7 @@ void clang::EmitClangCommentCommandInfo(RecordKeeper &Records,
<< Tag.getValueAsBit("IsFunctionDeclarationCommand") << ", "
<< Tag.getValueAsBit("IsRecordLikeDetailCommand") << ", "
<< Tag.getValueAsBit("IsRecordLikeDeclarationCommand") << ", "
<< /* IsUnknownCommand = */ "0"
<< " }";
<< /* IsUnknownCommand = */ "0" << " }";
if (i + 1 != e)
OS << ",";
OS << "\n";
Expand Down
Loading