Skip to content

Commit 7cea784

Browse files
committed
[llvm][mustache] Support setting delimiters in templates
The base mustache spec allows setting custom delimiters, which slightly change parsing of partials. This patch implements that feature by adding a new token type, and changing the tokenizer's behavior to allow setting custom delimiters.
1 parent c3b7fae commit 7cea784

File tree

3 files changed

+162
-92
lines changed

3 files changed

+162
-92
lines changed

llvm/lib/Support/Mustache.cpp

Lines changed: 151 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,14 @@
77
//===----------------------------------------------------------------------===//
88
#include "llvm/Support/Mustache.h"
99
#include "llvm/ADT/SmallVector.h"
10+
#include "llvm/Support/Debug.h"
1011
#include "llvm/Support/raw_ostream.h"
12+
13+
#include <cctype>
1114
#include <sstream>
1215

16+
#define DEBUG_TYPE "mustache"
17+
1318
using namespace llvm;
1419
using namespace llvm::mustache;
1520

@@ -62,6 +67,7 @@ class Token {
6267
InvertSectionOpen,
6368
UnescapeVariable,
6469
Comment,
70+
SetDelimiter,
6571
};
6672

6773
Token(std::string Str)
@@ -102,6 +108,8 @@ class Token {
102108
return Type::Partial;
103109
case '&':
104110
return Type::UnescapeVariable;
111+
case '=':
112+
return Type::SetDelimiter;
105113
default:
106114
return Type::Variable;
107115
}
@@ -189,27 +197,27 @@ class ASTNode {
189197
};
190198

191199
// A wrapper for arena allocator for ASTNodes
192-
AstPtr createRootNode(llvm::StringMap<AstPtr> &Partials,
193-
llvm::StringMap<Lambda> &Lambdas,
194-
llvm::StringMap<SectionLambda> &SectionLambdas,
195-
EscapeMap &Escapes) {
200+
static AstPtr createRootNode(llvm::StringMap<AstPtr> &Partials,
201+
llvm::StringMap<Lambda> &Lambdas,
202+
llvm::StringMap<SectionLambda> &SectionLambdas,
203+
EscapeMap &Escapes) {
196204
return std::make_unique<ASTNode>(Partials, Lambdas, SectionLambdas, Escapes);
197205
}
198206

199-
AstPtr createNode(ASTNode::Type T, Accessor A, ASTNode *Parent,
200-
llvm::StringMap<AstPtr> &Partials,
201-
llvm::StringMap<Lambda> &Lambdas,
202-
llvm::StringMap<SectionLambda> &SectionLambdas,
203-
EscapeMap &Escapes) {
207+
static AstPtr createNode(ASTNode::Type T, Accessor A, ASTNode *Parent,
208+
llvm::StringMap<AstPtr> &Partials,
209+
llvm::StringMap<Lambda> &Lambdas,
210+
llvm::StringMap<SectionLambda> &SectionLambdas,
211+
EscapeMap &Escapes) {
204212
return std::make_unique<ASTNode>(T, std::move(A), Parent, Partials, Lambdas,
205213
SectionLambdas, Escapes);
206214
}
207215

208-
AstPtr createTextNode(std::string Body, ASTNode *Parent,
209-
llvm::StringMap<AstPtr> &Partials,
210-
llvm::StringMap<Lambda> &Lambdas,
211-
llvm::StringMap<SectionLambda> &SectionLambdas,
212-
EscapeMap &Escapes) {
216+
static AstPtr createTextNode(std::string Body, ASTNode *Parent,
217+
llvm::StringMap<AstPtr> &Partials,
218+
llvm::StringMap<Lambda> &Lambdas,
219+
llvm::StringMap<SectionLambda> &SectionLambdas,
220+
EscapeMap &Escapes) {
213221
return std::make_unique<ASTNode>(std::move(Body), Parent, Partials, Lambdas,
214222
SectionLambdas, Escapes);
215223
}
@@ -226,7 +234,7 @@ AstPtr createTextNode(std::string Body, ASTNode *Parent,
226234
// and the current token is the second token.
227235
// For example:
228236
// "{{#Section}}"
229-
bool hasTextBehind(size_t Idx, const ArrayRef<Token> &Tokens) {
237+
static bool hasTextBehind(size_t Idx, const ArrayRef<Token> &Tokens) {
230238
if (Idx == 0)
231239
return true;
232240

@@ -242,7 +250,7 @@ bool hasTextBehind(size_t Idx, const ArrayRef<Token> &Tokens) {
242250
// Function to check if there's no meaningful text ahead.
243251
// We determine if a token has text ahead if the left of previous
244252
// token does not start with a newline.
245-
bool hasTextAhead(size_t Idx, const ArrayRef<Token> &Tokens) {
253+
static bool hasTextAhead(size_t Idx, const ArrayRef<Token> &Tokens) {
246254
if (Idx >= Tokens.size() - 1)
247255
return true;
248256

@@ -255,11 +263,11 @@ bool hasTextAhead(size_t Idx, const ArrayRef<Token> &Tokens) {
255263
return !TokenBody.starts_with("\r\n") && !TokenBody.starts_with("\n");
256264
}
257265

258-
bool requiresCleanUp(Token::Type T) {
266+
static bool requiresCleanUp(Token::Type T) {
259267
// We must clean up all the tokens that could contain child nodes.
260268
return T == Token::Type::SectionOpen || T == Token::Type::InvertSectionOpen ||
261269
T == Token::Type::SectionClose || T == Token::Type::Comment ||
262-
T == Token::Type::Partial;
270+
T == Token::Type::Partial || T == Token::Type::SetDelimiter;
263271
}
264272

265273
// Adjust next token body if there is no text ahead.
@@ -268,7 +276,7 @@ bool requiresCleanUp(Token::Type T) {
268276
// "{{! Comment }} \nLine 2"
269277
// would be considered as no text ahead and should be rendered as
270278
// " Line 2"
271-
void stripTokenAhead(SmallVectorImpl<Token> &Tokens, size_t Idx) {
279+
static void stripTokenAhead(SmallVectorImpl<Token> &Tokens, size_t Idx) {
272280
Token &NextToken = Tokens[Idx + 1];
273281
StringRef NextTokenBody = NextToken.TokenBody;
274282
// Cut off the leading newline which could be \n or \r\n.
@@ -286,8 +294,8 @@ void stripTokenAhead(SmallVectorImpl<Token> &Tokens, size_t Idx) {
286294
// "A"
287295
// The exception for this is partial tag which requires us to
288296
// keep track of the indentation once it's rendered.
289-
void stripTokenBefore(SmallVectorImpl<Token> &Tokens, size_t Idx,
290-
Token &CurrentToken, Token::Type CurrentType) {
297+
static void stripTokenBefore(SmallVectorImpl<Token> &Tokens, size_t Idx,
298+
Token &CurrentToken, Token::Type CurrentType) {
291299
Token &PrevToken = Tokens[Idx - 1];
292300
StringRef PrevTokenBody = PrevToken.TokenBody;
293301
StringRef Unindented = PrevTokenBody.rtrim(" \r\t\v");
@@ -296,57 +304,129 @@ void stripTokenBefore(SmallVectorImpl<Token> &Tokens, size_t Idx,
296304
CurrentToken.setIndentation(Indentation);
297305
}
298306

307+
struct Tag {
308+
enum class Kind {
309+
None,
310+
Normal, // {{...}}
311+
Triple, // {{{...}}}
312+
};
313+
314+
Kind TagKind = Kind::None;
315+
StringRef Content; // The content between the delimiters.
316+
StringRef FullMatch; // The entire tag, including delimiters.
317+
size_t StartPosition = StringRef::npos;
318+
};
319+
320+
static Tag findNextTag(StringRef Template, size_t StartPos,
321+
const SmallString<8> &Open,
322+
const SmallString<8> &Close) {
323+
const StringLiteral TripleOpen("{{{");
324+
const StringLiteral TripleClose("}}}");
325+
326+
size_t NormalOpenPos = Template.find(Open, StartPos);
327+
size_t TripleOpenPos = Template.find(TripleOpen, StartPos);
328+
329+
Tag Result;
330+
331+
// Determine which tag comes first.
332+
if (TripleOpenPos != StringRef::npos &&
333+
(NormalOpenPos == StringRef::npos || TripleOpenPos <= NormalOpenPos)) {
334+
// Found a triple mustache tag.
335+
size_t EndPos =
336+
Template.find(TripleClose, TripleOpenPos + TripleOpen.size());
337+
if (EndPos == StringRef::npos)
338+
return Result; // No closing tag found.
339+
340+
Result.TagKind = Tag::Kind::Triple;
341+
Result.StartPosition = TripleOpenPos;
342+
size_t ContentStart = TripleOpenPos + TripleOpen.size();
343+
Result.Content = Template.substr(ContentStart, EndPos - ContentStart);
344+
Result.FullMatch = Template.substr(
345+
TripleOpenPos, (EndPos + TripleClose.size()) - TripleOpenPos);
346+
} else if (NormalOpenPos != StringRef::npos) {
347+
// Found a normal mustache tag.
348+
size_t EndPos = Template.find(Close, NormalOpenPos + Open.size());
349+
if (EndPos == StringRef::npos)
350+
return Result; // No closing tag found.
351+
352+
Result.TagKind = Tag::Kind::Normal;
353+
Result.StartPosition = NormalOpenPos;
354+
size_t ContentStart = NormalOpenPos + Open.size();
355+
Result.Content = Template.substr(ContentStart, EndPos - ContentStart);
356+
Result.FullMatch =
357+
Template.substr(NormalOpenPos, (EndPos + Close.size()) - NormalOpenPos);
358+
}
359+
360+
return Result;
361+
}
362+
363+
static void processTag(const Tag &T, SmallVectorImpl<Token> &Tokens,
364+
SmallString<8> &Open, SmallString<8> &Close) {
365+
LLVM_DEBUG(dbgs() << " Found tag: \"" << T.FullMatch << "\", Content: \""
366+
<< T.Content << "\"\n");
367+
if (T.TagKind == Tag::Kind::Triple) {
368+
Tokens.emplace_back(T.FullMatch.str(), "&" + T.Content.str(), '&');
369+
LLVM_DEBUG(dbgs() << " Created UnescapeVariable token.\n");
370+
return;
371+
}
372+
StringRef Interpolated = T.Content;
373+
std::string RawBody = T.FullMatch.str();
374+
if (!Interpolated.trim().starts_with("=")) {
375+
char Front = Interpolated.empty() ? ' ' : Interpolated.trim().front();
376+
Tokens.emplace_back(RawBody, Interpolated.str(), Front);
377+
LLVM_DEBUG(dbgs() << " Created tag token of type '" << Front << "'\n");
378+
return;
379+
}
380+
Tokens.emplace_back(RawBody, Interpolated.str(), '=');
381+
StringRef DelimSpec = Interpolated.trim();
382+
DelimSpec = DelimSpec.drop_front(1);
383+
DelimSpec = DelimSpec.take_until([](char C) { return C == '='; });
384+
DelimSpec = DelimSpec.trim();
385+
386+
auto [NewOpen, NewClose] = DelimSpec.split(' ');
387+
Open = NewOpen;
388+
Close = NewClose;
389+
390+
LLVM_DEBUG(dbgs() << " Found Set Delimiter tag. NewOpen='" << Open
391+
<< "', NewClose='" << Close << "'\n");
392+
}
393+
299394
// Simple tokenizer that splits the template into tokens.
300395
// The mustache spec allows {{{ }}} to unescape variables,
301396
// but we don't support that here. An unescape variable
302397
// is represented only by {{& variable}}.
303-
SmallVector<Token> tokenize(StringRef Template) {
398+
static SmallVector<Token> tokenize(StringRef Template) {
399+
LLVM_DEBUG(dbgs() << "Tokenizing template: \"" << Template << "\"\n");
304400
SmallVector<Token> Tokens;
305-
StringLiteral Open("{{");
306-
StringLiteral Close("}}");
307-
StringLiteral TripleOpen("{{{");
308-
StringLiteral TripleClose("}}}");
401+
SmallString<8> Open("{{");
402+
SmallString<8> Close("}}");
309403
size_t Start = 0;
310-
size_t DelimiterStart = Template.find(Open);
311-
if (DelimiterStart == StringRef::npos) {
312-
Tokens.emplace_back(Template.str());
313-
return Tokens;
314-
}
315-
while (DelimiterStart != StringRef::npos) {
316-
if (DelimiterStart != Start)
317-
Tokens.emplace_back(Template.substr(Start, DelimiterStart - Start).str());
318-
319-
if (Template.substr(DelimiterStart).starts_with(TripleOpen)) {
320-
size_t DelimiterEnd = Template.find(TripleClose, DelimiterStart);
321-
if (DelimiterEnd == StringRef::npos)
322-
break;
323-
size_t BodyStart = DelimiterStart + TripleOpen.size();
324-
std::string Body =
325-
Template.substr(BodyStart, DelimiterEnd - BodyStart).str();
326-
std::string RawBody =
327-
Template.substr(DelimiterStart, DelimiterEnd - DelimiterStart + 3)
328-
.str();
329-
Tokens.emplace_back(RawBody, "&" + Body, '&');
330-
Start = DelimiterEnd + TripleClose.size();
331-
} else {
332-
size_t DelimiterEnd = Template.find(Close, DelimiterStart);
333-
if (DelimiterEnd == StringRef::npos)
334-
break;
335-
336-
// Extract the Interpolated variable without delimiters.
337-
size_t InterpolatedStart = DelimiterStart + Open.size();
338-
size_t InterpolatedEnd = DelimiterEnd - DelimiterStart - Close.size();
339-
std::string Interpolated =
340-
Template.substr(InterpolatedStart, InterpolatedEnd).str();
341-
std::string RawBody = Open.str() + Interpolated + Close.str();
342-
Tokens.emplace_back(RawBody, Interpolated, Interpolated[0]);
343-
Start = DelimiterEnd + Close.size();
404+
405+
while (Start < Template.size()) {
406+
LLVM_DEBUG(dbgs() << "Loop start. Start=" << Start << ", Open='" << Open
407+
<< "', Close='" << Close << "'\n");
408+
Tag T = findNextTag(Template, Start, Open, Close);
409+
410+
if (T.TagKind == Tag::Kind::None) {
411+
// No more tags, the rest is text.
412+
Tokens.emplace_back(Template.substr(Start).str());
413+
LLVM_DEBUG(dbgs() << " No more tags. Created final Text token: \""
414+
<< Template.substr(Start) << "\"\n");
415+
break;
416+
}
417+
418+
// Add the text before the tag.
419+
if (T.StartPosition > Start) {
420+
StringRef Text = Template.substr(Start, T.StartPosition - Start);
421+
Tokens.emplace_back(Text.str());
422+
LLVM_DEBUG(dbgs() << " Created Text token: \"" << Text << "\"\n");
344423
}
345-
DelimiterStart = Template.find(Open, Start);
346-
}
347424

348-
if (Start < Template.size())
349-
Tokens.emplace_back(Template.substr(Start).str());
425+
processTag(T, Tokens, Open, Close);
426+
427+
// Move past the tag.
428+
Start = T.StartPosition + T.FullMatch.size();
429+
}
350430

351431
// Fix up white spaces for:
352432
// - open sections
@@ -388,6 +468,7 @@ SmallVector<Token> tokenize(StringRef Template) {
388468
if ((!HasTextBehind && !HasTextAhead) || (!HasTextBehind && Idx == LastIdx))
389469
stripTokenBefore(Tokens, Idx, CurrentToken, CurrentType);
390470
}
471+
LLVM_DEBUG(dbgs() << "Tokenizing finished.\n");
391472
return Tokens;
392473
}
393474

@@ -551,13 +632,14 @@ void Parser::parseMustache(ASTNode *Parent, llvm::StringMap<AstPtr> &Partials,
551632
break;
552633
}
553634
case Token::Type::Comment:
635+
case Token::Type::SetDelimiter:
554636
break;
555637
case Token::Type::SectionClose:
556638
return;
557639
}
558640
}
559641
}
560-
void toMustacheString(const json::Value &Data, raw_ostream &OS) {
642+
static void toMustacheString(const json::Value &Data, raw_ostream &OS) {
561643
switch (Data.kind()) {
562644
case json::Value::Null:
563645
return;
@@ -590,6 +672,8 @@ void toMustacheString(const json::Value &Data, raw_ostream &OS) {
590672
}
591673

592674
void ASTNode::render(const json::Value &CurrentCtx, raw_ostream &OS) {
675+
if (Ty != Root && Ty != Text && AccessorValue.empty())
676+
return;
593677
// Set the parent context to the incoming context so that we
594678
// can walk up the context tree correctly in findContext().
595679
ParentContext = &CurrentCtx;
@@ -789,3 +873,5 @@ Template &Template::operator=(Template &&Other) noexcept {
789873
return *this;
790874
}
791875
} // namespace llvm::mustache
876+
877+
#undef DEBUG_TYPE

0 commit comments

Comments
 (0)