@@ -368,141 +368,101 @@ static const char *jsonKindToString(json::Value::Kind K) {
368368 llvm_unreachable (" Unknown json::Value::Kind" );
369369}
370370
371- static Tag findNextTag (StringRef Template, size_t StartPos, StringRef Open,
372- StringRef Close) {
373- const StringLiteral TripleOpen (" {{{" );
374- const StringLiteral TripleClose (" }}}" );
375-
376- size_t NormalOpenPos = Template.find (Open, StartPos);
377- size_t TripleOpenPos = Template.find (TripleOpen, StartPos);
378-
379- Tag Result;
380-
381- // Determine which tag comes first.
382- if (TripleOpenPos != StringRef::npos &&
383- (NormalOpenPos == StringRef::npos || TripleOpenPos <= NormalOpenPos)) {
384- // Found a triple mustache tag.
385- size_t EndPos =
386- Template.find (TripleClose, TripleOpenPos + TripleOpen.size ());
387- if (EndPos == StringRef::npos)
388- return Result; // No closing tag found.
389-
390- Result.TagKind = Tag::Kind::Triple;
391- Result.StartPosition = TripleOpenPos;
392- size_t ContentStart = TripleOpenPos + TripleOpen.size ();
393- Result.Content = Template.substr (ContentStart, EndPos - ContentStart);
394- Result.FullMatch = Template.substr (
395- TripleOpenPos, (EndPos + TripleClose.size ()) - TripleOpenPos);
396- } else if (NormalOpenPos != StringRef::npos) {
397- // Found a normal mustache tag.
398- size_t EndPos = Template.find (Close, NormalOpenPos + Open.size ());
399- if (EndPos == StringRef::npos)
400- return Result; // No closing tag found.
401-
402- Result.TagKind = Tag::Kind::Normal;
403- Result.StartPosition = NormalOpenPos;
404- size_t ContentStart = NormalOpenPos + Open.size ();
405- Result.Content = Template.substr (ContentStart, EndPos - ContentStart);
406- Result.FullMatch =
407- Template.substr (NormalOpenPos, (EndPos + Close.size ()) - NormalOpenPos);
408- }
409-
410- return Result;
411- }
412-
413- static std::optional<std::pair<StringRef, StringRef>>
414- processTag (const Tag &T, SmallVectorImpl<Token> &Tokens, MustacheContext &Ctx) {
415- LLVM_DEBUG (dbgs () << " [Tag] " << T.FullMatch << " , Content: " << T.Content
416- << " , Kind: " << tagKindToString (T.TagKind ) << " \n " );
417- if (T.TagKind == Tag::Kind::Triple) {
418- Tokens.emplace_back (T.FullMatch , Ctx.Saver .save (" &" + T.Content ), ' &' , Ctx);
419- return std::nullopt ;
420- }
421- StringRef Interpolated = T.Content ;
422- if (!Interpolated.trim ().starts_with (" =" )) {
423- char Front = Interpolated.empty () ? ' ' : Interpolated.trim ().front ();
424- Tokens.emplace_back (T.FullMatch , Interpolated, Front, Ctx);
425- return std::nullopt ;
426- }
427- Tokens.emplace_back (T.FullMatch , Interpolated, ' =' , Ctx);
428- StringRef DelimSpec = Interpolated.trim ();
429- DelimSpec = DelimSpec.drop_front (1 );
430- DelimSpec = DelimSpec.take_until ([](char C) { return C == ' =' ; });
431- DelimSpec = DelimSpec.trim ();
432-
433- std::pair<StringRef, StringRef> Ret = DelimSpec.split (' ' );
434- LLVM_DEBUG (dbgs () << " [Set Delimiter] NewOpen: " << Ret.first
435- << " , NewClose: " << Ret.second << " \n " );
436- return Ret;
437- }
438-
439371// Simple tokenizer that splits the template into tokens.
440- // The mustache spec allows {{{ }}} to unescape variables,
441- // but we don't support that here. An unescape variable
442- // is represented only by {{& variable}}.
443372static SmallVector<Token> tokenize (StringRef Template, MustacheContext &Ctx) {
444373 LLVM_DEBUG (dbgs () << " [Tokenize Template] \" " << Template << " \"\n " );
445374 SmallVector<Token> Tokens;
446375 SmallString<8 > Open (" {{" );
447376 SmallString<8 > Close (" }}" );
448- size_t Start = 0 ;
377+ size_t Cursor = 0 ;
378+ size_t TextStart = 0 ;
379+
380+ const StringLiteral TripleOpen (" {{{" );
381+ const StringLiteral TripleClose (" }}}" );
449382
450- while (Start < Template.size ()) {
451- LLVM_DEBUG (dbgs () << " [Tokenize Loop] Start=" << Start << " , Open='" << Open
452- << " ', Close='" << Close << " '\n " );
453- Tag T = findNextTag (Template, Start, Open, Close);
383+ while (Cursor < Template.size ()) {
384+ StringRef TemplateSuffix = Template.substr (Cursor);
385+ StringRef TagOpen, TagClose;
386+ Tag::Kind Kind;
387+
388+ // Determine which tag we've encountered.
389+ if (TemplateSuffix.starts_with (TripleOpen)) {
390+ Kind = Tag::Kind::Triple;
391+ TagOpen = TripleOpen;
392+ TagClose = TripleClose;
393+ } else if (TemplateSuffix.starts_with (Open)) {
394+ Kind = Tag::Kind::Normal;
395+ TagOpen = Open;
396+ TagClose = Close;
397+ } else {
398+ // Not at a tag, continue scanning.
399+ ++Cursor;
400+ continue ;
401+ }
454402
455- if (T.TagKind == Tag::Kind::None) {
456- // No more tags, the rest is text.
457- Tokens.emplace_back (Template.substr (Start));
458- break ;
403+ // Found a tag, first add the preceding text.
404+ if (Cursor > TextStart) {
405+ Tokens.emplace_back (Template.slice (TextStart, Cursor));
459406 }
460407
461- // Add the text before the tag.
462- if (T.StartPosition > Start) {
463- StringRef Text = Template.substr (Start, T.StartPosition - Start);
464- Tokens.emplace_back (Text);
408+ // Find the closing tag.
409+ size_t EndPos = Template.find (TagClose, Cursor + TagOpen.size ());
410+ if (EndPos == StringRef::npos) {
411+ // No closing tag, the rest is text.
412+ Tokens.emplace_back (Template.substr (Cursor));
413+ TextStart = Cursor = Template.size ();
414+ break ;
465415 }
466416
467- if (auto NewDelims = processTag (T, Tokens, Ctx)) {
468- std::tie (Open, Close) = *NewDelims;
417+ // Extract tag content and full match.
418+ size_t ContentStart = Cursor + TagOpen.size ();
419+ StringRef Content = Template.substr (ContentStart, EndPos - ContentStart);
420+ StringRef FullMatch =
421+ Template.substr (Cursor, (EndPos + TagClose.size ()) - Cursor);
422+
423+ // Process the tag (inlined logic from processTag).
424+ LLVM_DEBUG (dbgs () << " [Tag] " << FullMatch << " , Content: " << Content
425+ << " , Kind: " << tagKindToString (Kind) << " \n " );
426+ if (Kind == Tag::Kind::Triple) {
427+ Tokens.emplace_back (FullMatch, Ctx.Saver .save (" &" + Content), ' &' , Ctx);
428+ } else { // Normal Tag
429+ StringRef Interpolated = Content;
430+ if (!Interpolated.trim ().starts_with (" =" )) {
431+ char Front = Interpolated.empty () ? ' ' : Interpolated.trim ().front ();
432+ Tokens.emplace_back (FullMatch, Interpolated, Front, Ctx);
433+ } else { // Set Delimiter
434+ Tokens.emplace_back (FullMatch, Interpolated, ' =' , Ctx);
435+ StringRef DelimSpec = Interpolated.trim ();
436+ DelimSpec = DelimSpec.drop_front (1 );
437+ DelimSpec = DelimSpec.take_until ([](char C) { return C == ' =' ; });
438+ DelimSpec = DelimSpec.trim ();
439+
440+ auto [NewOpen, NewClose] = DelimSpec.split (' ' );
441+ LLVM_DEBUG (dbgs () << " [Set Delimiter] NewOpen: " << NewOpen
442+ << " , NewClose: " << NewClose << " \n " );
443+ Open = NewOpen;
444+ Close = NewClose;
445+ }
469446 }
470447
471- // Move past the tag.
472- Start = T.StartPosition + T.FullMatch .size ();
448+ // Move past the tag for the next iteration.
449+ Cursor += FullMatch.size ();
450+ TextStart = Cursor;
473451 }
474452
475- // Fix up white spaces for:
476- // - open sections
477- // - inverted sections
478- // - close sections
479- // - comments
480- //
481- // This loop attempts to find standalone tokens and tries to trim out
482- // the surrounding whitespace.
483- // For example:
484- // if you have the template string
485- // {{#section}} \n Example \n{{/section}}
486- // The output should would be
487- // For example:
488- // \n Example \n
453+ // Add any remaining text after the last tag.
454+ if (TextStart < Template.size ()) {
455+ Tokens.emplace_back (Template.substr (TextStart));
456+ }
457+
458+ // Fix up white spaces for standalone tags.
489459 size_t LastIdx = Tokens.size () - 1 ;
490460 for (size_t Idx = 0 , End = Tokens.size (); Idx < End; ++Idx) {
491461 Token &CurrentToken = Tokens[Idx];
492462 Token::Type CurrentType = CurrentToken.getType ();
493- // Check if token type requires cleanup.
494- bool RequiresCleanUp = requiresCleanUp (CurrentType);
495-
496- if (!RequiresCleanUp)
463+ if (!requiresCleanUp (CurrentType))
497464 continue ;
498465
499- // We adjust the token body if there's no text behind or ahead.
500- // A token is considered to have no text ahead if the right of the previous
501- // token is a newline followed by spaces.
502- // A token is considered to have no text behind if the left of the next
503- // token is spaces followed by a newline.
504- // eg.
505- // "Line 1\n {{#section}} \n Line 2 \n {{/section}} \n Line 3"
506466 bool HasTextBehind = hasTextBehind (Idx, Tokens);
507467 bool HasTextAhead = hasTextAhead (Idx, Tokens);
508468
0 commit comments