Skip to content

SLua editor: WIP #3935

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
239 changes: 132 additions & 107 deletions indra/llui/llkeywords.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,38 +34,20 @@
#include "llsdserialize.h"
#include "lltexteditor.h"
#include "llstl.h"
#include "llcontrol.h"

extern LLControlGroup gSavedSettings;

inline bool LLKeywordToken::isHead(const llwchar* s) const
{
// strncmp is much faster than string compare
bool res = true;
const llwchar* t = mToken.c_str();
auto len = mToken.size();
for (S32 i=0; i<len; i++)
{
if (s[i] != t[i])
{
res = false;
break;
}
}
return res;
size_t bytes = mToken.size() * sizeof(llwchar);
return std::memcmp(s, mToken.c_str(), bytes) == 0;
}

inline bool LLKeywordToken::isTail(const llwchar* s) const
{
bool res = true;
const llwchar* t = mDelimiter.c_str();
auto len = mDelimiter.size();
for (S32 i=0; i<len; i++)
{
if (s[i] != t[i])
{
res = false;
break;
}
}
return res;
size_t len_bytes = mDelimiter.size() * sizeof(llwchar);
return std::memcmp(s, mDelimiter.c_str(), len_bytes) == 0;
}

LLKeywords::LLKeywords()
Expand Down Expand Up @@ -282,6 +264,21 @@ void LLKeywords::processTokens()
}
}
}

// Pre-compile all regex patterns for tokens in mRegexTokenList
for (LLKeywordToken* regex_token : mRegexTokenList)
{
std::string start_pattern(regex_token->getToken().begin(), regex_token->getToken().end());
try
{
regex_token->setCompiledRegex(new std::regex(start_pattern));
}
catch (const std::regex_error& e)
{
LL_WARNS() << "Regex error in start pattern: " << e.what() << " in pattern: " << start_pattern << LL_ENDL;
}
}

LL_INFOS("SyntaxLSL") << "Finished processing tokens." << LL_ENDL;
}

Expand Down Expand Up @@ -509,17 +506,28 @@ LLTrace::BlockTimerStatHandle FTM_SYNTAX_COLORING("Syntax Coloring");
void LLKeywords::findSegments(std::vector<LLTextSegmentPtr>* seg_list, const LLWString& wtext, LLTextEditor& editor, LLStyleConstSP style)
{
LL_RECORD_BLOCK_TIME(FTM_SYNTAX_COLORING);
seg_list->clear();

if( wtext.empty() )
{
return;
}

// Clear the segment list
seg_list->clear();
// Reserve capacity for segments based on an estimated average of 8 characters per segment.
constexpr size_t AVERAGE_SEGMENT_LENGTH = 8;
seg_list->reserve(wtext.size() / AVERAGE_SEGMENT_LENGTH);

S32 text_len = static_cast<S32>(wtext.size()) + 1;

seg_list->push_back( new LLNormalTextSegment( style, 0, text_len, editor ) );

std::string text_to_search;
text_to_search.reserve(wtext.size());

bool has_regex = !mRegexTokenList.empty();
auto& delimiters = mDelimiterTokenList;

const llwchar* base = wtext.c_str();
const llwchar* cur = base;
while( *cur )
Expand Down Expand Up @@ -587,111 +595,133 @@ void LLKeywords::findSegments(std::vector<LLTextSegmentPtr>* seg_list, const LLW
cur++;
}

// Check if syntax highlighting is disabled
static LLCachedControl<bool> sDisableSyntaxHighlighting(gSavedSettings, "ScriptEditorDisableSyntaxHighlight", false);
if (sDisableSyntaxHighlighting)
{
if (*cur && *cur != '\n')
{
cur++;
}
continue; // skip processing any further syntax highlighting
}

while( *cur && *cur != '\n' )
{
// Check for regex matches first
bool regex_matched = false;
if (!mRegexTokenList.empty())
if (has_regex)
{
S32 seg_start = (S32)(cur - base);

std::string text_to_search(wtext.begin() + seg_start, wtext.end());
text_to_search.assign(wtext.begin() + seg_start, wtext.end());

for (LLKeywordToken* regex_token : mRegexTokenList)
{
std::string start_pattern(regex_token->getToken().begin(), regex_token->getToken().end());
std::string end_pattern(regex_token->getDelimiter().begin(), regex_token->getDelimiter().end());
std::regex* compiled_regex = regex_token->getCompiledRegex();

try
// If we have a pre-compiled regex, use it
if (compiled_regex)
{
std::regex start_regex_pattern(start_pattern);
std::smatch start_match;
std::string end_pattern(regex_token->getDelimiter().begin(), regex_token->getDelimiter().end());

if (std::regex_search(text_to_search, start_match, start_regex_pattern) && !start_match.empty())
try
{
if (start_match.position() == 0) // Match starts at current position
{
// Calculate segment boundaries for start pattern
S32 start_match_length = static_cast<S32>(start_match.str().length());
S32 start_seg_end = seg_start + start_match_length;

if (end_pattern.empty())
{
// If no end pattern is provided, treat the entire regex match as a single segment
// Move cursor past the matched segment
cur = base + start_seg_end;
std::smatch start_match;

// Insert the matched segment
insertSegments(wtext, *seg_list, regex_token, text_len, seg_start, start_seg_end, style, editor);
}
else
if (std::regex_search(text_to_search, start_match, *compiled_regex) && !start_match.empty())
{
if (start_match.position() == 0) // Match starts at current position
{
// Look for the end pattern after the start pattern
std::string remaining_text = text_to_search.substr(start_match_length);

// Process end pattern - replace any capture group references
std::string actual_end_pattern = end_pattern;
// Calculate segment boundaries for start pattern
S32 start_match_length = static_cast<S32>(start_match.str().length());
S32 start_seg_end = seg_start + start_match_length;

// Handle capture groups in the end pattern (replace \1, \2, etc. with their matched content)
for (size_t i = 1; i < start_match.size(); ++i)
if (end_pattern.empty())
{
std::string capture = start_match[i].str();
std::string placeholder = "\\" + std::to_string(i);
// If no end pattern is provided, treat the entire regex match as a single segment
// Move cursor past the matched segment
cur = base + start_seg_end;

// Replace all occurrences of the placeholder with the captured content
size_t pos = 0;
while ((pos = actual_end_pattern.find(placeholder, pos)) != std::string::npos)
{
actual_end_pattern.replace(pos, placeholder.length(), capture);
pos += capture.length();
}
// Insert the matched segment
insertSegments(wtext, *seg_list, regex_token, text_len, seg_start, start_seg_end, style, editor);
}
else
{ // TODO: better optimization for this part

try
{
std::regex end_regex_pattern(actual_end_pattern);
std::smatch end_match;
// Look for the end pattern after the start pattern
std::string remaining_text = text_to_search.substr(start_match_length);

S32 seg_end = start_seg_end;
// Process end pattern - replace any capture group references
std::string actual_end_pattern = end_pattern;

if (std::regex_search(remaining_text, end_match, end_regex_pattern) && !end_match.empty())
// Handle capture groups in the end pattern (replace \1, \2, etc. with their matched content)
for (size_t i = 1; i < start_match.size(); ++i)
{
// Calculate position of end match relative to the original text
S32 end_match_position = static_cast<S32>(end_match.position());
S32 end_match_length = static_cast<S32>(end_match.str().length());
std::string capture = start_match[i].str();
std::string placeholder = "\\" + std::to_string(i);

// Replace all occurrences of the placeholder with the captured content
size_t pos = 0;
while ((pos = actual_end_pattern.find(placeholder, pos)) != std::string::npos)
{
actual_end_pattern.replace(pos, placeholder.length(), capture);
pos += capture.length();
}
}

// Calculate the total length including both patterns and text between
seg_end += end_match_position + end_match_length;
try
{
std::regex end_regex_pattern(actual_end_pattern);
std::smatch end_match;

S32 seg_end = start_seg_end;

if (std::regex_search(remaining_text, end_match, end_regex_pattern) && !end_match.empty())
{
// Calculate position of end match relative to the original text
S32 end_match_position = static_cast<S32>(end_match.position());
S32 end_match_length = static_cast<S32>(end_match.str().length());

// Calculate the total length including both patterns and text between
seg_end += end_match_position + end_match_length;
}
else
{
// End pattern not found, treat everything up to EOF as the segment
seg_end += static_cast<S32>(remaining_text.length());
}

// Move cursor past the entire matched segment (start + content + end)
cur = base + seg_end;

// Insert the matched segment
insertSegments(wtext, *seg_list, regex_token, text_len, seg_start, seg_end, style, editor);
}
else
catch (const std::regex_error& e)
{
// End pattern not found, treat everything up to EOF as the segment
seg_end += static_cast<S32>(remaining_text.length());
LL_WARNS() << "Regex error in end pattern: " << e.what() << " in pattern: " << actual_end_pattern << LL_ENDL;
// Fall back to treating the start match as the entire segment
cur = base + start_seg_end;
insertSegments(wtext, *seg_list, regex_token, text_len, seg_start, start_seg_end, style, editor);
}

// Move cursor past the entire matched segment (start + content + end)
cur = base + seg_end;

// Insert the matched segment
insertSegments(wtext, *seg_list, regex_token, text_len, seg_start, seg_end, style, editor);
}
catch (const std::regex_error& e)
{
LL_WARNS() << "Regex error in end pattern: " << e.what() << " in pattern: " << actual_end_pattern << LL_ENDL;
// Fall back to treating the start match as the entire segment
cur = base + start_seg_end;
insertSegments(wtext, *seg_list, regex_token, text_len, seg_start, start_seg_end, style, editor);
}
}

regex_matched = true;
break;
regex_matched = true;
break;
}
}
}
catch (const std::regex_error& e)
{
LL_WARNS() << "Error using compiled regex: " << e.what() << LL_ENDL;
}
}
catch (const std::regex_error& e)
else
{
LL_WARNS() << "Regex error in start pattern: " << e.what() << " in pattern: " << start_pattern << LL_ENDL;
// Skip tokens that aren't pre-compiled
LL_WARNS() << "Skipping regex token due to missing pre-compiled pattern: "
<< wstring_to_utf8str(regex_token->getToken()) << LL_ENDL;
}
}

Expand All @@ -701,14 +731,12 @@ void LLKeywords::findSegments(std::vector<LLTextSegmentPtr>* seg_list, const LLW
}
}

// If no regex match, check against delimiters
// Check against delimiters
{
S32 seg_start = 0;
LLKeywordToken* cur_delimiter = NULL;
for (token_list_t::iterator iter = mDelimiterTokenList.begin();
iter != mDelimiterTokenList.end(); ++iter)
for (auto* delimiter : delimiters)
{
LLKeywordToken* delimiter = *iter;
if( delimiter->isHead( cur ) )
{
cur_delimiter = delimiter;
Expand Down Expand Up @@ -820,8 +848,7 @@ void LLKeywords::findSegments(std::vector<LLTextSegmentPtr>* seg_list, const LLW
S32 seg_end = seg_start + seg_len;

// First try to match the whole token (including dots for Lua namespaces)
WStringMapIndex whole_token(word_start, seg_len);
word_token_map_t::iterator map_iter = mWordTokenMap.find(whole_token);
word_token_map_t::iterator map_iter = mWordTokenMap.find(WStringMapIndex(word_start, seg_len));

if (map_iter != mWordTokenMap.end())
{
Expand All @@ -837,8 +864,7 @@ void LLKeywords::findSegments(std::vector<LLTextSegmentPtr>* seg_list, const LLW
{
// Get the namespace prefix (part before the first dot)
S32 prefix_len = (S32)(last_dot - word_start);
WStringMapIndex prefix_token(word_start, prefix_len);
map_iter = mWordTokenMap.find(prefix_token);
map_iter = mWordTokenMap.find(WStringMapIndex(word_start, prefix_len));

if (map_iter != mWordTokenMap.end())
{
Expand All @@ -853,8 +879,7 @@ void LLKeywords::findSegments(std::vector<LLTextSegmentPtr>* seg_list, const LLW
if (func_len > 0)
{
// Look for complete function matches
WStringMapIndex func_token(func_part, func_len);
map_iter = mWordTokenMap.find(func_token);
map_iter = mWordTokenMap.find(WStringMapIndex(func_part, func_len));

if (map_iter != mWordTokenMap.end())
{
Expand Down
16 changes: 15 additions & 1 deletion indra/llui/llkeywords.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <map>
#include <list>
#include <deque>
#include <regex>
#include "llpointer.h"

class LLTextSegment;
Expand Down Expand Up @@ -84,10 +85,20 @@ class LLKeywordToken
mToken( token ),
mColor( color ),
mToolTip( tool_tip ),
mDelimiter( delimiter ) // right delimiter
mDelimiter( delimiter ), // right delimiter
mCompiledRegex( nullptr )
{
}

~LLKeywordToken()
{
if (mCompiledRegex)
{
delete mCompiledRegex;
mCompiledRegex = nullptr;
}
}

S32 getLengthHead() const { return static_cast<S32>(mToken.size()); }
S32 getLengthTail() const { return static_cast<S32>(mDelimiter.size()); }
bool isHead(const llwchar* s) const;
Expand All @@ -97,6 +108,8 @@ class LLKeywordToken
ETokenType getType() const { return mType; }
const LLWString& getToolTip() const { return mToolTip; }
const LLWString& getDelimiter() const { return mDelimiter; }
std::regex* getCompiledRegex() const { return mCompiledRegex; }
void setCompiledRegex(std::regex* regex) { mCompiledRegex = regex; }

#ifdef _DEBUG
void dump();
Expand All @@ -108,6 +121,7 @@ class LLKeywordToken
LLUIColor mColor;
LLWString mToolTip;
LLWString mDelimiter;
std::regex* mCompiledRegex;
};

class LLKeywords
Expand Down
Loading