Skip to content

Commit 26a5841

Browse files
authored
Merge pull request #74794 from lawnjelly/gdscript_parser_hashtable
[3.x] Use hash table for GDScript parsing
2 parents 16df341 + 19f2006 commit 26a5841

File tree

3 files changed

+114
-60
lines changed

3 files changed

+114
-60
lines changed

modules/gdscript/gdscript.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2095,6 +2095,8 @@ GDScriptWarning::Code GDScriptWarning::get_code_from_name(const String &p_name)
20952095
#endif // DEBUG_ENABLED
20962096

20972097
GDScriptLanguage::GDScriptLanguage() {
2098+
GDScriptTokenizer::initialize();
2099+
20982100
calls = 0;
20992101
ERR_FAIL_COND(singleton);
21002102
singleton = this;
@@ -2139,6 +2141,8 @@ GDScriptLanguage::GDScriptLanguage() {
21392141
}
21402142

21412143
GDScriptLanguage::~GDScriptLanguage() {
2144+
GDScriptTokenizer::terminate();
2145+
21422146
if (_call_stack) {
21432147
memdelete_arr(_call_stack);
21442148
}

modules/gdscript/gdscript_tokenizer.cpp

Lines changed: 96 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
#include "core/print_string.h"
3636
#include "gdscript_functions.h"
3737

38+
OAHashMap<String, int> *GDScriptTokenizer::token_hashtable = nullptr;
39+
3840
const char *GDScriptTokenizer::token_names[TK_MAX] = {
3941
"Empty",
4042
"Identifier",
@@ -235,6 +237,96 @@ static const _kws _keyword_list[] = {
235237
{ GDScriptTokenizer::TK_ERROR, nullptr }
236238
};
237239

240+
// Prepare the hash table for parsing as a one off at startup.
241+
void GDScriptTokenizer::initialize() {
242+
token_hashtable = memnew((OAHashMap<String, int>));
243+
244+
token_hashtable->insert("null", 0);
245+
token_hashtable->insert("true", 1);
246+
token_hashtable->insert("false", 2);
247+
248+
// _type_list
249+
int id = TOKEN_HASH_TABLE_TYPE_START;
250+
int idx = 0;
251+
while (_type_list[idx].text) {
252+
token_hashtable->insert(_type_list[idx].text, id++);
253+
idx++;
254+
}
255+
256+
// built in funcs
257+
id = TOKEN_HASH_TABLE_BUILTIN_START;
258+
for (int j = 0; j < GDScriptFunctions::FUNC_MAX; j++) {
259+
token_hashtable->insert(GDScriptFunctions::get_func_name(GDScriptFunctions::Function(j)), id++);
260+
}
261+
262+
// keywords
263+
id = TOKEN_HASH_TABLE_KEYWORD_START;
264+
idx = 0;
265+
while (_keyword_list[idx].text) {
266+
token_hashtable->insert(_keyword_list[idx].text, id++);
267+
idx++;
268+
}
269+
}
270+
271+
void GDScriptTokenizer::terminate() {
272+
if (token_hashtable) {
273+
memdelete(token_hashtable);
274+
token_hashtable = nullptr;
275+
}
276+
}
277+
278+
// return whether found
279+
bool GDScriptTokenizerText::_parse_identifier(const String &p_str) {
280+
// N.B. GDScriptTokenizer::initialize() must have been called before using this function,
281+
// else token_hashtable will be NULL.
282+
const int *found = token_hashtable->lookup_ptr(p_str);
283+
284+
if (found) {
285+
int id = *found;
286+
if (id < TOKEN_HASH_TABLE_TYPE_START) {
287+
switch (id) {
288+
case 0: {
289+
_make_constant(Variant());
290+
} break;
291+
case 1: {
292+
_make_constant(true);
293+
} break;
294+
case 2: {
295+
_make_constant(false);
296+
} break;
297+
default: {
298+
DEV_ASSERT(0);
299+
} break;
300+
}
301+
return true;
302+
} else {
303+
// type list
304+
if (id < TOKEN_HASH_TABLE_BUILTIN_START) {
305+
int idx = id - TOKEN_HASH_TABLE_TYPE_START;
306+
_make_type(_type_list[idx].type);
307+
return true;
308+
}
309+
310+
// built in func
311+
if (id < TOKEN_HASH_TABLE_KEYWORD_START) {
312+
int idx = id - TOKEN_HASH_TABLE_BUILTIN_START;
313+
_make_built_in_func(GDScriptFunctions::Function(idx));
314+
return true;
315+
}
316+
317+
// keyword
318+
int idx = id - TOKEN_HASH_TABLE_KEYWORD_START;
319+
_make_token(_keyword_list[idx].token);
320+
return true;
321+
}
322+
323+
return true;
324+
}
325+
326+
// not found
327+
return false;
328+
}
329+
238330
const char *GDScriptTokenizer::get_token_name(Token p_token) {
239331
ERR_FAIL_INDEX_V(p_token, TK_MAX, "<error>");
240332
return token_names[p_token];
@@ -977,68 +1069,13 @@ void GDScriptTokenizerText::_advance() {
9771069
i++;
9781070
}
9791071

980-
bool identifier = false;
981-
982-
if (str == "null") {
983-
_make_constant(Variant());
984-
985-
} else if (str == "true") {
986-
_make_constant(true);
987-
988-
} else if (str == "false") {
989-
_make_constant(false);
990-
} else {
991-
bool found = false;
992-
993-
{
994-
int idx = 0;
995-
996-
while (_type_list[idx].text) {
997-
if (str == _type_list[idx].text) {
998-
_make_type(_type_list[idx].type);
999-
found = true;
1000-
break;
1001-
}
1002-
idx++;
1003-
}
1004-
}
1005-
1006-
if (!found) {
1007-
//built in func?
1008-
1009-
for (int j = 0; j < GDScriptFunctions::FUNC_MAX; j++) {
1010-
if (str == GDScriptFunctions::get_func_name(GDScriptFunctions::Function(j))) {
1011-
_make_built_in_func(GDScriptFunctions::Function(j));
1012-
found = true;
1013-
break;
1014-
}
1015-
}
1016-
}
1017-
1018-
if (!found) {
1019-
//keyword
1072+
// Detect preset keywords / functions using hashtable.
1073+
bool found = _parse_identifier(str);
10201074

1021-
int idx = 0;
1022-
found = false;
1023-
1024-
while (_keyword_list[idx].text) {
1025-
if (str == _keyword_list[idx].text) {
1026-
_make_token(_keyword_list[idx].token);
1027-
found = true;
1028-
break;
1029-
}
1030-
idx++;
1031-
}
1032-
}
1033-
1034-
if (!found) {
1035-
identifier = true;
1036-
}
1037-
}
1038-
1039-
if (identifier) {
1075+
if (!found) {
10401076
_make_identifier(str);
10411077
}
1078+
10421079
INCPOS(str.length());
10431080
return;
10441081
}

modules/gdscript/gdscript_tokenizer.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#ifndef GDSCRIPT_TOKENIZER_H
3232
#define GDSCRIPT_TOKENIZER_H
3333

34+
#include "core/oa_hash_map.h"
3435
#include "core/pair.h"
3536
#include "core/string_name.h"
3637
#include "core/ustring.h"
@@ -154,9 +155,20 @@ class GDScriptTokenizer {
154155

155156
static const char *token_names[TK_MAX];
156157

158+
enum {
159+
TOKEN_HASH_TABLE_TYPE_START = 3,
160+
TOKEN_HASH_TABLE_BUILTIN_START = TOKEN_HASH_TABLE_TYPE_START + Variant::VARIANT_MAX,
161+
TOKEN_HASH_TABLE_KEYWORD_START = TOKEN_HASH_TABLE_BUILTIN_START + GDScriptFunctions::FUNC_MAX,
162+
};
163+
164+
static OAHashMap<String, int> *token_hashtable;
165+
157166
public:
158167
static const char *get_token_name(Token p_token);
159168

169+
static void initialize();
170+
static void terminate();
171+
160172
bool is_token_literal(int p_offset = 0, bool variable_safe = false) const;
161173
StringName get_token_literal(int p_offset = 0) const;
162174

@@ -177,7 +189,7 @@ class GDScriptTokenizer {
177189
virtual bool is_ignoring_warnings() const = 0;
178190
#endif // DEBUG_ENABLED
179191

180-
virtual ~GDScriptTokenizer(){};
192+
virtual ~GDScriptTokenizer() {}
181193
};
182194

183195
class GDScriptTokenizerText : public GDScriptTokenizer {
@@ -230,6 +242,7 @@ class GDScriptTokenizerText : public GDScriptTokenizer {
230242
#endif // DEBUG_ENABLED
231243

232244
void _advance();
245+
bool _parse_identifier(const String &p_str);
233246

234247
public:
235248
void set_code(const String &p_code);

0 commit comments

Comments
 (0)