[Parser][NFC] Improve performance of idchar lexing (#6515)

tlively · web-flow · commit c60fe154ac09 · 2024-04-19T16:37:28.000-07:00
The parsing of idchars was hot enough to show up while profiling the parsing of
a very large module. Optimize it to speed up the overall parse by about 16% in a
very unscientific measurement.
diff --git a/src/parser/lexer.cpp b/src/parser/lexer.cpp
@@ -753,37 +753,25 @@ std::optional<LexResult> idchar(std::string_view in) {
     return {};
   }
   uint8_t c = ctx.peek();
-  if (('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') ||
-      ('a' <= c && c <= 'z')) {
-    ctx.take(1);
-  } else {
-    switch (c) {
-      case '!':
-      case '#':
-      case '$':
-      case '%':
-      case '&':
-      case '\'':
-      case '*':
-      case '+':
-      case '-':
-      case '.':
-      case '/':
-      case ':':
-      case '<':
-      case '=':
-      case '>':
-      case '?':
-      case '@':
-      case '\\':
-      case '^':
-      case '_':
-      case '`':
-      case '|':
-      case '~':
-        ctx.take(1);
-    }
+  // All the allowed characters lie in the range '!' to '~', and within that
+  // range the vast majority of characters are allowed, so it is significantly
+  // faster to check for the disallowed characters instead.
+  if (c < '!' || c > '~') {
+    return ctx.lexed();
+  }
+  switch (c) {
+    case '"':
+    case '(':
+    case ')':
+    case ',':
+    case ';':
+    case '[':
+    case ']':
+    case '{':
+    case '}':
+      return ctx.lexed();
   }
+  ctx.take(1);
   return ctx.lexed();
 }