Skip to content

Commit cda08d1

Browse files
authored
perf(es/parser): Introduce checkpoint to reduce clone (#11001)
**Description:** There could be no need to clone entire lexer and parser when calling `try_parse_xxx` in typescript. Here introduces checkpoints which only contains necessary data to recover from failed trials. This is also the prerequisite for #11000. And we can also remove the `Rc<RefCell<T>>` in the lexer later I'm not sure the data is completed for correctness. Let's see the test results.
1 parent 831eed5 commit cda08d1

File tree

12 files changed

+131
-25
lines changed

12 files changed

+131
-25
lines changed

crates/swc_ecma_lexer/src/common/input.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,17 @@ use crate::{common::syntax::SyntaxFlags, error::Error, lexer};
77
/// Clone should be cheap if you are parsing typescript because typescript
88
/// syntax requires backtracking.
99
pub trait Tokens<TokenAndSpan>: Clone + Iterator<Item = TokenAndSpan> {
10+
type Checkpoint;
11+
1012
fn set_ctx(&mut self, ctx: Context);
1113
fn ctx(&self) -> Context;
1214
fn ctx_mut(&mut self) -> &mut Context;
1315
fn syntax(&self) -> SyntaxFlags;
1416
fn target(&self) -> EsVersion;
1517

18+
fn checkpoint_save(&self) -> Self::Checkpoint;
19+
fn checkpoint_load(&mut self, checkpoint: Self::Checkpoint);
20+
1621
fn start_pos(&self) -> BytePos {
1722
BytePos(0)
1823
}

crates/swc_ecma_lexer/src/common/lexer/token.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ pub trait TokenFactory<'a, TokenAndSpan, I: Tokens<TokenAndSpan>>: Sized + Parti
1111
'a,
1212
I = I,
1313
Token = Self,
14-
Lexer = Self::Lexer,
1514
TokenAndSpan = TokenAndSpan,
1615
>;
1716

crates/swc_ecma_lexer/src/common/parser/buffer.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ pub trait NextTokenAndSpan {
1919

2020
pub trait Buffer<'a> {
2121
type Token: std::fmt::Debug + PartialEq + Clone + TokenFactory<'a, Self::TokenAndSpan, Self::I>;
22-
type Lexer: super::super::lexer::Lexer<'a, Self::TokenAndSpan>;
2322
type Next: NextTokenAndSpan<Token = Self::Token>;
2423
type TokenAndSpan: TokenAndSpanTrait<Token = Self::Token>;
2524
type I: Tokens<Self::TokenAndSpan>;

crates/swc_ecma_lexer/src/common/parser/mod.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,22 +52,23 @@ pub trait Parser<'a>: Sized + Clone {
5252
type Token: std::fmt::Debug
5353
+ Clone
5454
+ TokenFactory<'a, Self::TokenAndSpan, Self::I, Buffer = Self::Buffer>;
55-
type Lexer: super::lexer::Lexer<'a, Self::TokenAndSpan>;
5655
type Next: NextTokenAndSpan<Token = Self::Token>;
5756
type TokenAndSpan: TokenAndSpan<Token = Self::Token>;
5857
type I: Tokens<Self::TokenAndSpan>;
5958
type Buffer: self::buffer::Buffer<
6059
'a,
61-
Lexer = Self::Lexer,
6260
Token = Self::Token,
6361
TokenAndSpan = Self::TokenAndSpan,
6462
I = Self::I,
6563
>;
64+
type Checkpoint;
6665

6766
fn input(&self) -> &Self::Buffer;
6867
fn input_mut(&mut self) -> &mut Self::Buffer;
6968
fn state(&self) -> &State;
7069
fn state_mut(&mut self) -> &mut State;
70+
fn checkpoint_save(&self) -> Self::Checkpoint;
71+
fn checkpoint_load(&mut self, checkpoint: Self::Checkpoint);
7172

7273
#[inline(always)]
7374
fn with_state<'w>(&'w mut self, state: State) -> WithState<'a, 'w, Self> {

crates/swc_ecma_lexer/src/common/parser/typescript.rs

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -79,20 +79,22 @@ where
7979
if !p.input().syntax().typescript() {
8080
return Ok(false);
8181
}
82+
8283
let prev_ignore_error = p.input().get_ctx().contains(Context::IgnoreError);
83-
let mut cloned = p.clone();
84-
cloned.set_ctx(p.ctx() | Context::IgnoreError);
85-
let res = op(&mut cloned);
84+
let checkpoint = p.checkpoint_save();
85+
p.set_ctx(p.ctx() | Context::IgnoreError);
86+
let res = op(p);
8687
match res {
8788
Ok(Some(res)) if res => {
88-
*p = cloned;
8989
let mut ctx = p.ctx();
9090
ctx.set(Context::IgnoreError, prev_ignore_error);
9191
p.input_mut().set_ctx(ctx);
9292
Ok(res)
9393
}
94-
Err(..) => Ok(false),
95-
_ => Ok(false),
94+
_ => {
95+
p.checkpoint_load(checkpoint);
96+
Ok(false)
97+
}
9698
}
9799
}
98100

@@ -207,12 +209,11 @@ where
207209
trace_cur!(p, try_parse_ts);
208210

209211
let prev_ignore_error = p.input().get_ctx().contains(Context::IgnoreError);
210-
let mut cloned = p.clone();
211-
cloned.set_ctx(p.ctx() | Context::IgnoreError);
212-
let res = op(&mut cloned);
212+
let checkpoint = p.checkpoint_save();
213+
p.set_ctx(p.ctx() | Context::IgnoreError);
214+
let res = op(p);
213215
match res {
214216
Ok(Some(res)) => {
215-
*p = cloned;
216217
trace_cur!(p, try_parse_ts__success_value);
217218
let mut ctx = p.ctx();
218219
ctx.set(Context::IgnoreError, prev_ignore_error);
@@ -221,10 +222,12 @@ where
221222
}
222223
Ok(None) => {
223224
trace_cur!(p, try_parse_ts__success_no_value);
225+
p.checkpoint_load(checkpoint);
224226
None
225227
}
226228
Err(..) => {
227229
trace_cur!(p, try_parse_ts__fail);
230+
p.checkpoint_load(checkpoint);
228231
None
229232
}
230233
}
@@ -450,9 +453,11 @@ where
450453
F: FnOnce(&mut P) -> T,
451454
{
452455
debug_assert!(p.input().syntax().typescript());
453-
let mut cloned = p.clone();
454-
cloned.set_ctx(p.ctx() | Context::IgnoreError);
455-
op(&mut cloned)
456+
let checkpoint = p.checkpoint_save();
457+
p.set_ctx(p.ctx() | Context::IgnoreError);
458+
let ret = op(p);
459+
p.checkpoint_load(checkpoint);
460+
ret
456461
}
457462

458463
/// `tsParseTypeArguments`

crates/swc_ecma_lexer/src/input.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,16 @@ impl Iterator for TokensInput {
5555
}
5656

5757
impl Tokens<TokenAndSpan> for TokensInput {
58+
type Checkpoint = Self;
59+
60+
fn checkpoint_save(&self) -> Self::Checkpoint {
61+
self.clone()
62+
}
63+
64+
fn checkpoint_load(&mut self, checkpoint: Self::Checkpoint) {
65+
*self = checkpoint;
66+
}
67+
5868
fn set_ctx(&mut self, ctx: Context) {
5969
if ctx.contains(Context::Module) && !self.module_errors.borrow().is_empty() {
6070
let mut module_errors = self.module_errors.borrow_mut();
@@ -213,6 +223,16 @@ impl<I: Tokens<TokenAndSpan>> Iterator for Capturing<I> {
213223
}
214224

215225
impl<I: Tokens<TokenAndSpan>> Tokens<TokenAndSpan> for Capturing<I> {
226+
type Checkpoint = I::Checkpoint;
227+
228+
fn checkpoint_save(&self) -> Self::Checkpoint {
229+
self.inner.checkpoint_save()
230+
}
231+
232+
fn checkpoint_load(&mut self, checkpoint: Self::Checkpoint) {
233+
self.inner.checkpoint_load(checkpoint);
234+
}
235+
216236
#[inline(always)]
217237
fn set_ctx(&mut self, ctx: Context) {
218238
self.inner.set_ctx(ctx)
@@ -328,7 +348,6 @@ impl<I: Tokens<TokenAndSpan>> Buffer<I> {
328348

329349
impl<'a, I: Tokens<TokenAndSpan>> crate::common::parser::buffer::Buffer<'a> for Buffer<I> {
330350
type I = I;
331-
type Lexer = super::lexer::Lexer<'a>;
332351
type Next = TokenAndSpan;
333352
type Token = Token;
334353
type TokenAndSpan = TokenAndSpan;

crates/swc_ecma_lexer/src/lexer/state.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,8 @@ impl crate::common::lexer::state::TokenType for TokenType {
641641
}
642642

643643
impl Tokens<TokenAndSpan> for Lexer<'_> {
644+
type Checkpoint = Self;
645+
644646
#[inline]
645647
fn set_ctx(&mut self, ctx: Context) {
646648
if ctx.contains(Context::Module) && !self.module_errors.borrow().is_empty() {
@@ -660,6 +662,14 @@ impl Tokens<TokenAndSpan> for Lexer<'_> {
660662
&mut self.ctx
661663
}
662664

665+
fn checkpoint_save(&self) -> Self::Checkpoint {
666+
self.clone()
667+
}
668+
669+
fn checkpoint_load(&mut self, checkpoint: Self::Checkpoint) {
670+
*self = checkpoint;
671+
}
672+
663673
#[inline]
664674
fn syntax(&self) -> SyntaxFlags {
665675
self.syntax

crates/swc_ecma_lexer/src/parser/mod.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ pub struct Parser<I: Tokens<TokenAndSpan>> {
3838

3939
impl<'a, I: Tokens<TokenAndSpan>> crate::common::parser::Parser<'a> for Parser<I> {
4040
type Buffer = Buffer<I>;
41+
type Checkpoint = Self;
4142
type I = I;
42-
type Lexer = crate::lexer::Lexer<'a>;
4343
type Next = TokenAndSpan;
4444
type Token = Token;
4545
type TokenAndSpan = TokenAndSpan;
@@ -64,6 +64,14 @@ impl<'a, I: Tokens<TokenAndSpan>> crate::common::parser::Parser<'a> for Parser<I
6464
&mut self.state
6565
}
6666

67+
fn checkpoint_save(&self) -> Self::Checkpoint {
68+
self.clone()
69+
}
70+
71+
fn checkpoint_load(&mut self, checkpoint: Self::Checkpoint) {
72+
*self = checkpoint;
73+
}
74+
6775
#[inline(always)]
6876
fn mark_found_module_item(&mut self) {
6977
self.found_module_item = true;

crates/swc_ecma_parser/src/lexer/capturing.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,16 @@ impl<I: Iterator<Item = TokenAndSpan>> Iterator for Capturing<I> {
6868
impl<I: swc_ecma_lexer::common::input::Tokens<TokenAndSpan>>
6969
swc_ecma_lexer::common::input::Tokens<TokenAndSpan> for Capturing<I>
7070
{
71+
type Checkpoint = I::Checkpoint;
72+
73+
fn checkpoint_save(&self) -> Self::Checkpoint {
74+
self.inner.checkpoint_save()
75+
}
76+
77+
fn checkpoint_load(&mut self, checkpoint: Self::Checkpoint) {
78+
self.inner.checkpoint_load(checkpoint);
79+
}
80+
7181
fn set_ctx(&mut self, ctx: swc_ecma_lexer::common::context::Context) {
7282
self.inner.set_ctx(ctx);
7383
}

crates/swc_ecma_parser/src/lexer/state.rs

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
use std::mem::take;
22

3-
use swc_common::BytePos;
3+
use swc_common::{comments::Comments, BytePos};
44
use swc_ecma_ast::EsVersion;
55
use swc_ecma_lexer::{
66
common::{
77
lexer::{
88
char::CharExt,
9-
comments_buffer::{BufferedComment, BufferedCommentKind},
9+
comments_buffer::{BufferedComment, BufferedCommentKind, CommentsBuffer},
1010
state::State as StateTrait,
1111
LexResult,
1212
},
@@ -42,7 +42,35 @@ pub struct State {
4242
token_type: Option<Token>,
4343
}
4444

45-
impl swc_ecma_lexer::common::input::Tokens<TokenAndSpan> for Lexer<'_> {
45+
pub struct LexerCheckpoint<'a> {
46+
comments: Option<&'a dyn Comments>,
47+
comments_buffer: Option<CommentsBuffer>,
48+
state: State,
49+
ctx: Context,
50+
input_last_pos: BytePos,
51+
}
52+
53+
impl<'a> swc_ecma_lexer::common::input::Tokens<TokenAndSpan> for Lexer<'a> {
54+
type Checkpoint = LexerCheckpoint<'a>;
55+
56+
fn checkpoint_save(&self) -> Self::Checkpoint {
57+
Self::Checkpoint {
58+
comments: self.comments,
59+
comments_buffer: self.comments_buffer.clone(),
60+
state: self.state.clone(),
61+
ctx: self.ctx,
62+
input_last_pos: self.input.last_pos(),
63+
}
64+
}
65+
66+
fn checkpoint_load(&mut self, checkpoint: Self::Checkpoint) {
67+
self.comments = checkpoint.comments;
68+
self.comments_buffer = checkpoint.comments_buffer;
69+
self.state = checkpoint.state;
70+
self.ctx = checkpoint.ctx;
71+
unsafe { self.input.reset_to(checkpoint.input_last_pos) };
72+
}
73+
4674
#[inline]
4775
fn set_ctx(&mut self, ctx: Context) {
4876
if ctx.contains(Context::Module) && !self.module_errors.borrow().is_empty() {

0 commit comments

Comments
 (0)