1
1
use std:: collections:: {
2
2
BTreeMap as Map ,
3
3
BTreeSet as Set ,
4
+ VecDeque ,
4
5
} ;
6
+ use std:: fmt:: Debug ;
5
7
6
8
use regular_expression:: re:: RE ;
7
9
use finite_automata:: {
@@ -43,7 +45,7 @@ impl<'a, T> Iterator for TokenIds<'a, T> {
43
45
}
44
46
}
45
47
46
- pub fn lex < T : Clone + Ord > ( input : & str , productions : & Map < RE , Option < T > > ) -> Result < Vec < T > > {
48
+ pub fn lex < T : Clone + Debug + Ord > ( input : & str , productions : & Map < RE , Option < T > > ) -> Result < Vec < T > > {
47
49
let mut res = Vec :: new ( ) ;
48
50
for ( re, token) in productions {
49
51
res. push ( re. into_enfa ( & mut TokenIds :: new ( token) ) ) ;
@@ -60,9 +62,9 @@ pub fn lex<T: Clone + Ord>(input: &str, productions: &Map<RE, Option<T>>) -> Res
60
62
}
61
63
let dfa: DFA < Set < ( & Option < T > , u32 ) > , char > = DFA :: from ( alt) ;
62
64
let mut tokens = Vec :: new ( ) ;
63
- let mut characters: Vec < char > = input. chars ( ) . collect ( ) ;
65
+ let mut characters: VecDeque < char > = input. chars ( ) . collect ( ) ;
64
66
let mut source_index = dfa. initial_index ( ) ;
65
- while let Some ( character) = characters. pop ( ) {
67
+ while let Some ( character) = characters. pop_front ( ) {
66
68
if let Some ( transition_index) = dfa. contains ( & ( source_index, & character) ) {
67
69
let ( _, _, target_index) = dfa. at ( transition_index) ;
68
70
source_index = target_index;
@@ -72,16 +74,16 @@ pub fn lex<T: Clone + Ord>(input: &str, productions: &Map<RE, Option<T>>) -> Res
72
74
let token = tokens_iter. next ( ) . unwrap ( ) ;
73
75
while let Some ( current_token) = tokens_iter. next ( ) {
74
76
if current_token != token {
75
- return Err ( Error :: from ( ErrorKind :: InconsistentTokensInFinalState ) ) ;
77
+ return Err ( Error :: new ( ErrorKind :: InconsistentTokensInFinalState , format ! ( "{:?} != {:?}" , current_token , token ) ) ) ;
76
78
}
77
79
}
78
80
if let Some ( token) = token {
79
81
tokens. push ( token. clone ( ) ) ;
80
82
}
81
83
source_index = dfa. initial_index ( ) ;
82
- characters. push ( character) ;
84
+ characters. push_front ( character) ;
83
85
} else {
84
- return Err ( Error :: from ( ErrorKind :: FailedToReachFinalState ) ) ;
86
+ return Err ( Error :: new ( ErrorKind :: FailedToReachFinalState , format ! ( "{:?}" , dfa . at ( source_index ) ) ) ) ;
85
87
}
86
88
}
87
89
}
@@ -90,14 +92,14 @@ pub fn lex<T: Clone + Ord>(input: &str, productions: &Map<RE, Option<T>>) -> Res
90
92
let token = tokens_iter. next ( ) . unwrap ( ) ;
91
93
while let Some ( current_token) = tokens_iter. next ( ) {
92
94
if current_token != token {
93
- return Err ( Error :: from ( ErrorKind :: InconsistentTokensInFinalState ) ) ;
95
+ return Err ( Error :: new ( ErrorKind :: InconsistentTokensInFinalState , format ! ( "{:?} != {:?}" , current_token , token ) ) ) ;
94
96
}
95
97
}
96
98
if let Some ( token) = token {
97
99
tokens. push ( token. clone ( ) ) ;
98
100
}
99
101
} else {
100
- return Err ( Error :: from ( ErrorKind :: FailedToReachFinalState ) ) ;
102
+ return Err ( Error :: new ( ErrorKind :: FailedToReachFinalState , format ! ( "{:?}" , dfa . at ( source_index ) ) ) ) ;
101
103
}
102
104
Ok ( tokens)
103
105
}
@@ -108,24 +110,63 @@ pub fn productions<T>(_input: &str) -> Result<Map<RE, Option<T>>> {
108
110
109
111
#[ cfg( test) ]
110
112
mod tests {
111
- use regular_expression:: { sym, rep} ;
113
+ use regular_expression:: { sym, rep, cat } ;
112
114
113
115
use crate :: { Result , lex} ;
114
116
115
117
#[ test]
116
118
fn test_1 ( ) -> Result < ( ) > {
117
119
#[ derive( Clone , Debug , Eq , Ord , PartialEq , PartialOrd ) ]
118
- enum Binary {
120
+ enum Token {
119
121
ZERO ,
120
122
ONE ,
121
123
} ;
122
- let expected = vec ! [ Binary :: ZERO , Binary :: ONE , Binary :: ZERO ] ;
124
+ let expected = vec ! [ Token :: ZERO , Token :: ONE , Token :: ZERO ] ;
123
125
let actual = lex ( "0 1 0 " , & map ! [
124
- sym!( '0' ) => Some ( Binary :: ZERO ) ,
125
- sym!( '1' ) => Some ( Binary :: ONE ) ,
126
+ sym!( '0' ) => Some ( Token :: ZERO ) ,
127
+ sym!( '1' ) => Some ( Token :: ONE ) ,
126
128
rep!( sym!( ' ' ) ) => None
127
129
] ) ;
128
130
assert_eq ! ( expected, actual?) ;
129
131
Ok ( ( ) )
130
132
}
133
+
134
+ #[ test]
135
+ fn test_2 ( ) -> Result < ( ) > {
136
+ #[ derive( Clone , Debug , Eq , Ord , PartialEq , PartialOrd ) ]
137
+ #[ allow( non_camel_case_types) ]
138
+ enum Token {
139
+ ZERO_REP ,
140
+ ONE_REP
141
+ } ;
142
+ let expected = vec ! [ Token :: ZERO_REP , Token :: ONE_REP , Token :: ONE_REP ] ;
143
+ let actual = lex ( "00000001111 1111" , & map ! [
144
+ rep!( sym!( '0' ) ) => Some ( Token :: ZERO_REP ) ,
145
+ rep!( sym!( '1' ) ) => Some ( Token :: ONE_REP ) ,
146
+ rep!( sym!( ' ' ) ) => None
147
+ ] ) ;
148
+ assert_eq ! ( expected, actual?) ;
149
+ Ok ( ( ) )
150
+ }
151
+
152
+ #[ test]
153
+ fn test_3 ( ) -> Result < ( ) > {
154
+ #[ derive( Clone , Debug , Eq , Ord , PartialEq , PartialOrd ) ]
155
+ #[ allow( non_camel_case_types) ]
156
+ enum Token {
157
+ ZERO ,
158
+ ZERO_ONE ,
159
+ ONE_ONE ,
160
+ ONE ,
161
+ } ;
162
+ let expected = vec ! [ Token :: ZERO_ONE , Token :: ONE ] ;
163
+ let actual = lex ( "011" , & map ! [
164
+ sym!( '0' ) => Some ( Token :: ZERO ) ,
165
+ cat![ sym!( '0' ) , sym!( '1' ) ] => Some ( Token :: ZERO_ONE ) ,
166
+ cat![ sym!( '1' ) , sym!( '1' ) ] => Some ( Token :: ONE_ONE ) ,
167
+ sym!( '1' ) => Some ( Token :: ONE )
168
+ ] ) ;
169
+ assert_eq ! ( expected, actual?) ;
170
+ Ok ( ( ) )
171
+ }
131
172
}
0 commit comments