@@ -20,6 +20,7 @@ use std::fmt;
2020use std:: str;
2121use std:: cmp:: { PartialEq , Ordering , PartialOrd , Ord } ;
2222use std:: hash:: { Hash , Hasher } ;
23+ use std:: num:: NonZeroU32 ;
2324
2425use hygiene:: SyntaxContext ;
2526use { Span , DUMMY_SP , GLOBALS } ;
@@ -143,9 +144,10 @@ impl Decodable for Ident {
143144 }
144145}
145146
146- /// A symbol is an interned or gensymed string.
147+ /// A symbol is an interned or gensymed string. It's a NonZeroU32 so that
148+ /// Option<Symbol> only takes up 4 bytes.
147149#[ derive( Clone , Copy , PartialEq , Eq , PartialOrd , Ord , Hash ) ]
148- pub struct Symbol ( u32 ) ;
150+ pub struct Symbol ( NonZeroU32 ) ;
149151
150152// The interner is pointed to by a thread local value which is only set on the main thread
151153// with parallelization is disabled. So we don't allow `Symbol` to transfer between threads
@@ -188,8 +190,9 @@ impl Symbol {
188190 } )
189191 }
190192
193+ #[ inline( always) ]
191194 pub fn as_u32 ( self ) -> u32 {
192- self . 0
195+ self . 0 . get ( )
193196 }
194197}
195198
@@ -228,12 +231,36 @@ impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
228231 }
229232}
230233
234+ /// Symbols (which are 1-indexed) index into this (which is 0-indexed
235+ /// internally). The methods handle the index conversions.
236+ #[ derive( Default ) ]
237+ pub struct SymbolVec ( Vec < & ' static str > ) ;
238+
239+ impl SymbolVec {
240+ #[ inline]
241+ fn new_symbol ( & mut self , s : & ' static str ) -> Symbol {
242+ self . 0 . push ( s) ;
243+ // self.0.len() cannot be zero because of the push above.
244+ Symbol ( unsafe { NonZeroU32 :: new_unchecked ( self . 0 . len ( ) as u32 ) } )
245+ }
246+
247+ #[ inline]
248+ fn get ( & self , sym : Symbol ) -> Option < & & ' static str > {
249+ self . 0 . get ( sym. 0 . get ( ) as usize - 1 )
250+ }
251+
252+ #[ inline]
253+ fn contains ( & self , sym : Symbol ) -> bool {
254+ sym. 0 . get ( ) as usize <= self . 0 . len ( )
255+ }
256+ }
257+
231258// The `&'static str`s in this type actually point into the arena.
232259#[ derive( Default ) ]
233260pub struct Interner {
234261 arena : DroplessArena ,
235262 names : FxHashMap < & ' static str , Symbol > ,
236- strings : Vec < & ' static str > ,
263+ strings : SymbolVec ,
237264 gensyms : Vec < Symbol > ,
238265}
239266
@@ -243,9 +270,8 @@ impl Interner {
243270 for & string in init {
244271 if string == "" {
245272 // We can't allocate empty strings in the arena, so handle this here.
246- let name = Symbol ( this. strings . len ( ) as u32 ) ;
273+ let name = this. strings . new_symbol ( "" ) ;
247274 this. names . insert ( "" , name) ;
248- this. strings . push ( "" ) ;
249275 } else {
250276 this. intern ( string) ;
251277 }
@@ -258,8 +284,6 @@ impl Interner {
258284 return name;
259285 }
260286
261- let name = Symbol ( self . strings . len ( ) as u32 ) ;
262-
263287 // `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
264288 // UTF-8.
265289 let string: & str = unsafe {
@@ -270,16 +294,17 @@ impl Interner {
270294 let string: & ' static str = unsafe {
271295 & * ( string as * const str )
272296 } ;
273- self . strings . push ( string) ;
297+
298+ let name = self . strings . new_symbol ( string) ;
274299 self . names . insert ( string, name) ;
275300 name
276301 }
277302
278303 pub fn interned ( & self , symbol : Symbol ) -> Symbol {
279- if ( symbol . 0 as usize ) < self . strings . len ( ) {
304+ if self . strings . contains ( symbol ) {
280305 symbol
281306 } else {
282- self . interned ( self . gensyms [ ( !0 - symbol. 0 ) as usize ] )
307+ self . interned ( self . gensyms [ ( !0 - symbol. as_u32 ( ) ) as usize ] )
283308 }
284309 }
285310
@@ -290,17 +315,17 @@ impl Interner {
290315
291316 fn gensymed ( & mut self , symbol : Symbol ) -> Symbol {
292317 self . gensyms . push ( symbol) ;
293- Symbol ( !0 - self . gensyms . len ( ) as u32 + 1 )
318+ Symbol ( NonZeroU32 :: new ( !0 - self . gensyms . len ( ) as u32 + 1 ) . unwrap ( ) )
294319 }
295320
296321 fn is_gensymed ( & mut self , symbol : Symbol ) -> bool {
297- symbol . 0 as usize >= self . strings . len ( )
322+ ! self . strings . contains ( symbol )
298323 }
299324
300325 pub fn get ( & self , symbol : Symbol ) -> & str {
301- match self . strings . get ( symbol. 0 as usize ) {
326+ match self . strings . get ( symbol) {
302327 Some ( string) => string,
303- None => self . get ( self . gensyms [ ( !0 - symbol. 0 ) as usize ] ) ,
328+ None => self . get ( self . gensyms [ ( !0 - symbol. as_u32 ( ) ) as usize ] ) ,
304329 }
305330 }
306331}
@@ -313,6 +338,8 @@ macro_rules! declare_keywords {(
313338) => {
314339 pub mod keywords {
315340 use super :: { Symbol , Ident } ;
341+ use std:: num:: NonZeroU32 ;
342+
316343 #[ derive( Clone , Copy , PartialEq , Eq ) ]
317344 pub struct Keyword {
318345 ident: Ident ,
@@ -321,10 +348,17 @@ macro_rules! declare_keywords {(
321348 #[ inline] pub fn ident( self ) -> Ident { self . ident }
322349 #[ inline] pub fn name( self ) -> Symbol { self . ident. name }
323350 }
351+ // We must use `NonZeroU32::new_unchecked` below because it's `const`
352+ // and `NonZeroU32::new` is not. So we static_assert the non-zeroness
353+ // here.
354+ mod asserts {
355+ $( static_assert!( $konst: $index > 0u32 ) ; ) *
356+ }
324357 $(
325358 #[ allow( non_upper_case_globals) ]
326359 pub const $konst: Keyword = Keyword {
327- ident: Ident :: with_empty_ctxt( super :: Symbol ( $index) )
360+ ident: Ident :: with_empty_ctxt(
361+ super :: Symbol ( unsafe { NonZeroU32 :: new_unchecked( $index) } ) )
328362 } ;
329363 ) *
330364
@@ -355,79 +389,80 @@ macro_rules! declare_keywords {(
355389declare_keywords ! {
356390 // Special reserved identifiers used internally for elided lifetimes,
357391 // unnamed method parameters, crate root module, error recovery etc.
358- ( 0 , Invalid , "" )
359- ( 1 , PathRoot , "{{root}}" )
360- ( 2 , DollarCrate , "$crate" )
361- ( 3 , Underscore , "_" )
392+ // (0 cannot be used because Symbol uses NonZeroU32)
393+ ( 1 , Invalid , "" )
394+ ( 2 , PathRoot , "{{root}}" )
395+ ( 3 , DollarCrate , "$crate" )
396+ ( 4 , Underscore , "_" )
362397
363398 // Keywords that are used in stable Rust.
364- ( 4 , As , "as" )
365- ( 5 , Box , "box" )
366- ( 6 , Break , "break" )
367- ( 7 , Const , "const" )
368- ( 8 , Continue , "continue" )
369- ( 9 , Crate , "crate" )
370- ( 10 , Else , "else" )
371- ( 11 , Enum , "enum" )
372- ( 12 , Extern , "extern" )
373- ( 13 , False , "false" )
374- ( 14 , Fn , "fn" )
375- ( 15 , For , "for" )
376- ( 16 , If , "if" )
377- ( 17 , Impl , "impl" )
378- ( 18 , In , "in" )
379- ( 19 , Let , "let" )
380- ( 20 , Loop , "loop" )
381- ( 21 , Match , "match" )
382- ( 22 , Mod , "mod" )
383- ( 23 , Move , "move" )
384- ( 24 , Mut , "mut" )
385- ( 25 , Pub , "pub" )
386- ( 26 , Ref , "ref" )
387- ( 27 , Return , "return" )
388- ( 28 , SelfLower , "self" )
389- ( 29 , SelfUpper , "Self" )
390- ( 30 , Static , "static" )
391- ( 31 , Struct , "struct" )
392- ( 32 , Super , "super" )
393- ( 33 , Trait , "trait" )
394- ( 34 , True , "true" )
395- ( 35 , Type , "type" )
396- ( 36 , Unsafe , "unsafe" )
397- ( 37 , Use , "use" )
398- ( 38 , Where , "where" )
399- ( 39 , While , "while" )
399+ ( 5 , As , "as" )
400+ ( 6 , Box , "box" )
401+ ( 7 , Break , "break" )
402+ ( 8 , Const , "const" )
403+ ( 9 , Continue , "continue" )
404+ ( 10 , Crate , "crate" )
405+ ( 11 , Else , "else" )
406+ ( 12 , Enum , "enum" )
407+ ( 13 , Extern , "extern" )
408+ ( 14 , False , "false" )
409+ ( 15 , Fn , "fn" )
410+ ( 16 , For , "for" )
411+ ( 17 , If , "if" )
412+ ( 18 , Impl , "impl" )
413+ ( 19 , In , "in" )
414+ ( 20 , Let , "let" )
415+ ( 21 , Loop , "loop" )
416+ ( 22 , Match , "match" )
417+ ( 23 , Mod , "mod" )
418+ ( 24 , Move , "move" )
419+ ( 25 , Mut , "mut" )
420+ ( 26 , Pub , "pub" )
421+ ( 27 , Ref , "ref" )
422+ ( 28 , Return , "return" )
423+ ( 29 , SelfLower , "self" )
424+ ( 30 , SelfUpper , "Self" )
425+ ( 31 , Static , "static" )
426+ ( 32 , Struct , "struct" )
427+ ( 33 , Super , "super" )
428+ ( 34 , Trait , "trait" )
429+ ( 35 , True , "true" )
430+ ( 36 , Type , "type" )
431+ ( 37 , Unsafe , "unsafe" )
432+ ( 38 , Use , "use" )
433+ ( 39 , Where , "where" )
434+ ( 40 , While , "while" )
400435
401436 // Keywords that are used in unstable Rust or reserved for future use.
402- ( 40 , Abstract , "abstract" )
403- ( 41 , Become , "become" )
404- ( 42 , Do , "do" )
405- ( 43 , Final , "final" )
406- ( 44 , Macro , "macro" )
407- ( 45 , Override , "override" )
408- ( 46 , Priv , "priv" )
409- ( 47 , Typeof , "typeof" )
410- ( 48 , Unsized , "unsized" )
411- ( 49 , Virtual , "virtual" )
412- ( 50 , Yield , "yield" )
437+ ( 41 , Abstract , "abstract" )
438+ ( 42 , Become , "become" )
439+ ( 43 , Do , "do" )
440+ ( 44 , Final , "final" )
441+ ( 45 , Macro , "macro" )
442+ ( 46 , Override , "override" )
443+ ( 47 , Priv , "priv" )
444+ ( 48 , Typeof , "typeof" )
445+ ( 49 , Unsized , "unsized" )
446+ ( 50 , Virtual , "virtual" )
447+ ( 51 , Yield , "yield" )
413448
414449 // Edition-specific keywords that are used in stable Rust.
415- ( 51 , Dyn , "dyn" ) // >= 2018 Edition only
450+ ( 52 , Dyn , "dyn" ) // >= 2018 Edition only
416451
417452 // Edition-specific keywords that are used in unstable Rust or reserved for future use.
418- ( 52 , Async , "async" ) // >= 2018 Edition only
419- ( 53 , Try , "try" ) // >= 2018 Edition only
453+ ( 53 , Async , "async" ) // >= 2018 Edition only
454+ ( 54 , Try , "try" ) // >= 2018 Edition only
420455
421456 // Special lifetime names
422- ( 54 , UnderscoreLifetime , "'_" )
423- ( 55 , StaticLifetime , "'static" )
457+ ( 55 , UnderscoreLifetime , "'_" )
458+ ( 56 , StaticLifetime , "'static" )
424459
425460 // Weak keywords, have special meaning only in specific contexts.
426- ( 56 , Auto , "auto" )
427- ( 57 , Catch , "catch" )
428- ( 58 , Default , "default" )
429- ( 59 , Existential , "existential" )
430- ( 60 , Union , "union" )
461+ ( 57 , Auto , "auto" )
462+ ( 58 , Catch , "catch" )
463+ ( 59 , Default , "default" )
464+ ( 60 , Existential , "existential" )
465+ ( 61 , Union , "union" )
431466}
432467
433468impl Symbol {
@@ -708,20 +743,22 @@ mod tests {
708743 #[ test]
709744 fn interner_tests ( ) {
710745 let mut i: Interner = Interner :: default ( ) ;
711- // first one is zero:
712- assert_eq ! ( i. intern( "dog" ) , Symbol ( 0 ) ) ;
746+ let nz = |n| NonZeroU32 :: new ( n) . unwrap ( ) ;
747+
748+ // first one is 1:
749+ assert_eq ! ( i. intern( "dog" ) , Symbol ( nz( 1 ) ) ) ;
713750 // re-use gets the same entry:
714- assert_eq ! ( i. intern( "dog" ) , Symbol ( 0 ) ) ;
715- // different string gets a different # :
716- assert_eq ! ( i. intern( "cat" ) , Symbol ( 1 ) ) ;
717- assert_eq ! ( i. intern( "cat" ) , Symbol ( 1 ) ) ;
718- // dog is still at zero
719- assert_eq ! ( i. intern( "dog" ) , Symbol ( 0 ) ) ;
720- assert_eq ! ( i. gensym( "zebra" ) , Symbol ( 4294967295 ) ) ;
721- // gensym of same string gets new number :
722- assert_eq ! ( i. gensym( "zebra" ) , Symbol ( 4294967294 ) ) ;
751+ assert_eq ! ( i. intern( "dog" ) , Symbol ( nz ( 1 ) ) ) ;
752+ // different string gets a different number :
753+ assert_eq ! ( i. intern( "cat" ) , Symbol ( nz ( 2 ) ) ) ;
754+ assert_eq ! ( i. intern( "cat" ) , Symbol ( nz ( 2 ) ) ) ;
755+ // dog is still at 1
756+ assert_eq ! ( i. intern( "dog" ) , Symbol ( nz ( 1 ) ) ) ;
757+ assert_eq ! ( i. gensym( "zebra" ) , Symbol ( nz ( 4294967295 ) ) ) ;
758+ // gensym of same string gets new number:
759+ assert_eq ! ( i. gensym( "zebra" ) , Symbol ( nz ( 4294967294 ) ) ) ;
723760 // gensym of *existing* string gets new number:
724- assert_eq ! ( i. gensym( "dog" ) , Symbol ( 4294967293 ) ) ;
761+ assert_eq ! ( i. gensym( "dog" ) , Symbol ( nz ( 4294967293 ) ) ) ;
725762 }
726763
727764 #[ test]
0 commit comments