1- use crate :: mem :: ManuallyDrop ;
1+ use crate :: cell :: UnsafeCell ;
22use crate :: ptr;
3- use crate :: sync:: atomic:: AtomicPtr ;
4- use crate :: sync:: atomic:: Ordering :: SeqCst ;
3+ use crate :: sync:: atomic:: {
4+ AtomicPtr , AtomicU32 ,
5+ Ordering :: { AcqRel , Acquire , Relaxed , Release } ,
6+ } ;
57use crate :: sys:: c;
68
7- pub type Key = c:: DWORD ;
8- pub type Dtor = unsafe extern "C" fn ( * mut u8 ) ;
9+ #[ cfg( test) ]
10+ mod tests;
11+
12+ type Key = c:: DWORD ;
13+ type Dtor = unsafe extern "C" fn ( * mut u8 ) ;
914
1015// Turns out, like pretty much everything, Windows is pretty close the
1116// functionality that Unix provides, but slightly different! In the case of
@@ -22,60 +27,109 @@ pub type Dtor = unsafe extern "C" fn(*mut u8);
2227// To accomplish this feat, we perform a number of threads, all contained
2328// within this module:
2429//
25- // * All TLS destructors are tracked by *us*, not the windows runtime. This
30+ // * All TLS destructors are tracked by *us*, not the Windows runtime. This
2631// means that we have a global list of destructors for each TLS key that
2732// we know about.
2833// * When a thread exits, we run over the entire list and run dtors for all
2934// non-null keys. This attempts to match Unix semantics in this regard.
3035//
31- // This ends up having the overhead of using a global list, having some
32- // locks here and there, and in general just adding some more code bloat. We
33- // attempt to optimize runtime by forgetting keys that don't have
34- // destructors, but this only gets us so far.
35- //
3636// For more details and nitty-gritty, see the code sections below!
3737//
3838// [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
39- // [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base
40- // /threading/thread_local_storage_win.cc#L42
39+ // [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base/threading/thread_local_storage_win.cc#L42
4140
42- // -------------------------------------------------------------------------
43- // Native bindings
44- //
45- // This section is just raw bindings to the native functions that Windows
46- // provides, There's a few extra calls to deal with destructors.
41+ pub struct StaticKey {
42+ /// The key value shifted up by one. Since TLS_OUT_OF_INDEXES == DWORD::MAX
43+ /// is not a valid key value, this allows us to use zero as sentinel value
44+ /// without risking overflow.
45+ key : AtomicU32 ,
46+ dtor : Option < Dtor > ,
47+ next : AtomicPtr < StaticKey > ,
48+ /// Currently, destructors cannot be unregistered, so we cannot use racy
49+ /// initialization for keys. Instead, we need synchronize initialization.
50+ /// Use the Windows-provided `Once` since it does not require TLS.
51+ once : UnsafeCell < c:: INIT_ONCE > ,
52+ }
4753
48- #[ inline]
49- pub unsafe fn create ( dtor : Option < Dtor > ) -> Key {
50- let key = c:: TlsAlloc ( ) ;
51- assert ! ( key != c:: TLS_OUT_OF_INDEXES ) ;
52- if let Some ( f) = dtor {
53- register_dtor ( key, f) ;
54+ impl StaticKey {
55+ #[ inline]
56+ pub const fn new ( dtor : Option < Dtor > ) -> StaticKey {
57+ StaticKey {
58+ key : AtomicU32 :: new ( 0 ) ,
59+ dtor,
60+ next : AtomicPtr :: new ( ptr:: null_mut ( ) ) ,
61+ once : UnsafeCell :: new ( c:: INIT_ONCE_STATIC_INIT ) ,
62+ }
5463 }
55- key
56- }
5764
58- #[ inline]
59- pub unsafe fn set ( key : Key , value : * mut u8 ) {
60- let r = c:: TlsSetValue ( key, value as c :: LPVOID ) ;
61- debug_assert ! ( r != 0 ) ;
62- }
65+ #[ inline]
66+ pub unsafe fn set ( & ' static self , val : * mut u8 ) {
67+ let r = c:: TlsSetValue ( self . key ( ) , val . cast ( ) ) ;
68+ debug_assert_eq ! ( r, c :: TRUE ) ;
69+ }
6370
64- #[ inline]
65- pub unsafe fn get ( key : Key ) -> * mut u8 {
66- c:: TlsGetValue ( key) as * mut u8
67- }
71+ #[ inline]
72+ pub unsafe fn get ( & ' static self ) -> * mut u8 {
73+ c:: TlsGetValue ( self . key ( ) ) . cast ( )
74+ }
6875
69- #[ inline]
70- pub unsafe fn destroy ( _key : Key ) {
71- rtabort ! ( "can't destroy tls keys on windows" )
72- }
76+ #[ inline]
77+ unsafe fn key ( & ' static self ) -> Key {
78+ match self . key . load ( Acquire ) {
79+ 0 => self . init ( ) ,
80+ key => key - 1 ,
81+ }
82+ }
83+
84+ #[ cold]
85+ unsafe fn init ( & ' static self ) -> Key {
86+ if self . dtor . is_some ( ) {
87+ let mut pending = c:: FALSE ;
88+ let r = c:: InitOnceBeginInitialize ( self . once . get ( ) , 0 , & mut pending, ptr:: null_mut ( ) ) ;
89+ assert_eq ! ( r, c:: TRUE ) ;
7390
74- #[ inline]
75- pub fn requires_synchronized_create ( ) -> bool {
76- true
91+ if pending == c:: FALSE {
92+ // Some other thread initialized the key, load it.
93+ self . key . load ( Relaxed ) - 1
94+ } else {
95+ let key = c:: TlsAlloc ( ) ;
96+ if key == c:: TLS_OUT_OF_INDEXES {
97+ // Wakeup the waiting threads before panicking to avoid deadlock.
98+ c:: InitOnceComplete ( self . once . get ( ) , c:: INIT_ONCE_INIT_FAILED , ptr:: null_mut ( ) ) ;
99+ panic ! ( "out of TLS indexes" ) ;
100+ }
101+
102+ self . key . store ( key + 1 , Release ) ;
103+ register_dtor ( self ) ;
104+
105+ let r = c:: InitOnceComplete ( self . once . get ( ) , 0 , ptr:: null_mut ( ) ) ;
106+ debug_assert_eq ! ( r, c:: TRUE ) ;
107+
108+ key
109+ }
110+ } else {
111+ // If there is no destructor to clean up, we can use racy initialization.
112+
113+ let key = c:: TlsAlloc ( ) ;
114+ assert_ne ! ( key, c:: TLS_OUT_OF_INDEXES , "out of TLS indexes" ) ;
115+
116+ match self . key . compare_exchange ( 0 , key + 1 , AcqRel , Acquire ) {
117+ Ok ( _) => key,
118+ Err ( new) => {
119+ // Some other thread completed initialization first, so destroy
120+ // our key and use theirs.
121+ let r = c:: TlsFree ( key) ;
122+ debug_assert_eq ! ( r, c:: TRUE ) ;
123+ new - 1
124+ }
125+ }
126+ }
127+ }
77128}
78129
130+ unsafe impl Send for StaticKey { }
131+ unsafe impl Sync for StaticKey { }
132+
79133// -------------------------------------------------------------------------
80134// Dtor registration
81135//
@@ -96,29 +150,21 @@ pub fn requires_synchronized_create() -> bool {
96150// Typically processes have a statically known set of TLS keys which is pretty
97151// small, and we'd want to keep this memory alive for the whole process anyway
98152// really.
99- //
100- // Perhaps one day we can fold the `Box` here into a static allocation,
101- // expanding the `StaticKey` structure to contain not only a slot for the TLS
102- // key but also a slot for the destructor queue on windows. An optimization for
103- // another day!
104-
105- static DTORS : AtomicPtr < Node > = AtomicPtr :: new ( ptr:: null_mut ( ) ) ;
106-
107- struct Node {
108- dtor : Dtor ,
109- key : Key ,
110- next : * mut Node ,
111- }
112153
113- unsafe fn register_dtor ( key : Key , dtor : Dtor ) {
114- let mut node = ManuallyDrop :: new ( Box :: new ( Node { key, dtor, next : ptr:: null_mut ( ) } ) ) ;
154+ static DTORS : AtomicPtr < StaticKey > = AtomicPtr :: new ( ptr:: null_mut ( ) ) ;
115155
116- let mut head = DTORS . load ( SeqCst ) ;
156+ /// Should only be called once per key, otherwise loops or breaks may occur in
157+ /// the linked list.
158+ unsafe fn register_dtor ( key : & ' static StaticKey ) {
159+ let this = <* const StaticKey >:: cast_mut ( key) ;
160+ // Use acquire ordering to pass along the changes done by the previously
161+ // registered keys when we store the new head with release ordering.
162+ let mut head = DTORS . load ( Acquire ) ;
117163 loop {
118- node . next = head;
119- match DTORS . compare_exchange ( head, & mut * * node , SeqCst , SeqCst ) {
120- Ok ( _) => return , // nothing to drop, we successfully added the node to the list
121- Err ( cur ) => head = cur ,
164+ key . next . store ( head, Relaxed ) ;
165+ match DTORS . compare_exchange_weak ( head, this , Release , Acquire ) {
166+ Ok ( _) => break ,
167+ Err ( new ) => head = new ,
122168 }
123169 }
124170}
@@ -214,25 +260,29 @@ unsafe extern "system" fn on_tls_callback(h: c::LPVOID, dwReason: c::DWORD, pv:
214260 unsafe fn reference_tls_used ( ) { }
215261}
216262
217- #[ allow( dead_code) ] // actually called above
263+ #[ allow( dead_code) ] // actually called below
218264unsafe fn run_dtors ( ) {
219- let mut any_run = true ;
220265 for _ in 0 ..5 {
221- if !any_run {
222- break ;
223- }
224- any_run = false ;
225- let mut cur = DTORS . load ( SeqCst ) ;
266+ let mut any_run = false ;
267+
268+ // Use acquire ordering to observe key initialization.
269+ let mut cur = DTORS . load ( Acquire ) ;
226270 while !cur. is_null ( ) {
227- let ptr = c:: TlsGetValue ( ( * cur) . key ) ;
271+ let key = ( * cur) . key . load ( Relaxed ) - 1 ;
272+ let dtor = ( * cur) . dtor . unwrap ( ) ;
228273
274+ let ptr = c:: TlsGetValue ( key) ;
229275 if !ptr. is_null ( ) {
230- c:: TlsSetValue ( ( * cur ) . key , ptr:: null_mut ( ) ) ;
231- ( ( * cur ) . dtor ) ( ptr as * mut _ ) ;
276+ c:: TlsSetValue ( key, ptr:: null_mut ( ) ) ;
277+ dtor ( ptr as * mut _ ) ;
232278 any_run = true ;
233279 }
234280
235- cur = ( * cur) . next ;
281+ cur = ( * cur) . next . load ( Relaxed ) ;
282+ }
283+
284+ if !any_run {
285+ break ;
236286 }
237287 }
238288}
0 commit comments