@@ -122,12 +122,18 @@ lck_attr_t *pthread_lck_attr;
122
122
123
123
#define C_32_STK_ALIGN 16
124
124
#define C_64_STK_ALIGN 16
125
- #define C_64_REDZONE_LEN 128
126
125
127
126
// WORKQ use the largest alignment any platform needs
128
127
#define C_WORKQ_STK_ALIGN 16
129
128
129
+ #if defined(__arm64__ )
130
+ /* Pull the pthread_t into the same page as the top of the stack so we dirty one less page.
131
+ * <rdar://problem/19941744> The _pthread struct at the top of the stack shouldn't be page-aligned
132
+ */
133
+ #define PTHREAD_T_OFFSET (12*1024)
134
+ #else
130
135
#define PTHREAD_T_OFFSET 0
136
+ #endif
131
137
132
138
/*
133
139
* Flags filed passed to bsdthread_create and back in pthread_start
@@ -169,11 +175,13 @@ stack_addr_hint(proc_t p, vm_map_t vmap)
169
175
mach_vm_offset_t stackaddr ;
170
176
mach_vm_offset_t aslr_offset ;
171
177
bool proc64bit = proc_is64bit (p );
178
+ bool proc64bit_data = proc_is64bit_data (p );
172
179
173
180
// We can't safely take random values % something unless its a power-of-two
174
181
_Static_assert (powerof2 (PTH_DEFAULT_STACKSIZE ), "PTH_DEFAULT_STACKSIZE is a power-of-two" );
175
182
176
183
#if defined(__i386__ ) || defined(__x86_64__ )
184
+ (void )proc64bit_data ;
177
185
if (proc64bit ) {
178
186
// Matches vm_map_get_max_aslr_slide_pages's image shift in xnu
179
187
aslr_offset = random () % (1 << 28 ); // about 512 stacks
@@ -211,7 +219,13 @@ stack_addr_hint(proc_t p, vm_map_t vmap)
211
219
stackaddr = SHARED_REGION_BASE_ARM64 - 64 * PTH_DEFAULT_STACKSIZE - aslr_offset ;
212
220
} else {
213
221
// If you try to slide down from this point, you risk ending up in memory consumed by malloc
214
- stackaddr = SHARED_REGION_BASE_ARM - 32 * PTH_DEFAULT_STACKSIZE + aslr_offset ;
222
+ if (proc64bit_data ) {
223
+ stackaddr = SHARED_REGION_BASE_ARM64_32 ;
224
+ } else {
225
+ stackaddr = SHARED_REGION_BASE_ARM ;
226
+ }
227
+
228
+ stackaddr -= 32 * PTH_DEFAULT_STACKSIZE + aslr_offset ;
215
229
}
216
230
}
217
231
#else
@@ -308,7 +322,7 @@ _bsdthread_create(struct proc *p,
308
322
.r8 = (uint64_t )user_stack , /* golang wants this */
309
323
.r9 = (uint64_t )flags ,
310
324
311
- .rsp = (uint64_t )( user_stack - C_64_REDZONE_LEN )
325
+ .rsp = (uint64_t )user_stack ,
312
326
};
313
327
314
328
(void )pthread_kern -> thread_set_wq_state64 (th , (thread_state_t )& state );
@@ -322,7 +336,41 @@ _bsdthread_create(struct proc *p,
322
336
.edi = (uint32_t )user_stack , /* golang wants this */
323
337
.esi = (uint32_t )flags ,
324
338
325
- .esp = (int )((vm_offset_t )(user_stack - C_32_STK_ALIGN ))
339
+ .esp = (uint32_t )user_stack ,
340
+ };
341
+
342
+ (void )pthread_kern -> thread_set_wq_state32 (th , (thread_state_t )& state );
343
+ }
344
+ #elif defined(__arm__ ) || defined(__arm64__ )
345
+ if (proc_is64bit_data (p )) {
346
+ #ifdef __arm64__
347
+ arm_thread_state64_t state = {
348
+ .pc = (uint64_t )pthread_kern -> proc_get_threadstart (p ),
349
+ .x [0 ] = (uint64_t )user_pthread ,
350
+ .x [1 ] = (uint64_t )th_thport ,
351
+ .x [2 ] = (uint64_t )user_func , /* golang wants this */
352
+ .x [3 ] = (uint64_t )user_funcarg , /* golang wants this */
353
+ .x [4 ] = (uint64_t )user_stack , /* golang wants this */
354
+ .x [5 ] = (uint64_t )flags ,
355
+
356
+ .sp = (uint64_t )user_stack ,
357
+ };
358
+
359
+ (void )pthread_kern -> thread_set_wq_state64 (th , (thread_state_t )& state );
360
+ #else
361
+ panic ("Shouldn't have a 64-bit thread on a 32-bit kernel..." );
362
+ #endif // defined(__arm64__)
363
+ } else {
364
+ arm_thread_state_t state = {
365
+ .pc = (uint32_t )pthread_kern -> proc_get_threadstart (p ),
366
+ .r [0 ] = (uint32_t )user_pthread ,
367
+ .r [1 ] = (uint32_t )th_thport ,
368
+ .r [2 ] = (uint32_t )user_func , /* golang wants this */
369
+ .r [3 ] = (uint32_t )user_funcarg , /* golang wants this */
370
+ .r [4 ] = (uint32_t )user_stack , /* golang wants this */
371
+ .r [5 ] = (uint32_t )flags ,
372
+
373
+ .sp = (uint32_t )user_stack ,
326
374
};
327
375
328
376
(void )pthread_kern -> thread_set_wq_state32 (th , (thread_state_t )& state );
@@ -755,63 +803,77 @@ workq_set_register_state(proc_t p, thread_t th,
755
803
panic (__func__ ": thread_set_wq_state failed: %d" , error );
756
804
}
757
805
}
806
+ #elif defined(__arm__ ) || defined(__arm64__ )
807
+ if (!proc_is64bit_data (p )) {
808
+ arm_thread_state_t state = {
809
+ .pc = (int )wqstart_fnptr ,
810
+ .r [0 ] = (unsigned int )addrs -> self ,
811
+ .r [1 ] = (unsigned int )kport ,
812
+ .r [2 ] = (unsigned int )addrs -> stack_bottom ,
813
+ .r [3 ] = (unsigned int )kevent_list ,
814
+ // will be pushed onto the stack as arg4/5
815
+ .r [4 ] = (unsigned int )upcall_flags ,
816
+ .r [5 ] = (unsigned int )kevent_count ,
817
+
818
+ .sp = (int )(addrs -> stack_top )
819
+ };
820
+
821
+ int error = pthread_kern -> thread_set_wq_state32 (th , (thread_state_t )& state );
822
+ if (error != KERN_SUCCESS ) {
823
+ panic (__func__ ": thread_set_wq_state failed: %d" , error );
824
+ }
825
+ } else {
826
+ #if defined(__arm64__ )
827
+ arm_thread_state64_t state = {
828
+ .pc = (uint64_t )wqstart_fnptr ,
829
+ .x [0 ] = (uint64_t )addrs -> self ,
830
+ .x [1 ] = (uint64_t )kport ,
831
+ .x [2 ] = (uint64_t )addrs -> stack_bottom ,
832
+ .x [3 ] = (uint64_t )kevent_list ,
833
+ .x [4 ] = (uint64_t )upcall_flags ,
834
+ .x [5 ] = (uint64_t )kevent_count ,
835
+
836
+ .sp = (uint64_t )((vm_offset_t )addrs -> stack_top ),
837
+ };
838
+
839
+ int error = pthread_kern -> thread_set_wq_state64 (th , (thread_state_t )& state );
840
+ if (error != KERN_SUCCESS ) {
841
+ panic (__func__ ": thread_set_wq_state failed: %d" , error );
842
+ }
843
+ #else /* defined(__arm64__) */
844
+ panic ("Shouldn't have a 64-bit thread on a 32-bit kernel..." );
845
+ #endif /* defined(__arm64__) */
846
+ }
758
847
#else
759
848
#error setup_wqthread not defined for this architecture
760
849
#endif
761
850
}
762
851
763
- static int
764
- workq_kevent (proc_t p , struct workq_thread_addrs * th_addrs , int upcall_flags ,
852
+ static inline int
853
+ workq_kevent (proc_t p , struct workq_thread_addrs * th_addrs ,
765
854
user_addr_t eventlist , int nevents , int kevent_flags ,
766
855
user_addr_t * kevent_list_out , int * kevent_count_out )
767
856
{
768
- bool workloop = upcall_flags & WQ_FLAG_THREAD_WORKLOOP ;
769
- int kevent_count = WQ_KEVENT_LIST_LEN ;
770
- user_addr_t kevent_list = th_addrs -> self - WQ_KEVENT_LIST_LEN * sizeof (struct kevent_qos_s );
771
- user_addr_t kevent_id_addr = kevent_list ;
772
- kqueue_id_t kevent_id = -1 ;
773
857
int ret ;
774
858
775
- if (workloop ) {
776
- /*
777
- * The kevent ID goes just below the kevent list. Sufficiently new
778
- * userspace will know to look there. Old userspace will just
779
- * ignore it.
780
- */
781
- kevent_id_addr -= sizeof (kqueue_id_t );
782
- }
859
+ user_addr_t kevent_list = th_addrs -> self -
860
+ WQ_KEVENT_LIST_LEN * sizeof (struct kevent_qos_s );
861
+ user_addr_t data_buf = kevent_list - WQ_KEVENT_DATA_SIZE ;
862
+ user_size_t data_available = WQ_KEVENT_DATA_SIZE ;
783
863
784
- user_addr_t kevent_data_buf = kevent_id_addr - WQ_KEVENT_DATA_SIZE ;
785
- user_size_t kevent_data_available = WQ_KEVENT_DATA_SIZE ;
786
-
787
- if (workloop ) {
788
- kevent_flags |= KEVENT_FLAG_WORKLOOP ;
789
- ret = kevent_id_internal (p , & kevent_id ,
790
- eventlist , nevents , kevent_list , kevent_count ,
791
- kevent_data_buf , & kevent_data_available ,
792
- kevent_flags , & kevent_count );
793
- copyout (& kevent_id , kevent_id_addr , sizeof (kevent_id ));
794
- } else {
795
- kevent_flags |= KEVENT_FLAG_WORKQ ;
796
- ret = kevent_qos_internal (p , -1 , eventlist , nevents , kevent_list ,
797
- kevent_count , kevent_data_buf , & kevent_data_available ,
798
- kevent_flags , & kevent_count );
799
- }
864
+ ret = pthread_kern -> kevent_workq_internal (p , eventlist , nevents ,
865
+ kevent_list , WQ_KEVENT_LIST_LEN ,
866
+ data_buf , & data_available ,
867
+ kevent_flags , kevent_count_out );
800
868
801
869
// squash any errors into just empty output
802
- if (ret != 0 || kevent_count == -1 ) {
870
+ if (ret != 0 || * kevent_count_out == -1 ) {
803
871
* kevent_list_out = NULL ;
804
872
* kevent_count_out = 0 ;
805
873
return ret ;
806
874
}
807
875
808
- if (kevent_data_available == WQ_KEVENT_DATA_SIZE ) {
809
- workq_thread_set_top_addr (th_addrs , kevent_id_addr );
810
- } else {
811
- workq_thread_set_top_addr (th_addrs ,
812
- kevent_data_buf + kevent_data_available );
813
- }
814
- * kevent_count_out = kevent_count ;
876
+ workq_thread_set_top_addr (th_addrs , data_buf + data_available );
815
877
* kevent_list_out = kevent_list ;
816
878
return ret ;
817
879
}
@@ -833,7 +895,7 @@ workq_kevent(proc_t p, struct workq_thread_addrs *th_addrs, int upcall_flags,
833
895
* |pthread_t | th_stackaddr + DEFAULT_STACKSIZE + guardsize + PTHREAD_STACK_OFFSET
834
896
* |kevent list| optionally - at most WQ_KEVENT_LIST_LEN events
835
897
* |kevent data| optionally - at most WQ_KEVENT_DATA_SIZE bytes
836
- * |stack gap | bottom aligned to 16 bytes, and at least as big as stack_gap_min
898
+ * |stack gap | bottom aligned to 16 bytes
837
899
* | STACK |
838
900
* | ⇓ |
839
901
* | |
@@ -880,8 +942,7 @@ workq_setup_thread(proc_t p, thread_t th, vm_map_t map, user_addr_t stackaddr,
880
942
kevent_count = WORKQ_EXIT_THREAD_NKEVENT ;
881
943
} else if (upcall_flags & WQ_FLAG_THREAD_KEVENT ) {
882
944
unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE ;
883
- workq_kevent (p , & th_addrs , upcall_flags , NULL , 0 , flags ,
884
- & kevent_list , & kevent_count );
945
+ workq_kevent (p , & th_addrs , NULL , 0 , flags , & kevent_list , & kevent_count );
885
946
}
886
947
887
948
workq_set_register_state (p , th , & th_addrs , kport ,
@@ -909,7 +970,7 @@ workq_handle_stack_events(proc_t p, thread_t th, vm_map_t map,
909
970
910
971
unsigned int flags = KEVENT_FLAG_STACK_DATA | KEVENT_FLAG_IMMEDIATE |
911
972
KEVENT_FLAG_PARKING ;
912
- error = workq_kevent (p , & th_addrs , upcall_flags , events , nevents , flags ,
973
+ error = workq_kevent (p , & th_addrs , events , nevents , flags ,
913
974
& kevent_list , & kevent_count );
914
975
915
976
if (error || kevent_count == 0 ) {
0 commit comments