@@ -948,6 +948,196 @@ _PyMem_Strdup(const char *str)
948948 return copy ;
949949}
950950
951+ /***********************************************/
952+ /* Delayed freeing support for Py_GIL_DISABLED */
953+ /***********************************************/
954+
955+ // So that sizeof(struct _mem_work_chunk) is 4096 bytes on 64-bit platforms.
956+ #define WORK_ITEMS_PER_CHUNK 254
957+
958+ // A pointer to be freed once the QSBR read sequence reaches qsbr_goal.
959+ struct _mem_work_item {
960+ void * ptr ;
961+ uint64_t qsbr_goal ;
962+ };
963+
964+ // A fixed-size buffer of pointers to be freed
965+ struct _mem_work_chunk {
966+ // Linked list node of chunks in queue
967+ struct llist_node node ;
968+
969+ Py_ssize_t rd_idx ; // index of next item to read
970+ Py_ssize_t wr_idx ; // index of next item to write
971+ struct _mem_work_item array [WORK_ITEMS_PER_CHUNK ];
972+ };
973+
974+ void
975+ _PyMem_FreeDelayed (void * ptr )
976+ {
977+ #ifndef Py_GIL_DISABLED
978+ PyMem_Free (ptr );
979+ #else
980+ if (_PyRuntime .stoptheworld .world_stopped ) {
981+ // Free immediately if the world is stopped, including during
982+ // interpreter shutdown.
983+ PyMem_Free (ptr );
984+ return ;
985+ }
986+
987+ _PyThreadStateImpl * tstate = (_PyThreadStateImpl * )_PyThreadState_GET ();
988+ struct llist_node * head = & tstate -> mem_free_queue ;
989+
990+ struct _mem_work_chunk * buf = NULL ;
991+ if (!llist_empty (head )) {
992+ // Try to re-use the last buffer
993+ buf = llist_data (head -> prev , struct _mem_work_chunk , node );
994+ if (buf -> wr_idx == WORK_ITEMS_PER_CHUNK ) {
995+ // already full
996+ buf = NULL ;
997+ }
998+ }
999+
1000+ if (buf == NULL ) {
1001+ buf = PyMem_Calloc (1 , sizeof (* buf ));
1002+ if (buf != NULL ) {
1003+ llist_insert_tail (head , & buf -> node );
1004+ }
1005+ }
1006+
1007+ if (buf == NULL ) {
1008+ // failed to allocate a buffer, free immediately
1009+ _PyEval_StopTheWorld (tstate -> base .interp );
1010+ PyMem_Free (ptr );
1011+ _PyEval_StartTheWorld (tstate -> base .interp );
1012+ return ;
1013+ }
1014+
1015+ assert (buf != NULL && buf -> wr_idx < WORK_ITEMS_PER_CHUNK );
1016+ uint64_t seq = _Py_qsbr_deferred_advance (tstate -> qsbr );
1017+ buf -> array [buf -> wr_idx ].ptr = ptr ;
1018+ buf -> array [buf -> wr_idx ].qsbr_goal = seq ;
1019+ buf -> wr_idx ++ ;
1020+
1021+ if (buf -> wr_idx == WORK_ITEMS_PER_CHUNK ) {
1022+ _PyMem_ProcessDelayed ((PyThreadState * )tstate );
1023+ }
1024+ #endif
1025+ }
1026+
1027+ static struct _mem_work_chunk *
1028+ work_queue_first (struct llist_node * head )
1029+ {
1030+ return llist_data (head -> next , struct _mem_work_chunk , node );
1031+ }
1032+
1033+ static void
1034+ process_queue (struct llist_node * head , struct _qsbr_thread_state * qsbr ,
1035+ bool keep_empty )
1036+ {
1037+ while (!llist_empty (head )) {
1038+ struct _mem_work_chunk * buf = work_queue_first (head );
1039+
1040+ while (buf -> rd_idx < buf -> wr_idx ) {
1041+ struct _mem_work_item * item = & buf -> array [buf -> rd_idx ];
1042+ if (!_Py_qsbr_poll (qsbr , item -> qsbr_goal )) {
1043+ return ;
1044+ }
1045+
1046+ PyMem_Free (item -> ptr );
1047+ buf -> rd_idx ++ ;
1048+ }
1049+
1050+ assert (buf -> rd_idx == buf -> wr_idx );
1051+ if (keep_empty && buf -> node .next == head ) {
1052+ // Keep the last buffer in the queue to reduce re-allocations
1053+ buf -> rd_idx = buf -> wr_idx = 0 ;
1054+ return ;
1055+ }
1056+
1057+ llist_remove (& buf -> node );
1058+ PyMem_Free (buf );
1059+ }
1060+ }
1061+
1062+ static void
1063+ process_interp_queue (struct _Py_mem_interp_free_queue * queue ,
1064+ struct _qsbr_thread_state * qsbr )
1065+ {
1066+ if (!_Py_atomic_load_int_relaxed (& queue -> has_work )) {
1067+ return ;
1068+ }
1069+
1070+ // Try to acquire the lock, but don't block if it's already held.
1071+ if (_PyMutex_LockTimed (& queue -> mutex , 0 , 0 ) == PY_LOCK_ACQUIRED ) {
1072+ process_queue (& queue -> head , qsbr , false);
1073+
1074+ int more_work = !llist_empty (& queue -> head );
1075+ _Py_atomic_store_int_relaxed (& queue -> has_work , more_work );
1076+
1077+ PyMutex_Unlock (& queue -> mutex );
1078+ }
1079+ }
1080+
1081+ void
1082+ _PyMem_ProcessDelayed (PyThreadState * tstate )
1083+ {
1084+ PyInterpreterState * interp = tstate -> interp ;
1085+ _PyThreadStateImpl * tstate_impl = (_PyThreadStateImpl * )tstate ;
1086+
1087+ // Process thread-local work
1088+ process_queue (& tstate_impl -> mem_free_queue , tstate_impl -> qsbr , true);
1089+
1090+ // Process shared interpreter work
1091+ process_interp_queue (& interp -> mem_free_queue , tstate_impl -> qsbr );
1092+ }
1093+
1094+ void
1095+ _PyMem_AbandonDelayed (PyThreadState * tstate )
1096+ {
1097+ PyInterpreterState * interp = tstate -> interp ;
1098+ struct llist_node * queue = & ((_PyThreadStateImpl * )tstate )-> mem_free_queue ;
1099+
1100+ if (llist_empty (queue )) {
1101+ return ;
1102+ }
1103+
1104+ // Check if the queue contains one empty buffer
1105+ struct _mem_work_chunk * buf = work_queue_first (queue );
1106+ if (buf -> rd_idx == buf -> wr_idx ) {
1107+ llist_remove (& buf -> node );
1108+ PyMem_Free (buf );
1109+ assert (llist_empty (queue ));
1110+ return ;
1111+ }
1112+
1113+ // Merge the thread's work queue into the interpreter's work queue.
1114+ PyMutex_Lock (& interp -> mem_free_queue .mutex );
1115+ llist_concat (& interp -> mem_free_queue .head , queue );
1116+ _Py_atomic_store_int_relaxed (& interp -> mem_free_queue .has_work , 1 );
1117+ PyMutex_Unlock (& interp -> mem_free_queue .mutex );
1118+
1119+ assert (llist_empty (queue )); // the thread's queue is now empty
1120+ }
1121+
1122+ void
1123+ _PyMem_FiniDelayed (PyInterpreterState * interp )
1124+ {
1125+ struct llist_node * head = & interp -> mem_free_queue .head ;
1126+ while (!llist_empty (head )) {
1127+ struct _mem_work_chunk * buf = work_queue_first (head );
1128+
1129+ while (buf -> rd_idx < buf -> wr_idx ) {
1130+ // Free the remaining items immediately. There should be no other
1131+ // threads accessing the memory at this point during shutdown.
1132+ struct _mem_work_item * item = & buf -> array [buf -> rd_idx ];
1133+ PyMem_Free (item -> ptr );
1134+ buf -> rd_idx ++ ;
1135+ }
1136+
1137+ llist_remove (& buf -> node );
1138+ PyMem_Free (buf );
1139+ }
1140+ }
9511141
9521142/**************************/
9531143/* the "object" allocator */
0 commit comments