Skip to content

Commit dd01c8e

Browse files
author
Anselm Kruis
committed
Stackless issue python#81 and python#89: Fix the thread and interpreter shutdown code
Fix the implementation to work as documented if a thread dies. Now Stackless kills only tasklets with a nesting level > 0. During interpreter shutdown stackless additionally kills daemon threads, if they execute Python code or switch tasklets. This prevents access violations after clearing the thread states. Add a 1ms sleep for each daemon thread. This way the thread gets a better chance to run. Thanks to Kristján Valur Jónsson for suggesting this improvement. Add a test case for a C assertion violation during thread shutdown. Add some explanatory comments to the shutdown test case. Thanks to Christian Tismer for the sugestion. https://bitbucket.org/stackless-dev/stackless/issues/81 https://bitbucket.org/stackless-dev/stackless/issues/89 (grafted from 2cc41427a347a1ebec4eedc3db06a3664e67f798, 1388a2003957, 6140f5aaca2c and edc9b92ec457)
1 parent 7714aa4 commit dd01c8e

File tree

6 files changed

+916
-77
lines changed

6 files changed

+916
-77
lines changed

Doc/library/stackless/threads.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ Threads that end really should make sure that they finish whatever worker
4949
tasklets they have going by manually killing (``tasklet.kill()``) or
5050
unbinding (``tasklet.bind(None)``) them, but that is up to application code.
5151

52+
During interpreter shutdown Stackless kills other daemon threads (non-daemon
53+
are already dead at this point), if they execute Python code or switch
54+
tasklets. This way Stackless tries to avoid access violations, that might
55+
happen later after clearing the thread state structures.
56+
5257
----------------------
5358
A scheduler per thread
5459
----------------------

Stackless/changelog.txt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,15 @@ What's New in Stackless 3.X.X?
1010

1111
*Release date: 20XX-XX-XX*
1212

13-
- https://bitbucket.org/stackless-dev/stackless/issues/92
13+
- https://bitbucket.org/stackless-dev/stackless/issues/89
14+
- https://bitbucket.org/stackless-dev/stackless/issues/81
15+
Fix (crash) bugs during thread and interpreter shutdown.
16+
If a thread dies, Stackless now really kills tasklets with C-state. During
17+
interpreter shutdown, Stackless also kills daemon threads, if they execute
18+
Python code or switch tasklets. This change avoids crashes during a later
19+
shutdown stage.
20+
21+
- https://bitbucket.org/stackless-dev/stackless/issues/92
1422
If you run Stackless with the option '-v' (or set the environment variable
1523
PYTHONVERBOSE), Stackless prints a warning message, if it deallocates a
1624
tasklet, that has C-state.

Stackless/core/stacklesseval.c

Lines changed: 222 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -335,100 +335,251 @@ slp_eval_frame(PyFrameObject *f)
335335
return slp_frame_dispatch(f, fprev, 0, Py_None);
336336
}
337337

338-
/* a thread (or threads) is exiting. After this call, no tasklet may
339-
* refer to the current thread's tstate
340-
*/
341-
void slp_kill_tasks_with_stacks(PyThreadState *target_ts)
338+
static void
339+
kill_pending_current_main_and_watchdogs(PyThreadState *ts)
342340
{
343-
PyThreadState *ts = PyThreadState_GET();
344-
int count = 0;
341+
PyTaskletObject *t;
342+
343+
assert(ts != PyThreadState_GET()); /* don't kill ourself */
344+
345+
/* kill watchdogs */
346+
if (ts->st.watchdogs && PyList_CheckExact(ts->st.watchdogs)) {
347+
Py_ssize_t i;
348+
/* we don't kill the "intterupt" slot, number 0 */
349+
for(i = PyList_GET_SIZE(ts->st.watchdogs) - 1; i > 0; i--) {
350+
PyObject * item = PyList_GET_ITEM(ts->st.watchdogs, i);
351+
assert(item && PyTasklet_Check(item));
352+
t = (PyTaskletObject *) item;
353+
Py_INCREF(t); /* it is a borrowed ref */
354+
PyTasklet_KillEx(t, 1);
355+
PyErr_Clear();
356+
Py_DECREF(t);
357+
}
358+
}
359+
/* kill main */
360+
t = ts->st.main;
361+
if (t != NULL) {
362+
Py_INCREF(t); /* it is a borrowed ref */
363+
PyTasklet_KillEx(t, 1);
364+
PyErr_Clear();
365+
Py_DECREF(t);
366+
}
367+
/* kill current */
368+
t = ts->st.current;
369+
if (t != NULL) {
370+
Py_INCREF(t); /* it is a borrowed ref */
371+
PyTasklet_KillEx(t, 1);
372+
PyErr_Clear();
373+
Py_DECREF(t);
374+
}
375+
}
345376

346-
/* a loop to kill tasklets on the local thread */
347-
while (1) {
348-
PyCStackObject *csfirst = slp_cstack_chain, *cs;
349-
PyTaskletObject *t;
377+
static void
378+
run_other_threads(PyObject **sleep, int count)
379+
{
380+
if (count == 0) {
381+
/* shortcut */
382+
return;
383+
}
350384

351-
if (csfirst == NULL)
352-
break;
353-
for (cs = csfirst; ; cs = cs->next) {
354-
if (count && cs == csfirst) {
355-
/* nothing found */
356-
return;
385+
assert(sleep != NULL);
386+
if (*sleep == NULL) {
387+
/* get a reference to time.sleep() */
388+
PyObject *mod_time;
389+
assert(Py_IsInitialized());
390+
mod_time = PyImport_ImportModule("time");
391+
if (mod_time != NULL) {
392+
*sleep = PyObject_GetAttrString(mod_time, "sleep");
393+
Py_DECREF(mod_time);
394+
}
395+
if (*sleep == NULL) {
396+
PyErr_Clear();
397+
}
398+
}
399+
while(count-- > 0) {
400+
if (*sleep == NULL) {
401+
Py_BEGIN_ALLOW_THREADS
402+
Py_END_ALLOW_THREADS
403+
} else {
404+
PyObject *res = PyObject_CallFunction(*sleep, "(f)", (float)0.001);
405+
if (res != NULL) {
406+
Py_DECREF(res);
407+
} else {
408+
PyErr_Clear();
357409
}
358-
++count;
359-
/* has tstate already been cleared or is it a foreign thread? */
360-
if (cs->tstate != ts)
361-
continue;
410+
}
411+
}
412+
}
362413

363-
if (cs->task == NULL) {
364-
cs->tstate = NULL;
365-
continue;
366-
}
367-
/* is it already dead? */
368-
if (cs->task->f.frame == NULL) {
369-
cs->tstate = NULL;
370-
continue;
414+
/* a thread (or threads) is exiting. After this call, no tasklet may
415+
* refer to target_ts, if target_ts != NULL.
416+
* Also inactivate all other threads during interpreter shut down (target_ts == NULL).
417+
* Later in the shutdown sequence Python clears the tstate structure. This
418+
* causes access violations, if another thread is still active.
419+
*/
420+
void slp_kill_tasks_with_stacks(PyThreadState *target_ts)
421+
{
422+
PyThreadState *cts = PyThreadState_GET();
423+
int in_loop = 0;
424+
425+
if (target_ts == NULL || target_ts == cts) {
426+
/* a loop to kill tasklets on the local thread */
427+
while (1) {
428+
PyCStackObject *csfirst = slp_cstack_chain, *cs;
429+
PyTaskletObject *t;
430+
431+
if (csfirst == NULL)
432+
goto other_threads;
433+
for (cs = csfirst; ; cs = cs->next) {
434+
if (in_loop && cs == csfirst) {
435+
/* nothing found */
436+
goto other_threads;
437+
}
438+
in_loop = 1;
439+
/* has tstate already been cleared or is it a foreign thread? */
440+
if (cs->tstate != cts)
441+
continue;
442+
443+
if (cs->task == NULL) {
444+
cs->tstate = NULL;
445+
continue;
446+
}
447+
/* is it already dead? */
448+
if (cs->task->f.frame == NULL) {
449+
cs->tstate = NULL;
450+
continue;
451+
}
452+
break;
371453
}
372-
break;
373-
}
374-
count = 0;
375-
t = cs->task;
376-
Py_INCREF(t); /* cs->task is a borrowed ref */
377-
378-
/* We need to ensure that the tasklet 't' is in the scheduler
379-
* tasklet chain before this one (our main). This ensures
380-
* that this one is directly switched back to after 't' is
381-
* killed. The reason we do this this is because if another
382-
* tasklet is switched to, this is of course it being scheduled
383-
* and run. Why we do not need to do this for tasklets blocked
384-
* on channels is that when they are scheduled to be run and
385-
* killed, they will be implicitly placed before this one,
386-
* leaving it to run next.
387-
*/
388-
if (!t->flags.blocked && t != cs->tstate->st.current) {
389-
/* unlink from runnable queue if it wasn't previously remove()'d */
390-
if (t->next) {
454+
in_loop = 0;
455+
t = cs->task;
456+
Py_INCREF(t); /* cs->task is a borrowed ref */
457+
assert(t->cstate == cs);
458+
459+
/* If a thread ends, the thread no longer has a main tasklet and
460+
* the thread is not in a valid state. tstate->st.current is
461+
* undefined. It may point to a tasklet, but the other fields in
462+
* tstate have wrong values.
463+
*
464+
* Therefore we need to ensure, that t is not tstate->st.current.
465+
* Convert t into a free floating tasklet. PyTasklet_Kill works
466+
* for floating tasklets too.
467+
*/
468+
if (t->next && !t->flags.blocked) {
391469
assert(t->prev);
392470
slp_current_remove_tasklet(t);
393-
assert(t->cstate->tstate == ts);
394-
} else
395-
Py_INCREF(t); /* a new reference for the runnable queue */
396-
/* insert into the 'current' chain without modifying 'current' */
397-
slp_current_insert(t);
398-
}
399-
400-
PyTasklet_Kill(t);
401-
PyErr_Clear();
471+
assert(Py_REFCNT(t) > 1);
472+
Py_DECREF(t);
473+
assert(t->next == NULL);
474+
assert(t->prev == NULL);
475+
}
476+
assert(t != cs->tstate->st.current);
477+
478+
/* has the tasklet nesting_level > 0? The Stackles documentation
479+
* specifies: "When a thread dies, only tasklets with a C-state are actively killed.
480+
* Soft-switched tasklets simply stop."
481+
*/
482+
if ((cts->st.current == cs->task ? cts->st.nesting_level : cs->nesting_level) > 0) {
483+
/* Is is hard switched. */
484+
PyTasklet_Kill(t);
485+
PyErr_Clear();
486+
}
402487

403-
/* must clear the tstate */
404-
t->cstate->tstate = NULL;
405-
Py_DECREF(t);
406-
}
488+
/* must clear the tstate */
489+
t->cstate->tstate = NULL;
490+
Py_DECREF(t);
491+
} /* while(1) */
492+
} /* if(...) */
407493

494+
other_threads:
408495
/* and a separate simple loop to kill tasklets on foreign threads.
409496
* Since foreign tasklets are scheduled in their own good time,
410497
* there is no guarantee that they are actually dead when we
411498
* exit this function. Therefore, we also can't clear their thread
412499
* states. That will hopefully happen when their threads exit.
413500
*/
414501
{
415-
PyCStackObject *csfirst = slp_cstack_chain, *cs;
502+
PyCStackObject *csfirst, *cs;
416503
PyTaskletObject *t;
417-
418-
if (csfirst == NULL)
504+
PyObject *sleepfunc = NULL;
505+
int count;
506+
507+
/* other threads, first pass: kill (pending) current, main and watchdog tasklets */
508+
if (target_ts == NULL) {
509+
PyThreadState *ts;
510+
count = 0;
511+
for (ts = cts->interp->tstate_head; ts != NULL; ts = ts->next) {
512+
if (ts != cts) {
513+
/* Inactivate thread ts. In case the thread is active,
514+
* it will be killed. If the thread is sleping, it
515+
* continues to sleep.
516+
*/
517+
count++;
518+
kill_pending_current_main_and_watchdogs(ts);
519+
/* It helps to inactivate threads reliably */
520+
if (PyExc_TaskletExit)
521+
PyThreadState_SetAsyncExc(ts->thread_id, PyExc_TaskletExit);
522+
}
523+
}
524+
/* We must not release the GIL while we might hold the HEAD-lock.
525+
* Otherwise another thread (usually the thread of the killed tasklet)
526+
* could try to get the HEAD lock. The result would be a wonderful dead lock.
527+
* If target_ts is NULL, we know for sure, that we don't hold the HEAD-lock.
528+
*/
529+
run_other_threads(&sleepfunc, count);
530+
/* The other threads might have modified the thread state chain, but fortunately we
531+
* are done with it.
532+
*/
533+
} else if (target_ts != cts) {
534+
kill_pending_current_main_and_watchdogs(target_ts);
535+
/* Here it is not safe to release the GIL. */
536+
}
537+
538+
/* other threads, second pass: kill tasklets with nesting-level > 0 and
539+
* clear tstate if target_ts != NULL && target_ts != cts. */
540+
csfirst = slp_cstack_chain;
541+
if (csfirst == NULL) {
542+
Py_XDECREF(sleepfunc);
419543
return;
544+
}
545+
420546
count = 0;
421-
for (cs = csfirst; ; cs = cs->next) {
422-
if (count && cs == csfirst) {
423-
return;
424-
}
425-
count++;
547+
in_loop = 0;
548+
for (cs = csfirst; !(in_loop && cs == csfirst); cs = cs->next) {
549+
in_loop = 1;
426550
t = cs->task;
427-
Py_INCREF(t); /* cs->task is a borrowed ref */
428-
PyTasklet_Kill(t);
429-
PyErr_Clear();
551+
if (t == NULL)
552+
continue;
553+
Py_INCREF(t); /* cs->task is a borrowed ref */
554+
assert(t->cstate == cs);
555+
if (cs->tstate == cts) {
556+
Py_DECREF(t);
557+
continue; /* already handled */
558+
}
559+
if (target_ts != NULL && cs->tstate != target_ts) {
560+
Py_DECREF(t);
561+
continue; /* we are not interested in this thread */
562+
}
563+
if (((cs->tstate && cs->tstate->st.current == t) ? cs->tstate->st.nesting_level : cs->nesting_level) > 0) {
564+
/* Kill only tasklets with nesting level > 0 */
565+
count++;
566+
PyTasklet_Kill(t);
567+
PyErr_Clear();
568+
}
430569
Py_DECREF(t);
570+
if (target_ts != NULL) {
571+
cs->tstate = NULL;
572+
}
573+
}
574+
if (target_ts == NULL) {
575+
/* We must not release the GIL while we might hold the HEAD-lock.
576+
* Otherwise another thread (usually the thread of the killed tasklet)
577+
* could try to get the HEAD lock. The result would be a wonderful dead lock.
578+
* If target_ts is NULL, we know for sure, that we don't hold the HEAD-lock.
579+
*/
580+
run_other_threads(&sleepfunc, count);
431581
}
582+
Py_XDECREF(sleepfunc);
432583
}
433584
}
434585

0 commit comments

Comments
 (0)