From 6a4c86bcf01049ddcea0ff1ba0aec7e48f243ad9 Mon Sep 17 00:00:00 2001 From: Anselm Kruis Date: Fri, 9 Jul 2021 16:12:14 +0200 Subject: [PATCH] Stackless issue #283: concentrate platform dependent code in slp_transfer.c and eliminate pycore_slp_platformselect.h Extract the platform specific stack handling form "slp_eval_frame" into a new function "slp_cstack_set_base_and_goodgap" in slp_transfer.c. Move the definition of SLP_DO_NOT_OPTIMIZE_AWAY from pycore_stackless.h to slp_transfer.c, because it is only used there. Integrate include "pycore_slp_platformselect.h" into slp_transfer.c and remove this header file. Move "slp_transfer.c" form "Stackless/core" to "Stackless/platf", because it depends on the platform. Move all pycore_slp_switch_*.h header files from Include/internal to Stackless/platf. These headers are included by "slp_transfer.c" only. --- Include/internal/pycore_slp_platformselect.h | 134 ------- Include/internal/pycore_slp_pystate.h | 6 +- Include/internal/pycore_stackless.h | 12 + Makefile.pre.in | 31 +- Modules/_pickle.c | 4 +- PCbuild/pythoncore.vcxproj | 7 +- PCbuild/pythoncore.vcxproj.filters | 11 +- Python/ceval.c | 1 - Stackless/changelog.txt | 5 + Stackless/core/slp_transfer.c | 200 ---------- Stackless/core/stacklesseval.c | 44 +-- Stackless/module/scheduling.c | 1 - Stackless/module/stacklessmodule.c | 1 - Stackless/pickling/prickelpit.c | 3 - Stackless/pickling/safe_pickle.c | 5 +- Stackless/platf/slp_transfer.c | 367 ++++++++++++++++++ .../platf/switch_amd64_unix.h | 0 .../platf/switch_arm32_gcc.h | 0 .../platf/switch_arm_thumb_gcc.h | 0 .../platf/switch_mips_unix.h | 0 .../platf/switch_ppc_macosx.h | 0 .../platf/switch_ppc_unix.h | 0 .../platf/switch_ps3_SNTools.h | 0 .../platf/switch_s390_unix.h | 0 .../platf/switch_sparc_sun_gcc.h | 0 .../platf/switch_x64_msvc.h | 0 .../platf/switch_x86_msvc.h | 0 .../platf/switch_x86_unix.h | 0 28 files changed, 420 insertions(+), 412 deletions(-) delete mode 100644 Include/internal/pycore_slp_platformselect.h delete mode 100644 Stackless/core/slp_transfer.c create mode 100644 Stackless/platf/slp_transfer.c rename Include/internal/pycore_slp_switch_amd64_unix.h => Stackless/platf/switch_amd64_unix.h (100%) rename Include/internal/pycore_slp_switch_arm32_gcc.h => Stackless/platf/switch_arm32_gcc.h (100%) rename Include/internal/pycore_slp_switch_arm_thumb_gcc.h => Stackless/platf/switch_arm_thumb_gcc.h (100%) rename Include/internal/pycore_slp_switch_mips_unix.h => Stackless/platf/switch_mips_unix.h (100%) rename Include/internal/pycore_slp_switch_ppc_macosx.h => Stackless/platf/switch_ppc_macosx.h (100%) rename Include/internal/pycore_slp_switch_ppc_unix.h => Stackless/platf/switch_ppc_unix.h (100%) rename Include/internal/pycore_slp_switch_ps3_SNTools.h => Stackless/platf/switch_ps3_SNTools.h (100%) rename Include/internal/pycore_slp_switch_s390_unix.h => Stackless/platf/switch_s390_unix.h (100%) rename Include/internal/pycore_slp_switch_sparc_sun_gcc.h => Stackless/platf/switch_sparc_sun_gcc.h (100%) rename Include/internal/pycore_slp_switch_x64_msvc.h => Stackless/platf/switch_x64_msvc.h (100%) rename Include/internal/pycore_slp_switch_x86_msvc.h => Stackless/platf/switch_x86_msvc.h (100%) rename Include/internal/pycore_slp_switch_x86_unix.h => Stackless/platf/switch_x86_unix.h (100%) diff --git a/Include/internal/pycore_slp_platformselect.h b/Include/internal/pycore_slp_platformselect.h deleted file mode 100644 index 8108b493a882d8..00000000000000 --- a/Include/internal/pycore_slp_platformselect.h +++ /dev/null @@ -1,134 +0,0 @@ -#ifndef PYCORE_SLP_PLATFORMSELECT_H -#define PYCORE_SLP_PLATFORMSELECT_H - -/* - * Platform Selection for Stackless - */ - -#if defined(MS_WIN32) && !defined(MS_WIN64) && defined(_M_IX86) -#include "pycore_slp_switch_x86_msvc.h" /* MS Visual Studio on X86 */ -#elif defined(MS_WIN64) && defined(_M_X64) -#include "pycore_slp_switch_x64_msvc.h" /* MS Visual Studio on X64 */ -#elif defined(__GNUC__) && defined(__i386__) -#include "pycore_slp_switch_x86_unix.h" /* gcc on X86 */ -#elif defined(__GNUC__) && defined(__amd64__) -#include "pycore_slp_switch_amd64_unix.h" /* gcc on amd64 */ -#elif defined(__GNUC__) && defined(__PPC__) && defined(__linux__) -#include "pycore_slp_switch_ppc_unix.h" /* gcc on PowerPC */ -#elif defined(__GNUC__) && defined(__ppc__) && defined(__APPLE__) -#include "pycore_slp_switch_ppc_macosx.h" /* Apple MacOS X on PowerPC */ -#elif defined(__GNUC__) && defined(sparc) && defined(sun) -#include "pycore_slp_switch_sparc_sun_gcc.h" /* SunOS sparc with gcc */ -#elif defined(__GNUC__) && defined(__s390__) && defined(__linux__) -#include "pycore_slp_switch_s390_unix.h" /* Linux/S390 */ -#elif defined(__GNUC__) && defined(__s390x__) && defined(__linux__) -#include "pycore_slp_switch_s390_unix.h" /* Linux/S390 zSeries (identical) */ -#elif defined(__GNUC__) && defined(__arm__) && defined(__thumb__) -#include "pycore_slp_switch_arm_thumb_gcc.h" /* gcc using arm thumb */ -#elif defined(__GNUC__) && defined(__arm32__) -#include "pycore_slp_switch_arm32_gcc.h" /* gcc using arm32 */ -#elif defined(__GNUC__) && defined(__mips__) && defined(__linux__) -#include "pycore_slp_switch_mips_unix.h" /* MIPS */ -#elif defined(SN_TARGET_PS3) -#include "pycore_slp_switch_ps3_SNTools.h" /* Sony PS3 */ -#endif - -/* default definitions if not defined in above files */ - -/* - * Call SLP_DO_NOT_OPTIMIZE_AWAY(pointer) to ensure that pointer will be - * computed even post-optimization. Use it for pointers that are computed but - * otherwise are useless. The compiler tends to do a good job at eliminating - * unused variables, and this macro fools it into thinking var is in fact - * needed. - */ - -#ifndef SLP_DO_NOT_OPTIMIZE_AWAY - -/* Code is based on Facebook folly - * https://github.com/facebook/folly/blob/master/folly/Benchmark.h, - * which has an Apache 2 license. - */ -#ifdef _MSC_VER - -#pragma optimize("", off) - -static inline void doNotOptimizeDependencySink(const void* p) {} - -#pragma optimize("", on) - -#define SLP_DO_NOT_OPTIMIZE_AWAY(pointer) doNotOptimizeDependencySink(pointer) -#define SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS /* empty */ - -#elif (defined(__GNUC__) || defined(__clang__)) -/* - * The "r" constraint forces the compiler to make datum available - * in a register to the asm block, which means that it must have - * computed/loaded it. - */ -#define SLP_DO_NOT_OPTIMIZE_AWAY(pointer) \ - do {__asm__ volatile("" ::"r"(pointer));} while(0) -#define SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS /* empty */ -#else -/* - * Unknown compiler - */ -#define SLP_DO_NOT_OPTIMIZE_AWAY(pointer) \ - do { slp_do_not_opimize_away_sink = ((void*)(pointer)); } while(0) -extern uint8_t* volatile slp_do_not_opimize_away_sink; -#define SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS uint8_t* volatile slp_do_not_opimize_away_sink; -#endif -#endif - -/* adjust slots to typical size of a few recursions on your system */ - -#ifndef SLP_CSTACK_SLOTS -#define SLP_CSTACK_SLOTS 1024 -#endif - -/* how many cstacks to cache at all */ - -#ifndef SLP_CSTACK_MAXCACHE -#define SLP_CSTACK_MAXCACHE 100 -#endif - -/* a good estimate how much the cstack level differs between - initialisation and main C-Python(r) code. Not critical, but saves time. - Note that this will vanish with the greenlet approach. */ - -#ifndef SLP_CSTACK_GOODGAP -#define SLP_CSTACK_GOODGAP 4096 -#endif - -/* stack size in pointer to trigger stack spilling */ - -#ifndef SLP_CSTACK_WATERMARK -#define SLP_CSTACK_WATERMARK 16384 -#endif - -/* define direction of stack growth */ - -#ifndef SLP_CSTACK_DOWNWARDS -#define SLP_CSTACK_DOWNWARDS 1 /* 0 for upwards */ -#endif - -/************************************************************** - - Don't change definitions below, please. - - **************************************************************/ - -#if SLP_CSTACK_DOWNWARDS == 1 -#define SLP_CSTACK_COMPARE(a, b) (a) < (b) -#define SLP_CSTACK_SUBTRACT(a, b) (a) - (b) -#else -#define SLP_CSTACK_COMPARE(a, b) (a) > (b) -#define SLP_CSTACK_SUBTRACT(a, b) (b) - (a) -#endif - -#define SLP_CSTACK_SAVE_NOW(tstate, stackvar) \ - ((tstate)->st.cstack_root != NULL ? \ - SLP_CSTACK_SUBTRACT((tstate)->st.cstack_root, \ - (intptr_t*)&(stackvar)) > SLP_CSTACK_WATERMARK : 1) - -#endif /* !STACKLESS_SLP_PLATFORM_SELECT_H */ diff --git a/Include/internal/pycore_slp_pystate.h b/Include/internal/pycore_slp_pystate.h index 935646a0a891f3..4173d12cdf1dc2 100644 --- a/Include/internal/pycore_slp_pystate.h +++ b/Include/internal/pycore_slp_pystate.h @@ -16,7 +16,11 @@ /* This include file is included from pycore_pystate.h only */ -#include "pycore_slp_platformselect.h" /* for SLP_CSTACK_SLOTS */ + +/* adjust slots to typical size of a few recursions on your system */ +#ifndef SLP_CSTACK_SLOTS +#define SLP_CSTACK_SLOTS 1024 +#endif /* * Stackless runtime state diff --git a/Include/internal/pycore_stackless.h b/Include/internal/pycore_stackless.h index a9844ada69a392..aff5e11d677dcf 100644 --- a/Include/internal/pycore_stackless.h +++ b/Include/internal/pycore_stackless.h @@ -823,6 +823,18 @@ long slp_parse_thread_id(PyObject *thread_id, unsigned long *id); (frame_)->f_executing <= SLP_FRAME_EXECUTING_YIELD_FROM) +/* Defined in slp_transfer.c */ +int +slp_cstack_save_now(const PyThreadState *tstate, const void * pstackvar); +#define SLP_CSTACK_SAVE_NOW(tstate, stackvar) slp_cstack_save_now((tstate), &(stackvar)) +void +slp_cstack_set_root(PyThreadState *tstate, const void * pstackvar); +#define SLP_CSTACK_SET_ROOT(tstate, stackvar) slp_cstack_set_root((tstate), &(stackvar)) +PyObject * +slp_cstack_set_base_and_goodgap(PyThreadState *tstate, const void * pstackvar, PyFrameObject *f); + + + #endif /* #ifdef SLP_BUILD_CORE */ #else /* STACKLESS */ diff --git a/Makefile.pre.in b/Makefile.pre.in index 7596042b18337e..851a5707625c23 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -339,7 +339,7 @@ PYTHON_OBJS= \ Python/bltinmodule.o \ Python/ceval.o \ Stackless/core/cframeobject.o \ - Stackless/core/slp_transfer.o \ + Stackless/platf/slp_transfer.o \ Stackless/core/stacklesseval.o \ Stackless/core/stackless_util.o \ Stackless/module/channelobject.o \ @@ -989,8 +989,8 @@ regen-typeslots: $(srcdir)/Objects/typeslots.inc.new $(UPDATE_FILE) $(srcdir)/Objects/typeslots.inc $(srcdir)/Objects/typeslots.inc.new -Stackless/core/slp_transfer.o: $(srcdir)/Stackless/core/slp_transfer.c - $(CC) -c $(PY_CORE_CFLAGS) $(SLPFLAGS) -o $@ $(srcdir)/Stackless/core/slp_transfer.c +Stackless/platf/slp_transfer.o: $(srcdir)/Stackless/platf/slp_transfer.c + $(CC) -c $(PY_CORE_CFLAGS) $(SLPFLAGS) -o $@ $(srcdir)/Stackless/platf/slp_transfer.c ############################################################################ # Header files @@ -1093,19 +1093,18 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_stackless.h \ $(srcdir)/Include/internal/pycore_slp_pystate.h \ $(srcdir)/Include/internal/pycore_slp_prickelpit.h \ - $(srcdir)/Include/internal/pycore_slp_platformselect.h \ - $(srcdir)/Include/internal/pycore_slp_switch_amd64_unix.h \ - $(srcdir)/Include/internal/pycore_slp_switch_arm32_gcc.h \ - $(srcdir)/Include/internal/pycore_slp_switch_arm_thumb_gcc.h \ - $(srcdir)/Include/internal/pycore_slp_switch_mips_unix.h \ - $(srcdir)/Include/internal/pycore_slp_switch_ppc_macosx.h \ - $(srcdir)/Include/internal/pycore_slp_switch_ppc_unix.h \ - $(srcdir)/Include/internal/pycore_slp_switch_ps3_SNTools.h \ - $(srcdir)/Include/internal/pycore_slp_switch_s390_unix.h \ - $(srcdir)/Include/internal/pycore_slp_switch_sparc_sun_gcc.h \ - $(srcdir)/Include/internal/pycore_slp_switch_x64_msvc.h \ - $(srcdir)/Include/internal/pycore_slp_switch_x86_msvc.h \ - $(srcdir)/Include/internal/pycore_slp_switch_x86_unix.h \ + $(srcdir)/Stackless/platf/switch_amd64_unix.h \ + $(srcdir)/Stackless/platf/switch_arm32_gcc.h \ + $(srcdir)/Stackless/platf/switch_arm_thumb_gcc.h \ + $(srcdir)/Stackless/platf/switch_mips_unix.h \ + $(srcdir)/Stackless/platf/switch_ppc_macosx.h \ + $(srcdir)/Stackless/platf/switch_ppc_unix.h \ + $(srcdir)/Stackless/platf/switch_ps3_SNTools.h \ + $(srcdir)/Stackless/platf/switch_s390_unix.h \ + $(srcdir)/Stackless/platf/switch_sparc_sun_gcc.h \ + $(srcdir)/Stackless/platf/switch_x64_msvc.h \ + $(srcdir)/Stackless/platf/switch_x86_msvc.h \ + $(srcdir)/Stackless/platf/switch_x86_unix.h \ \ pyconfig.h \ $(PARSER_HEADERS) \ diff --git a/Modules/_pickle.c b/Modules/_pickle.c index ac98f0ef39651c..8485db85cdf749 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -12,7 +12,7 @@ #define SLP_BUILD_CORE #include "stackless_api.h" #include "pycore_slp_prickelpit.h" -#include "pycore_slp_platformselect.h" /* for stack saving */ +#include "pycore_stackless.h" #endif PyDoc_STRVAR(pickle_module_doc, @@ -3962,7 +3962,7 @@ save(PicklerObject *self, PyObject *obj, int pers_save) #ifdef STACKLESS /* but we save the stack after a fixed watermark */ { - /* use a variable, because SLP_CSTACK_SAVE_NOW evaluates ts several times. */ + /* use a variable, because SLP_CSTACK_SAVE_NOW may evaluate ts several times. */ PyThreadState *ts = PyThreadState_GET(); if (SLP_CSTACK_SAVE_NOW(ts, self)) { int res; diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 46863913aeaa06..1cab4c0cef4224 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -238,9 +238,8 @@ - - - + + @@ -439,7 +438,7 @@ - + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 5a36316dea618b..880e47b1e2ab09 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -525,13 +525,10 @@ Include - + Include - - Include - - + Include @@ -1163,8 +1160,8 @@ Stackless\core - - Stackless\core + + Stackless\platf Stackless\core diff --git a/Python/ceval.c b/Python/ceval.c index 7c9ce5a0564dc7..1fb5d3d9cc5f91 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -22,7 +22,6 @@ #include "setobject.h" #include "structmember.h" #include "pycore_stackless.h" -#include "pycore_slp_platformselect.h" /* for stack saving */ #include diff --git a/Stackless/changelog.txt b/Stackless/changelog.txt index 0e4d74d2b4cf16..50e8ea4b2268e3 100644 --- a/Stackless/changelog.txt +++ b/Stackless/changelog.txt @@ -9,6 +9,11 @@ What's New in Stackless 3.X.X? *Release date: 20XX-XX-XX* +- https://github.com/stackless-dev/stackless/issues/283 + Platform dependent source code is now completely in "Stackless/platf". The + API header files do not depend on the platform any longer and the header + "pycore_slp_platformselect.h" does not exist any more. + - https://github.com/stackless-dev/stackless/issues/281 Per tasklet profiling and tracing state information is now stored in the tasklet itself. This increases the size of tasklet objects a bit, but saves a diff --git a/Stackless/core/slp_transfer.c b/Stackless/core/slp_transfer.c deleted file mode 100644 index c9c7a78da25a48..00000000000000 --- a/Stackless/core/slp_transfer.c +++ /dev/null @@ -1,200 +0,0 @@ -#include "Python.h" -#include /* For ptrdiff_t */ - -#ifdef STACKLESS - -#include "pycore_stackless.h" - -/* - * the following macros are spliced into the OS/compiler - * specific code, in order to simplify maintenance. - */ - -#define _cstprev (_PyRuntime.st.transfer.cstprev) -#define _cst (_PyRuntime.st.transfer.cst) -#define _prev (_PyRuntime.st.transfer.prev) - - -#define __return(x) return (x) - -#define SLP_SAVE_STATE(stackref, stsizediff) \ - intptr_t stsizeb; \ - stackref += SLP_STACK_MAGIC; \ - if (_cstprev != NULL) { \ - if (slp_cstack_new(_cstprev, (intptr_t *)stackref, _prev) == NULL) __return(-1); \ - stsizeb = slp_cstack_save(*_cstprev); \ - } \ - else \ - stsizeb = (_cst->startaddr - (intptr_t *)stackref) * sizeof(intptr_t); \ - if (_cst == NULL) __return(0); \ - stsizediff = stsizeb - (Py_SIZE(_cst) * sizeof(intptr_t)); - -#define SLP_RESTORE_STATE() \ - if (_cst != NULL) { \ - slp_cstack_restore(_cst); \ - } - -/* - * Include pycore_slp_platformselect.h with SLP_EVAL defined. - * If the macro SLP_EVAL is defined, pycore_slp_platformselect.h defines - * the static function int slp_switch(void). - */ -#define SLP_EVAL -#ifdef PYCORE_SLP_PLATFORMSELECT_H -#undef PYCORE_SLP_PLATFORMSELECT_H -#endif -#include "pycore_slp_platformselect.h" - -#ifndef STACKLESS -********** -If you see this error message, -your operating system is not supported yet. -Please provide an implementation of the switch_XXX.h -or disable the STACKLESS flag. -********** -#endif - -SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS - -#ifdef SLP_EXTERNAL_ASM -/* CCP addition: Make these functions, to be called from assembler. - * The token include file for the given platform should enable the - * SLP_EXTERNAL_ASM define so that this is included. - */ - -/* There are two cases where slp_save_state would return 0, the - * first where there is no difference in where the stack pointer - * should be from where it is now. And the second where - * SLP_SAVE_STATE returns without restoring because we are only - * here to save. The assembler routine needs to differentiate - * between these, which is why we override the returns and flag - * the low bit of the return value on early exit. - */ -#undef __return -#define __return(x) { exitcode = x; goto exit; } - -intptr_t slp_save_state(intptr_t *stack){ - intptr_t exitcode; - intptr_t diff; - SLP_SAVE_STATE(stack, diff); - return diff; -exit: - /* hack: flag a problem by setting the value to odd */ - return exitcode | 1; -} - -void slp_restore_state(void){ - SLP_RESTORE_STATE(); -} - -extern int slp_switch(void); - -#endif - -static int -climb_stack_and_transfer(PyCStackObject **cstprev, PyCStackObject *cst, - PyTaskletObject *prev) -{ - /* - * there are cases where we have been initialized - * in some deep stack recursion, but later on we - * need to switch from a higher stacklevel, and the - * needed stack size becomes *negative* :-)) - */ - PyThreadState *ts = _PyThreadState_GET(); - intptr_t probe; - register ptrdiff_t needed = &probe - ts->st.cstack_base; - /* in rare cases, the need might have vanished due to the recursion */ - if (needed > 0) { - register void * stack_ptr_tmp = alloca(needed * sizeof(intptr_t)); - if (stack_ptr_tmp == NULL) - return -1; - /* hinder the compiler to optimise away - stack_ptr_tmp and the alloca call. - This happens with gcc 4.7.x and -O2 */ - SLP_DO_NOT_OPTIMIZE_AWAY(stack_ptr_tmp); - } - return slp_transfer(cstprev, cst, prev); -} - -/* This function returns -1 on error, 1 if a switch occurred and 0 - * if only a stack save was performed - */ -int -slp_transfer(PyCStackObject **cstprev, PyCStackObject *cst, - PyTaskletObject *prev) -{ - PyThreadState *ts = _PyThreadState_GET(); - int result; - /* Use a volatile pointer to prevent inlining of slp_switch(). - * See Stackless issue 183 - */ - static int (*volatile slp_switch_ptr)(void) = slp_switch; - - /* since we change the stack we must assure that the protocol was met */ - STACKLESS_ASSERT(); - SLP_ASSERT_FRAME_IN_TRANSFER(ts); - - if ((intptr_t *) &ts > ts->st.cstack_base) - return climb_stack_and_transfer(cstprev, cst, prev); - if (cst == NULL || Py_SIZE(cst) == 0) - cst = ts->st.initial_stub; - if (cst != NULL) { - if (cst->tstate != ts) { - PyErr_SetString(PyExc_SystemError, - "bad thread state in transfer"); - return -1; - } - if (ts->st.cstack_base != cst->startaddr) { - PyErr_SetString(PyExc_SystemError, - "bad stack reference in transfer"); - return -1; - } - /* - * if stacks are same and refcount==1, it must be the same - * task. In this case, we would destroy the target before - * switching. Therefore, we simply don't switch, just save. - */ - if (cstprev && *cstprev == cst && Py_REFCNT(cst) == 1) - cst = NULL; - } - _cstprev = cstprev; - _cst = cst; - _prev = prev; - result = slp_switch_ptr(); - SLP_ASSERT_FRAME_IN_TRANSFER(ts); - if (!result) { - if (_cst) { - /* record the context of the target stack. Can't do it before the switch because - * when saving the stack, the serial number is taken from serial_last_jump - */ - ts->st.serial_last_jump = _cst->serial; - - /* release any objects that needed to wait until after the switch. - * Note that it is important that this does not mess with the - * current tasklet's "tempval". We store it here to be - * absolutely sure. - */ - if (ts->st.del_post_switch) { - PyObject *tmp; - TASKLET_CLAIMVAL(ts->st.current, &tmp); - Py_CLEAR(ts->st.del_post_switch); - TASKLET_SETVAL_OWN(ts->st.current, tmp); - } - result = 1; - } else - result = 0; - } else - result = -1; - return result; -} - -#ifdef Py_DEBUG -int -slp_transfer_return(PyCStackObject *cst) -{ - return slp_transfer(NULL, cst, NULL); -} -#endif - -#endif diff --git a/Stackless/core/stacklesseval.c b/Stackless/core/stacklesseval.c index 93d8daa29dd6c3..73db0fa5a06d6e 100644 --- a/Stackless/core/stacklesseval.c +++ b/Stackless/core/stacklesseval.c @@ -8,9 +8,6 @@ #include "pycore_stackless.h" #include "pycore_slp_prickelpit.h" -/* platform specific constants */ -#include "pycore_slp_platformselect.h" - /* Stackless extension for ceval.c */ @@ -243,32 +240,6 @@ make_initial_stub(void) return result; } -static PyObject * -climb_stack_and_eval_frame(PyFrameObject *f) -{ - /* - * a similar case to climb_stack_and_transfer, - * but here we need to incorporate a gap in the - * stack into main and keep this gap on the stack. - * This way, initial_stub is always valid to be - * used to return to the main c stack. - */ - PyThreadState *ts = _PyThreadState_GET(); - intptr_t probe; - ptrdiff_t needed = &probe - ts->st.cstack_base; - /* in rare cases, the need might have vanished due to the recursion */ - if (needed > 0) { - register void * stack_ptr_tmp = alloca(needed * sizeof(intptr_t)); - if (stack_ptr_tmp == NULL) - return NULL; - /* hinder the compiler to optimise away - stack_ptr_tmp and the alloca call. - This happens with gcc 4.7.x and -O2 */ - SLP_DO_NOT_OPTIMIZE_AWAY(stack_ptr_tmp); - } - return slp_eval_frame(f); -} - static PyObject * slp_frame_dispatch_top(PyObject *retval); static PyObject * @@ -316,7 +287,6 @@ slp_eval_frame(PyFrameObject *f) { PyThreadState *ts = _PyThreadState_GET(); PyFrameObject *fprev = f->f_back; - intptr_t * stackref; int set_cstack_base; PyObject *retval; @@ -345,13 +315,9 @@ slp_eval_frame(PyFrameObject *f) } /* mark the stack base */ - stackref = SLP_STACK_REFPLUS + (intptr_t *) &f; set_cstack_base = ts->st.cstack_base == NULL; - if (set_cstack_base) - ts->st.cstack_base = stackref - SLP_CSTACK_GOODGAP; - if (stackref > ts->st.cstack_base) { - retval = climb_stack_and_eval_frame(f); /* recursively calls slp_eval_frame(f) */ - } else { + retval = slp_cstack_set_base_and_goodgap(ts, &f, f); + if (retval == (void*)1) { assert(SLP_CURRENT_FRAME(ts) == NULL); /* else we would change the current frame */ SLP_STORE_NEXT_FRAME(ts, f); returning = make_initial_stub(); @@ -873,7 +839,7 @@ eval_frame_callback(PyCFrameObject *cf, int exc, PyObject *retval) * ourselves in an infinite loop of stack spilling. */ saved_base = ts->st.cstack_root; - ts->st.cstack_root = SLP_STACK_REFPLUS + (intptr_t *) &f; + SLP_CSTACK_SET_ROOT(ts, f); /* pull in the right retval and tempval from the arguments */ Py_SETREF(retval, cf->ob1); @@ -936,14 +902,14 @@ slp_eval_frame_newstack(PyFrameObject *f, int exc, PyObject *retval) * magic here will clear that exception. */ intptr_t *old = ts->st.cstack_root; - ts->st.cstack_root = SLP_STACK_REFPLUS + (intptr_t *) &f; + SLP_CSTACK_SET_ROOT(ts, f); retval = PyEval_EvalFrameEx_slp(f, exc, retval); ts->st.cstack_root = old; return retval; } if (ts->st.cstack_root == NULL) { /* this is a toplevel call. Store the root of stack spilling */ - ts->st.cstack_root = SLP_STACK_REFPLUS + (intptr_t *) &f; + SLP_CSTACK_SET_ROOT(ts, f); retval = PyEval_EvalFrameEx_slp(f, exc, retval); /* and reset it. We may reenter stackless at a completely different * depth later diff --git a/Stackless/module/scheduling.c b/Stackless/module/scheduling.c index d93045680582da..3d7034c61901ea 100644 --- a/Stackless/module/scheduling.c +++ b/Stackless/module/scheduling.c @@ -6,7 +6,6 @@ #ifdef STACKLESS #include "pycore_stackless.h" - /****************************************************** The Bomb object -- making exceptions convenient diff --git a/Stackless/module/stacklessmodule.c b/Stackless/module/stacklessmodule.c index 16e6470d8eac9a..21ebd701c2a044 100644 --- a/Stackless/module/stacklessmodule.c +++ b/Stackless/module/stacklessmodule.c @@ -7,7 +7,6 @@ #include "pycore_stackless.h" #define IMPLEMENT_STACKLESSMODULE -#include "pycore_slp_platformselect.h" #include "pycore_slp_prickelpit.h" #include /* for offsetof() */ diff --git a/Stackless/pickling/prickelpit.c b/Stackless/pickling/prickelpit.c index f52633b82a7d64..6c0ee17d0ad633 100644 --- a/Stackless/pickling/prickelpit.c +++ b/Stackless/pickling/prickelpit.c @@ -7,9 +7,6 @@ #include "pycore_stackless.h" #include "pycore_slp_prickelpit.h" -/* platform specific constants */ -#include "pycore_slp_platformselect.h" - /****************************************************** type template and support for pickle helper types diff --git a/Stackless/pickling/safe_pickle.c b/Stackless/pickling/safe_pickle.c index d8a0ac76a5afd9..241937bc343c43 100644 --- a/Stackless/pickling/safe_pickle.c +++ b/Stackless/pickling/safe_pickle.c @@ -5,7 +5,6 @@ #define SLP_BUILD_CORE #include "pycore_stackless.h" -#include "pycore_slp_platformselect.h" /* safe pickling */ @@ -31,7 +30,7 @@ pickle_callback(PyCFrameObject *cf, int exc, PyObject *retval) * ourselves in an infinite loop of stack spilling. */ saved_base = ts->st.cstack_root; - ts->st.cstack_root = SLP_STACK_REFPLUS + (intptr_t *) &f; + SLP_CSTACK_SET_ROOT(ts, f); if (retval) { Py_DECREF(retval); cf->i = cPickle_save(cf->ob1, cf->ob2, cf->n); @@ -128,7 +127,7 @@ pickle_M(PyObject *self, PyObject *args, int pers_save) _self = self; _args = args; _pers_save = pers_save; - ts->st.cstack_root = SLP_STACK_REFPLUS + (intptr_t *) &self; + SLP_CSTACK_SET_ROOT(ts, self); ret = slp_int_wrapper(slp_eval_frame((PyFrameObject *)cf)); return ret; } diff --git a/Stackless/platf/slp_transfer.c b/Stackless/platf/slp_transfer.c new file mode 100644 index 00000000000000..522d602d4f4d34 --- /dev/null +++ b/Stackless/platf/slp_transfer.c @@ -0,0 +1,367 @@ +#include "Python.h" +#include /* For ptrdiff_t */ + +#ifdef STACKLESS + +#include "pycore_stackless.h" + +/* + * the following macros are spliced into the OS/compiler + * specific code, in order to simplify maintenance. + */ + +#define _cstprev (_PyRuntime.st.transfer.cstprev) +#define _cst (_PyRuntime.st.transfer.cst) +#define _prev (_PyRuntime.st.transfer.prev) + + +#define __return(x) return (x) + +#define SLP_SAVE_STATE(stackref, stsizediff) \ + intptr_t stsizeb; \ + stackref += SLP_STACK_MAGIC; \ + if (_cstprev != NULL) { \ + if (slp_cstack_new(_cstprev, (intptr_t *)stackref, _prev) == NULL) __return(-1); \ + stsizeb = slp_cstack_save(*_cstprev); \ + } \ + else \ + stsizeb = (_cst->startaddr - (intptr_t *)stackref) * sizeof(intptr_t); \ + if (_cst == NULL) __return(0); \ + stsizediff = stsizeb - (Py_SIZE(_cst) * sizeof(intptr_t)); + +#define SLP_RESTORE_STATE() \ + if (_cst != NULL) { \ + slp_cstack_restore(_cst); \ + } + +/* + * Platform Selection for Stackless + */ +#define SLP_EVAL /* enable code generation in the included header */ + +#if defined(MS_WIN32) && !defined(MS_WIN64) && defined(_M_IX86) +#include "switch_x86_msvc.h" /* MS Visual Studio on X86 */ +#elif defined(MS_WIN64) && defined(_M_X64) +#include "switch_x64_msvc.h" /* MS Visual Studio on X64 */ +#elif defined(__GNUC__) && defined(__i386__) +#include "switch_x86_unix.h" /* gcc on X86 */ +#elif defined(__GNUC__) && defined(__amd64__) +#include "switch_amd64_unix.h" /* gcc on amd64 */ +#elif defined(__GNUC__) && defined(__PPC__) && defined(__linux__) +#include "switch_ppc_unix.h" /* gcc on PowerPC */ +#elif defined(__GNUC__) && defined(__ppc__) && defined(__APPLE__) +#include "switch_ppc_macosx.h" /* Apple MacOS X on PowerPC */ +#elif defined(__GNUC__) && defined(sparc) && defined(sun) +#include "switch_sparc_sun_gcc.h" /* SunOS sparc with gcc */ +#elif defined(__GNUC__) && defined(__s390__) && defined(__linux__) +#include "switch_s390_unix.h" /* Linux/S390 */ +#elif defined(__GNUC__) && defined(__s390x__) && defined(__linux__) +#include "switch_s390_unix.h" /* Linux/S390 zSeries (identical) */ +#elif defined(__GNUC__) && defined(__arm__) && defined(__thumb__) +#include "switch_arm_thumb_gcc.h" /* gcc using arm thumb */ +#elif defined(__GNUC__) && defined(__arm32__) +#include "switch_arm32_gcc.h" /* gcc using arm32 */ +#elif defined(__GNUC__) && defined(__mips__) && defined(__linux__) +#include "switch_mips_unix.h" /* MIPS */ +#elif defined(SN_TARGET_PS3) +#include "switch_ps3_SNTools.h" /* Sony PS3 */ +#endif +#ifndef STACKLESS +********** +If you see this error message, +your operating system is not supported yet. +Please provide an implementation of the switch_XXX.h +or disable the STACKLESS flag. +********** +#endif + +/* default definitions if not defined in above files */ + + +/* a good estimate how much the cstack level differs between + initialisation and main C-Python(r) code. Not critical, but saves time. + Note that this will vanish with the greenlet approach. */ + +#ifndef SLP_CSTACK_GOODGAP +#define SLP_CSTACK_GOODGAP 4096 +#endif + +/* stack size in pointer to trigger stack spilling */ + +#ifndef SLP_CSTACK_WATERMARK +#define SLP_CSTACK_WATERMARK 16384 +#endif + +/* define direction of stack growth */ + +#ifndef SLP_CSTACK_DOWNWARDS +#define SLP_CSTACK_DOWNWARDS 1 /* 0 for upwards */ +#endif + + +/* + * Call SLP_DO_NOT_OPTIMIZE_AWAY(pointer) to ensure that pointer will be + * computed even post-optimization. Use it for pointers that are computed but + * otherwise are useless. The compiler tends to do a good job at eliminating + * unused variables, and this macro fools it into thinking var is in fact + * needed. + * + * A platform specific include may provide its own definition of + * SLP_DO_NOT_OPTIMIZE_AWAY and SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS. + */ +#ifndef SLP_DO_NOT_OPTIMIZE_AWAY + +/* Code is based on Facebook folly + * https://github.com/facebook/folly/blob/master/folly/Benchmark.h, + * which has an Apache 2 license. + */ +#ifdef _MSC_VER + +#pragma optimize("", off) + +static inline void doNotOptimizeDependencySink(const void* p) {} + +#pragma optimize("", on) + +#define SLP_DO_NOT_OPTIMIZE_AWAY(pointer) doNotOptimizeDependencySink(pointer) +#define SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS /* empty */ + +#elif (defined(__GNUC__) || defined(__clang__)) +/* + * The "r" constraint forces the compiler to make datum available + * in a register to the asm block, which means that it must have + * computed/loaded it. + */ +#define SLP_DO_NOT_OPTIMIZE_AWAY(pointer) \ + do {__asm__ volatile("" ::"r"(pointer));} while(0) +#define SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS /* empty */ +#else +/* + * Unknown compiler + */ +#define SLP_DO_NOT_OPTIMIZE_AWAY(pointer) \ + do { slp_do_not_opimize_away_sink = ((void*)(pointer)); } while(0) +extern uint8_t* volatile slp_do_not_opimize_away_sink; +#define SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS uint8_t* volatile slp_do_not_opimize_away_sink; +#endif +#endif /* #ifndef SLP_DO_NOT_OPTIMIZE_AWAY */ + +/************************************************************** + + Don't change definitions below, please. + + **************************************************************/ + +#if SLP_CSTACK_DOWNWARDS == 1 +#define SLP_CSTACK_COMPARE(a, b) (a) < (b) +#define SLP_CSTACK_SUBTRACT(a, b) (a) - (b) +#else +#define SLP_CSTACK_COMPARE(a, b) (a) > (b) +#define SLP_CSTACK_SUBTRACT(a, b) (b) - (a) +#endif + +/************************************************************** + * End of definitions + ***************************************************************/ + +SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS + +#ifdef SLP_EXTERNAL_ASM +/* CCP addition: Make these functions, to be called from assembler. + * The token include file for the given platform should enable the + * SLP_EXTERNAL_ASM define so that this is included. + */ + +/* There are two cases where slp_save_state would return 0, the + * first where there is no difference in where the stack pointer + * should be from where it is now. And the second where + * SLP_SAVE_STATE returns without restoring because we are only + * here to save. The assembler routine needs to differentiate + * between these, which is why we override the returns and flag + * the low bit of the return value on early exit. + */ +#undef __return +#define __return(x) { exitcode = x; goto exit; } + +intptr_t slp_save_state(intptr_t *stack){ + intptr_t exitcode; + intptr_t diff; + SLP_SAVE_STATE(stack, diff); + return diff; +exit: + /* hack: flag a problem by setting the value to odd */ + return exitcode | 1; +} + +void slp_restore_state(void){ + SLP_RESTORE_STATE(); +} + +extern int slp_switch(void); + +#endif + +static int +climb_stack_and_transfer(PyCStackObject **cstprev, PyCStackObject *cst, + PyTaskletObject *prev) +{ + /* + * there are cases where we have been initialized + * in some deep stack recursion, but later on we + * need to switch from a higher stacklevel, and the + * needed stack size becomes *negative* :-)) + */ + PyThreadState *ts = _PyThreadState_GET(); + intptr_t probe; + register ptrdiff_t needed = &probe - ts->st.cstack_base; + /* in rare cases, the need might have vanished due to the recursion */ + if (needed > 0) { + register void * stack_ptr_tmp = alloca(needed * sizeof(intptr_t)); + if (stack_ptr_tmp == NULL) + return -1; + /* hinder the compiler to optimise away + stack_ptr_tmp and the alloca call. + This happens with gcc 4.7.x and -O2 */ + SLP_DO_NOT_OPTIMIZE_AWAY(stack_ptr_tmp); + } + return slp_transfer(cstprev, cst, prev); +} + +static PyObject * +climb_stack_and_eval_frame(PyFrameObject *f) +{ + /* + * a similar case to climb_stack_and_transfer, + * but here we need to incorporate a gap in the + * stack into main and keep this gap on the stack. + * This way, initial_stub is always valid to be + * used to return to the main c stack. + */ + PyThreadState *ts = _PyThreadState_GET(); + intptr_t probe; + ptrdiff_t needed = &probe - ts->st.cstack_base; + /* in rare cases, the need might have vanished due to the recursion */ + if (needed > 0) { + register void * stack_ptr_tmp = alloca(needed * sizeof(intptr_t)); + if (stack_ptr_tmp == NULL) + return NULL; + /* hinder the compiler to optimise away + stack_ptr_tmp and the alloca call. + This happens with gcc 4.7.x and -O2 */ + SLP_DO_NOT_OPTIMIZE_AWAY(stack_ptr_tmp); + } + return slp_eval_frame(f); +} + +/* This function returns -1 on error, 1 if a switch occurred and 0 + * if only a stack save was performed + */ +int +slp_transfer(PyCStackObject **cstprev, PyCStackObject *cst, + PyTaskletObject *prev) +{ + PyThreadState *ts = _PyThreadState_GET(); + int result; + /* Use a volatile pointer to prevent inlining of slp_switch(). + * See Stackless issue 183 + */ + static int (*volatile slp_switch_ptr)(void) = slp_switch; + + /* since we change the stack we must assure that the protocol was met */ + STACKLESS_ASSERT(); + SLP_ASSERT_FRAME_IN_TRANSFER(ts); + + if ((intptr_t *) &ts > ts->st.cstack_base) + return climb_stack_and_transfer(cstprev, cst, prev); + if (cst == NULL || Py_SIZE(cst) == 0) + cst = ts->st.initial_stub; + if (cst != NULL) { + if (cst->tstate != ts) { + PyErr_SetString(PyExc_SystemError, + "bad thread state in transfer"); + return -1; + } + if (ts->st.cstack_base != cst->startaddr) { + PyErr_SetString(PyExc_SystemError, + "bad stack reference in transfer"); + return -1; + } + /* + * if stacks are same and refcount==1, it must be the same + * task. In this case, we would destroy the target before + * switching. Therefore, we simply don't switch, just save. + */ + if (cstprev && *cstprev == cst && Py_REFCNT(cst) == 1) + cst = NULL; + } + _cstprev = cstprev; + _cst = cst; + _prev = prev; + result = slp_switch_ptr(); + SLP_ASSERT_FRAME_IN_TRANSFER(ts); + if (!result) { + if (_cst) { + /* record the context of the target stack. Can't do it before the switch because + * when saving the stack, the serial number is taken from serial_last_jump + */ + ts->st.serial_last_jump = _cst->serial; + + /* release any objects that needed to wait until after the switch. + * Note that it is important that this does not mess with the + * current tasklet's "tempval". We store it here to be + * absolutely sure. + */ + if (ts->st.del_post_switch) { + PyObject *tmp; + TASKLET_CLAIMVAL(ts->st.current, &tmp); + Py_CLEAR(ts->st.del_post_switch); + TASKLET_SETVAL_OWN(ts->st.current, tmp); + } + result = 1; + } else + result = 0; + } else + result = -1; + return result; +} + +#ifdef Py_DEBUG +int +slp_transfer_return(PyCStackObject *cst) +{ + return slp_transfer(NULL, cst, NULL); +} +#endif + +int +slp_cstack_save_now(const PyThreadState *tstate, const void * pstackvar) +{ + assert(tstate); + assert(pstackvar); + if (tstate->st.cstack_root == NULL) + return 1; + return SLP_CSTACK_SUBTRACT(tstate->st.cstack_root, (const intptr_t*)pstackvar) > SLP_CSTACK_WATERMARK; +} + +void +slp_cstack_set_root(PyThreadState *tstate, const void * pstackvar) { + assert(tstate); + assert(pstackvar); + tstate->st.cstack_root = SLP_STACK_REFPLUS + (intptr_t *)pstackvar; +} + +PyObject * +slp_cstack_set_base_and_goodgap(PyThreadState *tstate, const void * pstackvar, PyFrameObject *f) { + intptr_t * stackref; + assert(tstate); + assert(pstackvar); + tstate->st.cstack_root = SLP_STACK_REFPLUS + (intptr_t *)pstackvar; + stackref = SLP_STACK_REFPLUS + (intptr_t *)pstackvar; + if (tstate->st.cstack_base == NULL) + tstate->st.cstack_base = stackref - SLP_CSTACK_GOODGAP; + if (stackref > tstate->st.cstack_base) + return climb_stack_and_eval_frame(f); /* recursively calls slp_eval_frame(f) */ + return (void *)1; +} + +#endif diff --git a/Include/internal/pycore_slp_switch_amd64_unix.h b/Stackless/platf/switch_amd64_unix.h similarity index 100% rename from Include/internal/pycore_slp_switch_amd64_unix.h rename to Stackless/platf/switch_amd64_unix.h diff --git a/Include/internal/pycore_slp_switch_arm32_gcc.h b/Stackless/platf/switch_arm32_gcc.h similarity index 100% rename from Include/internal/pycore_slp_switch_arm32_gcc.h rename to Stackless/platf/switch_arm32_gcc.h diff --git a/Include/internal/pycore_slp_switch_arm_thumb_gcc.h b/Stackless/platf/switch_arm_thumb_gcc.h similarity index 100% rename from Include/internal/pycore_slp_switch_arm_thumb_gcc.h rename to Stackless/platf/switch_arm_thumb_gcc.h diff --git a/Include/internal/pycore_slp_switch_mips_unix.h b/Stackless/platf/switch_mips_unix.h similarity index 100% rename from Include/internal/pycore_slp_switch_mips_unix.h rename to Stackless/platf/switch_mips_unix.h diff --git a/Include/internal/pycore_slp_switch_ppc_macosx.h b/Stackless/platf/switch_ppc_macosx.h similarity index 100% rename from Include/internal/pycore_slp_switch_ppc_macosx.h rename to Stackless/platf/switch_ppc_macosx.h diff --git a/Include/internal/pycore_slp_switch_ppc_unix.h b/Stackless/platf/switch_ppc_unix.h similarity index 100% rename from Include/internal/pycore_slp_switch_ppc_unix.h rename to Stackless/platf/switch_ppc_unix.h diff --git a/Include/internal/pycore_slp_switch_ps3_SNTools.h b/Stackless/platf/switch_ps3_SNTools.h similarity index 100% rename from Include/internal/pycore_slp_switch_ps3_SNTools.h rename to Stackless/platf/switch_ps3_SNTools.h diff --git a/Include/internal/pycore_slp_switch_s390_unix.h b/Stackless/platf/switch_s390_unix.h similarity index 100% rename from Include/internal/pycore_slp_switch_s390_unix.h rename to Stackless/platf/switch_s390_unix.h diff --git a/Include/internal/pycore_slp_switch_sparc_sun_gcc.h b/Stackless/platf/switch_sparc_sun_gcc.h similarity index 100% rename from Include/internal/pycore_slp_switch_sparc_sun_gcc.h rename to Stackless/platf/switch_sparc_sun_gcc.h diff --git a/Include/internal/pycore_slp_switch_x64_msvc.h b/Stackless/platf/switch_x64_msvc.h similarity index 100% rename from Include/internal/pycore_slp_switch_x64_msvc.h rename to Stackless/platf/switch_x64_msvc.h diff --git a/Include/internal/pycore_slp_switch_x86_msvc.h b/Stackless/platf/switch_x86_msvc.h similarity index 100% rename from Include/internal/pycore_slp_switch_x86_msvc.h rename to Stackless/platf/switch_x86_msvc.h diff --git a/Include/internal/pycore_slp_switch_x86_unix.h b/Stackless/platf/switch_x86_unix.h similarity index 100% rename from Include/internal/pycore_slp_switch_x86_unix.h rename to Stackless/platf/switch_x86_unix.h