-
Notifications
You must be signed in to change notification settings - Fork 555
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
RFC/WIP replace Perl stack with self allocating/expanding (C-style) stack on Win32 #13860
Comments
From @bulk88Created by @bulk88I decided to try something novel. Replacing the Perl stack, which is a -address space exhaustion on 32 bits, op/threads.t trys to launch 100 There are some API design problems of how get the code integrated in a We have SvLEN == 0 for foreign PV buffers. But what is the equivalent Should PERL_FLEXIBLE_EXCEPTIONS come back since I'm use MSC's In pp_mapwhile there was code starting at a very high address in the # define EXTEND(p,n) (void)(UNLIKELY( (4096/sizeof(SV*)) < which will constant fold away by C compiler for all const "n"s but won't Currently I set the maximum VM size of Perl stack to 32 MB, which is ( 62000000[maximum # of SVs I could push() into a @array in PP before For comparison, the Win32 C stack's default (and all Win32 Perls ever) How Windows/ReactOS self-allocates a thread's C stack The rest of this the result of a "nmake test" on self allocating stack --------------------------------------------------------------- Test Summary Report \|/Heap corruption, pp_split swaps Perl stack (PL_stack_sp) with random AV \|/pp_split+Error #6: UNADDRESSABLE ACCESS: writing \|/ creates 100 threads in 1 process, address space exhaustion \|/Heap corruption, pp_split swaps Perl stack (PL_stack_sp) with random AV \|/Address space exhaustion at \|/Same as above, address space exhaustion C:\perl519\src\win32> Perl Info
|
From @bulk88commit fc3093b45eb9ff0790ca25118825401fb7e1c7e4 WIP replace Perl stack with C style stack on Win32 Inline Patchcommit fc3093b45eb9ff0790ca25118825401fb7e1c7e4
Author: Daniel Dragan <bulk88@hotmail.com>
Date: Mon May 19 23:07:26 2014 -0400
WIP replace Perl stack with C style stack on Win32
diff --git a/dump.c b/dump.c
index 354cd57..ddbf65d 100644
--- a/dump.c
+++ b/dump.c
@@ -2385,19 +2385,50 @@ Perl_sv_dump(pTHX_ SV *sv)
PERL_ARGS_ASSERT_SV_DUMP;
if (SvROK(sv))
do_sv_dump(0, Perl_debug_log, sv, 0, 4, 0, 0);
else
do_sv_dump(0, Perl_debug_log, sv, 0, 0, 0, 0);
}
+
+static
+DWORD
+S_fix_stack(LPEXCEPTION_POINTERS exceptions) {
+ dTHX;
+ MEMORY_BASIC_INFORMATION mbi;
+ DWORD_PTR newalloc;
+ //this is inefficient, these things should be stored somewhere in interp struct
+ if(VirtualQuery(PL_stack_base,&mbi,sizeof(mbi)) != sizeof(mbi)){
+ DebugBreak();
+ fprintf(stderr, "VQ failed %u\n", GetLastError());
+ exit(1);
+ }
+ newalloc = (DWORD_PTR)mbi.AllocationBase+(DWORD_PTR)mbi.RegionSize;
+ if(!VirtualAlloc(newalloc,
+ 4096,
+ MEM_COMMIT,
+ PAGE_READWRITE|PAGE_GUARD
+ )) {
+ DebugBreak();
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+
+ return EXCEPTION_CONTINUE_EXECUTION;
+}
+
+
+
int
Perl_runops_debug(pTHX)
{
+__try
+{
dVAR;
if (!PL_op) {
Perl_ck_warner_d(aTHX_ packWARN(WARN_DEBUGGING), "NULL OP IN RUN");
return 0;
}
DEBUG_l(Perl_deb(aTHX_ "Entering new RUNOPS level\n"));
do {
@@ -2425,16 +2456,21 @@ Perl_runops_debug(pTHX)
}
OP_ENTRY_PROBE(OP_NAME(PL_op));
} while ((PL_op = PL_op->op_ppaddr(aTHX)));
DEBUG_l(Perl_deb(aTHX_ "leaving RUNOPS level\n"));
PERL_ASYNC_CHECK();
TAINT_NOT;
+}
+//needs bounds checks to make sure the STATUS_GUARD_PAGE_VIOLATION is for Perl stack and not some other c lib doing the same thing
+__except(GetExceptionCode() == STATUS_GUARD_PAGE_VIOLATION ? S_fix_stack(GetExceptionInformation()) : EXCEPTION_CONTINUE_SEARCH) {
+ NOOP;
+}
return 0;
}
I32
Perl_debop(pTHX_ const OP *o)
{
dVAR;
diff --git a/perl.h b/perl.h
index 6da39f3..42ad440 100644
--- a/perl.h
+++ b/perl.h
@@ -197,16 +197,17 @@
#define _CPERLarg
#define PERL_OBJECT_THIS
#define _PERL_OBJECT_THIS
#define PERL_OBJECT_THIS_
#define CALL_FPTR(fptr) (*fptr)
#define MEMBER_TO_FPTR(name) name
#endif /* !PERL_CORE */
+//put call to func that has __try/__catch, then calls PL_runops here?
#define CALLRUNOPS PL_runops
#define CALLREGCOMP(sv, flags) Perl_pregcomp(aTHX_ (sv),(flags))
#define CALLREGCOMP_ENG(prog, sv, flags) (prog)->comp(aTHX_ sv, flags)
#define CALLREGEXEC(prog,stringarg,strend,strbeg,minend,sv,data,flags) \
RX_ENGINE(prog)->exec(aTHX_ (prog),(stringarg),(strend), \
(strbeg),(minend),(sv),(data),(flags))
diff --git a/pp.h b/pp.h
index 97738c2..0d72d2f 100644
--- a/pp.h
+++ b/pp.h
@@ -275,22 +275,22 @@ Does not use C<TARG>. See also C<XPUSHu>, C<mPUSHu> and C<PUSHu>.
/* Same thing, but update mark register too. */
# define MEXTEND(p,n) STMT_START { \
const int markoff = mark - PL_stack_base; \
sp = stack_grow(sp,p,(SSize_t) (n)); \
mark = PL_stack_base + markoff; \
} STMT_END
#else
# define EXTEND(p,n) (void)(UNLIKELY(PL_stack_max - p < (SSize_t)(n)) && \
- (sp = stack_grow(sp,p, (SSize_t) (n))))
+ (sp = sp))
/* Same thing, but update mark register too. */
# define MEXTEND(p,n) STMT_START {if (UNLIKELY(PL_stack_max - p < (int)(n))) {\
const int markoff = mark - PL_stack_base; \
- sp = stack_grow(sp,p,(SSize_t) (n)); \
+ sp = sp; \
mark = PL_stack_base + markoff; \
} } STMT_END
#endif
#define PUSHs(s) (*++sp = (s))
#define PUSHTARG STMT_START { SvSETMAGIC(TARG); PUSHs(TARG); } STMT_END
#define PUSHp(p,l) STMT_START { sv_setpvn(TARG, (p), (l)); PUSHTARG; } STMT_END
#define PUSHn(n) STMT_START { sv_setnv(TARG, (NV)(n)); PUSHTARG; } STMT_END
diff --git a/pp_ctl.c b/pp_ctl.c
index 380a7fe..2d4f8d2 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -996,18 +996,24 @@ PP(pp_mapwhile)
if (shift < count)
shift = count; /* Avoid shifting too often --Ben Tilly */
EXTEND(SP,shift);
src = SP;
dst = (SP += shift);
PL_markstack_ptr[-1] += shift;
*PL_markstack_ptr += shift;
- while (count--)
- *dst-- = *src--;
+ //copy upwards not downwards
+ if(count) {
+ SV** dst1 = dst;
+ SV** src1 = src;
+ dst1 -= (count-1);
+ src1 -= (count-1);
+ memcpy(dst1, src1, sizeof(SV**)*count);
+ }
}
/* copy the new items down to the destination list */
dst = PL_stack_base + (PL_markstack_ptr[-2] += items) - 1;
if (gimme == G_ARRAY) {
/* add returned items to the collection (making mortal copies
* if necessary), then clear the current temps stack frame
* *except* for those items. We do this splicing the items
* into the start of the tmps frame (so some items may be on
diff --git a/run.c b/run.c
index ff3bc93..f5343d1 100644
--- a/run.c
+++ b/run.c
@@ -28,28 +28,64 @@
/*
* 'Away now, Shadowfax! Run, greatheart, run as you have never run before!
* Now we are come to the lands where you were foaled, and every stone you
* know. Run now! Hope is in speed!' --Gandalf
*
* [p.600 of _The Lord of the Rings_, III/xi: "The Palantír"]
*/
+
+static
+DWORD
+S_fix_stack(LPEXCEPTION_POINTERS exceptions) {
+ dTHX;
+ MEMORY_BASIC_INFORMATION mbi;
+ DWORD_PTR newalloc;
+ //this is inefficient, these things should be stored somewhere in interp struct
+ if(VirtualQuery(PL_stack_base,&mbi,sizeof(mbi)) != sizeof(mbi)){
+ DebugBreak();
+ fprintf(stderr, "VQ failed %u\n", GetLastError());
+ exit(1);
+ }
+ newalloc = (DWORD_PTR)mbi.AllocationBase+(DWORD_PTR)mbi.RegionSize;
+ if(!VirtualAlloc(newalloc,
+ 4096,
+ MEM_COMMIT,
+ PAGE_READWRITE|PAGE_GUARD
+ )) {
+ DebugBreak();
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+
+ return EXCEPTION_CONTINUE_EXECUTION;
+}
+
+
+
int
Perl_runops_standard(pTHX)
{
+__try
+{
dVAR;
OP *op = PL_op;
OP_ENTRY_PROBE(OP_NAME(op));
while ((PL_op = op = op->op_ppaddr(aTHX))) {
OP_ENTRY_PROBE(OP_NAME(op));
}
PERL_ASYNC_CHECK();
TAINT_NOT;
+}
+//needs bounds checks to make sure the STATUS_GUARD_PAGE_VIOLATION is for Perl stack and not some other c lib doing the same thing
+__except(GetExceptionCode() == STATUS_GUARD_PAGE_VIOLATION ? S_fix_stack(GetExceptionInformation()) : EXCEPTION_CONTINUE_SEARCH) {
+ NOOP;
+}
return 0;
}
/*
* Local variables:
* c-indentation-style: bsd
* c-basic-offset: 4
* indent-tabs-mode: nil
diff --git a/scope.c b/scope.c
index 07f24b7..b0768b9 100644
--- a/scope.c
+++ b/scope.c
@@ -48,20 +48,64 @@ Perl_stack_grow(pTHX_ SV **sp, SV **p, SSize_t n)
#define GROW(old) ((old) + 1)
#endif
PERL_SI *
Perl_new_stackinfo(pTHX_ I32 stitems, I32 cxitems)
{
dVAR;
PERL_SI *si;
+ void *avarr;
+ void * avarr2;
+ void * toalloc;
Newx(si, 1, PERL_SI);
si->si_stack = newAV();
AvREAL_off(si->si_stack);
- av_extend(si->si_stack, stitems > 0 ? stitems-1 : 0);
+ //av_extend(si->si_stack, stitems > 0 ? stitems-1 : 0);
+ Safefree(AvALLOC(si->si_stack));
+ AvALLOC(si->si_stack) = NULL;
+ AvARRAY(si->si_stack) = NULL;
+ AvMAX(si->si_stack) = SSize_t_MAX/sizeof(SV*);
+ fprintf(stderr, "stack alloc NSI av=%x\n", si->si_stack);
+ avarr = VirtualAlloc(
+ NULL,
+ 33554432, //2^25 32 MB
+ MEM_RESERVE,
+ PAGE_NOACCESS
+ );
+ if(!avarr) {
+ DWORD e = GetLastError();
+ DebugBreak();
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+ //4096 (page size) should be constant or runtime lookup from Win32 API, for
+ //constant research 32 and 64 bit behavior and meaning of "large pages"
+ if(! (avarr2 = VirtualAlloc(avarr,
+ 4096,
+ MEM_COMMIT,
+ PAGE_READWRITE
+ ))) {
+ DebugBreak();
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+ (DWORD_PTR)toalloc = (DWORD_PTR)avarr+(DWORD_PTR)4096;
+ if(!VirtualAlloc(toalloc,
+ 4096,
+ MEM_COMMIT,
+ PAGE_READWRITE|PAGE_GUARD
+ )) {
+ DebugBreak();
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+ AvALLOC(si->si_stack) = (SV**)avarr;
+ AvARRAY(si->si_stack) = (SV**)avarr;
+
AvALLOC(si->si_stack)[0] = &PL_sv_undef;
AvFILLp(si->si_stack) = 0;
si->si_prev = 0;
si->si_next = 0;
si->si_cxmax = cxitems - 1;
si->si_cxix = -1;
si->si_type = PERLSI_UNDEF;
Newx(si->si_cxstack, cxitems, PERL_CONTEXT);
diff --git a/sv.c b/sv.c
index b43fadf..7301b24 100644
--- a/sv.c
+++ b/sv.c
@@ -6427,17 +6427,30 @@ Perl_sv_clear(pTHX_ SV *const orig_sv)
if (AvREAL(av) && AvFILLp(av) > -1) {
next_sv = AvARRAY(av)[AvFILLp(av)--];
/* save old iter_sv in top-most slot of AV,
* and pray that it doesn't get wiped in the meantime */
AvARRAY(av)[AvMAX(av)] = iter_sv;
iter_sv = sv;
goto get_next_sv; /* process this new sv */
}
- Safefree(AvALLOC(av));
+ if(!AvREAL((const AV *)av) && AvMAX((const AV *)av) == SSize_t_MAX/sizeof(SV*)) {
+ fprintf(stderr, "stack dealloc av=%x\n", av);
+ if(!VirtualFree(
+ AvALLOC(av),
+ 0,
+ MEM_RELEASE
+ )) {
+ fprintf(stderr, "VF failed %u\n", GetLastError());
+ exit(1);
+ }
+ }
+ else {
+ Safefree(AvALLOC(av));
+ }
}
break;
case SVt_PVLV:
if (LvTYPE(sv) == 'T') { /* for tie: return HE to pool */
SvREFCNT_dec(HeKEY_sv((HE*)LvTARG(sv)));
HeNEXT((HE*)LvTARG(sv)) = PL_hv_fetch_ent_mh;
PL_hv_fetch_ent_mh = (HE*)LvTARG(sv);
@@ -6771,23 +6784,28 @@ Perl_sv_free(pTHX_ SV *const sv)
{
SvREFCNT_dec(sv);
}
/* Private helper function for SvREFCNT_dec().
* Called with rc set to original SvREFCNT(sv), where rc == 0 or 1 */
+SV * watch_sv;
+
void
Perl_sv_free2(pTHX_ SV *const sv, const U32 rc)
{
dVAR;
PERL_ARGS_ASSERT_SV_FREE2;
+ if( sv == watch_sv) {
+ DebugBreak();
+ }
if (LIKELY( rc == 1 )) {
/* normal case */
SvREFCNT(sv) = 0;
#ifdef DEBUGGING
if (SvTEMP(sv)) {
Perl_ck_warner_d(aTHX_ packWARN(WARN_DEBUGGING),
"Attempt to free temp prematurely: SV 0x%"UVxf
@@ -12675,32 +12693,74 @@ S_sv_dup_common(pTHX_ const SV *const sstr, CLONE_PARAMS *const param)
break;
case SVt_PVAV:
/* avoid cloning an empty array */
if (AvARRAY((const AV *)sstr) && AvFILLp((const AV *)sstr) >= 0) {
SV **dst_ary, **src_ary;
SSize_t items = AvFILLp((const AV *)sstr) + 1;
src_ary = AvARRAY((const AV *)sstr);
- Newxz(dst_ary, AvMAX((const AV *)sstr)+1, SV*);
+ if(!AvREAL((const AV *)sstr) && AvMAX((const AV *)sstr) == SSize_t_MAX/sizeof(SV*)) {
+ MEMORY_BASIC_INFORMATION mbi;
+ void * avarr;
+ DWORD_PTR toalloc;
+ void * avarr2;
+ fprintf(stderr, "stack alloc S_sv_dup_common src av=%x dst av=%x\n", sstr, dstr);
+ if(VirtualQuery(src_ary,&mbi,sizeof(mbi)) != sizeof(mbi)){
+ DebugBreak();
+ fprintf(stderr, "VQ failed %u\n", GetLastError());
+ exit(1);
+ }
+ avarr = VirtualAlloc(
+ NULL,
+ 33554432, //2^25 32 MB
+ MEM_RESERVE,
+ PAGE_NOACCESS
+ );
+ if(!avarr) {
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+ if(! (avarr2 = VirtualAlloc(avarr,
+ mbi.RegionSize,
+ MEM_COMMIT,
+ PAGE_READWRITE
+ ))) {
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+ toalloc = (DWORD_PTR) avarr + mbi.RegionSize;
+ if(!VirtualAlloc(toalloc,
+ 4096,
+ MEM_COMMIT,
+ PAGE_READWRITE|PAGE_GUARD
+ )) {
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+ dst_ary = avarr;
+ } else {
+ Newxz(dst_ary, AvMAX((const AV *)sstr)+1, SV*);
+ }
ptr_table_store(PL_ptr_table, src_ary, dst_ary);
AvARRAY(MUTABLE_AV(dstr)) = dst_ary;
AvALLOC((const AV *)dstr) = dst_ary;
if (AvREAL((const AV *)sstr)) {
dst_ary = sv_dup_inc_multiple(src_ary, dst_ary, items,
param);
}
else {
while (items-- > 0)
*dst_ary++ = sv_dup(*src_ary++, param);
}
- items = AvMAX((const AV *)sstr) - AvFILLp((const AV *)sstr);
- while (items-- > 0) {
- *dst_ary++ = &PL_sv_undef;
- }
+ //is this really needed? This is uninit space I think
+ // items = AvMAX((const AV *)sstr) - AvFILLp((const AV *)sstr);
+ // while (items-- > 0) {
+ // *dst_ary++ = &PL_sv_undef;
+ // }
}
else {
AvARRAY(MUTABLE_AV(dstr)) = NULL;
AvALLOC((const AV *)dstr) = (SV**)NULL;
AvMAX( (const AV *)dstr) = -1;
AvFILLp((const AV *)dstr) = -1;
}
break; |
From @bulk88tied_hash_deep_c_recursion_overflow.txt
|
From @tonycozOn Mon May 19 21:52:25 2014, bulk88 wrote:
Wouldn't it be simpler to just use an anonymous file mapping? Without SEC_COMMIT you're just allocating address space, similar to using MAP_NORESERVE for anonymous mmap() on Linux (and Solaris.) Unfortunately MAP_NORESERVE isn't standardized and doesn't seem to be available on the BSDs (anonymous mapping also isn't standardized, but appears to be more commonly implemented.) The main problem I see is one you've touched on - address space exhaustion on 32-bit platforms. The current mechanism allows for a large stack in a single thread, many threads with small stacks, a small stack with large SVs and other variations I'm not going to enumerate. Allocating large sections of address space would limit the way that a perl prcoess could use[1] memory too much. Tony [1] over-use, as many would say :) |
The RT System itself - Status changed from 'new' to 'open' |
From @bulk88I already wrote a response and posted it a week ago, but somehow it never made it, so I am retyping it from memory. (logged out?/cookies) On Tue May 20 22:30:53 2014, tonyc wrote:
No. Using a file mapping means having to keep track of a kernel file handle inaddition to the pointer to the memory block. Anon file mappings are for inter-process shared memory. Perl has no reason to share the perl stack with another process. Also file mappings are used for sliding memory "windows" into a larger file (for example, for files too large to memory map entirely). Again that has no use for perl. Both VirtualAlloc and null/invalid file handle CreateFileMapping memory is subject to standard ejection from physical memory to paging file rules. CreateFileMapping is just alot more complicated for no benefit. -- |
From @bulk88I thought about the self allocating concept on *nix. Is an sbrk and/or mmap region of pages, when initially allocated and NULL filled, COWed to a single inter-process shared page of NULLs until it is written to, making the page "dirty" and then being added to the non-shared private pool of the process? If this is true, the idea on *nix, this idea in this commit, can just allocates a large region of pages, and not "reserve" address space (which later would require a mprotect call in the segv handler to allocate), but keeps the pages allocated from the moment of address space allocation creation? Thus removing the need for a segv handler. Of course this is highly specific to the design of the *nix kernel I think. Since if it can't COW/map together new NULL filled pages until they are first written to, this idea will burn physical memory like crazy. After some more work on the concept, I've given up on trying to extract from pp_split, the swap the perl stack with a GV AV or pad AV, then PUSHs onto the AvREAL (I think) AV trick from Perl 3.0 from http://perl5.git.perl.org/perl.git/commitdiff/a687059cbaf2c6fdccb5e0fae2aee80ec15625a8 . Here is a diff of where this swap perl stack with a random AV code came from. If you look in the 3.0 patch, the only part of the code that is recognizable to blead today is the comment "/* temporarily switch stacks */". I'm guessing this trick was a memory optimization. --- C:\perl521\src\do_splitold.txt
+++ C:\perl521\src\do_splitnew.txt
@@ -1,94 +1,189 @@
int
-do_split(spat,retary,sarg,ptrmaxsarg,sargoff,cushion)
+do_split(str,spat,limit,gimme,arglast)
+STR *str;
register SPAT *spat;
-STR ***retary;
-register STR **sarg;
-int *ptrmaxsarg;
-int sargoff;
-int cushion;
+register int limit;
+int gimme;
+int *arglast;
{
- register char *s = str_get(sarg[1]);
- char *strend = s + sarg[1]->str_cur;
+ register ARRAY *ary = stack;
+ STR **st = ary->ary_array;
+ register int sp = arglast[0] + 1;
+ register char *s = str_get(st[sp]);
+ char *strend = s + st[sp--]->str_cur;
register STR *dstr;
register char *m;
- register ARRAY *ary;
- static ARRAY *myarray = Null(ARRAY*);
int iters = 0;
int i;
+ char *orig;
+ int origlimit = limit;
+ int realarray = 0;
if (!spat || !s)
fatal("panic: do_split");
else if (spat->spat_runtime) {
- m = str_get(eval(spat->spat_runtime,Null(STR***),-1));
- if (!*m || (*m == ' ' && !m[1])) {
- m = "\\s+";
+ nointrp = "|)";
+ sp = eval(spat->spat_runtime,G_SCALAR,sp);
+ st = stack->ary_array;
+ m = str_get(dstr = st[sp--]);
+ nointrp = "";
+ if (!dstr->str_cur || (*m == ' ' && dstr->str_cur == 1)) {
+ str_set(dstr,"\\s+");
+ m = dstr->str_ptr;
spat->spat_flags |= SPAT_SKIPWHITE;
}
- if (spat->spat_runtime->arg_type == O_ITEM &&
- spat->spat_runtime[1].arg_type == A_SINGLE) {
+ if (spat->spat_regexp)
+ regfree(spat->spat_regexp);
+ spat->spat_regexp = regcomp(m,m+dstr->str_cur,
+ spat->spat_flags & SPAT_FOLD,1);
+ if (spat->spat_flags & SPAT_KEEP ||
+ (spat->spat_runtime->arg_type == O_ITEM &&
+ (spat->spat_runtime[1].arg_type & A_MASK) == A_SINGLE) ) {
arg_free(spat->spat_runtime); /* it won't change, so */
spat->spat_runtime = Nullarg; /* no point compiling again */
}
- spat->spat_regexp = regcomp(m,spat->spat_flags & SPAT_FOLD,1);
}
#ifdef DEBUGGING
if (debug & 8) {
deb("2.SPAT /%s/\n",spat->spat_regexp->precomp);
}
#endif
- if (retary)
- ary = myarray;
+ ary = stab_xarray(spat->spat_repl[1].arg_ptr.arg_stab);
+ if (ary && ((ary->ary_flags & ARF_REAL) || gimme != G_ARRAY)) {
+ ary->ary_flags |= ARF_REAL;
+ realarray = 1;
+ ary->ary_fill = -1;
+ sp = -1; /* temporarily switch stacks */
+ }
else
- ary = spat->spat_repl[1].arg_ptr.arg_stab->stab_array;
- if (!ary)
- myarray = ary = anew(Nullstab);
- ary->ary_fill = -1;
+ ary = stack;
+ orig = s;
if (spat->spat_flags & SPAT_SKIPWHITE) {
while (isspace(*s))
s++;
}
+ if (!limit)
+ limit = 10001;
if (spat->spat_short) {
i = spat->spat_short->str_cur;
- while (*s && (m = fbminstr(s, strend, spat->spat_short))) {
- dstr = str_new(m-s);
- str_nset(dstr,s,m-s);
- astore(ary, iters++, dstr);
- if (iters > 10000)
- fatal("Substitution loop");
- s = m + i;
+ if (i == 1) {
+ i = *spat->spat_short->str_ptr;
+ while (--limit) {
+ for (m = s; m < strend && *m != i; m++) ;
+ if (m >= strend)
+ break;
+ if (realarray)
+ dstr = Str_new(30,m-s);
+ else
+ dstr = str_static(&str_undef);
+ str_nset(dstr,s,m-s);
+ (void)astore(ary, ++sp, dstr);
+ s = m + 1;
+ }
+ }
+ else {
+#ifndef lint
+ while (s < strend && --limit &&
+ (m=fbminstr((unsigned char*)s, (unsigned char*)strend,
+ spat->spat_short)) )
+#endif
+ {
+ if (realarray)
+ dstr = Str_new(31,m-s);
+ else
+ dstr = str_static(&str_undef);
+ str_nset(dstr,s,m-s);
+ (void)astore(ary, ++sp, dstr);
+ s = m + i;
+ }
}
}
else {
- while (*s && regexec(spat->spat_regexp, s, strend, (iters == 0), 1,
- Nullstr)) {
+ while (s < strend && --limit &&
+ regexec(spat->spat_regexp, s, strend, orig, 1, Nullstr, TRUE) ) {
+ if (spat->spat_regexp->subbase
+ && spat->spat_regexp->subbase != orig) {
+ m = s;
+ s = orig;
+ orig = spat->spat_regexp->subbase;
+ s = orig + (m - s);
+ strend = s + (strend - m);
+ }
m = spat->spat_regexp->startp[0];
- if (spat->spat_regexp->subbase)
- s = spat->spat_regexp->subbase;
- dstr = str_new(m-s);
+ if (realarray)
+ dstr = Str_new(32,m-s);
+ else
+ dstr = str_static(&str_undef);
str_nset(dstr,s,m-s);
- astore(ary, iters++, dstr);
- if (iters > 10000)
- fatal("Substitution loop");
+ (void)astore(ary, ++sp, dstr);
+ if (spat->spat_regexp->nparens) {
+ for (i = 1; i <= spat->spat_regexp->nparens; i++) {
+ s = spat->spat_regexp->startp[i];
+ m = spat->spat_regexp->endp[i];
+ if (realarray)
+ dstr = Str_new(33,m-s);
+ else
+ dstr = str_static(&str_undef);
+ str_nset(dstr,s,m-s);
+ (void)astore(ary, ++sp, dstr);
+ }
+ }
s = spat->spat_regexp->endp[0];
}
}
- if (*s) { /* ignore field after final "whitespace" */
- dstr = str_new(0); /* if they interpolate, it's null anyway */
- str_set(dstr,s);
- astore(ary, iters++, dstr);
+ if (realarray)
+ iters = sp + 1;
+ else
+ iters = sp - arglast[0];
+ if (iters > 9999)
+ fatal("Split loop");
+ if (s < strend || origlimit) { /* keep field after final delim? */
+ if (realarray)
+ dstr = Str_new(34,strend-s);
+ else
+ dstr = str_static(&str_undef);
+ str_nset(dstr,s,strend-s);
+ (void)astore(ary, ++sp, dstr);
+ iters++;
}
else {
- while (iters > 0 && !*str_get(afetch(ary,iters-1)))
- iters--;
+#ifndef I286
+ while (iters > 0 && ary->ary_array[sp]->str_cur == 0)
+ iters--,sp--;
+#else
+ char *zaps;
+ int zapb;
+
+ if (iters > 0) {
+ zaps = str_get(afetch(ary,sp,FALSE));
+ zapb = (int) *zaps;
+ }
+
+ while (iters > 0 && (!zapb)) {
+ iters--,sp--;
+ if (iters > 0) {
+ zaps = str_get(afetch(ary,iters-1,FALSE));
+ zapb = (int) *zaps;
+ }
+ }
+#endif
}
- if (retary) {
- *ptrmaxsarg = iters + sargoff;
- sarg = (STR**)saferealloc((char*)(sarg - sargoff),
- (iters+2+cushion+sargoff)*sizeof(STR*)) + sargoff;
-
- for (i = 1; i <= iters; i++)
- sarg[i] = afetch(ary,i-1);
- *retary = sarg;
+ if (realarray) {
+ ary->ary_fill = sp;
+ if (gimme == G_ARRAY) {
+ sp++;
+ astore(stack, arglast[0] + 1 + sp, Nullstr);
+ Copy(ary->ary_array, stack->ary_array + arglast[0] + 1, sp, STR*);
+ return arglast[0] + sp;
+ }
}
- return iters;
+ else {
+ if (gimme == G_ARRAY)
+ return sp;
+ }
+ sp = arglast[0] + 1;
+ str_numset(str,(double)iters);
+ STABSET(str);
+ st[sp] = str;
+ return sp;
} My next step after the REXTEND workaround (R=real), is to benchmark this since its stable enough to run now. -- |
From @bulk880001-vm-stacks.patchFrom f689583c0fae18a7b64f4b41db36c14a1c9d39e6 Mon Sep 17 00:00:00 2001
From: Daniel Dragan <bulk88@hotmail.com>
Date: Tue, 3 Jun 2014 01:34:45 -0400
Subject: [PATCH] vm stacks
---
embed.fnc | 4 +++
embed.h | 4 +++
perl.h | 8 +++++-
pp.c | 18 +++++++-------
pp.h | 29 ++++++++++++++++++----
pp_ctl.c | 10 ++++++-
proto.h | 4 +++
run.c | 16 ++++++++++++
scope.c | 48 +++++++++++++++++++++++++++++++++++++
sv.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++----
win32/Makefile | 6 ++--
win32/win32.c | 46 +++++++++++++++++++++++++++++++++++
win32/win32.h | 17 +++++++++++++
13 files changed, 256 insertions(+), 26 deletions(-)
diff --git a/embed.fnc b/embed.fnc
index fc3ed95..1a99170 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1678,6 +1678,10 @@ Ap |struct perl_vars *|GetVars
Ap |struct perl_vars*|init_global_struct
Ap |void |free_global_struct|NN struct perl_vars *plvarsp
#endif
+#if defined(PERL_ALT_STACKS)
+pX |int |call_runops|
+pn |DWORD |fix_win32stacks|LPEXCEPTION_POINTERS exceptions
+#endif
Ap |int |runops_standard
Ap |int |runops_debug
Afpd |void |sv_catpvf_mg |NN SV *const sv|NN const char *const pat|...
diff --git a/embed.h b/embed.h
index b7a0290..bfe61e9 100644
--- a/embed.h
+++ b/embed.h
@@ -1355,6 +1355,10 @@
#define malloc_good_size Perl_malloc_good_size
#define malloced_size Perl_malloced_size
# endif
+# if defined(PERL_ALT_STACKS)
+#define call_runops() Perl_call_runops(aTHX)
+#define fix_win32stacks Perl_fix_win32stacks
+# endif
# if defined(PERL_CORE)
#define opslab_force_free(a) Perl_opslab_force_free(aTHX_ a)
#define opslab_free(a) Perl_opslab_free(aTHX_ a)
diff --git a/perl.h b/perl.h
index 6da39f3..5522a36 100644
--- a/perl.h
+++ b/perl.h
@@ -202,7 +202,13 @@
#define MEMBER_TO_FPTR(name) name
#endif /* !PERL_CORE */
-#define CALLRUNOPS PL_runops
+//put call to func that has __try/__catch, then calls PL_runops here?
+#ifdef PERL_ALT_STACKS
+# define CALLRUNOPS(x) Perl_call_runops(x)
+#else
+#error bad
+# define CALLRUNOPS PL_runops
+#endif
#define CALLREGCOMP(sv, flags) Perl_pregcomp(aTHX_ (sv),(flags))
diff --git a/pp.c b/pp.c
index 04c1f29..4d0f95c 100644
--- a/pp.c
+++ b/pp.c
@@ -5543,7 +5543,7 @@ PP(pp_split)
} else {
dstr = newSVpvn_flags(s, m-s,
(do_utf8 ? SVf_UTF8 : 0) | make_mortal);
- XPUSHs(dstr);
+ RXPUSHs(dstr);
}
/* skip the whitespace found last */
@@ -5584,7 +5584,7 @@ PP(pp_split)
} else {
dstr = newSVpvn_flags(s, m-s,
(do_utf8 ? SVf_UTF8 : 0) | make_mortal);
- XPUSHs(dstr);
+ RXPUSHs(dstr);
}
s = m;
}
@@ -5601,9 +5601,9 @@ PP(pp_split)
if (!gimme_scalar) {
const U32 items = limit - 1;
if (items < slen)
- EXTEND(SP, items);
+ REXTEND(SP, items);
else
- EXTEND(SP, slen);
+ REXTEND(SP, slen);
}
if (do_utf8) {
@@ -5671,7 +5671,7 @@ PP(pp_split)
} else {
dstr = newSVpvn_flags(s, m-s,
(do_utf8 ? SVf_UTF8 : 0) | make_mortal);
- XPUSHs(dstr);
+ RXPUSHs(dstr);
}
/* The rx->minlen is in characters but we want to step
* s ahead by bytes. */
@@ -5695,7 +5695,7 @@ PP(pp_split)
} else {
dstr = newSVpvn_flags(s, m-s,
(do_utf8 ? SVf_UTF8 : 0) | make_mortal);
- XPUSHs(dstr);
+ RXPUSHs(dstr);
}
/* The rx->minlen is in characters but we want to step
* s ahead by bytes. */
@@ -5732,7 +5732,7 @@ PP(pp_split)
} else {
dstr = newSVpvn_flags(s, m-s,
(do_utf8 ? SVf_UTF8 : 0) | make_mortal);
- XPUSHs(dstr);
+ RXPUSHs(dstr);
}
if (RX_NPARENS(rx)) {
I32 i;
@@ -5757,7 +5757,7 @@ PP(pp_split)
}
else
dstr = &PL_sv_undef; /* undef, not "" */
- XPUSHs(dstr);
+ RXPUSHs(dstr);
}
}
@@ -5777,7 +5777,7 @@ PP(pp_split)
if (!gimme_scalar) {
const STRLEN l = strend - s;
dstr = newSVpvn_flags(s, l, (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
- XPUSHs(dstr);
+ RXPUSHs(dstr);
}
iters++;
}
diff --git a/pp.h b/pp.h
index 97738c2..bfc5b6e 100644
--- a/pp.h
+++ b/pp.h
@@ -270,24 +270,42 @@ Does not use C<TARG>. See also C<XPUSHu>, C<mPUSHu> and C<PUSHu>.
=cut
*/
-#ifdef STRESS_REALLOC
-# define EXTEND(p,n) (void)(sp = stack_grow(sp,p, (SSize_t)(n)))
+#ifndef PERL_ALT_STACKS
+# ifdef STRESS_REALLOC
+# define EXTEND(p,n) (void)(sp = stack_grow(sp,p, (SSize_t)(n)))
/* Same thing, but update mark register too. */
-# define MEXTEND(p,n) STMT_START { \
+# define MEXTEND(p,n) STMT_START { \
const int markoff = mark - PL_stack_base; \
sp = stack_grow(sp,p,(SSize_t) (n)); \
mark = PL_stack_base + markoff; \
} STMT_END
+# else
+# define EXTEND(p,n) (void)(UNLIKELY(PL_stack_max - p < (SSize_t)(n)) && \
+ (sp = stack_grow(sp,p, (SSize_t) (n))))
+
+/* Same thing, but update mark register too. */
+# define MEXTEND(p,n) STMT_START {if (UNLIKELY(PL_stack_max - p < (int)(n))) {\
+ const int markoff = mark - PL_stack_base; \
+ sp = stack_grow(sp,p,(SSize_t) (n)); \
+ mark = PL_stack_base + markoff; \
+ } } STMT_END
+# endif
#else
-# define EXTEND(p,n) (void)(UNLIKELY(PL_stack_max - p < (SSize_t)(n)) && \
+# ifdef STRESS_REALLOC
+# error STRESS_REALLOC and PERL_ALT_STACKS not implemented
+# else
+# define EXTEND(p,n) NOOP
+# define MEXTEND(p,n) NOOP
+# define REXTEND(p,n) (void)(UNLIKELY(PL_stack_max - p < (SSize_t)(n)) && \
(sp = stack_grow(sp,p, (SSize_t) (n))))
/* Same thing, but update mark register too. */
-# define MEXTEND(p,n) STMT_START {if (UNLIKELY(PL_stack_max - p < (int)(n))) {\
+# define RMEXTEND(p,n) STMT_START {if (UNLIKELY(PL_stack_max - p < (int)(n))) {\
const int markoff = mark - PL_stack_base; \
sp = stack_grow(sp,p,(SSize_t) (n)); \
mark = PL_stack_base + markoff; \
} } STMT_END
+# endif
#endif
#define PUSHs(s) (*++sp = (s))
@@ -298,6 +316,7 @@ Does not use C<TARG>. See also C<XPUSHu>, C<mPUSHu> and C<PUSHu>.
#define PUSHu(u) STMT_START { sv_setuv(TARG, (UV)(u)); PUSHTARG; } STMT_END
#define XPUSHs(s) (EXTEND(sp,1), *++sp = (s))
+#define RXPUSHs(s) (REXTEND(sp,1), *++sp = (s))
#define XPUSHTARG STMT_START { SvSETMAGIC(TARG); XPUSHs(TARG); } STMT_END
#define XPUSHp(p,l) STMT_START { sv_setpvn(TARG, (p), (l)); XPUSHTARG; } STMT_END
#define XPUSHn(n) STMT_START { sv_setnv(TARG, (NV)(n)); XPUSHTARG; } STMT_END
diff --git a/pp_ctl.c b/pp_ctl.c
index 380a7fe..2d4f8d2 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -1001,8 +1001,14 @@ PP(pp_mapwhile)
dst = (SP += shift);
PL_markstack_ptr[-1] += shift;
*PL_markstack_ptr += shift;
- while (count--)
- *dst-- = *src--;
+ //copy upwards not downwards
+ if(count) {
+ SV** dst1 = dst;
+ SV** src1 = src;
+ dst1 -= (count-1);
+ src1 -= (count-1);
+ memcpy(dst1, src1, sizeof(SV**)*count);
+ }
}
/* copy the new items down to the destination list */
dst = PL_stack_base + (PL_markstack_ptr[-2] += items) - 1;
diff --git a/proto.h b/proto.h
index f7716b5..f65f80c 100644
--- a/proto.h
+++ b/proto.h
@@ -5472,6 +5472,10 @@ PERL_CALLCONV MEM_SIZE Perl_malloced_size(void *p)
#if defined(NO_MATHOMS)
/* PERL_CALLCONV void Perl_sv_nounlocking(pTHX_ SV *sv); */
#endif
+#if defined(PERL_ALT_STACKS)
+PERL_CALLCONV int Perl_call_runops(pTHX);
+PERL_CALLCONV DWORD Perl_fix_win32stacks(LPEXCEPTION_POINTERS exceptions);
+#endif
#if defined(PERL_ANY_COW)
PERL_CALLCONV SV* Perl_sv_setsv_cow(pTHX_ SV* dstr, SV* sstr)
__attribute__nonnull__(pTHX_2);
diff --git a/run.c b/run.c
index ff3bc93..04f2673 100644
--- a/run.c
+++ b/run.c
@@ -48,6 +48,22 @@ Perl_runops_standard(pTHX)
return 0;
}
+#ifdef PERL_ALT_STACKS
+int
+Perl_call_runops(pTHX)
+{
+ __try {
+ return PL_runops(aTHX);
+ }
+ __except(GetExceptionCode() == STATUS_GUARD_PAGE_VIOLATION
+ ? Perl_fix_win32stacks(GetExceptionInformation())
+ : EXCEPTION_CONTINUE_SEARCH) {
+ NOOP;
+ }
+ croak_no_mem();
+}
+#endif
+
/*
* Local variables:
* c-indentation-style: bsd
diff --git a/scope.c b/scope.c
index 07f24b7..06b54c3 100644
--- a/scope.c
+++ b/scope.c
@@ -56,7 +56,55 @@ Perl_new_stackinfo(pTHX_ I32 stitems, I32 cxitems)
Newx(si, 1, PERL_SI);
si->si_stack = newAV();
AvREAL_off(si->si_stack);
+#ifndef PERL_ALT_STACKS
av_extend(si->si_stack, stitems > 0 ? stitems-1 : 0);
+#else
+ {
+ void *avarr;
+ void * avarr2;
+ void * toalloc;
+ Safefree(AvALLOC(si->si_stack));
+ AvALLOC(si->si_stack) = NULL;
+ AvARRAY(si->si_stack) = NULL;
+ AvMAX(si->si_stack) = SSize_t_MAX/sizeof(SV*);
+ //fprintf(stderr, "stack alloc NSI av=%x\n", si->si_stack);
+ avarr = VirtualAlloc(
+ NULL,
+ STACKMAX,
+ MEM_RESERVE,
+ PAGE_NOACCESS
+ );
+ if(!avarr) {
+ DWORD e = GetLastError();
+ DebugBreak();
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+ //4096 (page size) should be constant or runtime lookup from Win32 API, for
+ //4096 on 32 and x64, 8K on ia64 http://blogs.msdn.com/b/oldnewthing/archive/2004/09/08/226797.aspx
+ if(! (avarr2 = VirtualAlloc(avarr,
+ PERL_PAGESIZE,
+ MEM_COMMIT,
+ PAGE_READWRITE
+ ))) {
+ DebugBreak();
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+ (DWORD_PTR)toalloc = (DWORD_PTR)avarr+PERL_PAGESIZE;
+ if(!VirtualAlloc(toalloc,
+ PERL_PAGESIZE,
+ MEM_COMMIT,
+ PAGE_READWRITE|PAGE_GUARD
+ )) {
+ DebugBreak();
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+ AvALLOC(si->si_stack) = (SV**)avarr;
+ AvARRAY(si->si_stack) = (SV**)avarr;
+ }
+#endif
AvALLOC(si->si_stack)[0] = &PL_sv_undef;
AvFILLp(si->si_stack) = 0;
si->si_prev = 0;
diff --git a/sv.c b/sv.c
index d748d56..4baafdf 100644
--- a/sv.c
+++ b/sv.c
@@ -6430,7 +6430,20 @@ Perl_sv_clear(pTHX_ SV *const orig_sv)
iter_sv = sv;
goto get_next_sv; /* process this new sv */
}
- Safefree(AvALLOC(av));
+ if(!AvREAL((const AV *)av) && AvMAX((const AV *)av) == SSize_t_MAX/sizeof(SV*)) {
+ //fprintf(stderr, "stack dealloc av=%x\n", av);
+ if(!VirtualFree(
+ AvALLOC(av),
+ 0,
+ MEM_RELEASE
+ )) {
+ fprintf(stderr, "VF failed %u\n", GetLastError());
+ exit(1);
+ }
+ }
+ else {
+ Safefree(AvALLOC(av));
+ }
}
break;
@@ -6774,6 +6787,8 @@ Perl_sv_free(pTHX_ SV *const sv)
/* Private helper function for SvREFCNT_dec().
* Called with rc set to original SvREFCNT(sv), where rc == 0 or 1 */
+SV * watch_sv;
+
void
Perl_sv_free2(pTHX_ SV *const sv, const U32 rc)
{
@@ -6781,6 +6796,9 @@ Perl_sv_free2(pTHX_ SV *const sv, const U32 rc)
PERL_ARGS_ASSERT_SV_FREE2;
+ if( sv == watch_sv) {
+ DebugBreak();
+ }
if (LIKELY( rc == 1 )) {
/* normal case */
SvREFCNT(sv) = 0;
@@ -12678,7 +12696,48 @@ S_sv_dup_common(pTHX_ const SV *const sstr, CLONE_PARAMS *const param)
SSize_t items = AvFILLp((const AV *)sstr) + 1;
src_ary = AvARRAY((const AV *)sstr);
- Newxz(dst_ary, AvMAX((const AV *)sstr)+1, SV*);
+ if(!AvREAL((const AV *)sstr) && AvMAX((const AV *)sstr) == SSize_t_MAX/sizeof(SV*)) {
+ MEMORY_BASIC_INFORMATION mbi;
+ void * avarr;
+ DWORD_PTR toalloc;
+ void * avarr2;
+ //fprintf(stderr, "stack alloc S_sv_dup_common src av=%x dst av=%x\n", sstr, dstr);
+ if(VirtualQuery(src_ary,&mbi,sizeof(mbi)) != sizeof(mbi)){
+ DebugBreak();
+ fprintf(stderr, "VQ failed %u\n", GetLastError());
+ exit(1);
+ }
+ avarr = VirtualAlloc(
+ NULL,
+ 33554432, //2^25 32 MB
+ MEM_RESERVE,
+ PAGE_NOACCESS
+ );
+ if(!avarr) {
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+ if(! (avarr2 = VirtualAlloc(avarr,
+ mbi.RegionSize,
+ MEM_COMMIT,
+ PAGE_READWRITE
+ ))) {
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+ toalloc = (DWORD_PTR) avarr + mbi.RegionSize;
+ if(!VirtualAlloc(toalloc,
+ 4096,
+ MEM_COMMIT,
+ PAGE_READWRITE|PAGE_GUARD
+ )) {
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+ dst_ary = avarr;
+ } else {
+ Newxz(dst_ary, AvMAX((const AV *)sstr)+1, SV*);
+ }
ptr_table_store(PL_ptr_table, src_ary, dst_ary);
AvARRAY(MUTABLE_AV(dstr)) = dst_ary;
AvALLOC((const AV *)dstr) = dst_ary;
@@ -12690,10 +12749,11 @@ S_sv_dup_common(pTHX_ const SV *const sstr, CLONE_PARAMS *const param)
while (items-- > 0)
*dst_ary++ = sv_dup(*src_ary++, param);
}
- items = AvMAX((const AV *)sstr) - AvFILLp((const AV *)sstr);
- while (items-- > 0) {
- *dst_ary++ = &PL_sv_undef;
- }
+ //is this really needed? This is uninit space I think
+ // items = AvMAX((const AV *)sstr) - AvFILLp((const AV *)sstr);
+ // while (items-- > 0) {
+ // *dst_ary++ = &PL_sv_undef;
+ // }
}
else {
AvARRAY(MUTABLE_AV(dstr)) = NULL;
diff --git a/win32/Makefile b/win32/Makefile
index 8b1847c..8097e1e 100644
--- a/win32/Makefile
+++ b/win32/Makefile
@@ -220,7 +220,7 @@ BUILDOPT = $(BUILDOPTEXTRA)
# mode script reading (and break some DATA filehandle functionality)
# please check first if an updated ByteLoader isn't available on CPAN.
#
-BUILDOPT = $(BUILDOPT) -DPERL_TEXTMODE_SCRIPTS -DPERL_HASH_FUNC_ONE_AT_A_TIME
+BUILDOPT = $(BUILDOPT) -DPERL_TEXTMODE_SCRIPTS -DPERL_HASH_FUNC_ONE_AT_A_TIME -DPERL_ALT_STACKS
#
# specify semicolon-separated list of extra directories that modules will
@@ -1004,10 +1004,10 @@ $(MINIDIR) :
if not exist "$(MINIDIR)" mkdir "$(MINIDIR)"
$(MINICORE_OBJ) : $(CORE_NOCFG_H)
- $(CC) -c $(CFLAGS) -DPERL_EXTERNAL_GLOB -DPERL_IS_MINIPERL $(OBJOUT_FLAG)$@ ..\$(*F).c
+ $(CC) -c $(CFLAGS) -DPERL_EXTERNAL_GLOB -DPERL_IS_MINIPERL -DPERL_ALT_STACKS $(OBJOUT_FLAG)$@ ..\$(*F).c
$(MINIWIN32_OBJ) : $(CORE_NOCFG_H)
- $(CC) -c $(CFLAGS) -DPERL_IS_MINIPERL $(OBJOUT_FLAG)$@ $(*F).c
+ $(CC) -c $(CFLAGS) -DPERL_IS_MINIPERL -DPERL_ALT_STACKS $(OBJOUT_FLAG)$@ $(*F).c
# -DPERL_IMPLICIT_SYS needs C++ for perllib.c
# This is the only file that depends on perlhost.h, vmem.h, and vdir.h
diff --git a/win32/win32.c b/win32/win32.c
index cd594ca..191fcc3 100644
--- a/win32/win32.c
+++ b/win32/win32.c
@@ -4560,6 +4560,52 @@ win32_create_message_window(void)
0, 0, 0, 0, 0, HWND_MESSAGE, NULL, NULL, NULL);
}
+#ifdef PERL_ALT_STACKS
+DWORD
+Perl_fix_win32stacks(LPEXCEPTION_POINTERS exceptions)
+{
+ dTHX;
+ MEMORY_BASIC_INFORMATION mbi;
+ DWORD_PTR newalloc;
+ assert(exceptions->ExceptionRecord->ExceptionCode == STATUS_GUARD_PAGE_VIOLATION
+ && exceptions->ExceptionRecord->ExceptionFlags == 0);
+//needs bounds checks to make sure the STATUS_GUARD_PAGE_VIOLATION is for Perl stack and not some other c lib doing the same thing
+ /* ExceptionAddress is EIP/ * to machine code where fault happened, its not
+ interesting */
+ /* ExceptionFlags has to be zero, or the exception is not resumable, so there
+ is no point in checking it */
+ if(exceptions->ExceptionRecord->NumberParameters == 2
+ && (exceptions->ExceptionRecord->ExceptionInformation[0] == 0 /* read fault */
+ || exceptions->ExceptionRecord->ExceptionInformation[0] == 1) /* write failt */
+ && exceptions->ExceptionRecord->ExceptionInformation[1] >= (ULONG_PTR)PL_stack_base && /* fault addr >= stack bottom */
+//should this catch something beyond the alloc or let it pass through, and catch how far beyond the alloc?
+ exceptions->ExceptionRecord->ExceptionInformation[1] <= (ULONG_PTR)PL_stack_base + STACKMAX) {
+ //this is inefficient, these things should be stored somewhere in interp struct
+ if(VirtualQuery(PL_stack_base,&mbi,sizeof(mbi)) != sizeof(mbi)){
+ DebugBreak();
+ fprintf(stderr, "VQ failed %u\n", GetLastError());
+ exit(1);
+ }
+ assert(PL_stack_base == PL_stack_base && PL_stack_base == AvARRAY(PL_curstack));
+ newalloc = (DWORD_PTR)mbi.AllocationBase+(DWORD_PTR)mbi.RegionSize;
+ if(!VirtualAlloc(newalloc,
+ PERL_PAGESIZE,
+ MEM_COMMIT,
+ PAGE_READWRITE|PAGE_GUARD
+ )) {
+ DebugBreak();
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+
+ return EXCEPTION_CONTINUE_EXECUTION;
+ }
+ else { /* fault address and exception isn't from Perl */
+ return EXCEPTION_CONTINUE_SEARCH;
+ }
+}
+#endif
+
#ifdef HAVE_INTERP_INTERN
static void
diff --git a/win32/win32.h b/win32/win32.h
index bfb276f..d61f089 100644
--- a/win32/win32.h
+++ b/win32/win32.h
@@ -225,6 +225,23 @@ struct utsname {
# define WNOHANG 1
#endif
+#ifndef PERL_PAGESIZE
+# if defined(PAGE_SIZE)
+# define PERL_PAGESIZE PAGE_SIZE
+# elif defined(_M_IA64) || defined(__IA64__)
+/* http://blogs.msdn.com/b/oldnewthing/archive/2004/09/08/226797.aspx */
+# define PERL_PAGESIZE 8192
+# elif defined(_M_IX86) || defined(_M_X64) || defined (__i386__) || defined(__x86_64__)
+# define PERL_PAGESIZE 4096
+# else
+# error Unknown page size (ARM ?)
+# endif
+#endif
+
+#ifdef PERL_ALT_STACKS
+# define STACKMAX 0x1F00000
+#endif
+
#define PERL_GET_CONTEXT_DEFINED
/* Compiler-specific stuff. */
--
1.7.9.msysgit.0
|
From @bulk88On Mon Jun 02 22:54:57 2014, bulk88 wrote:
noextend (self allocating stack) perl is reliable 2% faster on perlbench ext noext ext2 noext2 AVERAGE 100 102 100 102 Results saved in file:///C|/sources/perlbench/benchres-004/index.html C:\sources\perlbench>perl perlbench-run -c 100000 noext=C:\perl521\noextend\bin\ Now to reverse the order of the perls, to squeeze out outside effects noext ext noext2 ext2 AVERAGE 100 98 100 98 Results saved in file:///C|/sources/perlbench/benchres-005/index.html C:\sources\perlbench> the largest difference was string/index-var at 26% faster. Many of the tests are noise, since they are less than 2% faster or upto 2% slower, and testing regular perl (regular malloc stack) against regular perlshows it can be 1-2% faster, which means that is noise. The largest perf decrease with no extend perl was 5% slower on string/index-const and string/ipol. This means more optimizing since I am looking things up with syscalls instead of caching them. Or a 4096 byte initial stack is too small and needs to be 8k. Or EXTEND() needs to be similar to win32 alloca and do something special if the extend request is more than 4096/sizeof(SV*) to do just 1 VM syscall to get multiple pages instead of address faulting and SEH exception dispatch and VM syscalls numerous times (once for each page). -- |
From @bulk88On Tue Jun 03 17:46:19 2014, bulk88 wrote:
With some changes, string/index-const is now faster, but some other things are slower and more jittery. "hash/get" and "hash/copy" was 5% faster on the 2nd run of the same binary than on the first run. ext noext ext2 noext2 AVERAGE 100 102 100 101 Results saved in file:///C|/sources/perlbench/benchres-006/index.html noext ext noext2 ext2 AVERAGE 100 99 99 99 I moved the try/catch to a more global location, thinking the overhead up setting up the Win32 OS exception handler and removing it was causing overhead. Apparently, runops_standard is entered and left 100s-1000s of times in a typical process's life. perl521.dll!Perl_runops_standard(interpreter * my_perl=0x00364694) Line 42 C perl521.dll!Perl_runops_standard(interpreter * my_perl=0x00364694) Line 42 C perl521.dll!Perl_runops_standard(interpreter * my_perl=0x00364694) Line 42 C perl521.dll!Perl_runops_standard(interpreter * my_perl=0x00364694) Line 38 C perl521.dll!Perl_runops_standard(interpreter * my_perl=0x00364694) Line 38 C And after all that painful stuff above, runops is called for one final time as perl521.dll!Perl_pp_leavesub(interpreter * my_perl=0x00364694) Line 2463 + 0x1 C But it might not be the source of all slowness, since some things improved, and some got worse. -- |
From @bulk880001-vm-stacks.patchFrom 7c72fda8dd4884b792056663444bdfaa8227a30b Mon Sep 17 00:00:00 2001
From: Daniel Dragan <bulk88@hotmail.com>
Date: Thu, 5 Jun 2014 18:35:54 -0400
Subject: [PATCH] vm stacks
moved try catch to a more global location
---
dist/threads/threads.xs | 13 ++++++++++
embed.fnc | 8 +++++-
embed.h | 8 ++++++
makedef.pl | 6 ++++
miniperlmain.c | 13 ++++++++++
perl.h | 10 +++++++
pp.c | 18 +++++++-------
pp.h | 38 ++++++++++++++++++++++++----
pp_ctl.c | 10 ++++++-
proto.h | 8 ++++++
run.c | 18 +++++++++++++
scope.c | 48 ++++++++++++++++++++++++++++++++++++
sv.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++
t/op/threads.t | 2 +-
win32/Makefile | 19 ++++++++++++++
win32/perlhost.h | 13 ++++++++++
win32/perllib.c | 13 ++++++++++
win32/win32.c | 46 ++++++++++++++++++++++++++++++++++
win32/win32.h | 20 +++++++++++++++
19 files changed, 354 insertions(+), 19 deletions(-)
diff --git a/dist/threads/threads.xs b/dist/threads/threads.xs
index 6175ba7..f31c27b 100644
--- a/dist/threads/threads.xs
+++ b/dist/threads/threads.xs
@@ -468,6 +468,9 @@ STATIC void *
S_ithread_run(void * arg)
#endif
{
+#ifdef PERL_ALT_STACKS
+__try {
+#endif
ithread *thread = (ithread *)arg;
int jmp_rc = 0;
I32 oldscope;
@@ -637,6 +640,16 @@ S_ithread_run(void * arg)
#else
return (0);
#endif
+#ifdef PERL_ALT_STACKS
+}
+ __except(GetExceptionCode() == STATUS_GUARD_PAGE_VIOLATION
+ ? Perl_fix_win32stacks(GetExceptionInformation())
+ : EXCEPTION_CONTINUE_SEARCH) {
+ NOOP;
+ }
+Perl_croak_no_mem();
+#endif
+
}
diff --git a/embed.fnc b/embed.fnc
index c820457..e1a14e3 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -273,7 +273,7 @@ Aprd |void |vcroak |NULLOK const char* pat|NULLOK va_list* args
Anprd |void |croak_no_modify
Anprd |void |croak_xs_usage |NN const CV *const cv \
|NN const char *const params
-npr |void |croak_no_mem
+nprX |void |croak_no_mem
nprX |void |croak_popstack
#if defined(WIN32)
norx |void |win32_croak_not_implemented|NN const char * fname
@@ -1686,6 +1686,12 @@ Ap |struct perl_vars *|GetVars
Ap |struct perl_vars*|init_global_struct
Ap |void |free_global_struct|NN struct perl_vars *plvarsp
#endif
+#if defined(PERL_ALT_STACKS)
+#if 0
+pX |int |call_runops|
+#endif
+pnX |DWORD |fix_win32stacks|LPEXCEPTION_POINTERS exceptions
+#endif
Ap |int |runops_standard
Ap |int |runops_debug
Afpd |void |sv_catpvf_mg |NN SV *const sv|NN const char *const pat|...
diff --git a/embed.h b/embed.h
index ca1b91b..614267d 100644
--- a/embed.h
+++ b/embed.h
@@ -1330,6 +1330,11 @@
# if !defined(WIN32)
#define do_exec3(a,b,c) Perl_do_exec3(aTHX_ a,b,c)
# endif
+# if 0
+# if defined(PERL_ALT_STACKS)
+#define call_runops() Perl_call_runops(aTHX)
+# endif
+# endif
# if defined(DEBUGGING)
#define get_debug_opts(a,b) Perl_get_debug_opts(aTHX_ a,b)
# if defined(PERL_IN_PAD_C)
@@ -1361,6 +1366,9 @@
#define malloc_good_size Perl_malloc_good_size
#define malloced_size Perl_malloced_size
# endif
+# if defined(PERL_ALT_STACKS)
+#define fix_win32stacks Perl_fix_win32stacks
+# endif
# if defined(PERL_CORE)
#define opslab_force_free(a) Perl_opslab_force_free(aTHX_ a)
#define opslab_free(a) Perl_opslab_free(aTHX_ a)
diff --git a/makedef.pl b/makedef.pl
index 8b972a4..8d32206 100644
--- a/makedef.pl
+++ b/makedef.pl
@@ -535,6 +535,12 @@ if ($define{HAS_SIGNBIT}) {
++$skip{Perl_signbit};
}
+unless ($define{PERL_ALT_STACKS}) {
+ ++$skip{Perl_call_runops};
+ ++$skip{Perl_fix_win32stacks};
+}
+++$skip{Perl_call_runops}; #obsolete
+
if ($define{'PERL_GLOBAL_STRUCT'}) {
readvar('perlvars.h', \%skip);
# This seems like the least ugly way to cope with the fact that PL_sh_path
diff --git a/miniperlmain.c b/miniperlmain.c
index f22dcbb..0a1da2c 100644
--- a/miniperlmain.c
+++ b/miniperlmain.c
@@ -92,6 +92,9 @@ main(int argc, char **argv, char **env)
PERL_SYS_INIT3(&argc,&argv,&env);
#endif
+#ifdef PERL_ALT_STACKS
+__try {
+#endif
#if defined(USE_ITHREADS)
/* XXX Ideally, this should really be happening in perl_alloc() or
* perl_construct() to keep libperl.a transparently fork()-safe.
@@ -163,6 +166,16 @@ main(int argc, char **argv, char **env)
exit(exitstatus);
return exitstatus;
+#ifdef PERL_ALT_STACKS
+}
+ __except(GetExceptionCode() == STATUS_GUARD_PAGE_VIOLATION
+ ? Perl_fix_win32stacks(GetExceptionInformation())
+ : EXCEPTION_CONTINUE_SEARCH) {
+ NOOP;
+ }
+croak_no_mem();
+#endif
+
}
/* Register any extra external extensions */
diff --git a/perl.h b/perl.h
index 7338f61..df77f5e 100644
--- a/perl.h
+++ b/perl.h
@@ -202,6 +202,13 @@
#define MEMBER_TO_FPTR(name) name
#endif /* !PERL_CORE */
+//put call to func that has __try/__catch, then calls PL_runops here?
+//#ifdef PERL_ALT_STACKS
+//# define CALLRUNOPS(x) Perl_call_runops(x)
+//#else
+//# define CALLRUNOPS PL_runops
+//#endif
+
#define CALLRUNOPS PL_runops
#define CALLREGCOMP(sv, flags) Perl_pregcomp(aTHX_ (sv),(flags))
@@ -4624,6 +4631,9 @@ EXTCONST char PL_bincompat_options[] =
# ifdef PERLIO_LAYERS
" PERLIO_LAYERS"
# endif
+# ifdef PERL_ALT_STACKS
+ " PERL_ALT_STACKS"
+# endif
# ifdef PERL_DEBUG_READONLY_COW
" PERL_DEBUG_READONLY_COW"
# endif
diff --git a/pp.c b/pp.c
index 11119a2..141ec09 100644
--- a/pp.c
+++ b/pp.c
@@ -5547,7 +5547,7 @@ PP(pp_split)
} else {
dstr = newSVpvn_flags(s, m-s,
(do_utf8 ? SVf_UTF8 : 0) | make_mortal);
- XPUSHs(dstr);
+ RXPUSHs(dstr);
}
/* skip the whitespace found last */
@@ -5588,7 +5588,7 @@ PP(pp_split)
} else {
dstr = newSVpvn_flags(s, m-s,
(do_utf8 ? SVf_UTF8 : 0) | make_mortal);
- XPUSHs(dstr);
+ RXPUSHs(dstr);
}
s = m;
}
@@ -5605,9 +5605,9 @@ PP(pp_split)
if (!gimme_scalar) {
const U32 items = limit - 1;
if (items < slen)
- EXTEND(SP, items);
+ REXTEND(SP, items);
else
- EXTEND(SP, slen);
+ REXTEND(SP, slen);
}
if (do_utf8) {
@@ -5675,7 +5675,7 @@ PP(pp_split)
} else {
dstr = newSVpvn_flags(s, m-s,
(do_utf8 ? SVf_UTF8 : 0) | make_mortal);
- XPUSHs(dstr);
+ RXPUSHs(dstr);
}
/* The rx->minlen is in characters but we want to step
* s ahead by bytes. */
@@ -5699,7 +5699,7 @@ PP(pp_split)
} else {
dstr = newSVpvn_flags(s, m-s,
(do_utf8 ? SVf_UTF8 : 0) | make_mortal);
- XPUSHs(dstr);
+ RXPUSHs(dstr);
}
/* The rx->minlen is in characters but we want to step
* s ahead by bytes. */
@@ -5736,7 +5736,7 @@ PP(pp_split)
} else {
dstr = newSVpvn_flags(s, m-s,
(do_utf8 ? SVf_UTF8 : 0) | make_mortal);
- XPUSHs(dstr);
+ RXPUSHs(dstr);
}
if (RX_NPARENS(rx)) {
I32 i;
@@ -5761,7 +5761,7 @@ PP(pp_split)
}
else
dstr = &PL_sv_undef; /* undef, not "" */
- XPUSHs(dstr);
+ RXPUSHs(dstr);
}
}
@@ -5781,7 +5781,7 @@ PP(pp_split)
if (!gimme_scalar) {
const STRLEN l = strend - s;
dstr = newSVpvn_flags(s, l, (do_utf8 ? SVf_UTF8 : 0) | make_mortal);
- XPUSHs(dstr);
+ RXPUSHs(dstr);
}
iters++;
}
diff --git a/pp.h b/pp.h
index a7e936c..ff90fce 100644
--- a/pp.h
+++ b/pp.h
@@ -271,33 +271,58 @@ Does not use C<TARG>. See also C<XPUSHu>, C<mPUSHu> and C<PUSHu>.
=cut
*/
-#ifdef STRESS_REALLOC
-# define EXTEND(p,n) STMT_START { \
+#ifndef PERL_ALT_STACKS
+# ifdef STRESS_REALLOC
+# define EXTEND(p,n) STMT_START { \
sp = stack_grow(sp,p,(SSize_t) (n)); \
PERL_UNUSED_VAR(sp); \
} STMT_END
/* Same thing, but update mark register too. */
-# define MEXTEND(p,n) STMT_START { \
+# define MEXTEND(p,n) STMT_START { \
const int markoff = mark - PL_stack_base; \
sp = stack_grow(sp,p,(SSize_t) (n)); \
mark = PL_stack_base + markoff; \
PERL_UNUSED_VAR(sp); \
} STMT_END
+# else
+# define EXTEND(p,n) STMT_START { \
+ if (UNLIKELY(PL_stack_max - p < (int)(n))) { \
+ sp = stack_grow(sp,p,(SSize_t) (n)); \
+ PERL_UNUSED_VAR(sp); \
+ } } STMT_END
+/* Same thing, but update mark register too. */
+# define MEXTEND(p,n) STMT_START { \
+ if (UNLIKELY(PL_stack_max - p < (int)(n))) { \
+ const int markoff = mark - PL_stack_base; \
+ sp = stack_grow(sp,p,(SSize_t) (n)); \
+ mark = PL_stack_base + markoff; \
+ PERL_UNUSED_VAR(sp); \
+ } } STMT_END
+# endif /* STRESS_REALLOC */
+# define REXTEND(p,n) EXTEND(p,n)
+# define RMEXTEND(p,n) MEXTEND(p,n)
#else
-# define EXTEND(p,n) STMT_START { \
+# ifdef STRESS_REALLOC
+# error STRESS_REALLOC and PERL_ALT_STACKS not implemented
+# else
+# define EXTEND(p,n) NOOP
+# define MEXTEND(p,n) NOOP
+# define REXTEND(p,n) STMT_START { \
if (UNLIKELY(PL_stack_max - p < (int)(n))) { \
sp = stack_grow(sp,p,(SSize_t) (n)); \
PERL_UNUSED_VAR(sp); \
} } STMT_END
+
/* Same thing, but update mark register too. */
-# define MEXTEND(p,n) STMT_START { \
+# define RMEXTEND(p,n) STMT_START { \
if (UNLIKELY(PL_stack_max - p < (int)(n))) { \
const int markoff = mark - PL_stack_base; \
sp = stack_grow(sp,p,(SSize_t) (n)); \
mark = PL_stack_base + markoff; \
PERL_UNUSED_VAR(sp); \
} } STMT_END
-#endif
+# endif /* STRESS_REALLOC */
+#endif /* PERL_ALT_STACKS */
#define PUSHs(s) (*++sp = (s))
#define PUSHTARG STMT_START { SvSETMAGIC(TARG); PUSHs(TARG); } STMT_END
@@ -307,6 +332,7 @@ Does not use C<TARG>. See also C<XPUSHu>, C<mPUSHu> and C<PUSHu>.
#define PUSHu(u) STMT_START { sv_setuv(TARG, (UV)(u)); PUSHTARG; } STMT_END
#define XPUSHs(s) STMT_START { EXTEND(sp,1); *++sp = (s); } STMT_END
+#define RXPUSHs(s) STMT_START { REXTEND(sp,1); *++sp = (s); } STMT_END
#define XPUSHTARG STMT_START { SvSETMAGIC(TARG); XPUSHs(TARG); } STMT_END
#define XPUSHp(p,l) STMT_START { sv_setpvn(TARG, (p), (l)); XPUSHTARG; } STMT_END
#define XPUSHn(n) STMT_START { sv_setnv(TARG, (NV)(n)); XPUSHTARG; } STMT_END
diff --git a/pp_ctl.c b/pp_ctl.c
index 0260a87..9ff0247 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -1002,8 +1002,14 @@ PP(pp_mapwhile)
dst = (SP += shift);
PL_markstack_ptr[-1] += shift;
*PL_markstack_ptr += shift;
- while (count--)
- *dst-- = *src--;
+ //copy upwards not downwards
+ if(count) {
+ SV** dst1 = dst;
+ SV** src1 = src;
+ dst1 -= (count-1);
+ src1 -= (count-1);
+ memcpy(dst1, src1, sizeof(SV**)*count);
+ }
}
/* copy the new items down to the destination list */
dst = PL_stack_base + (PL_markstack_ptr[-2] += items) - 1;
diff --git a/proto.h b/proto.h
index fb040c9..77dbf72 100644
--- a/proto.h
+++ b/proto.h
@@ -5292,6 +5292,11 @@ PERL_CALLCONV char* Perl_my_bcopy(const char* from, char* to, I32 len)
assert(from); assert(to)
#endif
+#if 0
+# if defined(PERL_ALT_STACKS)
+PERL_CALLCONV int Perl_call_runops(pTHX);
+# endif
+#endif
#if defined(DEBUGGING)
PERL_CALLCONV int Perl_get_debug_opts(pTHX_ const char **s, bool givehelp)
__attribute__warn_unused_result__
@@ -5497,6 +5502,9 @@ PERL_CALLCONV MEM_SIZE Perl_malloced_size(void *p)
#if defined(NO_MATHOMS)
/* PERL_CALLCONV void Perl_sv_nounlocking(pTHX_ SV *sv); */
#endif
+#if defined(PERL_ALT_STACKS)
+PERL_CALLCONV DWORD Perl_fix_win32stacks(LPEXCEPTION_POINTERS exceptions);
+#endif
#if defined(PERL_ANY_COW)
PERL_CALLCONV SV* Perl_sv_setsv_cow(pTHX_ SV* dstr, SV* sstr)
__attribute__nonnull__(pTHX_2);
diff --git a/run.c b/run.c
index ff3bc93..bc23cae 100644
--- a/run.c
+++ b/run.c
@@ -48,6 +48,24 @@ Perl_runops_standard(pTHX)
return 0;
}
+#if 0
+#ifdef PERL_ALT_STACKS
+int
+Perl_call_runops(pTHX)
+{
+ __try {
+ return PL_runops(aTHX);
+ }
+ __except(GetExceptionCode() == STATUS_GUARD_PAGE_VIOLATION
+ ? Perl_fix_win32stacks(GetExceptionInformation())
+ : EXCEPTION_CONTINUE_SEARCH) {
+ NOOP;
+ }
+ croak_no_mem();
+}
+#endif
+#endif
+
/*
* Local variables:
* c-indentation-style: bsd
diff --git a/scope.c b/scope.c
index 76e023a..6a004c6 100644
--- a/scope.c
+++ b/scope.c
@@ -56,7 +56,55 @@ Perl_new_stackinfo(pTHX_ I32 stitems, I32 cxitems)
Newx(si, 1, PERL_SI);
si->si_stack = newAV();
AvREAL_off(si->si_stack);
+#ifndef PERL_ALT_STACKS
av_extend(si->si_stack, stitems > 0 ? stitems-1 : 0);
+#else
+ {
+ void *avarr;
+ void * avarr2;
+ void * toalloc;
+ Safefree(AvALLOC(si->si_stack));
+ AvALLOC(si->si_stack) = NULL;
+ AvARRAY(si->si_stack) = NULL;
+ AvMAX(si->si_stack) = SSize_t_MAX/sizeof(SV*);
+ //fprintf(stderr, "stack alloc NSI av=%x\n", si->si_stack);
+ avarr = VirtualAlloc(
+ NULL,
+ STACKMAX,
+ MEM_RESERVE,
+ PAGE_NOACCESS
+ );
+ if(!avarr) {
+ DWORD e = GetLastError();
+ DebugBreak();
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+ //4096 (page size) should be constant or runtime lookup from Win32 API, for
+ //4096 on 32 and x64, 8K on ia64 http://blogs.msdn.com/b/oldnewthing/archive/2004/09/08/226797.aspx
+ if(! (avarr2 = VirtualAlloc(avarr,
+ PERL_PAGESIZE,
+ MEM_COMMIT,
+ PAGE_READWRITE
+ ))) {
+ DebugBreak();
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+ (DWORD_PTR)toalloc = (DWORD_PTR)avarr+PERL_PAGESIZE;
+ if(!VirtualAlloc(toalloc,
+ PERL_PAGESIZE,
+ MEM_COMMIT,
+ PAGE_READWRITE|PAGE_GUARD
+ )) {
+ DebugBreak();
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+ AvALLOC(si->si_stack) = (SV**)avarr;
+ AvARRAY(si->si_stack) = (SV**)avarr;
+ }
+#endif
AvALLOC(si->si_stack)[0] = &PL_sv_undef;
AvFILLp(si->si_stack) = 0;
si->si_prev = 0;
diff --git a/sv.c b/sv.c
index 13ea53c..d627c0c 100644
--- a/sv.c
+++ b/sv.c
@@ -6433,6 +6433,19 @@ Perl_sv_clear(pTHX_ SV *const orig_sv)
iter_sv = sv;
goto get_next_sv; /* process this new sv */
}
+#ifdef PERL_ALT_STACKS
+ if(!AvREAL((const AV *)av) && AvMAX((const AV *)av) == SSize_t_MAX/sizeof(SV*)) {
+ //fprintf(stderr, "stack dealloc av=%x\n", av);
+ if(!VirtualFree(
+ AvALLOC(av),
+ 0,
+ MEM_RELEASE
+ )) {
+ fprintf(stderr, "VF failed %u\n", GetLastError());
+ exit(1);
+ }
+ } else
+#endif
Safefree(AvALLOC(av));
}
@@ -6777,6 +6790,8 @@ Perl_sv_free(pTHX_ SV *const sv)
/* Private helper function for SvREFCNT_dec().
* Called with rc set to original SvREFCNT(sv), where rc == 0 or 1 */
+SV * watch_sv;
+
void
Perl_sv_free2(pTHX_ SV *const sv, const U32 rc)
{
@@ -6784,6 +6799,9 @@ Perl_sv_free2(pTHX_ SV *const sv, const U32 rc)
PERL_ARGS_ASSERT_SV_FREE2;
+ if( sv == watch_sv) {
+ DebugBreak();
+ }
if (LIKELY( rc == 1 )) {
/* normal case */
SvREFCNT(sv) = 0;
@@ -12683,6 +12701,48 @@ S_sv_dup_common(pTHX_ const SV *const sstr, CLONE_PARAMS *const param)
SSize_t items = AvFILLp((const AV *)sstr) + 1;
src_ary = AvARRAY((const AV *)sstr);
+#ifdef PERL_ALT_STACKS
+ if(!AvREAL((const AV *)sstr) && AvMAX((const AV *)sstr) == SSize_t_MAX/sizeof(SV*)) {
+ MEMORY_BASIC_INFORMATION mbi;
+ void * avarr;
+ DWORD_PTR toalloc;
+ void * avarr2;
+ //fprintf(stderr, "stack alloc S_sv_dup_common src av=%x dst av=%x\n", sstr, dstr);
+ if(VirtualQuery(src_ary,&mbi,sizeof(mbi)) != sizeof(mbi)){
+ DebugBreak();
+ fprintf(stderr, "VQ failed %u\n", GetLastError());
+ exit(1);
+ }
+ avarr = VirtualAlloc(
+ NULL,
+ 33554432, //2^25 32 MB
+ MEM_RESERVE,
+ PAGE_NOACCESS
+ );
+ if(!avarr) {
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+ if(! (avarr2 = VirtualAlloc(avarr,
+ mbi.RegionSize,
+ MEM_COMMIT,
+ PAGE_READWRITE
+ ))) {
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+ toalloc = (DWORD_PTR) avarr + mbi.RegionSize;
+ if(!VirtualAlloc(toalloc,
+ 4096,
+ MEM_COMMIT,
+ PAGE_READWRITE|PAGE_GUARD
+ )) {
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+ dst_ary = avarr;
+ } else
+#endif
Newxz(dst_ary, AvMAX((const AV *)sstr)+1, SV*);
ptr_table_store(PL_ptr_table, src_ary, dst_ary);
AvARRAY(MUTABLE_AV(dstr)) = dst_ary;
@@ -12695,10 +12755,12 @@ S_sv_dup_common(pTHX_ const SV *const sstr, CLONE_PARAMS *const param)
while (items-- > 0)
*dst_ary++ = sv_dup(*src_ary++, param);
}
+ /*is this really needed? This is uninit space I think
items = AvMAX((const AV *)sstr) - AvFILLp((const AV *)sstr);
while (items-- > 0) {
*dst_ary++ = &PL_sv_undef;
}
+ */
}
else {
AvARRAY(MUTABLE_AV(dstr)) = NULL;
diff --git a/t/op/threads.t b/t/op/threads.t
index 6fb2410..90314fa 100644
--- a/t/op/threads.t
+++ b/t/op/threads.t
@@ -139,7 +139,7 @@ watchdog(180, "process");
{
local $SIG{__WARN__} = sub {}; # Ignore any thread creation failure warnings
my @t;
- for (1..100) {
+ for (1..15) { #originally 100, but that caused OOM on self allocating stacks Perl on 32 bits
my $thr = threads->create( sub { require IO });
last if !defined($thr); # Probably ran out of memory
push(@t, $thr);
diff --git a/win32/Makefile b/win32/Makefile
index 4462930..ad55193 100644
--- a/win32/Makefile
+++ b/win32/Makefile
@@ -1,3 +1,6 @@
+!IF "$(USE_ALT_STACKS)" == "define"
+BUILDOPT = $(BUILDOPT) -DPERL_ALT_STACKS
+!ENDIF
#
# Makefile to build perl on Windows using Microsoft NMAKE.
# Supported compilers:
@@ -23,6 +26,10 @@
INST_DRV = c:
INST_TOP = $(INST_DRV)\perl521
+# Use OS specific Virtual Memory APIs to speed up Perl stack manipulation
+
+USE_ALT_STACKS = define
+
#
# Uncomment if you want to build a 32-bit Perl using a 32-bit compiler
# on a 64-bit version of Windows.
@@ -306,6 +313,10 @@ BUILDOPT = $(BUILDOPT) -DPERL_IMPLICIT_CONTEXT
BUILDOPT = $(BUILDOPT) -DPERL_IMPLICIT_SYS
!ENDIF
+!IF "$(USE_ALT_STACKS)" == "define"
+BUILDOPT = $(BUILDOPT) -DPERL_ALT_STACKS
+!ENDIF
+
!IF "$(PROCESSOR_ARCHITECTURE)" == ""
PROCESSOR_ARCHITECTURE = x86
!ENDIF
@@ -992,10 +1003,18 @@ $(MINIDIR) :
if not exist "$(MINIDIR)" mkdir "$(MINIDIR)"
$(MINICORE_OBJ) : $(CORE_NOCFG_H)
+!IF "$(USE_ALT_STACKS)" == "define"
+ $(CC) -c $(CFLAGS) -DPERL_EXTERNAL_GLOB -DPERL_IS_MINIPERL -DPERL_ALT_STACKS $(OBJOUT_FLAG)$@ ..\$(*F).c
+!ELSE
$(CC) -c $(CFLAGS) -DPERL_EXTERNAL_GLOB -DPERL_IS_MINIPERL $(OBJOUT_FLAG)$@ ..\$(*F).c
+!ENDIF
$(MINIWIN32_OBJ) : $(CORE_NOCFG_H)
+!IF "$(USE_ALT_STACKS)" == "define"
+ $(CC) -c $(CFLAGS) -DPERL_IS_MINIPERL -DPERL_ALT_STACKS $(OBJOUT_FLAG)$@ $(*F).c
+!ELSE
$(CC) -c $(CFLAGS) -DPERL_IS_MINIPERL $(OBJOUT_FLAG)$@ $(*F).c
+!ENDIF
# -DPERL_IMPLICIT_SYS needs C++ for perllib.c
# This is the only file that depends on perlhost.h, vmem.h, and vdir.h
diff --git a/win32/perlhost.h b/win32/perlhost.h
index 265328b..a7308b9 100644
--- a/win32/perlhost.h
+++ b/win32/perlhost.h
@@ -1691,6 +1691,9 @@ PerlProcGetTimeOfDay(struct IPerlProc* piPerl, struct timeval *t, void *z)
static THREAD_RET_TYPE
win32_start_child(LPVOID arg)
{
+#ifdef PERL_ALT_STACKS
+__try {
+#endif
PerlInterpreter *my_perl = (PerlInterpreter*)arg;
int status;
HWND parent_message_hwnd;
@@ -1801,6 +1804,16 @@ restart:
#else
return (DWORD)status;
#endif
+#ifdef PERL_ALT_STACKS
+}
+ __except(GetExceptionCode() == STATUS_GUARD_PAGE_VIOLATION
+ ? Perl_fix_win32stacks(GetExceptionInformation())
+ : EXCEPTION_CONTINUE_SEARCH) {
+ NOOP;
+ }
+croak_no_mem();
+#endif
+
}
#endif /* USE_ITHREADS */
diff --git a/win32/perllib.c b/win32/perllib.c
index 0e44a24..abb370d 100644
--- a/win32/perllib.c
+++ b/win32/perllib.c
@@ -233,6 +233,9 @@ RunPerl(int argc, char **argv, char **env)
PERL_SYS_INIT(&argc,&argv);
+#ifdef PERL_ALT_STACKS
+__try {
+#endif
if (!(my_perl = perl_alloc()))
return (1);
perl_construct(my_perl);
@@ -277,6 +280,16 @@ RunPerl(int argc, char **argv, char **env)
PERL_SYS_TERM();
return (exitstatus);
+
+#ifdef PERL_ALT_STACKS
+}
+ __except(GetExceptionCode() == STATUS_GUARD_PAGE_VIOLATION
+ ? Perl_fix_win32stacks(GetExceptionInformation())
+ : EXCEPTION_CONTINUE_SEARCH) {
+ NOOP;
+ }
+croak_no_mem();
+#endif
}
EXTERN_C void
diff --git a/win32/win32.c b/win32/win32.c
index cd594ca..191fcc3 100644
--- a/win32/win32.c
+++ b/win32/win32.c
@@ -4560,6 +4560,52 @@ win32_create_message_window(void)
0, 0, 0, 0, 0, HWND_MESSAGE, NULL, NULL, NULL);
}
+#ifdef PERL_ALT_STACKS
+DWORD
+Perl_fix_win32stacks(LPEXCEPTION_POINTERS exceptions)
+{
+ dTHX;
+ MEMORY_BASIC_INFORMATION mbi;
+ DWORD_PTR newalloc;
+ assert(exceptions->ExceptionRecord->ExceptionCode == STATUS_GUARD_PAGE_VIOLATION
+ && exceptions->ExceptionRecord->ExceptionFlags == 0);
+//needs bounds checks to make sure the STATUS_GUARD_PAGE_VIOLATION is for Perl stack and not some other c lib doing the same thing
+ /* ExceptionAddress is EIP/ * to machine code where fault happened, its not
+ interesting */
+ /* ExceptionFlags has to be zero, or the exception is not resumable, so there
+ is no point in checking it */
+ if(exceptions->ExceptionRecord->NumberParameters == 2
+ && (exceptions->ExceptionRecord->ExceptionInformation[0] == 0 /* read fault */
+ || exceptions->ExceptionRecord->ExceptionInformation[0] == 1) /* write failt */
+ && exceptions->ExceptionRecord->ExceptionInformation[1] >= (ULONG_PTR)PL_stack_base && /* fault addr >= stack bottom */
+//should this catch something beyond the alloc or let it pass through, and catch how far beyond the alloc?
+ exceptions->ExceptionRecord->ExceptionInformation[1] <= (ULONG_PTR)PL_stack_base + STACKMAX) {
+ //this is inefficient, these things should be stored somewhere in interp struct
+ if(VirtualQuery(PL_stack_base,&mbi,sizeof(mbi)) != sizeof(mbi)){
+ DebugBreak();
+ fprintf(stderr, "VQ failed %u\n", GetLastError());
+ exit(1);
+ }
+ assert(PL_stack_base == PL_stack_base && PL_stack_base == AvARRAY(PL_curstack));
+ newalloc = (DWORD_PTR)mbi.AllocationBase+(DWORD_PTR)mbi.RegionSize;
+ if(!VirtualAlloc(newalloc,
+ PERL_PAGESIZE,
+ MEM_COMMIT,
+ PAGE_READWRITE|PAGE_GUARD
+ )) {
+ DebugBreak();
+ fprintf(stderr, "VA failed %u\n", GetLastError());
+ exit(1);
+ }
+
+ return EXCEPTION_CONTINUE_EXECUTION;
+ }
+ else { /* fault address and exception isn't from Perl */
+ return EXCEPTION_CONTINUE_SEARCH;
+ }
+}
+#endif
+
#ifdef HAVE_INTERP_INTERN
static void
diff --git a/win32/win32.h b/win32/win32.h
index bfb276f..0b07b7c 100644
--- a/win32/win32.h
+++ b/win32/win32.h
@@ -225,6 +225,26 @@ struct utsname {
# define WNOHANG 1
#endif
+#ifndef PERL_PAGESIZE
+# if defined(PAGE_SIZE)
+# define PERL_PAGESIZE PAGE_SIZE
+# elif defined(_M_IA64) || defined(__IA64__)
+/* http://blogs.msdn.com/b/oldnewthing/archive/2004/09/08/226797.aspx */
+# define PERL_PAGESIZE 8192
+# elif defined(_M_IX86) || defined(_M_X64) || defined (__i386__) || defined(__x86_64__)
+# define PERL_PAGESIZE 4096
+# else
+# error Unknown page size (ARM ?)
+# endif
+#endif
+
+#ifdef PERL_ALT_STACKS
+# define STACKMAX 0x1F00000
+# define PERL_ALT_STACKS_EXPR(x) x
+#else
+# define PERL_ALT_STACKS_EXPR(x)
+#endif
+
#define PERL_GET_CONTEXT_DEFINED
/* Compiler-specific stuff. */
--
1.7.9.msysgit.0
|
From @bulk88I created a version of self allocating stacks, where EXTEND still has the normal overhead but instead of malloc mem, it uses OS VM directly. Only 3 functions in the binary are different (create the stack AV, dup it, free it and RunPerl). After taking about an hour of time to make the benchmarks below, it shows, that using a VM block instead of an malloc block is usually "slower", and in some cases faster. I wonder if what I am testing is function alignment, or is the 0x1000 alignment of OS VM causing cache associativity collisions and cache misses. ext extvm ext2 extvm2 AVERAGE 100 99 100 98 Results saved in file:///C|/sources/perlbench/benchres-009/index.html C:\sources\perlbench> -- |
From @tonycozOn Mon Jun 02 22:54:57 2014, bulk88 wrote:
That would be implementation dependent. POSIX doesn't discuss anonymous mappings. POSIX 2008 doesn't discuss brk()/sbrk() (they've been removed) Tony |
This work seems to have died without a resolution. Barring further submissions from @bulk88 (PR please!) I suggest closing this case. |
Migrated from rt.perl.org#121923 (status was 'open')
Searchable as RT121923$
The text was updated successfully, but these errors were encountered: