Skip to content

Commit 2336097

Browse files
Replaced recppush/regcppop with memcpy
Basically tested with Intel VTune to be increasing the performance of a perl regex matching program with multiple capture groups and recursive patterns
1 parent fd4fc0f commit 2336097

File tree

2 files changed

+59
-41
lines changed

2 files changed

+59
-41
lines changed

AUTHORS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ Alexander D'Archangel <darksuji@gmail.com>
6767
Alexander Foken
6868
Alexander Gernler <alexander_gernler@genua.de>
6969
Alexander Gough <alex-p5p@earth.li>
70+
Alexander Nikolov <sasho648@gmail.com>
7071
Alexander Hartmaier <abraxxa@cpan.org>
7172
Alexander Klimov <ask@wisdom.weizmann.ac.il>
7273
Alexander Smishlajev <als@turnhere.com>

regexec.c

Lines changed: 58 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,6 @@ static void S_setup_eval_state(pTHX_ regmatch_info *const reginfo);
218218
static void S_cleanup_regmatch_info_aux(pTHX_ void *arg);
219219
static regmatch_state * S_push_slab(pTHX);
220220

221-
#define REGCP_PAREN_ELEMS 3
222221
#define REGCP_OTHER_ELEMS 3
223222
#define REGCP_FRAME_ELEMS 1
224223
/* REGCP_FRAME_ELEMS are not part of the REGCP_OTHER_ELEMS and
@@ -228,8 +227,11 @@ STATIC CHECKPOINT
228227
S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen _pDEPTH)
229228
{
230229
const int retval = PL_savestack_ix;
231-
const int paren_elems_to_push =
232-
(maxopenparen - parenfloor) * REGCP_PAREN_ELEMS;
230+
/* Number of bytes about to be stored in the stack */
231+
const SSize_t paren_bytes_to_push = sizeof(*rex->offs) * (maxopenparen - parenfloor);
232+
/* Number of savestack[] entries to be filled by the paren data */
233+
/* Rounding is performed in case we are few elements short */
234+
const int paren_elems_to_push = (paren_bytes_to_push + sizeof(*PL_savestack) - 1) / sizeof(*PL_savestack);
233235
const UV total_elems = paren_elems_to_push + REGCP_OTHER_ELEMS;
234236
const UV elems_shifted = total_elems << SAVE_TIGHT_SHIFT;
235237
I32 p;
@@ -238,9 +240,9 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen _pDEPTH)
238240
PERL_ARGS_ASSERT_REGCPPUSH;
239241

240242
if (paren_elems_to_push < 0)
241-
Perl_croak(aTHX_ "panic: paren_elems_to_push, %i < 0, maxopenparen: %i parenfloor: %i REGCP_PAREN_ELEMS: %u",
243+
Perl_croak(aTHX_ "panic: paren_elems_to_push, %i < 0, maxopenparen: %i parenfloor: %i",
242244
(int)paren_elems_to_push, (int)maxopenparen,
243-
(int)parenfloor, (unsigned)REGCP_PAREN_ELEMS);
245+
(int)parenfloor);
244246

245247
if ((elems_shifted >> SAVE_TIGHT_SHIFT) != total_elems)
246248
Perl_croak(aTHX_ "panic: paren_elems_to_push offset %" UVuf
@@ -249,31 +251,35 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen _pDEPTH)
249251
(unsigned long)maxopenparen,
250252
(long)parenfloor);
251253

252-
SSGROW(total_elems + REGCP_FRAME_ELEMS);
253-
254254
DEBUG_BUFFERS_r(
255255
if ((int)maxopenparen > (int)parenfloor)
256-
Perl_re_exec_indentf( aTHX_
256+
Perl_re_exec_indentf(aTHX_
257257
"rex=0x%" UVxf " offs=0x%" UVxf ": saving capture indices:\n",
258258
depth,
259259
PTR2UV(rex),
260260
PTR2UV(rex->offs)
261261
);
262262
);
263-
for (p = parenfloor+1; p <= (I32)maxopenparen; p++) {
264-
/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
265-
SSPUSHIV(rex->offs[p].end);
266-
SSPUSHIV(rex->offs[p].start);
267-
SSPUSHINT(rex->offs[p].start_tmp);
268-
DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_
269-
" \\%" UVuf ": %" IVdf "(%" IVdf ")..%" IVdf "\n",
270-
depth,
271-
(UV)p,
272-
(IV)rex->offs[p].start,
273-
(IV)rex->offs[p].start_tmp,
274-
(IV)rex->offs[p].end
275-
));
276-
}
263+
264+
SSGROW(total_elems + REGCP_FRAME_ELEMS);
265+
266+
/* memcpy the offs inside the stack - it's faster than for loop */
267+
memcpy(&PL_savestack[PL_savestack_ix], rex->offs + parenfloor + 1, paren_bytes_to_push);
268+
PL_savestack_ix += paren_elems_to_push;
269+
270+
DEBUG_BUFFERS_r(
271+
for (p = parenfloor + 1; p <= (I32)maxopenparen; p++) {
272+
Perl_re_exec_indentf(aTHX_
273+
" \\%" UVuf ": %" IVdf "(%" IVdf ")..%" IVdf "\n",
274+
depth,
275+
(UV)p,
276+
(IV)rex->offs[p].start,
277+
(IV)rex->offs[p].start_tmp,
278+
(IV)rex->offs[p].end
279+
);
280+
}
281+
);
282+
277283
/* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
278284
SSPUSHINT(maxopenparen);
279285
SSPUSHINT(rex->lastparen);
@@ -366,25 +372,36 @@ S_regcppop(pTHX_ regexp *rex, U32 *maxopenparen_p _pDEPTH)
366372
PTR2UV(rex->offs)
367373
);
368374
);
369-
paren = *maxopenparen_p;
370-
for ( ; i > 0; i -= REGCP_PAREN_ELEMS) {
371-
SSize_t tmps;
372-
rex->offs[paren].start_tmp = SSPOPINT;
373-
rex->offs[paren].start = SSPOPIV;
374-
tmps = SSPOPIV;
375-
if (paren <= rex->lastparen)
376-
rex->offs[paren].end = tmps;
377-
DEBUG_BUFFERS_r( Perl_re_exec_indentf( aTHX_
378-
" \\%" UVuf ": %" IVdf "(%" IVdf ")..%" IVdf "%s\n",
379-
depth,
380-
(UV)paren,
381-
(IV)rex->offs[paren].start,
382-
(IV)rex->offs[paren].start_tmp,
383-
(IV)rex->offs[paren].end,
384-
(paren > rex->lastparen ? "(skipped)" : ""));
385-
);
386-
paren--;
387-
}
375+
/* substract remaining elements from the stack */
376+
PL_savestack_ix -= i;
377+
378+
/* static assert that offs struc size is not less than stack elem size */
379+
STATIC_ASSERT_STMT(sizeof(*rex->offs) >= sizeof(*PL_savestack));
380+
381+
/* calculate actual number of offs/capture groups stored */
382+
/* by doing integer division (leaving potential alignment aside) */
383+
i = (i * sizeof(*PL_savestack)) / sizeof(*rex->offs);
384+
385+
/* calculate paren starting point */
386+
/* i is our number of entries which we are subtracting from *maxopenparen_p */
387+
/* and we are storing + 1 this to get the beginning */
388+
paren = *maxopenparen_p - i + 1;
389+
390+
/* restore them */
391+
memcpy(rex->offs + paren, &PL_savestack[PL_savestack_ix], i * sizeof(*rex->offs));
392+
393+
DEBUG_BUFFERS_r(
394+
for (; paren <= *maxopenparen_p; ++paren) {
395+
Perl_re_exec_indentf(aTHX_
396+
" \\%" UVuf ": %" IVdf "(%" IVdf ")..%" IVdf "%s\n",
397+
depth,
398+
(UV)paren,
399+
(IV)rex->offs[paren].start,
400+
(IV)rex->offs[paren].start_tmp,
401+
(IV)rex->offs[paren].end,
402+
(paren > rex->lastparen ? "(skipped)" : ""));
403+
}
404+
);
388405
#if 1
389406
/* It would seem that the similar code in regtry()
390407
* already takes care of this, and in fact it is in

0 commit comments

Comments
 (0)