Skip to content

bpo-23689: re module, allocate SRE_REPEAT in a memory pool #12160

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
re module: fix memory leak when a match is terminated by a signal or memory
allocation failure. Patch by Ma Lin.
95 changes: 94 additions & 1 deletion Modules/_sre.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,89 @@ data_stack_grow(SRE_STATE* state, Py_ssize_t size)
return 0;
}


/* memory pool functions for SRE_REPEAT, this can avoid memory
leak when SRE(match) function terminates abruptly.
state->pool_used_repeats is a doubled linked list, so that we
can remove a SRE_REPEAT node from it.
state->pool_unused_repeats is a single linked list, we put/get
node at the head. */

static SRE_REPEAT*
mempool_repeat_malloc(SRE_STATE *state)
{
SRE_REPEAT *repeat, *temp;

if (state->pool_unused_repeats) {
/* unused pool has slot */
repeat = state->pool_unused_repeats;

/* remove from unused pool */
state->pool_unused_repeats = repeat->mem_next;
} else {
repeat = PyObject_MALLOC(sizeof(SRE_REPEAT));
if (!repeat)
return NULL;
}

/* add to used pool */
temp = state->pool_used_repeats;
if (temp)
temp->mem_prev = repeat;
repeat->mem_prev = NULL;
repeat->mem_next = temp;
state->pool_used_repeats = repeat;

return repeat;
}

static void
mempool_repeat_free(SRE_STATE *state, SRE_REPEAT *repeat)
{
SRE_REPEAT *prev, *next;

/* remove from used pool */
prev = repeat->mem_prev;
next = repeat->mem_next;

if (prev) {
prev->mem_next = next;
} else {
state->pool_used_repeats = next;
}
if (next)
next->mem_prev = prev;

/* add to unused pool */
repeat->mem_next = state->pool_unused_repeats;
state->pool_unused_repeats = repeat;
}

static void
mempool_repeat_clear(SRE_STATE *state)
{
SRE_REPEAT *next, *temp;

/* clear used pool */
next = state->pool_used_repeats;
while (next) {
temp = next;
next = temp->mem_next;
PyObject_FREE(temp);
}
state->pool_used_repeats = NULL;

/* clear unused pool */
next = state->pool_unused_repeats;
while (next) {
temp = next;
next = temp->mem_next;
PyObject_FREE(temp);
}
state->pool_unused_repeats = NULL;
}


/* generate 8-bit version */

#define SRE_CHAR Py_UCS1
Expand Down Expand Up @@ -348,7 +431,12 @@ state_reset(SRE_STATE* state)

state->repeat = NULL;

data_stack_dealloc(state);
/* reuse stack if stack_size <= 16 KiB,
to avoid frequent memory alloc/free. */
if (state->data_stack_size <= 16*1024)
state->data_stack_base = 0;
else
data_stack_dealloc(state);
}

static void*
Expand Down Expand Up @@ -442,6 +530,10 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
state->match_all = 0;
state->must_advance = 0;

state->repeat = NULL;
state->pool_used_repeats = NULL;
state->pool_unused_repeats = NULL;

state->beginning = ptr;

state->start = (void*) ((char*) ptr + start * state->charsize);
Expand Down Expand Up @@ -470,6 +562,7 @@ state_fini(SRE_STATE* state)
data_stack_dealloc(state);
PyMem_Del(state->mark);
state->mark = NULL;
mempool_repeat_clear(state);
}

/* calculate offset from start of string */
Expand Down
6 changes: 6 additions & 0 deletions Modules/sre.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ typedef struct SRE_REPEAT_T {
SRE_CODE* pattern; /* points to REPEAT operator arguments */
void* last_ptr; /* helper to check for infinite loops */
struct SRE_REPEAT_T *prev; /* points to previous repeat context */
/* for memory pool */
struct SRE_REPEAT_T *mem_prev;
struct SRE_REPEAT_T *mem_next;
} SRE_REPEAT;

typedef struct {
Expand All @@ -83,6 +86,9 @@ typedef struct {
size_t data_stack_base;
/* current repeat context */
SRE_REPEAT *repeat;
/* memory pool for SRE_REPEAT object */
SRE_REPEAT *pool_used_repeats;
SRE_REPEAT *pool_unused_repeats;
} SRE_STATE;

typedef struct {
Expand Down
4 changes: 2 additions & 2 deletions Modules/sre_lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -988,7 +988,7 @@ SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel)
ctx->pattern[1], ctx->pattern[2]));

/* install new repeat context */
ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
ctx->u.rep = mempool_repeat_malloc(state);
if (!ctx->u.rep) {
PyErr_NoMemory();
RETURN_FAILURE;
Expand All @@ -1002,7 +1002,7 @@ SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel)
state->ptr = ctx->ptr;
DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
state->repeat = ctx->u.rep->prev;
PyObject_FREE(ctx->u.rep);
mempool_repeat_free(state, ctx->u.rep);

if (ret) {
RETURN_ON_ERROR(ret);
Expand Down