Skip to content

Commit e170116

Browse files
committed
Make marshal stable
marshal used refcnt() to decide use FLAG_REF or not. Even when one object is used only once in marshled object, FLAG_REF could be used. This commit makes marshal two-pass. It counts refs at first pass. In second pass, mashal objects and use FLAG_REF only when the object is used more than twice.
1 parent 5fe7c98 commit e170116

File tree

2 files changed

+165
-29
lines changed

2 files changed

+165
-29
lines changed

Lib/test/test_marshal.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,25 @@ def test_eof(self):
324324
for i in range(len(data)):
325325
self.assertRaises(EOFError, marshal.loads, data[0: i])
326326

327+
def test_stable_refs(self):
328+
"""FLAG_REF must be used regardless refcnt"""
329+
x = 0x42
330+
y = (x,)
331+
z = [y, y]
332+
dummy = x # refcnt of x must be >1
333+
334+
data = marshal.dumps(x)
335+
# x is used once, FLAG_REF must not be set.
336+
self.assertEqual(b"i\x42\x00\x00\x00", data)
337+
338+
data = marshal.dumps(z)
339+
# y is used twice, but x is used once because y is reused.
340+
self.assertEqual(b"[\x02\x00\x00\x00" + # list(size=2)i\x42\x00\x00\x00", data)
341+
b"\xa9\x01" + # small tuple(size=1) | FLAG_REF
342+
b"i\x42\x00\x00\x00" + # int(42)
343+
b"r\x00\x00\x00\x00", # ref(0)
344+
data)
345+
327346
LARGE_SIZE = 2**31
328347
pointer_size = 8 if sys.maxsize > 0xFFFFFFFF else 4
329348

Python/marshal.c

Lines changed: 146 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ typedef struct {
8686
char *end;
8787
char *buf;
8888
_Py_hashtable_t *hashtable;
89+
int last_index;
8990
int version;
9091
} WFILE;
9192

@@ -276,37 +277,35 @@ w_ref(PyObject *v, char *flag, WFILE *p)
276277
return 0; /* not writing object references */
277278

278279
/* if it has only one reference, it definitely isn't shared */
279-
if (Py_REFCNT(v) == 1)
280+
if (Py_REFCNT(v) == 1) {
280281
return 0;
282+
}
281283

282284
entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v);
283-
if (entry != NULL) {
284-
/* write the reference index to the stream */
285-
_Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, w);
285+
if (entry == NULL) {
286+
return 0;
287+
}
288+
289+
_Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, w);
290+
// w >= 0: index written by previous w_ref()
291+
// w < 0 : refcnt counted by w_count_refs()
292+
if (w == -1) {
293+
// This object is used only once.
294+
return 0;
295+
}
296+
297+
if (w >= 0) {
286298
/* we don't store "long" indices in the dict */
287299
assert(0 <= w && w <= 0x7fffffff);
288300
w_byte(TYPE_REF, p);
289301
w_long(w, p);
290302
return 1;
291303
} else {
292-
size_t s = p->hashtable->entries;
293-
/* we don't support long indices */
294-
if (s >= 0x7fffffff) {
295-
PyErr_SetString(PyExc_ValueError, "too many objects");
296-
goto err;
297-
}
298-
w = (int)s;
299-
Py_INCREF(v);
300-
if (_Py_HASHTABLE_SET(p->hashtable, v, w) < 0) {
301-
Py_DECREF(v);
302-
goto err;
303-
}
304+
w = p->last_index++;
305+
_Py_HASHTABLE_ENTRY_WRITE_DATA(p->hashtable, entry, w);
304306
*flag |= FLAG_REF;
305307
return 0;
306308
}
307-
err:
308-
p->error = WFERR_UNMARSHALLABLE;
309-
return 1;
310309
}
311310

312311
static void
@@ -584,18 +583,135 @@ w_complex_object(PyObject *v, char flag, WFILE *p)
584583
}
585584

586585
static int
587-
w_init_refs(WFILE *wf, int version)
586+
w_count_refs(PyObject *v, WFILE *p)
588587
{
589-
if (version >= 3) {
590-
wf->hashtable = _Py_hashtable_new(sizeof(PyObject *), sizeof(int),
591-
_Py_hashtable_hash_ptr,
592-
_Py_hashtable_compare_direct);
593-
if (wf->hashtable == NULL) {
594-
PyErr_NoMemory();
595-
return -1;
588+
if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
589+
PyErr_SetString(PyExc_ValueError,
590+
"object too deeply nested to marshal");
591+
goto err;
592+
}
593+
594+
if (v == NULL ||
595+
v == Py_None ||
596+
v == PyExc_StopIteration ||
597+
v == Py_Ellipsis ||
598+
v == Py_False ||
599+
v == Py_True) {
600+
return 0;
601+
}
602+
603+
/* if it has only one reference, it definitely isn't shared */
604+
if (Py_REFCNT(v) > 1) {
605+
// Use negative number to count refs
606+
_Py_hashtable_entry_t *entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v);
607+
if (entry != NULL) {
608+
int w;
609+
_Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, w);
610+
assert(w < 0);
611+
w--;
612+
_Py_HASHTABLE_ENTRY_WRITE_DATA(p->hashtable, entry, w);
613+
return 0;
614+
}
615+
else {
616+
size_t s = p->hashtable->entries;
617+
/* we don't support long indices */
618+
if (s >= 0x7fffffff) {
619+
PyErr_SetString(PyExc_ValueError, "too many objects");
620+
goto err;
621+
}
622+
int w = -1;
623+
Py_INCREF(v);
624+
if (_Py_HASHTABLE_SET(p->hashtable, v, w) < 0) {
625+
Py_DECREF(v);
626+
goto err;
627+
}
628+
}
629+
}
630+
631+
// These logic should be same to w_object()
632+
p->depth++;
633+
634+
Py_ssize_t i, n;
635+
if (PyTuple_CheckExact(v)) {
636+
n = PyTuple_Size(v);
637+
for (i = 0; i < n; i++) {
638+
w_count_refs(PyTuple_GET_ITEM(v, i), p);
639+
}
640+
}
641+
else if (PyList_CheckExact(v)) {
642+
n = PyList_GET_SIZE(v);
643+
for (i = 0; i < n; i++) {
644+
w_count_refs(PyList_GET_ITEM(v, i), p);
596645
}
597646
}
647+
else if (PyDict_CheckExact(v)) {
648+
PyObject *key, *value;
649+
i = 0;
650+
while (PyDict_Next(v, &i, &key, &value)) {
651+
w_count_refs(key, p);
652+
w_count_refs(value, p);
653+
}
654+
}
655+
else if (PyAnySet_CheckExact(v)) {
656+
PyObject *value, *it;
657+
658+
it = PyObject_GetIter(v);
659+
if (it == NULL) {
660+
p->depth--;
661+
goto err;
662+
}
663+
while ((value = PyIter_Next(it)) != NULL) {
664+
w_count_refs(value, p);
665+
Py_DECREF(value);
666+
}
667+
Py_DECREF(it);
668+
if (PyErr_Occurred()) {
669+
p->depth--;
670+
goto err;
671+
}
672+
}
673+
else if (PyCode_Check(v)) {
674+
PyCodeObject *co = (PyCodeObject *)v;
675+
w_count_refs(co->co_code, p);
676+
w_count_refs(co->co_consts, p);
677+
w_count_refs(co->co_names, p);
678+
w_count_refs(co->co_varnames, p);
679+
w_count_refs(co->co_freevars, p);
680+
w_count_refs(co->co_cellvars, p);
681+
w_count_refs(co->co_filename, p);
682+
w_count_refs(co->co_name, p);
683+
w_count_refs(co->co_lnotab, p);
684+
}
685+
686+
p->depth--;
687+
688+
if (p->error == WFERR_UNMARSHALLABLE) {
689+
return 1;
690+
}
598691
return 0;
692+
693+
err:
694+
p->error = WFERR_UNMARSHALLABLE;
695+
return 1;
696+
}
697+
698+
static int
699+
w_init_refs(WFILE *wf, int version, PyObject *x)
700+
{
701+
if (version < 3) {
702+
return 0;
703+
}
704+
705+
wf->hashtable = _Py_hashtable_new(sizeof(PyObject *), sizeof(int),
706+
_Py_hashtable_hash_ptr,
707+
_Py_hashtable_compare_direct);
708+
if (wf->hashtable == NULL) {
709+
PyErr_NoMemory();
710+
return -1;
711+
}
712+
wf->last_index = 0;
713+
714+
return w_count_refs(x, wf);
599715
}
600716

601717
static int
@@ -645,8 +761,9 @@ PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
645761
wf.end = wf.ptr + sizeof(buf);
646762
wf.error = WFERR_OK;
647763
wf.version = version;
648-
if (w_init_refs(&wf, version))
764+
if (w_init_refs(&wf, version, x)) {
649765
return; /* caller mush check PyErr_Occurred() */
766+
}
650767
w_object(x, &wf);
651768
w_clear_refs(&wf);
652769
w_flush(&wf);
@@ -1621,7 +1738,7 @@ PyMarshal_WriteObjectToString(PyObject *x, int version)
16211738
wf.end = wf.ptr + PyBytes_Size(wf.str);
16221739
wf.error = WFERR_OK;
16231740
wf.version = version;
1624-
if (w_init_refs(&wf, version)) {
1741+
if (w_init_refs(&wf, version, x)) {
16251742
Py_DECREF(wf.str);
16261743
return NULL;
16271744
}

0 commit comments

Comments
 (0)