Skip to content

bpo-33312: Fix clang ubsan out of bounds warnings in dict. #6537

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fixed clang ubsan (undefined behavior sanitizer) warnings in dictobject.c by
adjusting how the internal struct _dictkeysobject shared keys structure is
declared.
11 changes: 2 additions & 9 deletions Objects/dict-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,8 @@ struct _dictkeysobject {
- 4 bytes if dk_size <= 0xffffffff (int32_t*)
- 8 bytes otherwise (int64_t*)

Dynamically sized, 8 is minimum. */
union {
int8_t as_1[8];
int16_t as_2[4];
int32_t as_4[2];
#if SIZEOF_VOID_P > 4
int64_t as_8[1];
#endif
} dk_indices;
Dynamically sized, SIZEOF_VOID_P is minimum. */
char dk_indices[]; /* char is required to avoid strict aliasing. */
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should not it be unsigned char?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the DKIX_EMPTY constant is -1 and all of the types this was replacing are signed (as are the things we cast it to everywhere). so sticking with char made sense.

i'd prefer to say int8_t but given that references I've found only mention char and unsigned char in relation to strict aliasing I'm being conservative and exactly matching that.


/* "PyDictKeyEntry dk_entries[dk_usable];" array follows:
see the DK_ENTRIES() macro */
Expand Down
27 changes: 12 additions & 15 deletions Objects/dictobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ PyDict_Fini(void)
2 : sizeof(int32_t))
#endif
#define DK_ENTRIES(dk) \
((PyDictKeyEntry*)(&(dk)->dk_indices.as_1[DK_SIZE(dk) * DK_IXSIZE(dk)]))
((PyDictKeyEntry*)(&((int8_t*)((dk)->dk_indices))[DK_SIZE(dk) * DK_IXSIZE(dk)]))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the following expression look a tiny bit clearer to you?

((PyDictKeyEntry*)((int8_t*)((dk)->dk_indices) + DK_SIZE(dk) * DK_IXSIZE(dk))

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe, but i'll still toss another pair of ()s in there for clarity:

((PyDictKeyEntry*)(((int8_t*)((dk)->dk_indices)) + DK_SIZE(dk) * DK_IXSIZE(dk))

even though i believe those are equivalent (the cast happens before the + ?)


#define DK_DEBUG_INCREF _Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA
#define DK_DEBUG_DECREF _Py_DEC_REFTOTAL _Py_REF_DEBUG_COMMA
Expand All @@ -316,21 +316,21 @@ dk_get_index(PyDictKeysObject *keys, Py_ssize_t i)
Py_ssize_t ix;

if (s <= 0xff) {
int8_t *indices = keys->dk_indices.as_1;
int8_t *indices = (int8_t*)(keys->dk_indices);
ix = indices[i];
}
else if (s <= 0xffff) {
int16_t *indices = keys->dk_indices.as_2;
int16_t *indices = (int16_t*)(keys->dk_indices);
ix = indices[i];
}
#if SIZEOF_VOID_P > 4
else if (s > 0xffffffff) {
int64_t *indices = keys->dk_indices.as_8;
int64_t *indices = (int64_t*)(keys->dk_indices);
ix = indices[i];
}
#endif
else {
int32_t *indices = keys->dk_indices.as_4;
int32_t *indices = (int32_t*)(keys->dk_indices);
ix = indices[i];
}
assert(ix >= DKIX_DUMMY);
Expand All @@ -346,23 +346,23 @@ dk_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)
assert(ix >= DKIX_DUMMY);

if (s <= 0xff) {
int8_t *indices = keys->dk_indices.as_1;
int8_t *indices = (int8_t*)(keys->dk_indices);
assert(ix <= 0x7f);
indices[i] = (char)ix;
}
else if (s <= 0xffff) {
int16_t *indices = keys->dk_indices.as_2;
int16_t *indices = (int16_t*)(keys->dk_indices);
assert(ix <= 0x7fff);
indices[i] = (int16_t)ix;
}
#if SIZEOF_VOID_P > 4
else if (s > 0xffffffff) {
int64_t *indices = keys->dk_indices.as_8;
int64_t *indices = (int64_t*)(keys->dk_indices);
indices[i] = ix;
}
#endif
else {
int32_t *indices = keys->dk_indices.as_4;
int32_t *indices = (int32_t*)(keys->dk_indices);
assert(ix <= 0x7fffffff);
indices[i] = (int32_t)ix;
}
Expand Down Expand Up @@ -421,8 +421,8 @@ static PyDictKeysObject empty_keys_struct = {
lookdict_split, /* dk_lookup */
0, /* dk_usable (immutable) */
0, /* dk_nentries */
.dk_indices = { .as_1 = {DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY,
DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}},
{DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY,
DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}, /* dk_indices */
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the size of the dk_indices field?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a static initializer. it's my understanding that static initializing a VLA has the compiler allocate space for however many elements you enter.

example: char foo[] = "hello"

};

static PyObject *empty_values[1] = { NULL };
Expand Down Expand Up @@ -530,7 +530,6 @@ static PyDictKeysObject *new_keys_object(Py_ssize_t size)
}
else {
dk = PyObject_MALLOC(sizeof(PyDictKeysObject)
- Py_MEMBER_SIZE(PyDictKeysObject, dk_indices)
+ es * size
+ sizeof(PyDictKeyEntry) * usable);
if (dk == NULL) {
Expand All @@ -543,7 +542,7 @@ static PyDictKeysObject *new_keys_object(Py_ssize_t size)
dk->dk_usable = usable;
dk->dk_lookup = lookdict_unicode_nodummy;
dk->dk_nentries = 0;
memset(&dk->dk_indices.as_1[0], 0xff, es * size);
memset(&dk->dk_indices[0], 0xff, es * size);
memset(DK_ENTRIES(dk), 0, sizeof(PyDictKeyEntry) * usable);
return dk;
}
Expand Down Expand Up @@ -3007,7 +3006,6 @@ _PyDict_SizeOf(PyDictObject *mp)
in the type object. */
if (mp->ma_keys->dk_refcnt == 1)
res += (sizeof(PyDictKeysObject)
- Py_MEMBER_SIZE(PyDictKeysObject, dk_indices)
+ DK_IXSIZE(mp->ma_keys) * size
+ sizeof(PyDictKeyEntry) * usable);
return res;
Expand All @@ -3017,7 +3015,6 @@ Py_ssize_t
_PyDict_KeysSize(PyDictKeysObject *keys)
{
return (sizeof(PyDictKeysObject)
- Py_MEMBER_SIZE(PyDictKeysObject, dk_indices)
+ DK_IXSIZE(keys) * DK_SIZE(keys)
+ USABLE_FRACTION(DK_SIZE(keys)) * sizeof(PyDictKeyEntry));
}
Expand Down