-
-
Notifications
You must be signed in to change notification settings - Fork 32.2k
bpo-33312: Fix clang ubsan out of bounds warnings in dict. #6537
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
Fixed clang ubsan (undefined behavior sanitizer) warnings in dictobject.c by | ||
adjusting how the internal struct _dictkeysobject shared keys structure is | ||
declared. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -298,7 +298,7 @@ PyDict_Fini(void) | |
2 : sizeof(int32_t)) | ||
#endif | ||
#define DK_ENTRIES(dk) \ | ||
((PyDictKeyEntry*)(&(dk)->dk_indices.as_1[DK_SIZE(dk) * DK_IXSIZE(dk)])) | ||
((PyDictKeyEntry*)(&((int8_t*)((dk)->dk_indices))[DK_SIZE(dk) * DK_IXSIZE(dk)])) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does the following expression look a tiny bit clearer to you? ((PyDictKeyEntry*)((int8_t*)((dk)->dk_indices) + DK_SIZE(dk) * DK_IXSIZE(dk)) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe, but i'll still toss another pair of ()s in there for clarity:
even though i believe those are equivalent (the cast happens before the + ?) |
||
|
||
#define DK_DEBUG_INCREF _Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA | ||
#define DK_DEBUG_DECREF _Py_DEC_REFTOTAL _Py_REF_DEBUG_COMMA | ||
|
@@ -316,21 +316,21 @@ dk_get_index(PyDictKeysObject *keys, Py_ssize_t i) | |
Py_ssize_t ix; | ||
|
||
if (s <= 0xff) { | ||
int8_t *indices = keys->dk_indices.as_1; | ||
int8_t *indices = (int8_t*)(keys->dk_indices); | ||
ix = indices[i]; | ||
} | ||
else if (s <= 0xffff) { | ||
int16_t *indices = keys->dk_indices.as_2; | ||
int16_t *indices = (int16_t*)(keys->dk_indices); | ||
ix = indices[i]; | ||
} | ||
#if SIZEOF_VOID_P > 4 | ||
else if (s > 0xffffffff) { | ||
int64_t *indices = keys->dk_indices.as_8; | ||
int64_t *indices = (int64_t*)(keys->dk_indices); | ||
ix = indices[i]; | ||
} | ||
#endif | ||
else { | ||
int32_t *indices = keys->dk_indices.as_4; | ||
int32_t *indices = (int32_t*)(keys->dk_indices); | ||
ix = indices[i]; | ||
} | ||
assert(ix >= DKIX_DUMMY); | ||
|
@@ -346,23 +346,23 @@ dk_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix) | |
assert(ix >= DKIX_DUMMY); | ||
|
||
if (s <= 0xff) { | ||
int8_t *indices = keys->dk_indices.as_1; | ||
int8_t *indices = (int8_t*)(keys->dk_indices); | ||
assert(ix <= 0x7f); | ||
indices[i] = (char)ix; | ||
} | ||
else if (s <= 0xffff) { | ||
int16_t *indices = keys->dk_indices.as_2; | ||
int16_t *indices = (int16_t*)(keys->dk_indices); | ||
assert(ix <= 0x7fff); | ||
indices[i] = (int16_t)ix; | ||
} | ||
#if SIZEOF_VOID_P > 4 | ||
else if (s > 0xffffffff) { | ||
int64_t *indices = keys->dk_indices.as_8; | ||
int64_t *indices = (int64_t*)(keys->dk_indices); | ||
indices[i] = ix; | ||
} | ||
#endif | ||
else { | ||
int32_t *indices = keys->dk_indices.as_4; | ||
int32_t *indices = (int32_t*)(keys->dk_indices); | ||
assert(ix <= 0x7fffffff); | ||
indices[i] = (int32_t)ix; | ||
} | ||
|
@@ -421,8 +421,8 @@ static PyDictKeysObject empty_keys_struct = { | |
lookdict_split, /* dk_lookup */ | ||
0, /* dk_usable (immutable) */ | ||
0, /* dk_nentries */ | ||
.dk_indices = { .as_1 = {DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, | ||
DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}}, | ||
{DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, | ||
DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}, /* dk_indices */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the size of the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a static initializer. it's my understanding that static initializing a VLA has the compiler allocate space for however many elements you enter. example: |
||
}; | ||
|
||
static PyObject *empty_values[1] = { NULL }; | ||
|
@@ -530,7 +530,6 @@ static PyDictKeysObject *new_keys_object(Py_ssize_t size) | |
} | ||
else { | ||
dk = PyObject_MALLOC(sizeof(PyDictKeysObject) | ||
- Py_MEMBER_SIZE(PyDictKeysObject, dk_indices) | ||
+ es * size | ||
+ sizeof(PyDictKeyEntry) * usable); | ||
if (dk == NULL) { | ||
|
@@ -543,7 +542,7 @@ static PyDictKeysObject *new_keys_object(Py_ssize_t size) | |
dk->dk_usable = usable; | ||
dk->dk_lookup = lookdict_unicode_nodummy; | ||
dk->dk_nentries = 0; | ||
memset(&dk->dk_indices.as_1[0], 0xff, es * size); | ||
memset(&dk->dk_indices[0], 0xff, es * size); | ||
memset(DK_ENTRIES(dk), 0, sizeof(PyDictKeyEntry) * usable); | ||
return dk; | ||
} | ||
|
@@ -3007,7 +3006,6 @@ _PyDict_SizeOf(PyDictObject *mp) | |
in the type object. */ | ||
if (mp->ma_keys->dk_refcnt == 1) | ||
res += (sizeof(PyDictKeysObject) | ||
- Py_MEMBER_SIZE(PyDictKeysObject, dk_indices) | ||
+ DK_IXSIZE(mp->ma_keys) * size | ||
+ sizeof(PyDictKeyEntry) * usable); | ||
return res; | ||
|
@@ -3017,7 +3015,6 @@ Py_ssize_t | |
_PyDict_KeysSize(PyDictKeysObject *keys) | ||
{ | ||
return (sizeof(PyDictKeysObject) | ||
- Py_MEMBER_SIZE(PyDictKeysObject, dk_indices) | ||
+ DK_IXSIZE(keys) * DK_SIZE(keys) | ||
+ USABLE_FRACTION(DK_SIZE(keys)) * sizeof(PyDictKeyEntry)); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should not it be
unsigned char
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the DKIX_EMPTY constant is -1 and all of the types this was replacing are signed (as are the things we cast it to everywhere). so sticking with char made sense.
i'd prefer to say int8_t but given that references I've found only mention char and unsigned char in relation to strict aliasing I'm being conservative and exactly matching that.