diff --git a/py/map.c b/py/map.c index 6411f8adeab1..933b5f8e2945 100644 --- a/py/map.c +++ b/py/map.c @@ -83,7 +83,7 @@ STATIC void mp_map_rehash(mp_map_t *map) { map->all_keys_are_qstrs = 1; map->table = m_new0(mp_map_elem_t, map->alloc); for (int i = 0; i < old_alloc; i++) { - if (old_table[i].key != NULL) { + if (old_table[i].key != MP_OBJ_NULL && old_table[i].key != MP_OBJ_SENTINEL) { mp_map_lookup(map, old_table[i].key, MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = old_table[i].value; } } @@ -106,8 +106,6 @@ mp_map_elem_t* mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t // map is a hash table (not a fixed array), so do a hash lookup - machine_uint_t hash; - hash = mp_obj_hash(index); if (map->alloc == 0) { if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { mp_map_rehash(map); @@ -116,54 +114,79 @@ mp_map_elem_t* mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t } } + machine_uint_t hash = mp_obj_hash(index); uint pos = hash % map->alloc; + uint start_pos = pos; + mp_map_elem_t *avail_slot = NULL; for (;;) { - mp_map_elem_t *elem = &map->table[pos]; - if (elem->key == NULL) { - // not in table + mp_map_elem_t *slot = &map->table[pos]; + if (slot->key == MP_OBJ_NULL) { + // found NULL slot, so index is not in table if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { - if (map->used + 1 >= map->alloc) { - // not enough room in table, rehash it - mp_map_rehash(map); - // restart the search for the new element - pos = hash % map->alloc; - continue; - } else { - map->used += 1; - elem->key = index; - elem->value = NULL; - if (!MP_OBJ_IS_QSTR(index)) { - map->all_keys_are_qstrs = 0; - } - return elem; + map->used += 1; + if (avail_slot == NULL) { + avail_slot = slot; + } + slot->key = index; + slot->value = MP_OBJ_NULL; + if (!MP_OBJ_IS_QSTR(index)) { + map->all_keys_are_qstrs = 0; } - } else if (elem->value == NULL) { - return NULL; + return slot; + } else { + return MP_OBJ_NULL; } - // Otherwise it's just entry marked as deleted, so continue with next one - } else if (elem->key == index || (!map->all_keys_are_qstrs && mp_obj_equal(elem->key, index))) { - // found it - /* it seems CPython does not replace the index; try x={True:'true'};x[1]='one';x - if (add_if_not_found) { - elem->key = index; + } else if (slot->key == MP_OBJ_SENTINEL) { + // found deleted slot, remember for later + if (avail_slot == NULL) { + avail_slot = slot; } - */ + } else if (slot->key == index || (!map->all_keys_are_qstrs && mp_obj_equal(slot->key, index))) { + // found index + // Note: CPython does not replace the index; try x={True:'true'};x[1]='one';x if (lookup_kind & MP_MAP_LOOKUP_REMOVE_IF_FOUND) { - map->used--; // this leaks this memory (but see dict_get_helper) mp_map_elem_t *retval = m_new(mp_map_elem_t, 1); - retval->key = elem->key; - retval->value = elem->value; - elem->key = NULL; - // elem->key = NULL && elem->value != NULL means "marked deleted" - // assume value indeed never NULL + retval->key = slot->key; + retval->value = slot->value; + // delete element in this slot + map->used--; + if (map->table[(pos + 1) % map->alloc].key == MP_OBJ_NULL) { + // optimisation if next slot is empty + slot->key = MP_OBJ_NULL; + } else { + slot->key = MP_OBJ_SENTINEL; + } return retval; } - return elem; + return slot; } // not yet found, keep searching in this table pos = (pos + 1) % map->alloc; + + if (pos == start_pos) { + // search got back to starting position, so index is not in table + if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { + if (avail_slot != NULL) { + // there was an available slot, so use that + map->used++; + avail_slot->key = index; + avail_slot->value = MP_OBJ_NULL; + if (!MP_OBJ_IS_QSTR(index)) { + map->all_keys_are_qstrs = 0; + } + return avail_slot; + } else { + // not enough room in table, rehash it + mp_map_rehash(map); + // restart the search for the new element + start_pos = pos = hash % map->alloc; + } + } else { + return MP_OBJ_NULL; + } + } } } @@ -183,16 +206,14 @@ STATIC void mp_set_rehash(mp_set_t *set) { set->used = 0; set->table = m_new0(mp_obj_t, set->alloc); for (int i = 0; i < old_alloc; i++) { - if (old_table[i] != NULL) { - mp_set_lookup(set, old_table[i], true); + if (old_table[i] != MP_OBJ_NULL && old_table[i] != MP_OBJ_SENTINEL) { + mp_set_lookup(set, old_table[i], MP_MAP_LOOKUP_ADD_IF_NOT_FOUND); } } m_del(mp_obj_t, old_table, old_alloc); } mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t lookup_kind) { - int hash; - int pos; if (set->alloc == 0) { if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { mp_set_rehash(set); @@ -200,45 +221,84 @@ mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t looku return NULL; } } - if (lookup_kind & MP_MAP_LOOKUP_FIRST) { - hash = 0; - pos = 0; - } else { - hash = mp_obj_hash(index);; - pos = hash % set->alloc; - } + machine_uint_t hash = mp_obj_hash(index); + uint pos = hash % set->alloc; + uint start_pos = pos; + mp_obj_t *avail_slot = NULL; for (;;) { mp_obj_t elem = set->table[pos]; if (elem == MP_OBJ_NULL) { - // not in table + // found NULL slot, so index is not in table if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { - if (set->used + 1 >= set->alloc) { + if (avail_slot == NULL) { + avail_slot = &set->table[pos]; + } + set->used++; + *avail_slot = index; + return index; + } else { + return MP_OBJ_NULL; + } + } else if (elem == MP_OBJ_SENTINEL) { + // found deleted slot, remember for later + if (avail_slot == NULL) { + avail_slot = &set->table[pos]; + } + } else if (mp_obj_equal(elem, index)) { + // found index + if (lookup_kind & MP_MAP_LOOKUP_REMOVE_IF_FOUND) { + // delete element + set->used--; + if (set->table[(pos + 1) % set->alloc] == MP_OBJ_NULL) { + // optimisation if next slot is empty + set->table[pos] = MP_OBJ_NULL; + } else { + set->table[pos] = MP_OBJ_SENTINEL; + } + } + return elem; + } + + // not yet found, keep searching in this table + pos = (pos + 1) % set->alloc; + + if (pos == start_pos) { + // search got back to starting position, so index is not in table + if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { + if (avail_slot != NULL) { + // there was an available slot, so use that + set->used++; + *avail_slot = index; + return index; + } else { // not enough room in table, rehash it mp_set_rehash(set); // restart the search for the new element - pos = hash % set->alloc; - } else { - set->used += 1; - set->table[pos] = index; - return index; + start_pos = pos = hash % set->alloc; } - } else if (lookup_kind & MP_MAP_LOOKUP_FIRST) { - pos++; } else { return MP_OBJ_NULL; } - } else if ((lookup_kind & MP_MAP_LOOKUP_FIRST) || mp_obj_equal(elem, index)) { - // found it - if (lookup_kind & MP_MAP_LOOKUP_REMOVE_IF_FOUND) { - set->used--; - set->table[pos] = NULL; + } + } +} + +mp_obj_t mp_set_remove_first(mp_set_t *set) { + for (uint pos = 0; pos < set->alloc; pos++) { + if (set->table[pos] != MP_OBJ_NULL && set->table[pos] != MP_OBJ_SENTINEL) { + mp_obj_t elem = set->table[pos]; + // delete element + set->used--; + if (set->table[(pos + 1) % set->alloc] == MP_OBJ_NULL) { + // optimisation if next slot is empty + set->table[pos] = MP_OBJ_NULL; + } else { + set->table[pos] = MP_OBJ_SENTINEL; } return elem; - } else { - // not yet found, keep searching in this table - pos = (pos + 1) % set->alloc; } } + return MP_OBJ_NULL; } void mp_set_clear(mp_set_t *set) { diff --git a/py/obj.h b/py/obj.h index 79efc4c4f2d2..eb447f916c06 100644 --- a/py/obj.h +++ b/py/obj.h @@ -23,6 +23,11 @@ typedef struct _mp_obj_base_t mp_obj_base_t; #define MP_OBJ_NULL ((mp_obj_t)NULL) +// The SENTINEL object is used for various internal purposes where one needs +// an object which is unique from all other objects, including MP_OBJ_NULL. + +#define MP_OBJ_SENTINEL ((mp_obj_t)8) + // These macros check for small int, qstr or object, and access small int and qstr values // - xxxx...xxx1: a small int, bits 1 and above are the value // - xxxx...xx10: a qstr, bits 2 and above are the value @@ -103,11 +108,11 @@ typedef struct _mp_map_t { mp_map_elem_t *table; } mp_map_t; +// These can be or'd together typedef enum _mp_map_lookup_kind_t { MP_MAP_LOOKUP, // 0 MP_MAP_LOOKUP_ADD_IF_NOT_FOUND, // 1 MP_MAP_LOOKUP_REMOVE_IF_FOUND, // 2 - MP_MAP_LOOKUP_FIRST = 4, } mp_map_lookup_kind_t; void mp_map_init(mp_map_t *map, int n); @@ -129,6 +134,7 @@ typedef struct _mp_set_t { void mp_set_init(mp_set_t *set, int n); mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t lookup_kind); +mp_obj_t mp_set_remove_first(mp_set_t *set); void mp_set_clear(mp_set_t *set); // Type definitions for methods diff --git a/py/objdict.c b/py/objdict.c index 7c9d8d74df7a..f6070a0b4045 100644 --- a/py/objdict.c +++ b/py/objdict.c @@ -103,7 +103,7 @@ STATIC mp_map_elem_t *dict_it_iternext_elem(mp_obj_t self_in) { mp_map_elem_t *table = self->dict->map.table; for (int i = self->cur; i < max; i++) { - if (table[i].key != NULL) { + if (table[i].key != MP_OBJ_NULL && table[i].key != MP_OBJ_SENTINEL) { self->cur = i + 1; return &(table[i]); } diff --git a/py/objset.c b/py/objset.c index 439c6e96e622..222f76e405dc 100644 --- a/py/objset.c +++ b/py/objset.c @@ -32,7 +32,7 @@ STATIC void set_print(void (*print)(void *env, const char *fmt, ...), void *env, bool first = true; print(env, "{"); for (int i = 0; i < self->set.alloc; i++) { - if (self->set.table[i] != MP_OBJ_NULL) { + if (self->set.table[i] != MP_OBJ_NULL && self->set.table[i] != MP_OBJ_SENTINEL) { if (!first) { print(env, ", "); } @@ -83,7 +83,7 @@ STATIC mp_obj_t set_it_iternext(mp_obj_t self_in) { mp_obj_t *table = self->set->set.table; for (machine_uint_t i = self->cur; i < max; i++) { - if (table[i] != NULL) { + if (table[i] != MP_OBJ_NULL && table[i] != MP_OBJ_SENTINEL) { self->cur = i + 1; return table[i]; } @@ -307,12 +307,10 @@ STATIC mp_obj_t set_equal(mp_obj_t self_in, mp_obj_t other_in) { STATIC mp_obj_t set_pop(mp_obj_t self_in) { assert(MP_OBJ_IS_TYPE(self_in, &mp_type_set)); mp_obj_set_t *self = self_in; - - if (self->set.used == 0) { + mp_obj_t obj = mp_set_remove_first(&self->set); + if (obj == MP_OBJ_NULL) { nlr_jump(mp_obj_new_exception_msg(&mp_type_KeyError, "pop from an empty set")); } - mp_obj_t obj = mp_set_lookup(&self->set, NULL, - MP_MAP_LOOKUP_REMOVE_IF_FOUND | MP_MAP_LOOKUP_FIRST); return obj; } STATIC MP_DEFINE_CONST_FUN_OBJ_1(set_pop_obj, set_pop); @@ -375,6 +373,14 @@ STATIC mp_obj_t set_union(mp_obj_t self_in, mp_obj_t other_in) { } STATIC MP_DEFINE_CONST_FUN_OBJ_2(set_union_obj, set_union); +STATIC mp_obj_t set_unary_op(int op, mp_obj_t self_in) { + mp_obj_set_t *self = self_in; + switch (op) { + case MP_UNARY_OP_BOOL: return MP_BOOL(self->set.used != 0); + case MP_UNARY_OP_LEN: return MP_OBJ_NEW_SMALL_INT((machine_int_t)self->set.used); + default: return MP_OBJ_NULL; // op not supported for None + } +} STATIC mp_obj_t set_binary_op(int op, mp_obj_t lhs, mp_obj_t rhs) { mp_obj_t args[] = {lhs, rhs}; @@ -450,6 +456,7 @@ const mp_obj_type_t mp_type_set = { .name = MP_QSTR_set, .print = set_print, .make_new = set_make_new, + .unary_op = set_unary_op, .binary_op = set_binary_op, .getiter = set_getiter, .locals_dict = (mp_obj_t)&set_locals_dict, diff --git a/tests/basics/dict-del.py b/tests/basics/dict-del.py index 012ed0caea3c..d908fea424f0 100644 --- a/tests/basics/dict-del.py +++ b/tests/basics/dict-del.py @@ -1,8 +1,21 @@ -for i in range(100): - d = dict() - for j in range(100): - d[j] = j - del d[i] - for j in range(100): - if j not in d: - print(j, 'not in d') +for n in range(20): + print('testing dict with {} items'.format(n)) + for i in range(n): + # create dict + d = dict() + for j in range(n): + d[str(j)] = j + print(len(d)) + + # delete an item + del d[str(i)] + print(len(d)) + + # check items + for j in range(n): + if str(j) in d: + if j == i: + print(j, 'in d, but it should not be') + else: + if j != i: + print(j, 'not in d, but it should be') diff --git a/tests/basics/set_remove.py b/tests/basics/set_remove.py index 208ab137f3f0..5627516c43c8 100644 --- a/tests/basics/set_remove.py +++ b/tests/basics/set_remove.py @@ -1,3 +1,4 @@ +# basic test s = {1} print(s.remove(1)) print(list(s)) @@ -7,3 +8,26 @@ pass else: print("failed to raise KeyError") + +# test sets of varying size +for n in range(20): + print('testing set with {} items'.format(n)) + for i in range(n): + # create set + s = set() + for j in range(n): + s.add(str(j)) + print(len(s)) + + # delete an item + s.remove(str(i)) + print(len(s)) + + # check items + for j in range(n): + if str(j) in s: + if j == i: + print(j, 'in s, but it should not be') + else: + if j != i: + print(j, 'not in s, but it should be')