Skip to content

Commit

Permalink
py: Convert hash API to use MP_UNARY_OP_HASH instead of ad-hoc function.
Browse files Browse the repository at this point in the history
Hashing is now done using mp_unary_op function with MP_UNARY_OP_HASH as
the operator argument.  Hashing for int, str and bytes still go via
fast-path in mp_unary_op since they are the most common objects which
need to be hashed.

This lead to quite a bit of code cleanup, and should be more efficient
if anything.  It saves 176 bytes code space on Thumb2, and 360 bytes on
x86.

The only loss is that the error message "unhashable type" is now the
more generic "unsupported type for __hash__".
  • Loading branch information
dpgeorge committed May 12, 2015
1 parent 6738c1d commit c2a4e4e
Show file tree
Hide file tree
Showing 20 changed files with 85 additions and 112 deletions.
4 changes: 2 additions & 2 deletions py/bc0.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@
#define MP_BC_LOAD_CONST_SMALL_INT_MULTI (0x70) // + N(64)
#define MP_BC_LOAD_FAST_MULTI (0xb0) // + N(16)
#define MP_BC_STORE_FAST_MULTI (0xc0) // + N(16)
#define MP_BC_UNARY_OP_MULTI (0xd0) // + op(5)
#define MP_BC_BINARY_OP_MULTI (0xd5) // + op(35)
#define MP_BC_UNARY_OP_MULTI (0xd0) // + op(6)
#define MP_BC_BINARY_OP_MULTI (0xd6) // + op(35)

#endif // __MICROPY_INCLUDED_PY_BC0_H__
7 changes: 4 additions & 3 deletions py/map.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@

#include "py/mpconfig.h"
#include "py/misc.h"
#include "py/obj.h"
#include "py/runtime0.h"
#include "py/runtime.h"

// Fixed empty map. Useful when need to call kw-receiving functions
// without any keywords from C, etc.
Expand Down Expand Up @@ -200,7 +201,7 @@ mp_map_elem_t *mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t
}
}

mp_uint_t hash = mp_obj_hash(index);
mp_uint_t hash = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, index));
mp_uint_t pos = hash % map->alloc;
mp_uint_t start_pos = pos;
mp_map_elem_t *avail_slot = NULL;
Expand Down Expand Up @@ -308,7 +309,7 @@ mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t looku
return NULL;
}
}
mp_uint_t hash = mp_obj_hash(index);
mp_uint_t hash = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, index));
mp_uint_t pos = hash % set->alloc;
mp_uint_t start_pos = pos;
mp_obj_t *avail_slot = NULL;
Expand Down
4 changes: 2 additions & 2 deletions py/modbuiltins.c
Original file line number Diff line number Diff line change
Expand Up @@ -272,8 +272,8 @@ STATIC mp_obj_t mp_builtin_divmod(mp_obj_t o1_in, mp_obj_t o2_in) {
MP_DEFINE_CONST_FUN_OBJ_2(mp_builtin_divmod_obj, mp_builtin_divmod);

STATIC mp_obj_t mp_builtin_hash(mp_obj_t o_in) {
// TODO hash will generally overflow small integer; can we safely truncate it?
return mp_obj_new_int(mp_obj_hash(o_in));
// result is guaranteed to be a (small) int
return mp_unary_op(MP_UNARY_OP_HASH, o_in);
}
MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_hash_obj, mp_builtin_hash);

Expand Down
62 changes: 7 additions & 55 deletions py/obj.c
Original file line number Diff line number Diff line change
Expand Up @@ -147,61 +147,6 @@ bool mp_obj_is_callable(mp_obj_t o_in) {
return mp_obj_instance_is_callable(o_in);
}

mp_int_t mp_obj_hash(mp_obj_t o_in) {
if (o_in == mp_const_false) {
return 0; // needs to hash to same as the integer 0, since False==0
} else if (o_in == mp_const_true) {
return 1; // needs to hash to same as the integer 1, since True==1
} else if (MP_OBJ_IS_SMALL_INT(o_in)) {
return MP_OBJ_SMALL_INT_VALUE(o_in);
} else if (MP_OBJ_IS_TYPE(o_in, &mp_type_int)) {
return mp_obj_int_hash(o_in);
} else if (MP_OBJ_IS_STR(o_in) || MP_OBJ_IS_TYPE(o_in, &mp_type_bytes)) {
return mp_obj_str_get_hash(o_in);
} else if (MP_OBJ_IS_TYPE(o_in, &mp_type_NoneType)) {
return (mp_int_t)o_in;
} else if (MP_OBJ_IS_FUN(o_in)) {
return (mp_int_t)o_in;
} else if (MP_OBJ_IS_TYPE(o_in, &mp_type_tuple)) {
return mp_obj_tuple_hash(o_in);
} else if (MP_OBJ_IS_TYPE(o_in, &mp_type_type)) {
return (mp_int_t)o_in;
} else if (mp_obj_is_instance_type(mp_obj_get_type(o_in))) {
// if a valid __hash__ method exists, use it
mp_obj_t method[2];
mp_load_method_maybe(o_in, MP_QSTR___hash__, method);
if (method[0] != MP_OBJ_NULL) {
mp_obj_t hash_val = mp_call_method_n_kw(0, 0, method);
if (MP_OBJ_IS_INT(hash_val)) {
return mp_obj_int_get_truncated(hash_val);
}
goto error;
}

mp_load_method_maybe(o_in, MP_QSTR___eq__, method);
if (method[0] == MP_OBJ_NULL) {
// https://docs.python.org/3/reference/datamodel.html#object.__hash__
// "User-defined classes have __eq__() and __hash__() methods by default;
// with them, all objects compare unequal (except with themselves) and
// x.__hash__() returns an appropriate value such that x == y implies
// both that x is y and hash(x) == hash(y)."
return (mp_int_t)o_in;
}
// "A class that overrides __eq__() and does not define __hash__() will have its __hash__() implicitly set to None.
// When the __hash__() method of a class is None, instances of the class will raise an appropriate TypeError"
}

// TODO hash classes

error:
if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "unhashable type"));
} else {
nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
"unhashable type: '%s'", mp_obj_get_type_str(o_in)));
}
}

// This function implements the '==' operator (and so the inverse of '!=').
//
// From the Python language reference:
Expand Down Expand Up @@ -540,3 +485,10 @@ void mp_get_buffer_raise(mp_obj_t obj, mp_buffer_info_t *bufinfo, mp_uint_t flag
nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "object with buffer protocol required"));
}
}

mp_obj_t mp_generic_unary_op(mp_uint_t op, mp_obj_t o_in) {
switch (op) {
case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT((mp_uint_t)o_in);
default: return MP_OBJ_NULL; // op not supported
}
}
2 changes: 1 addition & 1 deletion py/obj.h
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,6 @@ void mp_obj_print_exception(const mp_print_t *print, mp_obj_t exc);

bool mp_obj_is_true(mp_obj_t arg);
bool mp_obj_is_callable(mp_obj_t o_in);
mp_int_t mp_obj_hash(mp_obj_t o_in);
bool mp_obj_equal(mp_obj_t o1, mp_obj_t o2);

mp_int_t mp_obj_get_int(mp_const_obj_t arg);
Expand All @@ -525,6 +524,7 @@ mp_obj_t mp_obj_id(mp_obj_t o_in);
mp_obj_t mp_obj_len(mp_obj_t o_in);
mp_obj_t mp_obj_len_maybe(mp_obj_t o_in); // may return MP_OBJ_NULL
mp_obj_t mp_obj_subscr(mp_obj_t base, mp_obj_t index, mp_obj_t val);
mp_obj_t mp_generic_unary_op(mp_uint_t op, mp_obj_t o_in);

// bool
// TODO make lower case when it has proven itself
Expand Down
5 changes: 4 additions & 1 deletion py/objbool.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,21 @@ STATIC mp_obj_t bool_unary_op(mp_uint_t op, mp_obj_t o_in) {
mp_int_t value = ((mp_obj_bool_t*)o_in)->value;
switch (op) {
case MP_UNARY_OP_BOOL: return o_in;
// needs to hash to the same value as if converting to an integer
case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT(value);
case MP_UNARY_OP_POSITIVE: return MP_OBJ_NEW_SMALL_INT(value);
case MP_UNARY_OP_NEGATIVE: return MP_OBJ_NEW_SMALL_INT(-value);
case MP_UNARY_OP_INVERT: return MP_OBJ_NEW_SMALL_INT(~value);

// only bool needs to implement MP_UNARY_OP_NOT
case MP_UNARY_OP_NOT:
default: // no other cases
if (value) {
return mp_const_false;
} else {
return mp_const_true;
}

default: return MP_OBJ_NULL; // op not supported
}
}

Expand Down
5 changes: 5 additions & 0 deletions py/objfun.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ const mp_obj_type_t mp_type_fun_builtin = {
{ &mp_type_type },
.name = MP_QSTR_function,
.call = fun_builtin_call,
.unary_op = mp_generic_unary_op,
};

/******************************************************************************/
Expand Down Expand Up @@ -314,6 +315,7 @@ const mp_obj_type_t mp_type_fun_bc = {
.print = fun_bc_print,
#endif
.call = fun_bc_call,
.unary_op = mp_generic_unary_op,
#if MICROPY_PY_FUNCTION_ATTRS
.attr = fun_bc_attr,
#endif
Expand Down Expand Up @@ -366,6 +368,7 @@ STATIC const mp_obj_type_t mp_type_fun_native = {
{ &mp_type_type },
.name = MP_QSTR_function,
.call = fun_native_call,
.unary_op = mp_generic_unary_op,
};

mp_obj_t mp_obj_new_fun_native(mp_uint_t scope_flags, mp_uint_t n_pos_args, mp_uint_t n_kwonly_args, mp_obj_t def_args_in, mp_obj_t def_kw_args, const void *fun_data) {
Expand Down Expand Up @@ -421,6 +424,7 @@ STATIC const mp_obj_type_t mp_type_fun_viper = {
{ &mp_type_type },
.name = MP_QSTR_function,
.call = fun_viper_call,
.unary_op = mp_generic_unary_op,
};

mp_obj_t mp_obj_new_fun_viper(mp_uint_t n_args, void *fun_data, mp_uint_t type_sig) {
Expand Down Expand Up @@ -533,6 +537,7 @@ STATIC const mp_obj_type_t mp_type_fun_asm = {
{ &mp_type_type },
.name = MP_QSTR_function,
.call = fun_asm_call,
.unary_op = mp_generic_unary_op,
};

mp_obj_t mp_obj_new_fun_asm(mp_uint_t n_args, void *fun_data) {
Expand Down
4 changes: 0 additions & 4 deletions py/objint.c
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,6 @@ char *mp_obj_int_formatted(char **buf, mp_uint_t *buf_size, mp_uint_t *fmt_size,

#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE

mp_int_t mp_obj_int_hash(mp_obj_t self_in) {
return MP_OBJ_SMALL_INT_VALUE(self_in);
}

bool mp_obj_int_is_positive(mp_obj_t self_in) {
return mp_obj_get_int(self_in) >= 0;
}
Expand Down
15 changes: 5 additions & 10 deletions py/objint_longlong.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,6 @@
const mp_obj_int_t mp_maxsize_obj = {{&mp_type_int}, MP_SSIZE_MAX};
#endif

mp_int_t mp_obj_int_hash(mp_obj_t self_in) {
if (MP_OBJ_IS_SMALL_INT(self_in)) {
return MP_OBJ_SMALL_INT_VALUE(self_in);
}
mp_obj_int_t *self = self_in;
// truncate value to fit in mp_int_t, which gives the same hash as
// small int if the value fits without truncation
return self->val;
}

void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, mp_uint_t len, byte *buf) {
assert(MP_OBJ_IS_TYPE(self_in, &mp_type_int));
mp_obj_int_t *self = self_in;
Expand Down Expand Up @@ -117,6 +107,11 @@ mp_obj_t mp_obj_int_unary_op(mp_uint_t op, mp_obj_t o_in) {
mp_obj_int_t *o = o_in;
switch (op) {
case MP_UNARY_OP_BOOL: return MP_BOOL(o->val != 0);

// truncate value to fit in mp_int_t, which gives the same hash as
// small int if the value fits without truncation
case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT((mp_int_t)o->val);

case MP_UNARY_OP_POSITIVE: return o_in;
case MP_UNARY_OP_NEGATIVE: return mp_obj_new_int_from_ll(-o->val);
case MP_UNARY_OP_INVERT: return mp_obj_new_int_from_ll(~o->val);
Expand Down
9 changes: 1 addition & 8 deletions py/objint_mpz.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,6 @@ char *mp_obj_int_formatted_impl(char **buf, mp_uint_t *buf_size, mp_uint_t *fmt_
return str;
}

mp_int_t mp_obj_int_hash(mp_obj_t self_in) {
if (MP_OBJ_IS_SMALL_INT(self_in)) {
return MP_OBJ_SMALL_INT_VALUE(self_in);
}
mp_obj_int_t *self = self_in;
return mpz_hash(&self->mpz);
}

void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, mp_uint_t len, byte *buf) {
assert(MP_OBJ_IS_TYPE(self_in, &mp_type_int));
mp_obj_int_t *self = self_in;
Expand Down Expand Up @@ -143,6 +135,7 @@ mp_obj_t mp_obj_int_unary_op(mp_uint_t op, mp_obj_t o_in) {
mp_obj_int_t *o = o_in;
switch (op) {
case MP_UNARY_OP_BOOL: return MP_BOOL(!mpz_is_zero(&o->mpz));
case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT(mpz_hash(&o->mpz));
case MP_UNARY_OP_POSITIVE: return o_in;
case MP_UNARY_OP_NEGATIVE: { mp_obj_int_t *o2 = mp_obj_int_new_mpz(); mpz_neg_inpl(&o2->mpz, &o->mpz); return o2; }
case MP_UNARY_OP_INVERT: { mp_obj_int_t *o2 = mp_obj_int_new_mpz(); mpz_not_inpl(&o2->mpz, &o->mpz); return o2; }
Expand Down
1 change: 1 addition & 0 deletions py/objnone.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ STATIC mp_obj_t none_unary_op(mp_uint_t op, mp_obj_t o_in) {
(void)o_in;
switch (op) {
case MP_UNARY_OP_BOOL: return mp_const_false;
case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT((mp_uint_t)o_in);
default: return MP_OBJ_NULL; // op not supported
}
}
Expand Down
10 changes: 0 additions & 10 deletions py/objstr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1982,16 +1982,6 @@ STATIC void bad_implicit_conversion(mp_obj_t self_in) {
}
}

mp_uint_t mp_obj_str_get_hash(mp_obj_t self_in) {
// TODO: This has too big overhead for hash accessor
if (MP_OBJ_IS_STR_OR_BYTES(self_in)) {
GET_STR_HASH(self_in, h);
return h;
} else {
bad_implicit_conversion(self_in);
}
}

mp_uint_t mp_obj_str_get_len(mp_obj_t self_in) {
// TODO This has a double check for the type, one in obj.c and one here
if (MP_OBJ_IS_STR_OR_BYTES(self_in)) {
Expand Down
19 changes: 8 additions & 11 deletions py/objtuple.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,14 @@ mp_obj_t mp_obj_tuple_unary_op(mp_uint_t op, mp_obj_t self_in) {
mp_obj_tuple_t *self = self_in;
switch (op) {
case MP_UNARY_OP_BOOL: return MP_BOOL(self->len != 0);
case MP_UNARY_OP_HASH: {
// start hash with pointer to empty tuple, to make it fairly unique
mp_int_t hash = (mp_int_t)mp_const_empty_tuple;
for (mp_uint_t i = 0; i < self->len; i++) {
hash += MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, self->items[i]));
}
return MP_OBJ_NEW_SMALL_INT(hash);
}
case MP_UNARY_OP_LEN: return MP_OBJ_NEW_SMALL_INT(self->len);
default: return MP_OBJ_NULL; // op not supported
}
Expand Down Expand Up @@ -258,17 +266,6 @@ void mp_obj_tuple_del(mp_obj_t self_in) {
m_del_var(mp_obj_tuple_t, mp_obj_t, self->len, self);
}

mp_int_t mp_obj_tuple_hash(mp_obj_t self_in) {
assert(MP_OBJ_IS_TYPE(self_in, &mp_type_tuple));
mp_obj_tuple_t *self = self_in;
// start hash with pointer to empty tuple, to make it fairly unique
mp_int_t hash = (mp_int_t)mp_const_empty_tuple;
for (mp_uint_t i = 0; i < self->len; i++) {
hash += mp_obj_hash(self->items[i]);
}
return hash;
}

/******************************************************************************/
/* tuple iterator */

Expand Down
24 changes: 23 additions & 1 deletion py/objtype.c
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ mp_obj_t mp_obj_instance_make_new(mp_obj_t self_in, mp_uint_t n_args, mp_uint_t
const qstr mp_unary_op_method_name[] = {
[MP_UNARY_OP_BOOL] = MP_QSTR___bool__,
[MP_UNARY_OP_LEN] = MP_QSTR___len__,
[MP_UNARY_OP_HASH] = MP_QSTR___hash__,
#if MICROPY_PY_ALL_SPECIAL_METHODS
[MP_UNARY_OP_POSITIVE] = MP_QSTR___pos__,
[MP_UNARY_OP_NEGATIVE] = MP_QSTR___neg__,
Expand Down Expand Up @@ -355,8 +356,28 @@ STATIC mp_obj_t instance_unary_op(mp_uint_t op, mp_obj_t self_in) {
if (member[0] == MP_OBJ_SENTINEL) {
return mp_unary_op(op, self->subobj[0]);
} else if (member[0] != MP_OBJ_NULL) {
return mp_call_function_1(member[0], self_in);
mp_obj_t val = mp_call_function_1(member[0], self_in);
// __hash__ must return a small int
if (op == MP_UNARY_OP_HASH) {
val = MP_OBJ_NEW_SMALL_INT(mp_obj_int_get_truncated(val));
}
return val;
} else {
if (op == MP_UNARY_OP_HASH) {
lookup.attr = MP_QSTR___eq__;
mp_obj_class_lookup(&lookup, self->base.type);
if (member[0] == MP_OBJ_NULL) {
// https://docs.python.org/3/reference/datamodel.html#object.__hash__
// "User-defined classes have __eq__() and __hash__() methods by default;
// with them, all objects compare unequal (except with themselves) and
// x.__hash__() returns an appropriate value such that x == y implies
// both that x is y and hash(x) == hash(y)."
return MP_OBJ_NEW_SMALL_INT((mp_uint_t)self_in);
}
// "A class that overrides __eq__() and does not define __hash__() will have its __hash__() implicitly set to None.
// When the __hash__() method of a class is None, instances of the class will raise an appropriate TypeError"
}

return MP_OBJ_NULL; // op not supported
}
}
Expand Down Expand Up @@ -835,6 +856,7 @@ const mp_obj_type_t mp_type_type = {
.print = type_print,
.make_new = type_make_new,
.call = type_call,
.unary_op = mp_generic_unary_op,
.attr = type_attr,
};

Expand Down
7 changes: 7 additions & 0 deletions py/runtime.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "py/nlr.h"
#include "py/parsenum.h"
#include "py/compile.h"
#include "py/objstr.h"
#include "py/objtuple.h"
#include "py/objlist.h"
#include "py/objmodule.h"
Expand Down Expand Up @@ -200,6 +201,8 @@ mp_obj_t mp_unary_op(mp_uint_t op, mp_obj_t arg) {
switch (op) {
case MP_UNARY_OP_BOOL:
return MP_BOOL(val != 0);
case MP_UNARY_OP_HASH:
return arg;
case MP_UNARY_OP_POSITIVE:
return arg;
case MP_UNARY_OP_NEGATIVE:
Expand All @@ -215,6 +218,10 @@ mp_obj_t mp_unary_op(mp_uint_t op, mp_obj_t arg) {
assert(0);
return arg;
}
} else if (op == MP_UNARY_OP_HASH && MP_OBJ_IS_STR_OR_BYTES(arg)) {
// fast path for hashing str/bytes
GET_STR_HASH(arg, h);
return MP_OBJ_NEW_SMALL_INT(h);
} else {
mp_obj_type_t *type = mp_obj_get_type(arg);
if (type->unary_op != NULL) {
Expand Down
Loading

0 comments on commit c2a4e4e

Please sign in to comment.