Skip to content

Added new function to hash any PostgreSQL data type #10

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
1 commit merged into from
Jun 13, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions hll--1.0.sql
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,13 @@ CREATE FUNCTION hll_hash_text(text, integer default 0)
AS 'MODULE_PATHNAME', 'hll_hash_varlena'
LANGUAGE C STRICT IMMUTABLE;

-- Hash any scalar data type.
--
CREATE FUNCTION hll_hash_any(anyelement, integer default 0)
RETURNS hll_hashval
AS 'MODULE_PATHNAME', 'hll_hash_any'
LANGUAGE C STRICT IMMUTABLE;


-- ----------------------------------------------------------------
-- Operators
Expand Down
65 changes: 65 additions & 0 deletions hll.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "utils/array.h"
#include "utils/bytea.h"
#include "utils/int8.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "catalog/pg_type.h"

Expand Down Expand Up @@ -2571,6 +2572,70 @@ hll_hash_varlena(PG_FUNCTION_ARGS)
PG_RETURN_INT64(out[0]);
}


// Hash any scalar data type.
//
PG_FUNCTION_INFO_V1(hll_hash_any);
Datum hll_hash_any(PG_FUNCTION_ARGS);
Datum
hll_hash_any(PG_FUNCTION_ARGS)
{
Datum keyDatum = PG_GETARG_DATUM(0);
Datum seedDatum = PG_GETARG_DATUM(1);
Datum hashResultDatum = 0;

Oid keyTypeId = get_fn_expr_argtype(fcinfo->flinfo, 0);
int16 keyTypeLength = get_typlen(keyTypeId);

/* dispatch to corresponding hash function for key type */
switch (keyTypeLength)
{
case 1:
hashResultDatum = DirectFunctionCall2(hll_hash_1byte, keyDatum, seedDatum);
break;

case 2:
hashResultDatum = DirectFunctionCall2(hll_hash_2byte, keyDatum, seedDatum);
break;

case 4:
hashResultDatum = DirectFunctionCall2(hll_hash_4byte, keyDatum, seedDatum);
break;

case 8:
hashResultDatum = DirectFunctionCall2(hll_hash_8byte, keyDatum, seedDatum);
break;

case -1:
case -2:
hashResultDatum = DirectFunctionCall2(hll_hash_varlena, keyDatum, seedDatum);
break;

default:
{
/*
* We have a fixed-size type such as char(10), macaddr, circle, etc. We
* first convert this type to its variable-length binary representation
* and then dispatch to the variable-length hashing function.
*/
Oid keyTypeSendFunction = InvalidOid;
bool keyTypeVarlena = false;
Datum keyBinaryDatum = 0;

/* no need to worry about SPI for these types' output functions */
getTypeBinaryOutputInfo(keyTypeId, &keyTypeSendFunction, &keyTypeVarlena);
keyBinaryDatum = OidFunctionCall1(keyTypeSendFunction, keyDatum);

hashResultDatum = DirectFunctionCall2(hll_hash_varlena, keyBinaryDatum,
seedDatum);
break;
}
}

PG_RETURN_INT64(hashResultDatum);
}


PG_FUNCTION_INFO_V1(hll_eq);
Datum hll_eq(PG_FUNCTION_ARGS);
Datum
Expand Down
1 change: 1 addition & 0 deletions regress/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ SQL = \
typmod.sql \
typmod_insert.sql \
hash.sql \
hash_any.sql \
murmur_bigint.sql \
murmur_bytea.sql \
equal.sql \
Expand Down
175 changes: 175 additions & 0 deletions regress/hash_any.ref
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
-- ----------------------------------------------------------------
-- Misc Tests on hash_any Function
-- ----------------------------------------------------------------
SELECT hll_set_output_version(1);
hll_set_output_version
------------------------
1
(1 row)

-- ---------------- Check hash and hash_any function results match
SELECT hll_hash_boolean(FALSE) = hll_hash_any(FALSE);
?column?
----------
t
(1 row)

SELECT hll_hash_boolean(TRUE) = hll_hash_any(TRUE);
?column?
----------
t
(1 row)

SELECT hll_hash_smallint(0::smallint) = hll_hash_any(0::smallint);
?column?
----------
t
(1 row)

SELECT hll_hash_smallint(100::smallint) = hll_hash_any(100::smallint);
?column?
----------
t
(1 row)

SELECT hll_hash_smallint(-100::smallint) = hll_hash_any(-100::smallint);
?column?
----------
t
(1 row)

SELECT hll_hash_integer(0) = hll_hash_any(0);
?column?
----------
t
(1 row)

SELECT hll_hash_integer(100) = hll_hash_any(100);
?column?
----------
t
(1 row)

SELECT hll_hash_integer(-100) = hll_hash_any(-100);
?column?
----------
t
(1 row)

SELECT hll_hash_bigint(0) = hll_hash_any(0::bigint);
?column?
----------
t
(1 row)

SELECT hll_hash_bigint(100) = hll_hash_any(100::bigint);
?column?
----------
t
(1 row)

SELECT hll_hash_bigint(-100) = hll_hash_any(-100::bigint);
?column?
----------
t
(1 row)

SELECT hll_hash_bytea(E'\\x') = hll_hash_any(E'\\x'::bytea);
?column?
----------
t
(1 row)

SELECT hll_hash_bytea(E'\\x41') = hll_hash_any(E'\\x41'::bytea);
?column?
----------
t
(1 row)

SELECT hll_hash_bytea(E'\\x42') = hll_hash_any(E'\\x42'::bytea);
?column?
----------
t
(1 row)

SELECT hll_hash_bytea(E'\\x4142') = hll_hash_any(E'\\x4142'::bytea);
?column?
----------
t
(1 row)

SELECT hll_hash_text('') = hll_hash_any(''::text);
?column?
----------
t
(1 row)

SELECT hll_hash_text('A') = hll_hash_any('A'::text);
?column?
----------
t
(1 row)

SELECT hll_hash_text('B') = hll_hash_any('B'::text);
?column?
----------
t
(1 row)

SELECT hll_hash_text('AB') = hll_hash_any('AB'::text);
?column?
----------
t
(1 row)

-- ---------------- Check several types not handled by default hash functions
-- ---------------- macaddr
SELECT hll_hash_any('08:00:2b:01:02:03'::macaddr);
hll_hash_any
----------------------
-4883882473551067169
(1 row)

SELECT hll_hash_any('08002b010203'::macaddr);
hll_hash_any
----------------------
-4883882473551067169
(1 row)

SELECT hll_hash_any('01-23-45-67-89-ab'::macaddr);
hll_hash_any
---------------------
3974616115244794976
(1 row)

SELECT hll_hash_any('012345-6789ab'::macaddr);
hll_hash_any
---------------------
3974616115244794976
(1 row)

-- ---------------- interval
SELECT hll_hash_any('1 year 2 months 3 days 4 hours 5 minutes 6seconds'::interval);
hll_hash_any
---------------------
1647734813508782007
(1 row)

SELECT hll_hash_any('P1Y2M3DT4H5M6S'::interval);
hll_hash_any
---------------------
1647734813508782007
(1 row)

SELECT hll_hash_any('1997-06 20 12:00:00'::interval);
hll_hash_any
---------------------
3706410791461549552
(1 row)

SELECT hll_hash_any('P1997-06-20T12:00:00'::interval);
hll_hash_any
---------------------
3706410791461549552
(1 row)

48 changes: 48 additions & 0 deletions regress/hash_any.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
-- ----------------------------------------------------------------
-- Misc Tests on hash_any Function
-- ----------------------------------------------------------------

SELECT hll_set_output_version(1);

-- ---------------- Check hash and hash_any function results match

SELECT hll_hash_boolean(FALSE) = hll_hash_any(FALSE);
SELECT hll_hash_boolean(TRUE) = hll_hash_any(TRUE);

SELECT hll_hash_smallint(0::smallint) = hll_hash_any(0::smallint);
SELECT hll_hash_smallint(100::smallint) = hll_hash_any(100::smallint);
SELECT hll_hash_smallint(-100::smallint) = hll_hash_any(-100::smallint);

SELECT hll_hash_integer(0) = hll_hash_any(0);
SELECT hll_hash_integer(100) = hll_hash_any(100);
SELECT hll_hash_integer(-100) = hll_hash_any(-100);

SELECT hll_hash_bigint(0) = hll_hash_any(0::bigint);
SELECT hll_hash_bigint(100) = hll_hash_any(100::bigint);
SELECT hll_hash_bigint(-100) = hll_hash_any(-100::bigint);

SELECT hll_hash_bytea(E'\\x') = hll_hash_any(E'\\x'::bytea);
SELECT hll_hash_bytea(E'\\x41') = hll_hash_any(E'\\x41'::bytea);
SELECT hll_hash_bytea(E'\\x42') = hll_hash_any(E'\\x42'::bytea);
SELECT hll_hash_bytea(E'\\x4142') = hll_hash_any(E'\\x4142'::bytea);

SELECT hll_hash_text('') = hll_hash_any(''::text);
SELECT hll_hash_text('A') = hll_hash_any('A'::text);
SELECT hll_hash_text('B') = hll_hash_any('B'::text);
SELECT hll_hash_text('AB') = hll_hash_any('AB'::text);

-- ---------------- Check several types not handled by default hash functions

-- ---------------- macaddr

SELECT hll_hash_any('08:00:2b:01:02:03'::macaddr);
SELECT hll_hash_any('08002b010203'::macaddr);
SELECT hll_hash_any('01-23-45-67-89-ab'::macaddr);
SELECT hll_hash_any('012345-6789ab'::macaddr);

-- ---------------- interval

SELECT hll_hash_any('1 year 2 months 3 days 4 hours 5 minutes 6seconds'::interval);
SELECT hll_hash_any('P1Y2M3DT4H5M6S'::interval);
SELECT hll_hash_any('1997-06 20 12:00:00'::interval);
SELECT hll_hash_any('P1997-06-20T12:00:00'::interval);