Skip to content

Commit e512d00

Browse files
author
Timon Karnezos
committed
Merge pull request citusdata#10 from ozgune/master
Added new function to hash any PostgreSQL data type. (Closes citusdata#10)
2 parents ddc8c7c + 15d5ae6 commit e512d00

File tree

5 files changed

+296
-0
lines changed

5 files changed

+296
-0
lines changed

hll--1.0.sql

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,13 @@ CREATE FUNCTION hll_hash_text(text, integer default 0)
319319
AS 'MODULE_PATHNAME', 'hll_hash_varlena'
320320
LANGUAGE C STRICT IMMUTABLE;
321321

322+
-- Hash any scalar data type.
323+
--
324+
CREATE FUNCTION hll_hash_any(anyelement, integer default 0)
325+
RETURNS hll_hashval
326+
AS 'MODULE_PATHNAME', 'hll_hash_any'
327+
LANGUAGE C STRICT IMMUTABLE;
328+
322329

323330
-- ----------------------------------------------------------------
324331
-- Operators

hll.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "utils/array.h"
3131
#include "utils/bytea.h"
3232
#include "utils/int8.h"
33+
#include "utils/lsyscache.h"
3334
#include "utils/memutils.h"
3435
#include "catalog/pg_type.h"
3536

@@ -2571,6 +2572,70 @@ hll_hash_varlena(PG_FUNCTION_ARGS)
25712572
PG_RETURN_INT64(out[0]);
25722573
}
25732574

2575+
2576+
// Hash any scalar data type.
2577+
//
2578+
PG_FUNCTION_INFO_V1(hll_hash_any);
2579+
Datum hll_hash_any(PG_FUNCTION_ARGS);
2580+
Datum
2581+
hll_hash_any(PG_FUNCTION_ARGS)
2582+
{
2583+
Datum keyDatum = PG_GETARG_DATUM(0);
2584+
Datum seedDatum = PG_GETARG_DATUM(1);
2585+
Datum hashResultDatum = 0;
2586+
2587+
Oid keyTypeId = get_fn_expr_argtype(fcinfo->flinfo, 0);
2588+
int16 keyTypeLength = get_typlen(keyTypeId);
2589+
2590+
/* dispatch to corresponding hash function for key type */
2591+
switch (keyTypeLength)
2592+
{
2593+
case 1:
2594+
hashResultDatum = DirectFunctionCall2(hll_hash_1byte, keyDatum, seedDatum);
2595+
break;
2596+
2597+
case 2:
2598+
hashResultDatum = DirectFunctionCall2(hll_hash_2byte, keyDatum, seedDatum);
2599+
break;
2600+
2601+
case 4:
2602+
hashResultDatum = DirectFunctionCall2(hll_hash_4byte, keyDatum, seedDatum);
2603+
break;
2604+
2605+
case 8:
2606+
hashResultDatum = DirectFunctionCall2(hll_hash_8byte, keyDatum, seedDatum);
2607+
break;
2608+
2609+
case -1:
2610+
case -2:
2611+
hashResultDatum = DirectFunctionCall2(hll_hash_varlena, keyDatum, seedDatum);
2612+
break;
2613+
2614+
default:
2615+
{
2616+
/*
2617+
* We have a fixed-size type such as char(10), macaddr, circle, etc. We
2618+
* first convert this type to its variable-length binary representation
2619+
* and then dispatch to the variable-length hashing function.
2620+
*/
2621+
Oid keyTypeSendFunction = InvalidOid;
2622+
bool keyTypeVarlena = false;
2623+
Datum keyBinaryDatum = 0;
2624+
2625+
/* no need to worry about SPI for these types' output functions */
2626+
getTypeBinaryOutputInfo(keyTypeId, &keyTypeSendFunction, &keyTypeVarlena);
2627+
keyBinaryDatum = OidFunctionCall1(keyTypeSendFunction, keyDatum);
2628+
2629+
hashResultDatum = DirectFunctionCall2(hll_hash_varlena, keyBinaryDatum,
2630+
seedDatum);
2631+
break;
2632+
}
2633+
}
2634+
2635+
PG_RETURN_INT64(hashResultDatum);
2636+
}
2637+
2638+
25742639
PG_FUNCTION_INFO_V1(hll_eq);
25752640
Datum hll_eq(PG_FUNCTION_ARGS);
25762641
Datum

regress/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ SQL = \
3030
typmod.sql \
3131
typmod_insert.sql \
3232
hash.sql \
33+
hash_any.sql \
3334
murmur_bigint.sql \
3435
murmur_bytea.sql \
3536
equal.sql \

regress/hash_any.ref

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
-- ----------------------------------------------------------------
2+
-- Misc Tests on hash_any Function
3+
-- ----------------------------------------------------------------
4+
SELECT hll_set_output_version(1);
5+
hll_set_output_version
6+
------------------------
7+
1
8+
(1 row)
9+
10+
-- ---------------- Check hash and hash_any function results match
11+
SELECT hll_hash_boolean(FALSE) = hll_hash_any(FALSE);
12+
?column?
13+
----------
14+
t
15+
(1 row)
16+
17+
SELECT hll_hash_boolean(TRUE) = hll_hash_any(TRUE);
18+
?column?
19+
----------
20+
t
21+
(1 row)
22+
23+
SELECT hll_hash_smallint(0::smallint) = hll_hash_any(0::smallint);
24+
?column?
25+
----------
26+
t
27+
(1 row)
28+
29+
SELECT hll_hash_smallint(100::smallint) = hll_hash_any(100::smallint);
30+
?column?
31+
----------
32+
t
33+
(1 row)
34+
35+
SELECT hll_hash_smallint(-100::smallint) = hll_hash_any(-100::smallint);
36+
?column?
37+
----------
38+
t
39+
(1 row)
40+
41+
SELECT hll_hash_integer(0) = hll_hash_any(0);
42+
?column?
43+
----------
44+
t
45+
(1 row)
46+
47+
SELECT hll_hash_integer(100) = hll_hash_any(100);
48+
?column?
49+
----------
50+
t
51+
(1 row)
52+
53+
SELECT hll_hash_integer(-100) = hll_hash_any(-100);
54+
?column?
55+
----------
56+
t
57+
(1 row)
58+
59+
SELECT hll_hash_bigint(0) = hll_hash_any(0::bigint);
60+
?column?
61+
----------
62+
t
63+
(1 row)
64+
65+
SELECT hll_hash_bigint(100) = hll_hash_any(100::bigint);
66+
?column?
67+
----------
68+
t
69+
(1 row)
70+
71+
SELECT hll_hash_bigint(-100) = hll_hash_any(-100::bigint);
72+
?column?
73+
----------
74+
t
75+
(1 row)
76+
77+
SELECT hll_hash_bytea(E'\\x') = hll_hash_any(E'\\x'::bytea);
78+
?column?
79+
----------
80+
t
81+
(1 row)
82+
83+
SELECT hll_hash_bytea(E'\\x41') = hll_hash_any(E'\\x41'::bytea);
84+
?column?
85+
----------
86+
t
87+
(1 row)
88+
89+
SELECT hll_hash_bytea(E'\\x42') = hll_hash_any(E'\\x42'::bytea);
90+
?column?
91+
----------
92+
t
93+
(1 row)
94+
95+
SELECT hll_hash_bytea(E'\\x4142') = hll_hash_any(E'\\x4142'::bytea);
96+
?column?
97+
----------
98+
t
99+
(1 row)
100+
101+
SELECT hll_hash_text('') = hll_hash_any(''::text);
102+
?column?
103+
----------
104+
t
105+
(1 row)
106+
107+
SELECT hll_hash_text('A') = hll_hash_any('A'::text);
108+
?column?
109+
----------
110+
t
111+
(1 row)
112+
113+
SELECT hll_hash_text('B') = hll_hash_any('B'::text);
114+
?column?
115+
----------
116+
t
117+
(1 row)
118+
119+
SELECT hll_hash_text('AB') = hll_hash_any('AB'::text);
120+
?column?
121+
----------
122+
t
123+
(1 row)
124+
125+
-- ---------------- Check several types not handled by default hash functions
126+
-- ---------------- macaddr
127+
SELECT hll_hash_any('08:00:2b:01:02:03'::macaddr);
128+
hll_hash_any
129+
----------------------
130+
-4883882473551067169
131+
(1 row)
132+
133+
SELECT hll_hash_any('08002b010203'::macaddr);
134+
hll_hash_any
135+
----------------------
136+
-4883882473551067169
137+
(1 row)
138+
139+
SELECT hll_hash_any('01-23-45-67-89-ab'::macaddr);
140+
hll_hash_any
141+
---------------------
142+
3974616115244794976
143+
(1 row)
144+
145+
SELECT hll_hash_any('012345-6789ab'::macaddr);
146+
hll_hash_any
147+
---------------------
148+
3974616115244794976
149+
(1 row)
150+
151+
-- ---------------- interval
152+
SELECT hll_hash_any('1 year 2 months 3 days 4 hours 5 minutes 6seconds'::interval);
153+
hll_hash_any
154+
---------------------
155+
1647734813508782007
156+
(1 row)
157+
158+
SELECT hll_hash_any('P1Y2M3DT4H5M6S'::interval);
159+
hll_hash_any
160+
---------------------
161+
1647734813508782007
162+
(1 row)
163+
164+
SELECT hll_hash_any('1997-06 20 12:00:00'::interval);
165+
hll_hash_any
166+
---------------------
167+
3706410791461549552
168+
(1 row)
169+
170+
SELECT hll_hash_any('P1997-06-20T12:00:00'::interval);
171+
hll_hash_any
172+
---------------------
173+
3706410791461549552
174+
(1 row)
175+

regress/hash_any.sql

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
-- ----------------------------------------------------------------
2+
-- Misc Tests on hash_any Function
3+
-- ----------------------------------------------------------------
4+
5+
SELECT hll_set_output_version(1);
6+
7+
-- ---------------- Check hash and hash_any function results match
8+
9+
SELECT hll_hash_boolean(FALSE) = hll_hash_any(FALSE);
10+
SELECT hll_hash_boolean(TRUE) = hll_hash_any(TRUE);
11+
12+
SELECT hll_hash_smallint(0::smallint) = hll_hash_any(0::smallint);
13+
SELECT hll_hash_smallint(100::smallint) = hll_hash_any(100::smallint);
14+
SELECT hll_hash_smallint(-100::smallint) = hll_hash_any(-100::smallint);
15+
16+
SELECT hll_hash_integer(0) = hll_hash_any(0);
17+
SELECT hll_hash_integer(100) = hll_hash_any(100);
18+
SELECT hll_hash_integer(-100) = hll_hash_any(-100);
19+
20+
SELECT hll_hash_bigint(0) = hll_hash_any(0::bigint);
21+
SELECT hll_hash_bigint(100) = hll_hash_any(100::bigint);
22+
SELECT hll_hash_bigint(-100) = hll_hash_any(-100::bigint);
23+
24+
SELECT hll_hash_bytea(E'\\x') = hll_hash_any(E'\\x'::bytea);
25+
SELECT hll_hash_bytea(E'\\x41') = hll_hash_any(E'\\x41'::bytea);
26+
SELECT hll_hash_bytea(E'\\x42') = hll_hash_any(E'\\x42'::bytea);
27+
SELECT hll_hash_bytea(E'\\x4142') = hll_hash_any(E'\\x4142'::bytea);
28+
29+
SELECT hll_hash_text('') = hll_hash_any(''::text);
30+
SELECT hll_hash_text('A') = hll_hash_any('A'::text);
31+
SELECT hll_hash_text('B') = hll_hash_any('B'::text);
32+
SELECT hll_hash_text('AB') = hll_hash_any('AB'::text);
33+
34+
-- ---------------- Check several types not handled by default hash functions
35+
36+
-- ---------------- macaddr
37+
38+
SELECT hll_hash_any('08:00:2b:01:02:03'::macaddr);
39+
SELECT hll_hash_any('08002b010203'::macaddr);
40+
SELECT hll_hash_any('01-23-45-67-89-ab'::macaddr);
41+
SELECT hll_hash_any('012345-6789ab'::macaddr);
42+
43+
-- ---------------- interval
44+
45+
SELECT hll_hash_any('1 year 2 months 3 days 4 hours 5 minutes 6seconds'::interval);
46+
SELECT hll_hash_any('P1Y2M3DT4H5M6S'::interval);
47+
SELECT hll_hash_any('1997-06 20 12:00:00'::interval);
48+
SELECT hll_hash_any('P1997-06-20T12:00:00'::interval);

0 commit comments

Comments
 (0)