Skip to content

Commit

Permalink
Fix Bug in integer serialization for integers in GIN
Browse files Browse the repository at this point in the history
Fixed a bug where integers were not being serialized
correctly when stored in GIN indices.

Added more regression tests in light of the found issue
  • Loading branch information
JoshInnis committed Jul 15, 2022
1 parent 5ded59f commit 392a937
Show file tree
Hide file tree
Showing 3 changed files with 163 additions and 85 deletions.
87 changes: 66 additions & 21 deletions regress/expected/index.out
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ SET search_path TO ag_catalog;
SET enable_mergejoin = ON;
SET enable_hashjoin = ON;
SET enable_nestloop = ON;
SET enable_seqscan = false;
SELECT create_graph('cypher_index');
NOTICE: graph "cypher_index" has been created
create_graph
Expand Down Expand Up @@ -227,19 +228,19 @@ SELECT * FROM cypher('cypher_index', $$ MATCH(n) DETACH DELETE n $$) AS (a agtyp
* Section 2: Graphid Indices to Improve Join Performance
*/
SELECT * FROM cypher('cypher_index', $$
CREATE (us:Country {name: "United States"}),
(ca:Country {name: "Canada"}),
(mx:Country {name: "Mexico"}),
(us)<-[:has_city]-(:City {name:"New York", country_code:"US"}),
(us)<-[:has_city]-(:City {name:"San Fransisco", country_code:"US"}),
(us)<-[:has_city]-(:City {name:"Los Angeles", country_code:"US"}),
(us)<-[:has_city]-(:City {name:"Seattle", country_code:"US"}),
(ca)<-[:has_city]-(:City {name:"Vancouver", country_code:"CA"}),
(ca)<-[:has_city]-(:City {name:"Toroto", country_code:"CA"}),
(ca)<-[:has_city]-(:City {name:"Montreal", country_code:"CA"}),
(mx)<-[:has_city]-(:City {name:"Mexico City", country_code:"MX"}),
(mx)<-[:has_city]-(:City {name:"Monterrey", country_code:"MX"}),
(mx)<-[:has_city]-(:City {name:"Tijuana", country_code:"MX"})
CREATE (us:Country {name: "United States", country_code: "US", life_expectancy: 78.79, gdp: 20.94::numeric}),
(ca:Country {name: "Canada", country_code: "CA", life_expectancy: 82.05, gdp: 1.643::numeric}),
(mx:Country {name: "Mexico", country_code: "MX", life_expectancy: 75.05, gdp: 1.076::numeric}),
(us)<-[:has_city]-(:City {city_id: 1, name:"New York", west_coast: false, country_code:"US"}),
(us)<-[:has_city]-(:City {city_id: 2, name:"San Fransisco", west_coast: true, country_code:"US"}),
(us)<-[:has_city]-(:City {city_id: 3, name:"Los Angeles", west_coast: true, country_code:"US"}),
(us)<-[:has_city]-(:City {city_id: 4, name:"Seattle", west_coast: true, country_code:"US"}),
(ca)<-[:has_city]-(:City {city_id: 5, name:"Vancouver", west_coast: true, country_code:"CA"}),
(ca)<-[:has_city]-(:City {city_id: 6, name:"Toroto", west_coast: false, country_code:"CA"}),
(ca)<-[:has_city]-(:City {city_id: 7, name:"Montreal", west_coast: false, country_code:"CA"}),
(mx)<-[:has_city]-(:City {city_id: 8, name:"Mexico City", west_coast: false, country_code:"MX"}),
(mx)<-[:has_city]-(:City {city_id: 9, name:"Monterrey", west_coast: false, country_code:"MX"}),
(mx)<-[:has_city]-(:City {city_id: 10, name:"Tijuana", west_coast: false, country_code:"MX"})
$$) as (n agtype);
n
---
Expand Down Expand Up @@ -299,19 +300,63 @@ SET enable_nestloop = ON;
--
-- Section 3: Agtype GIN Indices to Improve WHERE clause Performance
--
CREATE INDEX load_city_gid_idx
CREATE INDEX load_city_gin_idx
ON cypher_index."City" USING gin (properties);
SELECT COUNT(*) FROM cypher('cypher_index', $$
MATCH (c:City {country_code: "AD"})
CREATE INDEX load_country_gin_idx
ON cypher_index."Country" USING gin (properties);
SELECT * FROM cypher('cypher_index', $$
MATCH (c:City {city_id: 1})
RETURN c
$$) as (n agtype);
count
-------
0
n
------------------------------------------------------------------------------------------------------------------------------------------------
{"id": 1970324836974593, "label": "City", "properties": {"name": "New York", "city_id": 1, "west_coast": false, "country_code": "US"}}::vertex
(1 row)

SELECT * FROM cypher('cypher_index', $$
MATCH (:Country {country_code: "US"})<-[]-(city:City)
RETURN city
$$) as (n agtype);
n
----------------------------------------------------------------------------------------------------------------------------------------------------
{"id": 1970324836974593, "label": "City", "properties": {"name": "New York", "city_id": 1, "west_coast": false, "country_code": "US"}}::vertex
{"id": 1970324836974594, "label": "City", "properties": {"name": "San Fransisco", "city_id": 2, "west_coast": true, "country_code": "US"}}::vertex
{"id": 1970324836974595, "label": "City", "properties": {"name": "Los Angeles", "city_id": 3, "west_coast": true, "country_code": "US"}}::vertex
{"id": 1970324836974596, "label": "City", "properties": {"name": "Seattle", "city_id": 4, "west_coast": true, "country_code": "US"}}::vertex
(4 rows)

SELECT * FROM cypher('cypher_index', $$
MATCH (c:City {west_coast: true})
RETURN c
$$) as (n agtype);
n
----------------------------------------------------------------------------------------------------------------------------------------------------
{"id": 1970324836974594, "label": "City", "properties": {"name": "San Fransisco", "city_id": 2, "west_coast": true, "country_code": "US"}}::vertex
{"id": 1970324836974595, "label": "City", "properties": {"name": "Los Angeles", "city_id": 3, "west_coast": true, "country_code": "US"}}::vertex
{"id": 1970324836974596, "label": "City", "properties": {"name": "Seattle", "city_id": 4, "west_coast": true, "country_code": "US"}}::vertex
{"id": 1970324836974597, "label": "City", "properties": {"name": "Vancouver", "city_id": 5, "west_coast": true, "country_code": "CA"}}::vertex
(4 rows)

SELECT * FROM cypher('cypher_index', $$
MATCH (c:Country {life_expectancy: 82.05})
RETURN c
$$) as (n agtype);
n
---------------------------------------------------------------------------------------------------------------------------------------------------------------
{"id": 1407374883553282, "label": "Country", "properties": {"gdp": 1.643::numeric, "name": "Canada", "country_code": "CA", "life_expectancy": 82.05}}::vertex
(1 row)

SELECT * FROM cypher('cypher_index', $$
MATCH (c:Country {gdp: 20.94::numeric})
RETURN c
$$) as (n agtype);
n
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
{"id": 1407374883553281, "label": "Country", "properties": {"gdp": 20.94::numeric, "name": "United States", "country_code": "US", "life_expectancy": 78.79}}::vertex
(1 row)

DROP INDEX load_city_gid_idx;
ERROR: index "load_city_gid_idx" does not exist
DROP INDEX cypher_index.load_city_gin_idx;
DROP INDEX cypher_index.load_country_gin_idx;
--
-- Section 4: Index use with WHERE clause
--
Expand Down
59 changes: 42 additions & 17 deletions regress/sql/index.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ SET search_path TO ag_catalog;
SET enable_mergejoin = ON;
SET enable_hashjoin = ON;
SET enable_nestloop = ON;
SET enable_seqscan = false;

SELECT create_graph('cypher_index');

Expand Down Expand Up @@ -131,19 +132,19 @@ SELECT * FROM cypher('cypher_index', $$ MATCH(n) DETACH DELETE n $$) AS (a agtyp
* Section 2: Graphid Indices to Improve Join Performance
*/
SELECT * FROM cypher('cypher_index', $$
CREATE (us:Country {name: "United States"}),
(ca:Country {name: "Canada"}),
(mx:Country {name: "Mexico"}),
(us)<-[:has_city]-(:City {name:"New York", country_code:"US"}),
(us)<-[:has_city]-(:City {name:"San Fransisco", country_code:"US"}),
(us)<-[:has_city]-(:City {name:"Los Angeles", country_code:"US"}),
(us)<-[:has_city]-(:City {name:"Seattle", country_code:"US"}),
(ca)<-[:has_city]-(:City {name:"Vancouver", country_code:"CA"}),
(ca)<-[:has_city]-(:City {name:"Toroto", country_code:"CA"}),
(ca)<-[:has_city]-(:City {name:"Montreal", country_code:"CA"}),
(mx)<-[:has_city]-(:City {name:"Mexico City", country_code:"MX"}),
(mx)<-[:has_city]-(:City {name:"Monterrey", country_code:"MX"}),
(mx)<-[:has_city]-(:City {name:"Tijuana", country_code:"MX"})
CREATE (us:Country {name: "United States", country_code: "US", life_expectancy: 78.79, gdp: 20.94::numeric}),
(ca:Country {name: "Canada", country_code: "CA", life_expectancy: 82.05, gdp: 1.643::numeric}),
(mx:Country {name: "Mexico", country_code: "MX", life_expectancy: 75.05, gdp: 1.076::numeric}),
(us)<-[:has_city]-(:City {city_id: 1, name:"New York", west_coast: false, country_code:"US"}),
(us)<-[:has_city]-(:City {city_id: 2, name:"San Fransisco", west_coast: true, country_code:"US"}),
(us)<-[:has_city]-(:City {city_id: 3, name:"Los Angeles", west_coast: true, country_code:"US"}),
(us)<-[:has_city]-(:City {city_id: 4, name:"Seattle", west_coast: true, country_code:"US"}),
(ca)<-[:has_city]-(:City {city_id: 5, name:"Vancouver", west_coast: true, country_code:"CA"}),
(ca)<-[:has_city]-(:City {city_id: 6, name:"Toroto", west_coast: false, country_code:"CA"}),
(ca)<-[:has_city]-(:City {city_id: 7, name:"Montreal", west_coast: false, country_code:"CA"}),
(mx)<-[:has_city]-(:City {city_id: 8, name:"Mexico City", west_coast: false, country_code:"MX"}),
(mx)<-[:has_city]-(:City {city_id: 9, name:"Monterrey", west_coast: false, country_code:"MX"}),
(mx)<-[:has_city]-(:City {city_id: 10, name:"Tijuana", west_coast: false, country_code:"MX"})
$$) as (n agtype);

ALTER TABLE cypher_index."Country" ADD PRIMARY KEY (id);
Expand Down Expand Up @@ -201,16 +202,40 @@ SET enable_nestloop = ON;
--
-- Section 3: Agtype GIN Indices to Improve WHERE clause Performance
--
CREATE INDEX load_city_gid_idx
CREATE INDEX load_city_gin_idx
ON cypher_index."City" USING gin (properties);

SELECT COUNT(*) FROM cypher('cypher_index', $$
MATCH (c:City {country_code: "AD"})
CREATE INDEX load_country_gin_idx
ON cypher_index."Country" USING gin (properties);


SELECT * FROM cypher('cypher_index', $$
MATCH (c:City {city_id: 1})
RETURN c
$$) as (n agtype);

SELECT * FROM cypher('cypher_index', $$
MATCH (:Country {country_code: "US"})<-[]-(city:City)
RETURN city
$$) as (n agtype);

SELECT * FROM cypher('cypher_index', $$
MATCH (c:City {west_coast: true})
RETURN c
$$) as (n agtype);

DROP INDEX load_city_gid_idx;
SELECT * FROM cypher('cypher_index', $$
MATCH (c:Country {life_expectancy: 82.05})
RETURN c
$$) as (n agtype);

SELECT * FROM cypher('cypher_index', $$
MATCH (c:Country {gdp: 20.94::numeric})
RETURN c
$$) as (n agtype);

DROP INDEX cypher_index.load_city_gin_idx;
DROP INDEX cypher_index.load_country_gin_idx;
--
-- Section 4: Index use with WHERE clause
--
Expand Down
102 changes: 55 additions & 47 deletions src/backend/utils/adt/agtype_gin.c
Original file line number Diff line number Diff line change
Expand Up @@ -500,53 +500,61 @@ static Datum make_scalar_key(const agtype_value *scalarVal, bool is_key)
char buf[MAXINT8LEN + 1];
switch (scalarVal->type)
{
case AGTV_NULL:
Assert(!is_key);
item = make_text_key(AGT_GIN_FLAG_NULL, "", 0);
break;
case AGTV_INTEGER:
Assert(!is_key);
pg_lltoa(scalarVal->val.int_value, buf);
item = make_text_key(AGT_GIN_FLAG_NUM, buf, MAXINT8LEN + 1);
break;
case AGTV_FLOAT:
Assert(!is_key);
cstr = float8out_internal(scalarVal->val.float_value);
item = make_text_key(AGT_GIN_FLAG_NUM, cstr, strlen(cstr));
break;
case AGTV_BOOL:
Assert(!is_key);
item = make_text_key(AGT_GIN_FLAG_BOOL,
scalarVal->val.boolean ? "t" : "f", 1);
break;
case AGTV_NUMERIC:
Assert(!is_key);
/*
* A normalized textual representation, free of trailing zeroes,
* is required so that numerically equal values will produce equal
* strings.
*
* It isn't ideal that numerics are stored in a relatively bulky
* textual format. However, it's a notationally convenient way of
* storing a "union" type in the GIN B-Tree, and indexing Jsonb
* strings takes precedence.
*/
cstr = numeric_normalize(scalarVal->val.numeric);
item = make_text_key(AGT_GIN_FLAG_NUM, cstr, strlen(cstr));
pfree(cstr);
break;
case AGTV_STRING:
item = make_text_key(is_key ? AGT_GIN_FLAG_KEY : AGT_GIN_FLAG_STR,
scalarVal->val.string.val,
scalarVal->val.string.len);
break;
case AGTV_VERTEX:
case AGTV_EDGE:
case AGTV_PATH:
elog(ERROR, "agtype type: %d is not a scalar", scalarVal->type);
default:
elog(ERROR, "unrecognized agtype type: %d", scalarVal->type);
break;
case AGTV_NULL:
Assert(!is_key);
item = make_text_key(AGT_GIN_FLAG_NULL, "", 0);
break;
case AGTV_INTEGER:
{
char *result;

Assert(!is_key);

pg_lltoa(scalarVal->val.int_value, buf);

result = pstrdup(buf);
item = make_text_key(AGT_GIN_FLAG_NUM, result, strlen(result));
break;
}
case AGTV_FLOAT:
Assert(!is_key);
cstr = float8out_internal(scalarVal->val.float_value);
item = make_text_key(AGT_GIN_FLAG_NUM, cstr, strlen(cstr));
break;
case AGTV_BOOL:
Assert(!is_key);
item = make_text_key(AGT_GIN_FLAG_BOOL,
scalarVal->val.boolean ? "t" : "f", 1);
break;
case AGTV_NUMERIC:
Assert(!is_key);
/*
* A normalized textual representation, free of trailing zeroes,
* is required so that numerically equal values will produce equal
* strings.
*
* It isn't ideal that numerics are stored in a relatively bulky
* textual format. However, it's a notationally convenient way of
* storing a "union" type in the GIN B-Tree, and indexing Jsonb
* strings takes precedence.
*/
cstr = numeric_normalize(scalarVal->val.numeric);
item = make_text_key(AGT_GIN_FLAG_NUM, cstr, strlen(cstr));
pfree(cstr);
break;
case AGTV_STRING:
item = make_text_key(is_key ? AGT_GIN_FLAG_KEY : AGT_GIN_FLAG_STR,
scalarVal->val.string.val,
scalarVal->val.string.len);
break;
case AGTV_VERTEX:
case AGTV_EDGE:
case AGTV_PATH:
elog(ERROR, "agtype type: %d is not a scalar", scalarVal->type);
break;
default:
elog(ERROR, "unrecognized agtype type: %d", scalarVal->type);
break;
}

return item;
Expand Down

0 comments on commit 392a937

Please sign in to comment.