Skip to content

Commit

Permalink
[#10510] YSQL: Improve performance of concurrent refreshes for matviews
Browse files Browse the repository at this point in the history
Summary:
In YB mode, we restrict matviews from having rows with all null values when performing concurrent refreshes. Previously, we performed a full scan of the data to check for completely null rows, which leads to a performance hit. This diff performs the check for completely null rows on the computed diff table instead (which is a much smaller table).

Performance test:
Tested on centOS 7 machine with Intel Broadwell CPU.
```
CREATE TABLE base ... ; // 100 MB dataset
CREATE MATERIALIZED VIEW test AS SELECT * FROM base;
CREATE UNIQUE INDEX ON test ... // Unique index required for concurrent refreshes
INSERT a few rows INTO base;
REFRESH MATERIALIZED VIEW CONCURRENTLY base;
```
The above snippet gets stuck on the scan of the temporary table with the new data for at least 25 minutes in the current version of the code.
With this diff, it takes about 3 minutes for the refresh to execute.

Test Plan:
Run yb_feature_matview in TestPgRegressFeature

Jenkins: urgent

Reviewers: mihnea

Reviewed By: mihnea

Subscribers: zyu, smishra, yql

Differential Revision: https://phabricator.dev.yugabyte.com/D14596
  • Loading branch information
Fizaa Luthra authored and Fizaa Luthra committed Jan 6, 2022
1 parent a5e2d82 commit 67e0467
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 47 deletions.
106 changes: 62 additions & 44 deletions src/postgres/src/backend/commands/matview.c
Original file line number Diff line number Diff line change
Expand Up @@ -634,44 +634,16 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner,
* that in a way that allows showing the first duplicated row found. Even
* after we pass this test, a unique index on the materialized view may
* find a duplicate key problem.
* In YB mode, we also restrict the data from having rows with all null
* values, because we can't correctly compare fully-null rows to compute
* an accurate diff table.
*/
resetStringInfo(&querybuf);
if (IsYugaByteEnabled()) {
appendStringInfo(&querybuf, "SELECT newdata FROM %s newdata WHERE ", tempname);
TupleDesc tuple_desc = RelationGetDescr(matviewRel);

for (int i = 1; i <= tuple_desc->natts; i++)
{
Form_pg_attribute attribute = TupleDescAttr(tuple_desc, i - 1);
char *attribute_name = NameStr(attribute->attname);
appendStringInfo(&querybuf, "(newdata).%s IS NULL ", attribute_name);
if (i < tuple_desc->natts)
appendStringInfo(&querybuf, "AND ");

}

appendStringInfo(&querybuf,
"OR newdata IS NOT NULL AND EXISTS "
"(SELECT 1 FROM %s newdata2 WHERE newdata2 IS NOT NULL "
"AND newdata2 OPERATOR(pg_catalog.*=) newdata "
"AND newdata2.ctid OPERATOR(pg_catalog.<>) "
"newdata.ctid)",
tempname);

}
else {
appendStringInfo(&querybuf,
"SELECT newdata FROM %s newdata "
"WHERE newdata IS NOT NULL AND EXISTS "
"(SELECT 1 FROM %s newdata2 WHERE newdata2 IS NOT NULL "
"AND newdata2 OPERATOR(pg_catalog.*=) newdata "
"AND newdata2.ctid OPERATOR(pg_catalog.<>) "
"newdata.ctid)",
tempname, tempname);
}
appendStringInfo(&querybuf,
"SELECT newdata FROM %s newdata "
"WHERE newdata IS NOT NULL AND EXISTS "
"(SELECT 1 FROM %s newdata2 WHERE newdata2 IS NOT NULL "
"AND newdata2 OPERATOR(pg_catalog.*=) newdata "
"AND newdata2.ctid OPERATOR(pg_catalog.<>) "
"newdata.ctid)",
tempname, tempname);
if (SPI_execute(querybuf.data, false, 1) != SPI_OK_SELECT)
elog(ERROR, "SPI_exec failed: %s", querybuf.data);
if (SPI_processed > 0)
Expand All @@ -685,8 +657,7 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner,
*/
ereport(ERROR,
(errcode(ERRCODE_CARDINALITY_VIOLATION),
errmsg("new data for materialized view \"%s\" contains duplicate rows without any null columns, "
"or contains rows with all null values",
errmsg("new data for materialized view \"%s\" contains duplicate rows without any null columns",
RelationGetRelationName(matviewRel)),
errdetail("Row: %s",
SPI_getvalue(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1))));
Expand All @@ -697,19 +668,23 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner,

/* Start building the query for creating the diff table. */
resetStringInfo(&querybuf);
if (IsYugaByteEnabled()) {
if (IsYugaByteEnabled())
{
appendStringInfo(&querybuf,
"CREATE TEMP TABLE %s AS "
"SELECT mv, newdata "
"FROM %s mv FULL JOIN %s newdata ON (",
diffname, matviewname, tempname);
} else {
}
else
{
appendStringInfo(&querybuf,
"CREATE TEMP TABLE %s AS "
"SELECT mv.ctid AS tid, newdata "
"FROM %s mv FULL JOIN %s newdata ON (",
diffname, matviewname, tempname);
}

/*
* Get the list of index OIDs for the table from the relcache, and look up
* each one in the pg_index syscache. We will test for equality on all
Expand Down Expand Up @@ -830,12 +805,15 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner,
*/
Assert(foundUniqueIndex);

if (IsYugaByteEnabled()) {
if (IsYugaByteEnabled())
{
/* Can't use TID in YB mode */
appendStringInfoString(&querybuf,
" AND newdata OPERATOR(pg_catalog.*=) mv) "
"WHERE newdata IS NULL OR mv IS NULL ");
} else {
}
else
{
appendStringInfoString(&querybuf,
" AND newdata OPERATOR(pg_catalog.*=) mv) "
"WHERE newdata IS NULL OR mv IS NULL "
Expand All @@ -862,6 +840,41 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner,

OpenMatViewIncrementalMaintenance();

/*
* In YB mode, we also restrict the data from having rows with all null
* values, because we can't correctly compare fully-null rows to compute
* an accurate diff table.
*/
if (IsYugaByteEnabled())
{
resetStringInfo(&querybuf);
appendStringInfo(&querybuf, "SELECT newdata, mv FROM %s WHERE ", diffname);
TupleDesc tuple_desc = RelationGetDescr(matviewRel);

for (int i = 1; i <= tuple_desc->natts; i++)
{
Form_pg_attribute attribute = TupleDescAttr(tuple_desc, i - 1);
char *attribute_name = NameStr(attribute->attname);
appendStringInfo(&querybuf, "(newdata).%s IS NULL AND (mv).%s IS NULL ",
attribute_name, attribute_name);
if (i < tuple_desc->natts)
appendStringInfo(&querybuf, "AND ");

}

if (SPI_execute(querybuf.data, false, 1) != SPI_OK_SELECT)
elog(ERROR, "SPI_exec failed: %s", querybuf.data);
if (SPI_processed > 0)
{
ereport(ERROR,
(errcode(ERRCODE_CARDINALITY_VIOLATION),
errmsg("new data for materialized view \"%s\" contains rows with all null values",
RelationGetRelationName(matviewRel)),
errdetail("Row: %s",
SPI_getvalue(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1))));
}
}

/* Deletes must come before inserts; do them first. */
resetStringInfo(&querybuf);
if (IsYugaByteEnabled())
Expand Down Expand Up @@ -892,22 +905,27 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner,
"AND diff.newdata IS NULL)",
matviewname, diffname);
}

if (SPI_exec(querybuf.data, 0) != SPI_OK_DELETE)
elog(ERROR, "SPI_exec failed: %s", querybuf.data);

/* Inserts go last. */
resetStringInfo(&querybuf);
if (IsYugaByteEnabled()) {
if (IsYugaByteEnabled())
{
appendStringInfo(&querybuf,
"INSERT INTO %s SELECT (diff.newdata).* "
"FROM %s diff WHERE mv IS NULL",
matviewname, diffname);
} else {
}
else
{
appendStringInfo(&querybuf,
"INSERT INTO %s SELECT (diff.newdata).* "
"FROM %s diff WHERE tid IS NULL",
matviewname, diffname);
}

if (SPI_exec(querybuf.data, 0) != SPI_OK_INSERT)
elog(ERROR, "SPI_exec failed: %s", querybuf.data);

Expand Down
6 changes: 3 additions & 3 deletions src/postgres/src/test/regress/expected/yb_feature_matview.out
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ INSERT INTO mvtest_foo SELECT * FROM mvtest_foo;
REFRESH MATERIALIZED VIEW mvtest_mv;
ERROR: duplicate key value violates unique constraint "mvtest_mv_a_idx"
REFRESH MATERIALIZED VIEW CONCURRENTLY mvtest_mv;
ERROR: new data for materialized view "mvtest_mv" contains duplicate rows without any null columns, or contains rows with all null values
ERROR: new data for materialized view "mvtest_mv" contains duplicate rows without any null columns
DETAIL: Row: (1,10)
DROP TABLE mvtest_foo CASCADE;
NOTICE: drop cascades to materialized view mvtest_mv
Expand Down Expand Up @@ -548,8 +548,8 @@ INSERT INTO test_yb VALUES (null);
CREATE MATERIALIZED VIEW mtest_yb AS SELECT * FROM test_yb;
CREATE UNIQUE INDEX ON mtest_yb(col);
REFRESH MATERIALIZED VIEW CONCURRENTLY mtest_yb; -- should fail
ERROR: new data for materialized view "mtest_yb" contains duplicate rows without any null columns, or contains rows with all null values
DETAIL: Row: ()
ERROR: new data for materialized view "mtest_yb" contains rows with all null values
DETAIL: Row: (null)
CREATE TABLE pg_temp_foo (col int);
INSERT INTO pg_temp_foo values (1);
SELECT * FROM pg_temp_foo;
Expand Down

0 comments on commit 67e0467

Please sign in to comment.