Skip to content

Commit 062dc59

Browse files
committed
Try out the dot-subscripting patch
There is a patch on the Postgres mailinglist that allows https://commitfest.postgresql.org/patch/5214/
1 parent 9ecb7e4 commit 062dc59

File tree

11 files changed

+158
-41
lines changed

11 files changed

+158
-41
lines changed

.github/workflows/build_and_test.yaml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,19 +40,25 @@ jobs:
4040
matrix:
4141
include:
4242
- version: REL_14_STABLE
43+
repo: postgres/postgres
4344
type: Release
4445
- version: REL_15_STABLE
46+
repo: postgres/postgres
4547
type: Release
4648
- version: REL_16_STABLE
49+
repo: postgres/postgres
4750
type: Release
4851
# For PG17 we build DuckDB as a static library. There's nothing special
4952
# about the static library, nor PG17. This is only done so that we have
5053
# CI coverage for our logic to build a static library version.
5154
- version: REL_17_STABLE
55+
repo: postgres/postgres
5256
type: ReleaseStatic
5357
- version: REL_17_STABLE
58+
repo: postgres/postgres
5459
type: Debug
55-
- version: master
60+
- version: cf/5214
61+
repo: postgresql-cfbot/postgresql
5662
type: Release
5763

5864
# Not enabled for now (waiting for April 2025 code freeze)
@@ -89,7 +95,7 @@ jobs:
8995
- name: Checkout PostgreSQL code
9096
run: |
9197
rm -rf postgres
92-
git clone --branch ${{ matrix.version }} --single-branch --depth 1 https://github.com/postgres/postgres.git
98+
git clone --branch ${{ matrix.version }} --single-branch --depth 1 https://github.com/${{ matrix.repo }}.git postgres
9399
94100
- name: Compute Version SHAs
95101
id: versions

src/pg/pgduckdb_subscript.cpp

Lines changed: 74 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,35 @@ AddSubscriptExpressions(SubscriptingRef *sbsref, struct ParseState *pstate, A_In
8989
}
9090
}
9191

92+
bool
93+
AddSubscriptExpressions(SubscriptingRef *sbsref, struct ParseState *pstate, Node *subscript, bool is_slice) {
94+
if (IsA(subscript, A_Indices)) {
95+
// If the subscript is an A_Indices node, we can add the expressions directly
96+
AddSubscriptExpressions(sbsref, pstate, castNode(A_Indices, subscript), is_slice);
97+
return true;
98+
}
99+
100+
if (IsA(subscript, String)) {
101+
sbsref->refupperindexpr = lappend(sbsref->refupperindexpr, subscript);
102+
return true;
103+
}
104+
105+
if (IsA(subscript, A_Star)) {
106+
sbsref->refupperindexpr = lappend(sbsref->refupperindexpr, NULL);
107+
return true;
108+
}
109+
110+
return false;
111+
}
112+
92113
/*
93114
* DuckdbSubscriptTransform is called by the parser when a subscripting
94115
* operation is performed on a duckdb type that can be indexed by arbitrary
95116
* expressions. All this does is parse those expressions and make sure the
96117
* subscript returns an an duckdb.unresolved_type again.
97118
*/
98119
void
99-
DuckdbSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool is_slice,
120+
DuckdbSubscriptTransform(SubscriptingRef *sbsref, List **indirection, struct ParseState *pstate, bool is_slice,
100121
bool is_assignment, const char *type_name) {
101122
/*
102123
* We need to populate our cache for some of the code below. Normally this
@@ -111,18 +132,22 @@ DuckdbSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct Pars
111132
elog(ERROR, "Assignment to %s is not supported", type_name);
112133
}
113134

114-
if (indirection == NIL) {
135+
if (*indirection == NIL) {
115136
elog(ERROR, "Subscripting %s with an empty subscript is not supported", type_name);
116137
}
117138

118139
// Transform each subscript expression
119-
foreach_node(A_Indices, subscript, indirection) {
120-
AddSubscriptExpressions(sbsref, pstate, subscript, is_slice);
140+
foreach_ptr(Node, subscript, *indirection) {
141+
if (!AddSubscriptExpressions(sbsref, pstate, subscript, is_slice)) {
142+
break;
143+
}
121144
}
122145

123146
// Set the result type of the subscripting operation
124147
sbsref->refrestype = pgduckdb::DuckdbUnresolvedTypeOid();
125148
sbsref->reftypmod = -1;
149+
150+
*indirection = list_delete_first_n(*indirection, list_length(sbsref->refupperindexpr));
126151
}
127152

128153
/*
@@ -136,7 +161,7 @@ DuckdbSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct Pars
136161
* Currently this is used for duckdb.row and duckdb.struct types.
137162
*/
138163
void
139-
DuckdbTextSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool is_slice,
164+
DuckdbTextSubscriptTransform(SubscriptingRef *sbsref, List **indirection, struct ParseState *pstate, bool is_slice,
140165
bool is_assignment, const char *type_name) {
141166
/*
142167
* We need to populate our cache for some of the code below. Normally this
@@ -151,33 +176,40 @@ DuckdbTextSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct
151176
elog(ERROR, "Assignment to %s is not supported", type_name);
152177
}
153178

154-
if (indirection == NIL) {
179+
if (*indirection == NIL) {
155180
elog(ERROR, "Subscripting %s with an empty subscript is not supported", type_name);
156181
}
157182

158183
bool first = true;
159184

160185
// Transform each subscript expression
161-
foreach_node(A_Indices, subscript, indirection) {
162-
/* The first subscript needs to be a TEXT constant, since it should be
163-
* a column reference. But the subscripts after that can be anything,
164-
* DuckDB should interpret those. */
165-
if (first) {
166-
sbsref->refupperindexpr =
167-
lappend(sbsref->refupperindexpr, CoerceSubscriptToText(pstate, subscript, type_name));
186+
foreach_ptr(Node, subscript, *indirection) {
187+
/*
188+
* If the first subscript is an index expression then it needs to be
189+
* coerced to text, since it should be a column reference. But the
190+
* subscripts after that can be anything, DuckDB should interpret
191+
* those.
192+
*/
193+
if (first && IsA(subscript, A_Indices)) {
194+
sbsref->refupperindexpr = lappend(sbsref->refupperindexpr,
195+
CoerceSubscriptToText(pstate, castNode(A_Indices, subscript), type_name));
168196
if (is_slice) {
169197
sbsref->reflowerindexpr = lappend(sbsref->reflowerindexpr, NULL);
170198
}
171199
first = false;
172200
continue;
173201
}
174202

175-
AddSubscriptExpressions(sbsref, pstate, subscript, is_slice);
203+
if (!AddSubscriptExpressions(sbsref, pstate, subscript, is_slice)) {
204+
break;
205+
}
176206
}
177207

178208
// Set the result type of the subscripting operation
179209
sbsref->refrestype = pgduckdb::DuckdbUnresolvedTypeOid();
180210
sbsref->reftypmod = -1;
211+
212+
*indirection = list_delete_first_n(*indirection, list_length(sbsref->refupperindexpr));
181213
}
182214

183215
static bool
@@ -229,8 +261,14 @@ DuckdbSubscriptExecSetup(const SubscriptingRef * /*sbsref*/, SubscriptingRefStat
229261
}
230262

231263
void
232-
DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool is_slice,
264+
#if PG_VERSION_NUM >= 180000
265+
DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List **indirection, struct ParseState *pstate, bool is_slice,
233266
bool is_assignment) {
267+
#else
268+
DuckdbRowSubscriptTransform(SubscriptingRef *sbsref, List *indirection_, struct ParseState *pstate, bool is_slice,
269+
bool is_assignment) {
270+
List **indirection = &indirection_;
271+
#endif
234272
DuckdbTextSubscriptTransform(sbsref, indirection, pstate, is_slice, is_assignment, "duckdb.row");
235273
}
236274

@@ -249,8 +287,14 @@ static SubscriptRoutines duckdb_row_subscript_routines = {
249287
};
250288

251289
void
252-
DuckdbUnresolvedTypeSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate,
290+
#if PG_VERSION_NUM >= 180000
291+
DuckdbUnresolvedTypeSubscriptTransform(SubscriptingRef *sbsref, List **indirection, struct ParseState *pstate,
292+
bool is_slice, bool is_assignment) {
293+
#else
294+
DuckdbUnresolvedTypeSubscriptTransform(SubscriptingRef *sbsref, List *indirection_, struct ParseState *pstate,
253295
bool is_slice, bool is_assignment) {
296+
List **indirection = &indirection_;
297+
#endif
254298
DuckdbSubscriptTransform(sbsref, indirection, pstate, is_slice, is_assignment, "duckdb.unresolved_type");
255299
}
256300

@@ -269,8 +313,14 @@ static SubscriptRoutines duckdb_unresolved_type_subscript_routines = {
269313
};
270314

271315
void
272-
DuckdbStructSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool is_slice,
316+
#if PG_VERSION_NUM >= 180000
317+
DuckdbStructSubscriptTransform(SubscriptingRef *sbsref, List **indirection, struct ParseState *pstate, bool is_slice,
273318
bool is_assignment) {
319+
#else
320+
DuckdbStructSubscriptTransform(SubscriptingRef *sbsref, List *indirection_, struct ParseState *pstate, bool is_slice,
321+
bool is_assignment) {
322+
List **indirection = &indirection_;
323+
#endif
274324
DuckdbTextSubscriptTransform(sbsref, indirection, pstate, is_slice, is_assignment, "duckdb.struct");
275325
}
276326

@@ -289,8 +339,14 @@ static SubscriptRoutines duckdb_struct_subscript_routines = {
289339
};
290340

291341
void
292-
DuckdbMapSubscriptTransform(SubscriptingRef *sbsref, List *indirection, struct ParseState *pstate, bool is_slice,
342+
#if PG_VERSION_NUM >= 180000
343+
DuckdbMapSubscriptTransform(SubscriptingRef *sbsref, List **indirection, struct ParseState *pstate, bool is_slice,
344+
bool is_assignment) {
345+
#else
346+
DuckdbMapSubscriptTransform(SubscriptingRef *sbsref, List *indirection_, struct ParseState *pstate, bool is_slice,
293347
bool is_assignment) {
348+
List **indirection = &indirection_;
349+
#endif
294350
DuckdbSubscriptTransform(sbsref, indirection, pstate, is_slice, is_assignment, "duckdb.map");
295351
}
296352

src/pgduckdb_ruleutils.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,14 @@ pgduckdb_subscript_has_custom_alias(Plan *plan, List *rtable, Var *subscript_var
350350
int varno;
351351
int varattno;
352352

353+
if (strcmp(colname, "?column?") == 0) {
354+
/*
355+
* If the column name is "?column?", then it means that Postgres
356+
* couldn't figure out a decent alias.
357+
*/
358+
return false;
359+
}
360+
353361
/*
354362
* If we have a syntactic referent for the Var, and we're working from a
355363
* parse tree, prefer to use the syntactic referent. Otherwise, fall back
@@ -388,6 +396,15 @@ pgduckdb_strip_first_subscript(SubscriptingRef *sbsref, StringInfo buf) {
388396
}
389397

390398
Assert(sbsref->refupperindexpr);
399+
400+
if (linitial(sbsref->refupperindexpr) == NULL) {
401+
return sbsref;
402+
}
403+
404+
if (IsA(linitial(sbsref->refupperindexpr), String)) {
405+
return sbsref;
406+
}
407+
391408
Oid typoutput;
392409
bool typIsVarlena;
393410
Const *constval = castNode(Const, linitial(sbsref->refupperindexpr));

src/vendor/pg_ruleutils_18.c

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13082,17 +13082,33 @@ printSubscripts(SubscriptingRef *sbsref, deparse_context *context)
1308213082
lowlist_item = list_head(sbsref->reflowerindexpr); /* could be NULL */
1308313083
foreach(uplist_item, sbsref->refupperindexpr)
1308413084
{
13085-
appendStringInfoChar(buf, '[');
13086-
if (lowlist_item)
13085+
Node *up = (Node *) lfirst(uplist_item);
13086+
13087+
if (!up)
13088+
{
13089+
appendStringInfoString(buf, ".*");
13090+
}
13091+
else if (IsA(up, String))
13092+
{
13093+
appendStringInfoChar(buf, '.');
13094+
appendStringInfoString(buf, quote_identifier(strVal(up)));
13095+
}
13096+
else
1308713097
{
13098+
appendStringInfoChar(buf, '[');
13099+
if (lowlist_item)
13100+
{
13101+
/* If subexpression is NULL, get_rule_expr prints nothing */
13102+
get_rule_expr((Node *) lfirst(lowlist_item), context, false);
13103+
appendStringInfoChar(buf, ':');
13104+
}
1308813105
/* If subexpression is NULL, get_rule_expr prints nothing */
13089-
get_rule_expr((Node *) lfirst(lowlist_item), context, false);
13090-
appendStringInfoChar(buf, ':');
13091-
lowlist_item = lnext(sbsref->reflowerindexpr, lowlist_item);
13106+
get_rule_expr((Node *) lfirst(uplist_item), context, false);
13107+
appendStringInfoChar(buf, ']');
1309213108
}
13093-
/* If subexpression is NULL, get_rule_expr prints nothing */
13094-
get_rule_expr((Node *) lfirst(uplist_item), context, false);
13095-
appendStringInfoChar(buf, ']');
13109+
13110+
if (lowlist_item)
13111+
lowlist_item = lnext(sbsref->reflowerindexpr, lowlist_item);
1309613112
}
1309713113
}
1309813114

test/pycheck/motherduck_test.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,29 +280,36 @@ def test_md_duckdb_only_types(md_cur: Cursor, ddb: Duckdb):
280280
ddb.sql("""
281281
CREATE TABLE t1(
282282
m MAP(INT, VARCHAR),
283+
ms MAP(VARCHAR, INT),
283284
s STRUCT(v VARCHAR, i INTEGER),
284285
u UNION(t time, d date),
285286
)""")
286287
ddb.sql("""
287288
INSERT INTO t1 VALUES (
288289
MAP{1: 'abc'},
290+
MAP{'abc': 1},
289291
{'v': 'struct abc', 'i': 123},
290292
'12:00'::time,
291293
), (
292294
MAP{2: 'def'},
295+
MAP{'def': 2},
293296
{'v': 'struct def', 'i': 456},
294297
'2023-10-01'::date,
295298
)
296299
""")
297300
md_cur.wait_until_table_exists("t1")
298301
assert md_cur.sql("""select * from t1""") == [
299-
("{1=abc}", "{'v': struct abc, 'i': 123}", "12:00:00"),
300-
("{2=def}", "{'v': struct def, 'i': 456}", "2023-10-01"),
302+
("{1=abc}", "{abc=1}", "{'v': struct abc, 'i': 123}", "12:00:00"),
303+
("{2=def}", "{def=2}", "{'v': struct def, 'i': 456}", "2023-10-01"),
301304
]
302305

303306
assert md_cur.sql("""select m[1] from t1""") == ["abc", None]
307+
assert md_cur.sql("""select ms['abc'] from t1""") == [1, None]
308+
assert md_cur.sql("""select (ms).abc from t1""") == [1, None]
304309
assert md_cur.sql("""select s['v'] from t1""") == ["struct abc", "struct def"]
305310
assert md_cur.sql("""select s['i'] from t1""") == [123, 456]
311+
assert md_cur.sql("""select (s).v from t1""") == ["struct abc", "struct def"]
312+
assert md_cur.sql("""select (s).i from t1""") == [123, 456]
306313
assert md_cur.sql("""select union_extract(u,'t') from t1""") == [
307314
datetime.time(12, 0),
308315
None,

test/regression/expected/json_functions_duckdb.out

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,15 @@ SELECT public.json_transform(j, '{"family": "VARCHAR", "coolness": "DOUBLE"}') F
343343
{'family': canidae, 'coolness': NULL}
344344
(2 rows)
345345

346+
SELECT (transformed).* FROM (
347+
SELECT public.json_transform(j, '{"family": "VARCHAR", "coolness": "DOUBLE"}') as transformed FROM example2
348+
) q;
349+
family | coolness
350+
----------+----------
351+
anatidae | 42.42
352+
canidae |
353+
(2 rows)
354+
346355
SELECT public.json_transform(j, '{"family": "TINYINT", "coolness": "DECIMAL(4, 2)"}') FROM example2;
347356
json_transform
348357
-------------------------------------

test/regression/expected/read_functions.out

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,20 +63,15 @@ SELECT arraycol[1:2] FROM (
6363
(1 row)
6464

6565
SELECT r['arraycol'][:] FROM read_parquet('../../data/indexable.parquet') r;
66-
r.arraycol[:]
67-
---------------
68-
{11,22,33}
69-
(1 row)
66+
ERROR: (PGDuckDB/CreatePlan) Prepared query returned an error: 'Parser Error: syntax error at or near "*"
7067

68+
LINE 1: SELECT r.arraycol.* FROM system.main.read_parquet('../../data/indexable.parquet...
69+
^
7170
SELECT arraycol[:] FROM (
7271
SELECT r['arraycol'] arraycol
7372
FROM read_parquet('../../data/indexable.parquet') r
7473
) q;
75-
arraycol
76-
------------
77-
{11,22,33}
78-
(1 row)
79-
74+
ERROR: (PGDuckDB/CreatePlan) Prepared query returned an error: 'Binder Error: Cannot extract field from expression "arraycol.*" because it is not a struct
8075
-- Subqueries correctly expand *, in case of multiple columns.
8176
SELECT * FROM (
8277
SELECT 'something' as prefix, *, 'something else' as postfix
@@ -506,7 +501,7 @@ SELECT COUNT(r['a']) FROM read_json('../../data/table.json') r WHERE r['c'] > 50
506501
51
507502
(1 row)
508503

509-
SELECT r['a'], r['b'], r['c'] FROM read_json('../../data/table.json') r WHERE r['c'] > 50.4 AND r['c'] < 51.2;
504+
SELECT (r).a, (r).b, (r).c FROM read_json('../../data/table.json') r WHERE (r).c > 50.4 AND (r).c < 51.2;
510505
a | b | c
511506
----+---------+------
512507
50 | json_50 | 50.5

test/regression/expected/unresolved_type.out

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,3 +203,9 @@ select make_timestamptz(r['microseconds']) from duckdb.query($$ SELECT 168657000
203203
Mon Jun 12 04:40:00 2023 PDT
204204
(1 row)
205205

206+
SELECT (s).* FROM (select (r).s FROM duckdb.query($$ SELECT {'key1': 'value1', 'key2': 42} AS s $$) r);
207+
key1 | key2
208+
--------+------
209+
value1 | 42
210+
(1 row)
211+

0 commit comments

Comments
 (0)