Description
hey there -
as you know, SQLAlchemy relies upon knowing what the names of columns will be in result sets, and for asycnpg that forces us to use PreparedStatements for statements that we want to fetch rows from.
When the preparedstatement has a custom type like an ENUM inside of it, asyncpg does a giant query up front to cache information about the datatype, here it is from my test case below inside my PG SQL log:
2023-09-17 10:04:41.545 EDT [1728595] LOG: execute __asyncpg_stmt_3__: WITH RECURSIVE typeinfo_tree(
oid, ns, name, kind, basetype, elemtype, elemdelim,
range_subtype, attrtypoids, attrnames, depth)
AS (
SELECT
ti.oid, ti.ns, ti.name, ti.kind, ti.basetype,
ti.elemtype, ti.elemdelim, ti.range_subtype,
ti.attrtypoids, ti.attrnames, 0
FROM
(
SELECT
t.oid AS oid,
ns.nspname AS ns,
t.typname AS name,
t.typtype AS kind,
(CASE WHEN t.typtype = 'd' THEN
(WITH RECURSIVE typebases(oid, depth) AS (
SELECT
t2.typbasetype AS oid,
0 AS depth
FROM
pg_type t2
WHERE
t2.oid = t.oid
UNION ALL
SELECT
t2.typbasetype AS oid,
tb.depth + 1 AS depth
FROM
pg_type t2,
typebases tb
WHERE
tb.oid = t2.oid
AND t2.typbasetype != 0
) SELECT oid FROM typebases ORDER BY depth DESC LIMIT 1)
ELSE NULL
END) AS basetype,
t.typelem AS elemtype,
elem_t.typdelim AS elemdelim,
COALESCE(
range_t.rngsubtype,
multirange_t.rngsubtype) AS range_subtype,
(CASE WHEN t.typtype = 'c' THEN
(SELECT
array_agg(ia.atttypid ORDER BY ia.attnum)
FROM
pg_attribute ia
INNER JOIN pg_class c
ON (ia.attrelid = c.oid)
WHERE
ia.attnum > 0 AND NOT ia.attisdropped
AND c.reltype = t.oid)
ELSE NULL
END) AS attrtypoids,
(CASE WHEN t.typtype = 'c' THEN
(SELECT
array_agg(ia.attname::text ORDER BY ia.attnum)
FROM
pg_attribute ia
INNER JOIN pg_class c
ON (ia.attrelid = c.oid)
WHERE
ia.attnum > 0 AND NOT ia.attisdropped
AND c.reltype = t.oid)
ELSE NULL
END) AS attrnames
FROM
pg_catalog.pg_type AS t
INNER JOIN pg_catalog.pg_namespace ns ON (
ns.oid = t.typnamespace)
LEFT JOIN pg_type elem_t ON (
t.typlen = -1 AND
t.typelem != 0 AND
t.typelem = elem_t.oid
)
LEFT JOIN pg_range range_t ON (
t.oid = range_t.rngtypid
)
LEFT JOIN pg_range multirange_t ON (
t.oid = multirange_t.rngmultitypid
)
)
AS ti
WHERE
ti.oid = any($1::oid[])
UNION ALL
SELECT
ti.oid, ti.ns, ti.name, ti.kind, ti.basetype,
ti.elemtype, ti.elemdelim, ti.range_subtype,
ti.attrtypoids, ti.attrnames, tt.depth + 1
FROM
(
SELECT
t.oid AS oid,
ns.nspname AS ns,
t.typname AS name,
t.typtype AS kind,
(CASE WHEN t.typtype = 'd' THEN
(WITH RECURSIVE typebases(oid, depth) AS (
SELECT
t2.typbasetype AS oid,
0 AS depth
FROM
pg_type t2
WHERE
t2.oid = t.oid
UNION ALL
SELECT
t2.typbasetype AS oid,
tb.depth + 1 AS depth
FROM
pg_type t2,
typebases tb
WHERE
tb.oid = t2.oid
AND t2.typbasetype != 0
) SELECT oid FROM typebases ORDER BY depth DESC LIMIT 1)
ELSE NULL
END) AS basetype,
t.typelem AS elemtype,
elem_t.typdelim AS elemdelim,
COALESCE(
range_t.rngsubtype,
multirange_t.rngsubtype) AS range_subtype,
(CASE WHEN t.typtype = 'c' THEN
(SELECT
array_agg(ia.atttypid ORDER BY ia.attnum)
FROM
pg_attribute ia
INNER JOIN pg_class c
ON (ia.attrelid = c.oid)
WHERE
ia.attnum > 0 AND NOT ia.attisdropped
AND c.reltype = t.oid)
ELSE NULL
END) AS attrtypoids,
(CASE WHEN t.typtype = 'c' THEN
(SELECT
array_agg(ia.attname::text ORDER BY ia.attnum)
FROM
pg_attribute ia
INNER JOIN pg_class c
ON (ia.attrelid = c.oid)
WHERE
ia.attnum > 0 AND NOT ia.attisdropped
AND c.reltype = t.oid)
ELSE NULL
END) AS attrnames
FROM
pg_catalog.pg_type AS t
INNER JOIN pg_catalog.pg_namespace ns ON (
ns.oid = t.typnamespace)
LEFT JOIN pg_type elem_t ON (
t.typlen = -1 AND
t.typelem != 0 AND
t.typelem = elem_t.oid
)
LEFT JOIN pg_range range_t ON (
t.oid = range_t.rngtypid
)
LEFT JOIN pg_range multirange_t ON (
t.oid = multirange_t.rngmultitypid
)
)
ti,
typeinfo_tree tt
WHERE
(tt.elemtype IS NOT NULL AND ti.oid = tt.elemtype)
OR (tt.attrtypoids IS NOT NULL AND ti.oid = any(tt.attrtypoids))
OR (tt.range_subtype IS NOT NULL AND ti.oid = tt.range_subtype)
OR (tt.basetype IS NOT NULL AND ti.oid = tt.basetype)
)
SELECT DISTINCT
*,
basetype::regtype::text AS basetype_name,
elemtype::regtype::text AS elemtype_name,
range_subtype::regtype::text AS range_subtype_name
FROM
typeinfo_tree
ORDER BY
depth DESC
2023-09-17 10:04:41.545 EDT [1728595] DETAIL: parameters: $1 = '{2732440}'
This is I assume once per connection/type, but we still are getting performance concerns about it as we see in #10356. SQLAlchemy uses a connection pool by default however this is still an upfront cost and some folks don't use the pool.
I would assume the purpose of this query has to do with get_attributes()
having all the information about the oid for the type.
We don't actually need the "type" part of get_attributes(), just the names (we already know the types on our end). Is there a possibility asyncpg could have some kind of connection parameter, or prepared statement parameter, that is something to the effect use_varchar_for_custom_type
or omit_custom_type
or something such that this giant query on prepare can be skipped?
Demo that produces the query in question:
import asyncio
import asyncpg
async def main():
conn = await asyncpg.connect(
user="scott", password="tiger", database="test"
)
await conn.execute("DROP TABLE IF EXISTS mood_test")
await conn.execute(
"DROP TYPE IF EXISTS mood"
)
await conn.execute(
"CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy')"
)
await conn.execute(
"CREATE TABLE mood_test (id SERIAL primary key, current_mood mood)"
)
await conn.fetch("SELECT 'right before the prepare'")
pp = await conn.prepare("SELECT id, current_mood FROM mood_test")
await conn.fetch("SELECT 'right after the prepare'")
cols = pp.get_attributes()
await conn.fetch("SELECT 'right after the get_attributes'")
print(f"attributes: {cols}")
await conn.close()
asyncio.run(main())