Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions cmd/dump/dump_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ func TestDumpCommand_Issue80IndexNameQuote(t *testing.T) {
runExactMatchTest(t, "issue_80_index_name_quote")
}

func TestDumpCommand_Issue82ViewLogicExpr(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
runExactMatchTest(t, "issue_82_view_logic_expr")
}

func TestDumpCommand_Issue83ExplicitConstraintName(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
Expand Down
104 changes: 102 additions & 2 deletions ir/formatter.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,10 @@ func (f *postgreSQLFormatter) formatExpression(expr *pg_query.Node) {
f.formatCoalesceExpr(expr.GetCoalesceExpr())
case expr.GetNullTest() != nil:
f.formatNullTest(expr.GetNullTest())
case expr.GetScalarArrayOpExpr() != nil:
f.formatScalarArrayOpExpr(expr.GetScalarArrayOpExpr())
case expr.GetAArrayExpr() != nil:
f.formatAArrayExpr(expr.GetAArrayExpr())
default:
// Fallback to deparse for complex expressions
if deparseResult, err := f.deparseNode(expr); err == nil {
Expand Down Expand Up @@ -297,6 +301,29 @@ func (f *postgreSQLFormatter) formatAConst(constant *pg_query.A_Const) {

// formatAExpr formats an A_Expr (binary/unary expressions)
func (f *postgreSQLFormatter) formatAExpr(expr *pg_query.A_Expr) {
// Special case: Detect "column = ANY (ARRAY[...])" pattern and convert to "column IN (...)"
// This pattern appears when parsing view definitions from pg_get_viewdef()
if len(expr.Name) == 1 && expr.Rexpr != nil {
if str := expr.Name[0].GetString_(); str != nil && str.Sval == "=" {
if aArrayExpr := expr.Rexpr.GetAArrayExpr(); aArrayExpr != nil {
// Direct array comparison: column = ARRAY[...]
// Convert to IN syntax, stripping unnecessary type casts from constants
f.formatExpressionStripCast(expr.Lexpr)
f.buffer.WriteString(" IN (")
for i, elem := range aArrayExpr.Elements {
if i > 0 {
f.buffer.WriteString(", ")
}
// Strip type casts from constants in IN list
f.formatExpressionStripCast(elem)
}
f.buffer.WriteString(")")
return
}
}
}
Comment on lines +304 to +324
Copy link

Copilot AI Oct 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This rewrites a plain '=' comparison against an array literal (e.g., 'col = ARRAY[...]') to 'IN (...)', which changes semantics. Unless this node represents the '= ANY' form (which should be handled by ScalarArrayOpExpr or A_Expr ANY/ALL kinds), this transformation is unsafe. Remove this branch or tighten it to only trigger for an explicit '= ANY (ARRAY[...])' construct.

Copilot uses AI. Check for mistakes.

// Default formatting for other A_Expr cases
// Format left operand
if expr.Lexpr != nil {
f.formatExpression(expr.Lexpr)
Expand Down Expand Up @@ -484,10 +511,10 @@ func (f *postgreSQLFormatter) formatCaseExpr(caseExpr *pg_query.CaseExpr) {
}
}

// Format ELSE clause
// Format ELSE clause, stripping unnecessary type casts from constants/NULL
if caseExpr.Defresult != nil {
f.buffer.WriteString(" ELSE ")
f.formatExpression(caseExpr.Defresult)
f.formatExpressionStripCast(caseExpr.Defresult)
}
Comment on lines +514 to 518
Copy link

Copilot AI Oct 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] Good improvement on ELSE formatting. Consider applying the same cast-stripping to WHEN result expressions as well (e.g., formatExpressionStripCast on CaseWhenExpr.Result) to ensure consistent formatting of constants in all CASE branches.

Copilot uses AI. Check for mistakes.

f.buffer.WriteString(" END")
Expand Down Expand Up @@ -568,3 +595,76 @@ func (f *postgreSQLFormatter) formatNullTest(nullTest *pg_query.NullTest) {
f.buffer.WriteString(" IS NOT NULL")
}
}

// formatExpressionStripCast formats an expression, stripping unnecessary type casts from constants and NULL
func (f *postgreSQLFormatter) formatExpressionStripCast(expr *pg_query.Node) {
// If this is a TypeCast of a constant or NULL, format just the value without the cast
if typeCast := expr.GetTypeCast(); typeCast != nil {
if typeCast.Arg != nil {
if aConst := typeCast.Arg.GetAConst(); aConst != nil {
// This is a typed constant, format just the constant value
f.formatAConst(aConst)
return
}
// For non-constant args, recursively strip casts
f.formatExpressionStripCast(typeCast.Arg)
return
}
}

// Otherwise, format normally
f.formatExpression(expr)
}

// formatAArrayExpr formats array expressions (ARRAY[...])
func (f *postgreSQLFormatter) formatAArrayExpr(arrayExpr *pg_query.A_ArrayExpr) {
f.buffer.WriteString("ARRAY[")
for i, elem := range arrayExpr.Elements {
if i > 0 {
f.buffer.WriteString(", ")
}
f.formatExpression(elem)
}
f.buffer.WriteString("]")
}

// formatScalarArrayOpExpr formats scalar array operations like "column = ANY (ARRAY[...])"
// and converts them to the simpler "column IN (...)" syntax
func (f *postgreSQLFormatter) formatScalarArrayOpExpr(arrayOp *pg_query.ScalarArrayOpExpr) {
// Check if this is a simple = ANY pattern that can be converted to IN
// UseOr means ANY (disjunction), !UseOr means ALL (conjunction)
isEqualAny := arrayOp.UseOr && len(arrayOp.Args) == 2

if isEqualAny {
// Args[0] is the left side (column), Args[1] is the right side (array)
// Format as "column IN (values)"
Comment on lines +635 to +640
Copy link

Copilot AI Oct 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

formatScalarArrayOpExpr does not verify the operator is '=' and will convert other ANY/ALL operators (e.g., '<> ANY', '> ANY', or '= ALL') to IN, which is incorrect. Add an explicit operator check (e.g., via Opname if available, or a safe deparse check for ' = ANY') and only rewrite when the operator is equality with ANY; otherwise, fall back to deparse.

Copilot uses AI. Check for mistakes.
if len(arrayOp.Args) >= 2 {
// Format left side (the column)
f.formatExpression(arrayOp.Args[0])

f.buffer.WriteString(" IN (")

// Extract values from the array
if arrayExpr := arrayOp.Args[1].GetArrayExpr(); arrayExpr != nil {
// Format array elements as comma-separated list
for i, elem := range arrayExpr.Elements {
if i > 0 {
f.buffer.WriteString(", ")
}
f.formatExpression(elem)
}
} else {
// Fallback: format the right expression as-is
f.formatExpression(arrayOp.Args[1])
}

f.buffer.WriteString(")")
return
}
Comment on lines +641 to +663
Copy link

Copilot AI Oct 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The RHS extraction uses GetArrayExpr and falls back to emitting the entire array as-is, which can produce invalid SQL like 'col IN (ARRAY[...]::type[])'. Additionally, the LHS retains type casts (e.g., 'status::text'). Unwrap TypeCast nodes on the RHS to reach A_ArrayExpr/ArrayExpr and iterate elements, formatting each via formatExpressionStripCast; also use formatExpressionStripCast on the LHS to drop superfluous casts. If the RHS cannot be unwrapped to an array literal, fall back to deparsing the whole ScalarArrayOpExpr rather than emitting an invalid IN.

Copilot uses AI. Check for mistakes.
}

// For other operations (like <> ALL) or malformed expressions, use deparse fallback
if deparseResult, err := f.deparseNode(&pg_query.Node{Node: &pg_query.Node_ScalarArrayOpExpr{ScalarArrayOpExpr: arrayOp}}); err == nil {
f.buffer.WriteString(deparseResult)
}
}
70 changes: 67 additions & 3 deletions ir/normalize.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,11 @@ func normalizeViewDefinition(definition string) string {
normalized = definition
}

// Normalize ORDER BY clauses to match pg_get_viewdef format
normalized = normalizeOrderByInView(normalized)
// Apply all AST-based normalizations in one pass to avoid re-parsing
// This includes:
// 1. Converting PostgreSQL's "= ANY (ARRAY[...])" to "IN (...)"
// 2. Normalizing ORDER BY clauses to use aliases
normalized = normalizeViewWithAST(normalized)

return normalized
}
Expand Down Expand Up @@ -1058,8 +1061,68 @@ func convertAnyArrayToIn(expr string) string {
return fmt.Sprintf("%s IN (%s)", columnName, strings.Join(cleanValues, ", "))
}

// normalizeViewWithAST applies all AST-based normalizations in a single pass
// This includes converting "= ANY (ARRAY[...])" to "IN (...)" and normalizing ORDER BY
func normalizeViewWithAST(definition string) string {
if definition == "" {
return definition
}

// Parse the view definition
parseResult, err := pg_query.Parse(definition)
if err != nil {
return definition
}

if len(parseResult.Stmts) == 0 {
return definition
}

stmt := parseResult.Stmts[0]
selectStmt := stmt.Stmt.GetSelectStmt()
if selectStmt == nil {
return definition
}

// Step 1: Normalize ORDER BY clauses (modify AST if needed)
orderByModified := false
if len(selectStmt.SortClause) > 0 {
// Build reverse alias map (expression -> alias) from target list
exprToAliasMap := buildExpressionToAliasMap(selectStmt.TargetList)

// Transform ORDER BY clauses: replace complex expressions with aliases when possible
for _, sortItem := range selectStmt.SortClause {
if sortBy := sortItem.GetSortBy(); sortBy != nil {
if wasModified := normalizeOrderByExpressionToAlias(sortBy, exprToAliasMap); wasModified {
orderByModified = true
}
}
}
}

// Step 2: Check if we need to use custom formatter
// Use custom formatter if:
// a) The view definition contains "= ANY" (needs conversion to IN)
// b) ORDER BY was modified
needsCustomFormatter := strings.Contains(definition, "= ANY") || orderByModified

if needsCustomFormatter {
// Use custom formatter to format the entire query
// The formatter will handle:
// - Converting "= ANY (ARRAY[...])" to "IN (...)"
// - Proper formatting of all expressions
formatter := newPostgreSQLFormatter()
formatted := formatter.formatQueryNode(stmt.Stmt)
if formatted != "" {
return formatted
}
}

return definition
}

// normalizeOrderByInView normalizes ORDER BY clauses in view definitions
// This converts PostgreSQL's pg_get_viewdef format (with parentheses and expressions)
// This converts PostgreSQL's pg_get_viewdef format (with parentheses and expressions)
// back to parser format (using column aliases) for consistent comparison
// Uses AST manipulation for robustness
func normalizeOrderByInView(definition string) string {
Expand Down Expand Up @@ -1098,6 +1161,7 @@ func normalizeOrderByInView(definition string) string {
}

// If we made modifications, use PostgreSQL formatter to maintain formatting
// IMPORTANT: Use the custom formatter to preserve ANY->IN conversions done earlier
if modified {
formatter := newPostgreSQLFormatter()
formatted := formatter.formatQueryNode(stmt.Stmt)
Expand Down
9 changes: 9 additions & 0 deletions testdata/dump/issue_82_view_logic_expr/manifest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"name": "issue_82_view_logic_expr",
"description": "Test case for view logical expressions dumping (GitHub issue #82)",
"source": "https://github.com/pgschema/pgschema/issues/82",
"notes": [
"Tests that views with CASE statements and IN clauses are correctly preserved when dumping",
"Verifies ORDER BY with aliased columns doesn't corrupt the CASE expression"
]
}
61 changes: 61 additions & 0 deletions testdata/dump/issue_82_view_logic_expr/pgdump.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
--
-- PostgreSQL database dump
--

-- Dumped from database version 17.5 (Debian 17.5-1.pgdg120+1)
-- Dumped by pg_dump version 17.5 (Homebrew)

SET statement_timeout = 0;
SET lock_timeout = 0;
SET idle_in_transaction_session_timeout = 0;
SET transaction_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = on;
SELECT pg_catalog.set_config('search_path', '', false);
SET check_function_bodies = false;
SET xmloption = content;
SET client_min_messages = warning;
SET row_security = off;

SET default_tablespace = '';

SET default_table_access_method = heap;

--
-- Name: orders; Type: TABLE; Schema: public; Owner: -
--

CREATE TABLE public.orders (
id integer NOT NULL,
status character varying(50) NOT NULL,
amount numeric(10,2)
);


--
-- Name: paid_orders; Type: VIEW; Schema: public; Owner: -
--

CREATE VIEW public.paid_orders AS
SELECT id AS order_id,
status,
CASE
WHEN ((status)::text = ANY ((ARRAY['paid'::character varying, 'completed'::character varying])::text[])) THEN amount
ELSE NULL::numeric
END AS paid_amount
FROM public.orders
ORDER BY id, status;


--
-- Name: orders orders_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--

ALTER TABLE ONLY public.orders
ADD CONSTRAINT orders_pkey PRIMARY KEY (id);


--
-- PostgreSQL database dump complete
--

31 changes: 31 additions & 0 deletions testdata/dump/issue_82_view_logic_expr/pgschema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
--
-- pgschema database dump
--

-- Dumped from database version PostgreSQL 17.5
-- Dumped by pgschema version 1.4.0


--
-- Name: orders; Type: TABLE; Schema: -; Owner: -
--

CREATE TABLE IF NOT EXISTS orders (
id integer,
status varchar(50) NOT NULL,
amount numeric(10,2),
CONSTRAINT orders_pkey PRIMARY KEY (id)
);

--
-- Name: paid_orders; Type: VIEW; Schema: -; Owner: -
--

CREATE OR REPLACE VIEW paid_orders AS
SELECT
id AS order_id,
status,
CASE WHEN status IN ('paid', 'completed') THEN amount ELSE NULL END AS paid_amount
FROM orders
ORDER BY order_id, status;

36 changes: 36 additions & 0 deletions testdata/dump/issue_82_view_logic_expr/raw.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
--
-- Test case for GitHub issue #82: View logical expressions dumping
--
-- This test case reproduces a bug where view definitions with CASE statements
-- containing IN clauses become syntactically incorrect when dumped.
--
-- The issue occurs when:
-- 1. A view has a CASE statement with an IN clause
-- 2. The view uses column aliases
-- 3. The view has an ORDER BY clause referencing the aliased columns
--
-- Original bug: CASE WHEN status IN ('paid', 'completed') THEN amount ELSE NULL END
-- Gets corrupted to: CASE WHEN status::text = ::text THEN amount ELSE NULL END
-- (The IN clause values disappear!)
--

--
-- Base table: orders with status tracking
--
CREATE TABLE orders (
id integer PRIMARY KEY,
status varchar(50) NOT NULL,
amount numeric(10,2)
);

--
-- Problematic view: Uses CASE with IN clause + column alias in ORDER BY
-- This specific combination triggers the bug
--
CREATE VIEW paid_orders AS
SELECT
id AS order_id,
status,
CASE WHEN status IN ('paid', 'completed') THEN amount ELSE NULL END AS paid_amount
FROM orders
ORDER BY order_id, status;