Skip to content

Commit 3207655

Browse files
committed
sql/analyzer: resolve columns from the projection down the tree
Fixes src-d/gitbase#241 This commit introduces 2 new analyzer rules: - `erase_projection`, which deletes redundant `Project` nodes. When a `Project`s schema is exactly the same as the schema of its child, the `Project` node is removed. For example `SELECT * FROM table` would be something like `Project -> Table`, which is converted to only `Table` now because the `Project` node is redundant. - `reorder_projection`, which moves projected columns down the tree so that appearances of those columns in sort and filter nodes may be resolved. The way we parse the queries makes the `Project` the topmost node of the tree most of the time and `Filter` and `Sort` are always down `Project`. That makes it impossible for the analyzer to resolve the columns that are _created_ in the projection (aliases). Now, those columns are pushed down below the node that requires them in a `Project` node. There also have been some modifications to current analyzer rules: - `qualify_columns` now does not error if the column can't be qualified. Since we're gonna need resolution for aliases and aliases don't belong to any table, having columns that cannot be qualified is expected. - `resolve_columns` now does not error if it fails to resolve a column in it's first pass. That's so other rules may have time to do some work on the tree so that this column can be resolved. Instead, that first pass the column is wrapped with `maybeAlias` and then in any subsequent pass if we find a `maybeAlias` and we can't resolve it this time it means the other analyzer rules weren't able to make changes in the tree to make this column resolvable and it fails with an error. This is a way to defer the column resolution because some rules may need to be able to use the schema (which requires all the nodes with columns down the tree resolved) in order to do some work and make some columns resolvable (such as `reorder_projection`). Other small changes: - `plan.Project` now also propagates the `Source` of the column if the expression had it, which it did not before. Signed-off-by: Miguel Molina <miguel@erizocosmi.co>
1 parent f3a82f0 commit 3207655

File tree

7 files changed

+471
-180
lines changed

7 files changed

+471
-180
lines changed

engine_test.go

Lines changed: 114 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -17,209 +17,192 @@ import (
1717

1818
const driverName = "engine_tests"
1919

20-
func TestQueries(t *testing.T) {
21-
e := newEngine(t)
22-
23-
testQuery(t, e,
20+
var queries = []struct {
21+
query string
22+
expected []sql.Row
23+
}{
24+
{
2425
"SELECT i FROM mytable;",
25-
[][]interface{}{{int64(1)}, {int64(2)}, {int64(3)}},
26-
)
27-
28-
testQuery(t, e,
26+
[]sql.Row{{int64(1)}, {int64(2)}, {int64(3)}},
27+
},
28+
{
2929
"SELECT i FROM mytable WHERE i = 2;",
30-
[][]interface{}{{int64(2)}},
31-
)
32-
33-
testQuery(t, e,
30+
[]sql.Row{{int64(2)}},
31+
},
32+
{
3433
"SELECT i FROM mytable ORDER BY i DESC;",
35-
[][]interface{}{{int64(3)}, {int64(2)}, {int64(1)}},
36-
)
37-
38-
testQuery(t, e,
34+
[]sql.Row{{int64(3)}, {int64(2)}, {int64(1)}},
35+
},
36+
{
3937
"SELECT i FROM mytable WHERE s = 'first row' ORDER BY i DESC;",
40-
[][]interface{}{{int64(1)}},
41-
)
42-
43-
testQuery(t, e,
38+
[]sql.Row{{int64(1)}},
39+
},
40+
{
4441
"SELECT i FROM mytable WHERE s = 'first row' ORDER BY i DESC LIMIT 1;",
45-
[][]interface{}{{int64(1)}},
46-
)
47-
48-
testQuery(t, e,
42+
[]sql.Row{{int64(1)}},
43+
},
44+
{
4945
"SELECT COUNT(*) FROM mytable;",
50-
[][]interface{}{{int32(3)}},
51-
)
52-
53-
testQuery(t, e,
46+
[]sql.Row{{int32(3)}},
47+
},
48+
{
5449
"SELECT COUNT(*) FROM mytable LIMIT 1;",
55-
[][]interface{}{{int32(3)}},
56-
)
57-
58-
testQuery(t, e,
50+
[]sql.Row{{int32(3)}},
51+
},
52+
{
5953
"SELECT COUNT(*) AS c FROM mytable;",
60-
[][]interface{}{{int32(3)}},
61-
)
62-
63-
testQuery(t, e,
54+
[]sql.Row{{int32(3)}},
55+
},
56+
{
6457
"SELECT substring(s, 2, 3) FROM mytable",
65-
[][]interface{}{{"irs"}, {"eco"}, {"hir"}},
66-
)
67-
68-
testQuery(t, e,
58+
[]sql.Row{{"irs"}, {"eco"}, {"hir"}},
59+
},
60+
{
6961
"SELECT YEAR('2007-12-11') FROM mytable",
70-
[][]interface{}{{int32(2007)}, {int32(2007)}, {int32(2007)}},
71-
)
72-
73-
testQuery(t, e,
62+
[]sql.Row{{int32(2007)}, {int32(2007)}, {int32(2007)}},
63+
},
64+
{
7465
"SELECT MONTH('2007-12-11') FROM mytable",
75-
[][]interface{}{{int32(12)}, {int32(12)}, {int32(12)}},
76-
)
77-
78-
testQuery(t, e,
66+
[]sql.Row{{int32(12)}, {int32(12)}, {int32(12)}},
67+
},
68+
{
7969
"SELECT DAY('2007-12-11') FROM mytable",
80-
[][]interface{}{{int32(11)}, {int32(11)}, {int32(11)}},
81-
)
82-
83-
testQuery(t, e,
70+
[]sql.Row{{int32(11)}, {int32(11)}, {int32(11)}},
71+
},
72+
{
8473
"SELECT HOUR('2007-12-11 20:21:22') FROM mytable",
85-
[][]interface{}{{int32(20)}, {int32(20)}, {int32(20)}},
86-
)
87-
88-
testQuery(t, e,
74+
[]sql.Row{{int32(20)}, {int32(20)}, {int32(20)}},
75+
},
76+
{
8977
"SELECT MINUTE('2007-12-11 20:21:22') FROM mytable",
90-
[][]interface{}{{int32(21)}, {int32(21)}, {int32(21)}},
91-
)
92-
93-
testQuery(t, e,
78+
[]sql.Row{{int32(21)}, {int32(21)}, {int32(21)}},
79+
},
80+
{
9481
"SELECT SECOND('2007-12-11 20:21:22') FROM mytable",
95-
[][]interface{}{{int32(22)}, {int32(22)}, {int32(22)}},
96-
)
97-
98-
testQuery(t, e,
82+
[]sql.Row{{int32(22)}, {int32(22)}, {int32(22)}},
83+
},
84+
{
9985
"SELECT DAYOFYEAR('2007-12-11 20:21:22') FROM mytable",
100-
[][]interface{}{{int32(345)}, {int32(345)}, {int32(345)}},
101-
)
102-
103-
testQuery(t, e,
86+
[]sql.Row{{int32(345)}, {int32(345)}, {int32(345)}},
87+
},
88+
{
10489
"SELECT i FROM mytable WHERE i BETWEEN 1 AND 2",
105-
[][]interface{}{{int64(1)}, {int64(2)}},
106-
)
107-
108-
testQuery(t, e,
90+
[]sql.Row{{int64(1)}, {int64(2)}},
91+
},
92+
{
10993
"SELECT i FROM mytable WHERE i NOT BETWEEN 1 AND 2",
110-
[][]interface{}{{int64(3)}},
111-
)
112-
113-
testQuery(t, e,
94+
[]sql.Row{{int64(3)}},
95+
},
96+
{
11497
"SELECT i, i2, s2 FROM mytable INNER JOIN othertable ON i = i2",
115-
[][]interface{}{
98+
[]sql.Row{
11699
{int64(1), int64(1), "third"},
117100
{int64(2), int64(2), "second"},
118101
{int64(3), int64(3), "first"},
119102
},
120-
)
121-
122-
testQuery(t, e,
123-
"SELECT s FROM mytable INNER JOIN othertable "+
103+
},
104+
{
105+
"SELECT s FROM mytable INNER JOIN othertable " +
124106
"ON substring(s2, 1, 2) != '' AND i = i2",
125-
[][]interface{}{
107+
[]sql.Row{
126108
{"first row"},
127109
{"second row"},
128110
{"third row"},
129111
},
130-
)
131-
132-
testQuery(t, e,
112+
},
113+
{
133114
`SELECT COUNT(*) as cnt, fi FROM (
134115
SELECT tbl.s AS fi
135116
FROM mytable tbl
136117
) t
137118
GROUP BY fi`,
138-
[][]interface{}{
119+
[]sql.Row{
139120
{int32(1), "first row"},
140121
{int32(1), "second row"},
141122
{int32(1), "third row"},
142123
},
143-
)
144-
145-
testQuery(t, e,
124+
},
125+
{
146126
"SELECT CAST(-3 AS UNSIGNED) FROM mytable",
147-
[][]interface{}{
127+
[]sql.Row{
148128
{uint64(18446744073709551613)},
149129
{uint64(18446744073709551613)},
150130
{uint64(18446744073709551613)},
151131
},
152-
)
153-
154-
testQuery(t, e,
132+
},
133+
{
155134
"SELECT CONVERT(-3, UNSIGNED) FROM mytable",
156-
[][]interface{}{
135+
[]sql.Row{
157136
{uint64(18446744073709551613)},
158137
{uint64(18446744073709551613)},
159138
{uint64(18446744073709551613)},
160139
},
161-
)
162-
163-
testQuery(t, e,
140+
},
141+
{
164142
"SELECT '3' > 2 FROM tabletest",
165-
[][]interface{}{
143+
[]sql.Row{
166144
{true},
167145
{true},
168146
{true},
169147
},
170-
)
171-
172-
testQuery(t, e,
148+
},
149+
{
173150
"SELECT text > 2 FROM tabletest",
174-
[][]interface{}{
151+
[]sql.Row{
175152
{false},
176153
{false},
177154
{false},
178155
},
179-
)
180-
181-
testQuery(t, e,
156+
},
157+
{
182158
"SELECT * FROM tabletest WHERE text > 0",
183159
nil,
184-
)
185-
186-
testQuery(t, e,
160+
},
161+
{
187162
"SELECT * FROM tabletest WHERE text = 0",
188-
[][]interface{}{
163+
[]sql.Row{
189164
{"a", int32(1)},
190165
{"b", int32(2)},
191166
{"c", int32(3)},
192167
},
193-
)
194-
195-
testQuery(t, e,
168+
},
169+
{
196170
"SELECT * FROM tabletest WHERE text = 'a'",
197-
[][]interface{}{
171+
[]sql.Row{
198172
{"a", int32(1)},
199173
},
200-
)
201-
202-
testQuery(t, e,
174+
},
175+
{
203176
"SELECT s FROM mytable WHERE i IN (1, 2, 5)",
204-
[][]interface{}{
177+
[]sql.Row{
205178
{"first row"},
206179
{"second row"},
207180
},
208-
)
209-
210-
testQuery(t, e,
181+
},
182+
{
211183
"SELECT s FROM mytable WHERE i NOT IN (1, 2, 5)",
212-
[][]interface{}{
184+
[]sql.Row{
213185
{"third row"},
214186
},
215-
)
216-
217-
testQuery(t, e,
187+
},
188+
{
218189
"SELECT 1 + 2",
219-
[][]interface{}{
190+
[]sql.Row{
220191
{int64(3)},
221192
},
222-
)
193+
},
194+
{
195+
`SELECT i AS foo FROM mytable WHERE foo NOT IN (1, 2, 5)`,
196+
[]sql.Row{{int64(3)}},
197+
},
198+
}
199+
200+
func TestQueries(t *testing.T) {
201+
e := newEngine(t)
202+
203+
for _, tt := range queries {
204+
testQuery(t, e, tt.query, tt.expected)
205+
}
223206
}
224207

225208
func TestOrderByColumns(t *testing.T) {
@@ -246,12 +229,12 @@ func TestInsertInto(t *testing.T) {
246229
e := newEngine(t)
247230
testQuery(t, e,
248231
"INSERT INTO mytable (s, i) VALUES ('x', 999);",
249-
[][]interface{}{{int64(1)}},
232+
[]sql.Row{{int64(1)}},
250233
)
251234

252235
testQuery(t, e,
253236
"SELECT i FROM mytable WHERE s = 'x';",
254-
[][]interface{}{{int64(999)}},
237+
[]sql.Row{{int64(999)}},
255238
)
256239
}
257240

@@ -318,7 +301,7 @@ func TestDDL(t *testing.T) {
318301
testQuery(t, e,
319302
"CREATE TABLE t1(a INTEGER, b TEXT, c DATE,"+
320303
"d TIMESTAMP, e VARCHAR(20), f BLOB NOT NULL)",
321-
[][]interface{}(nil),
304+
[]sql.Row(nil),
322305
)
323306

324307
db, err := e.Catalog.Database("mydb")
@@ -339,15 +322,15 @@ func TestDDL(t *testing.T) {
339322
require.Equal(s, testTable.Schema())
340323
}
341324

342-
func testQuery(t *testing.T, e *sqle.Engine, q string, r [][]interface{}) {
325+
func testQuery(t *testing.T, e *sqle.Engine, q string, r []sql.Row) {
343326
t.Run(q, func(t *testing.T) {
344327
require := require.New(t)
345328
session := sql.NewEmptyContext()
346329

347330
_, rows, err := e.Query(session, q)
348331
require.NoError(err)
349332

350-
var rs [][]interface{}
333+
var rs []sql.Row
351334
for {
352335
row, err := rows.Next()
353336
if err == io.EOF {
@@ -463,8 +446,6 @@ func TestTracing(t *testing.T) {
463446
"plan.Sort",
464447
}
465448

466-
require.Len(spans, 77)
467-
468449
var spanOperations []string
469450
for _, s := range spans {
470451
name := s.(*jaeger.Span).OperationName()

0 commit comments

Comments
 (0)