-
Notifications
You must be signed in to change notification settings - Fork 45
Expand file tree
/
Copy pathsql_select.py
More file actions
133 lines (95 loc) · 2.82 KB
/
sql_select.py
File metadata and controls
133 lines (95 loc) · 2.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# A very limited parser for SQL SELECT statements,
# for demo purposes. Supports:
# 1. A simple list of columns (or number/string literals)
# 2. A simple table name
# 3. An optional where condition,
# which has the form of 'A op B' where A and B are columns, strings or number,
# and op is a comparison operator
#
# We demonstrate the use of `map` to create AST nodes with a single arg,
# and `seq` for AST nodes with more than one arg.
import enum
from dataclasses import dataclass
from typing import List, Optional, Union
from parsy import from_enum, regex, seq, string
# -- AST nodes:
class Operator(enum.Enum):
EQ = "="
LT = "<"
GT = ">"
LTE = "<="
GTE = ">="
@dataclass
class Number:
value: int
@dataclass
class String:
value: str
@dataclass
class Field:
name: str
@dataclass
class Table:
name: str
ColumnExpression = Union[Field, String, Number]
@dataclass
class Comparison:
left: ColumnExpression
operator: Operator
right: ColumnExpression
@dataclass
class Select:
columns: List[ColumnExpression]
table: Table
where: Optional[Comparison]
# -- Parsers:
number_literal = regex(r"-?[0-9]+").map(int).map(Number)
# We don't support ' in strings or escaping for simplicity
string_literal = regex(r"'[^']*'").map(lambda s: String(s[1:-1]))
identifier = regex("[a-zA-Z][a-zA-Z0-9_]*")
field = identifier.map(Field)
table = identifier.map(Table)
space = regex(r"\s+") # non-optional whitespace
padding = regex(r"\s*") # optional whitespace
column_expr = field | string_literal | number_literal
operator = from_enum(Operator)
comparison = seq(
left=column_expr << padding,
operator=operator,
right=padding >> column_expr,
).combine_dict(Comparison)
SELECT = string("SELECT")
FROM = string("FROM")
WHERE = string("WHERE")
# Here we demonstrate use of leading underscore to discard parts we don't want,
# which is more readable and convenient than `<<` and `>>` sometimes.
select = seq(
_select=SELECT + space,
columns=column_expr.sep_by(padding + string(",") + padding, min=1),
_from=space + FROM + space,
table=table,
where=(space >> WHERE >> space >> comparison).optional(),
_end=padding + string(";"),
).combine_dict(Select)
# Run these tests with pytest:
def test_select():
assert select.parse("SELECT thing, stuff, 123, 'hello' FROM my_table WHERE id = 1;") == Select(
columns=[
Field("thing"),
Field("stuff"),
Number(123),
String("hello"),
],
table=Table("my_table"),
where=Comparison(
left=Field("id"),
operator=Operator.EQ,
right=Number(1),
),
)
def test_optional_where():
assert select.parse("SELECT 1 FROM x;") == Select(
columns=[Number(1)],
table=Table("x"),
where=None,
)