Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
joocer committed Oct 7, 2023
1 parent 646845d commit 3b1c367
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 14 deletions.
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,7 @@ test:

coverage:
python -m coverage run -m pytest
python -m coverage report --include=opteryx/** -m
python -m coverage report --include=opteryx/** -m

compile:
python setup.py build_ext --inplace
4 changes: 4 additions & 0 deletions opteryx/components/binder/binder.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,10 @@ def create_variable_node(node: Node, context: Dict[str, Any]) -> Node:
# Update node.source to the found relation name
node.source = found_source_relation.name

# if we have an alias for a column not known about in the schema, add it
if node.alias not in column.all_names:
column.aliases.append(node.alias)

# Update node.schema_column with the found column
node.schema_column = column
return node, context
Expand Down
11 changes: 1 addition & 10 deletions opteryx/components/binder/binder_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ def visit_project(self, node: Node, context: BindingContext) -> Tuple[Node, Bind
columns.append(column)
else:
# Handle qualified wildcards
for name, schema in [(name, schema) for name, schema in context.schemas.items()]:
for name, schema in list(context.schemas.items()):
if (
name == column.value[0]
or name.startswith("$shared")
Expand Down Expand Up @@ -522,15 +522,6 @@ def visit_project(self, node: Node, context: BindingContext) -> Tuple[Node, Bind
schema.columns = schema_columns
for column in node.columns:
if column.schema_column.identity in [i.identity for i in schema_columns]:
# If .alias is set, update .value and set .alias to None
if column.alias:
column.source_column = column.alias
current_name = column.schema_column.name
column.schema_column.name = column.alias
column.schema_column.aliases.append(column.qualified_name)
context.schemas[relation].pop_column(current_name)
context.schemas[relation].columns.append(column.schema_column)
column.alias = None
columns.append(column)

# We always have a $derived schema, even if it's empty
Expand Down
12 changes: 12 additions & 0 deletions opteryx/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
├── AmbiguousDatasetError
├── AmbiguousIdentifierError
├── ColumnNotFoundError
├── ColumnReferencedBeforeEvaluation
├── DatasetNotFoundError
├── FunctionNotFoundError
├── IncorrectTypeError
Expand Down Expand Up @@ -150,6 +151,17 @@ def __init__(
super().__init__(message)


class ColumnReferencedBeforeEvaluation(SqlError):
"""
Return an error message when the column reference order is incorrect
"""

def __init__(self, column: str):
self.column = column
message = f"Reference to '{column}' cannot be made here, it hasn't been evaluated yet due to the internal order of query evaluation."
super().__init__(message)


class DatasetNotFoundError(SqlError):
"""Exception raised when a dataset is not found."""

Expand Down
4 changes: 4 additions & 0 deletions opteryx/managers/expression/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from orso.types import OrsoTypes
from pyarrow import Table

from opteryx.exceptions import ColumnReferencedBeforeEvaluation
from opteryx.exceptions import UnsupportedSyntaxError
from opteryx.functions.binary_operators import binary_operations
from opteryx.functions.unary_operations import UNARY_OPERATIONS
Expand Down Expand Up @@ -198,6 +199,9 @@ def _inner_evaluate(root: Node, table: Table, context: ExecutionContext):
root.value = format_expression(root)
root.node_type = NodeType.EVALUATED
if node_type == NodeType.EVALUATED:
column = root.schema_column
if not root.schema_column.identity in table.column_names:
raise ColumnReferencedBeforeEvaluation(column=root.schema_column.name)
return table[root.schema_column.identity].to_numpy()
if node_type == NodeType.COMPARISON_OPERATOR:
left = _inner_evaluate(root.left, table, context)
Expand Down
40 changes: 37 additions & 3 deletions tests/sql_battery/test_shapes_and_errors_battery.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
AmbiguousIdentifierError,
AmbiguousDatasetError,
ColumnNotFoundError,
ColumnReferencedBeforeEvaluation,
DatasetNotFoundError,
EmptyDatasetError,
IncompatibleTypesError,
Expand Down Expand Up @@ -179,6 +180,7 @@
("SELECT * FROM `$satellites` WHERE name = 'Calypso'", 1, 8, None),
("SELECT * FROM `$satellites` WHERE `name` = 'Calypso'", 1, 8, None),
("SELECT * FROM $satellites WITH (NO_CACHE)", 177, 8, None),
# 101

# Do we handle comments
("/* comment */ SELECT * FROM $satellites WHERE name = 'Calypso'", 1, 8, None),
Expand Down Expand Up @@ -248,10 +250,41 @@
("SELECT DISTINCT group FROM $astronauts", 21, 1, None),
("SELECT DISTINCT name, birth_date, missions, birth_place, group FROM $astronauts", 357, 5, None),

# alias tests
("SELECT name as Name FROM $satellites", 177, 1, None),
("SELECT name as Name, id as Identifier FROM $satellites", 177, 2, None),
("SELECT name as NAME FROM $satellites WHERE name = 'Calypso'", 1, 1, None),
("SELECT name as NAME FROM $satellites GROUP BY name", 177, 1, None),
("SELECT id as id FROM $satellites", 177, 1, None),
("SELECT planetId as planetId FROM $satellites", 177, 1, None),
("SELECT id as ID, planetId as PLANETID FROM $satellites", 177, 2, None),
("SELECT id as iD, name as nAME FROM $satellites WHERE planetId = 5", 67, 2, None),
("SELECT id as ID, planetId as planetId FROM $satellites WHERE name = 'Io'", 1, 2, None),
("SELECT name as NAME, id as ID, planetId as PLANETID FROM $satellites", 177, 3, None),
("SELECT id as ID FROM $satellites GROUP BY id", 177, 1, None),
("SELECT planetId as planetId FROM $satellites GROUP BY planetId", 7, 1, None),
("SELECT name as nAme, id as Id FROM $satellites WHERE planetId = 3", 1, 2, None),
("SELECT id as ID, name as Name FROM $satellites GROUP BY name, id", 177, 2, None),
("SELECT UPPER(name) as NAME FROM $satellites", 177, 1, None),
("SELECT name as n FROM $satellites WHERE n = 'Titan'", None, 1, ColumnNotFoundError),
("SELECT id as Identifier FROM $satellites ORDER BY Identifier", 177, 1, None),
("SELECT name as n FROM $satellites GROUP BY name HAVING COUNT(n) > 1", None, 1, ColumnReferencedBeforeEvaluation), # TEMP
("SELECT name as Name, name as NAME FROM $satellites", 177, 2, AmbiguousIdentifierError),
("SELECT COUNT(id) as countID, MIN(id) as minID FROM $satellites", 1, 2, None),
("SELECT s.id as satelliteID, p.id as planetID FROM $satellites s JOIN $planets p ON s.planetId = p.id", 177, 2, None),
("SELECT x.id FROM (SELECT id as ID FROM $satellites WHERE id < 10) x", 9, 1, None),
("SELECT name as n as m FROM $satellites", None, 1, SqlError),
("SELECT id*2 as doubleID FROM $satellites", 177, 1, None),
("SELECT id as Identifier FROM $satellites ORDER BY Identifier", 177, 1, None),
("SELECT name as n FROM $satellites GROUP BY name HAVING COUNT(n) > 1", None, 1, ColumnReferencedBeforeEvaluation),
("SELECT name as n FROM $satellites WHERE n = 'Calypso'", None, 1, ColumnNotFoundError),
("SELECT id * 2 as DoubleID FROM $satellites", 177, 1, None),
("SELECT LEFT(name, 3) as newName FROM $satellites", 177, 1, None),
("SELECT name as n, id as i, planetId as p FROM $satellites WHERE planetId = 3 ORDER BY n, i", 1, 3, None),
("SELECT name as n1, name as n2 FROM $satellites", 177, 2, AmbiguousIdentifierError),
("SELECT COUNT(id) as Total FROM $satellites", 1, 1, None),
("SELECT x.id FROM (SELECT id FROM $satellites) as x", 177, 1, None),
("SELECT id as Identifier, name FROM $satellites", 177, 2, None),

# Test infix calculations
("SELECT * FROM $satellites WHERE id = 5", 1, 8, None),
Expand All @@ -264,6 +297,7 @@
("SELECT * FROM $satellites WHERE id = 15 % 10 AND name = 'Europa'", 1, 8, None),
("SELECT * FROM $satellites WHERE id = 15 DIV 4", 1, 8, None),
("SELECT * FROM $satellites WHERE id = -5 + 10", 1, 8, None),

("SELECT * FROM $satellites WHERE id = ABS(-5)", 1, 8, None),
("SELECT * FROM $satellites WHERE id = 5 - 3 + 1", 1, 8, None),
("SELECT * FROM $satellites WHERE id = (3 * 1) + 2", 1, 8, None),
Expand Down Expand Up @@ -343,7 +377,8 @@
("SELECT * FROM $satellites LIMIT 50 OFFSET 170", 7, 8, None),
("SELECT * FROM $satellites ORDER BY name", 177, 8, None),
("SELECT * FROM $satellites ORDER BY RANDOM()", 177, 8, None),

]
A = [
("SELECT MAX(planetId) FROM $satellites", 1, 1, None),
("SELECT MIN(planetId) FROM $satellites", 1, 1, None),
("SELECT SUM(planetId) FROM $satellites", 1, 1, None),
Expand Down Expand Up @@ -755,8 +790,7 @@
("SELECT s.* FROM $planets AS s INNER JOIN $planets AS p USING (id, name)", 9, 20, None),
("SELECT p.* FROM $planets AS s INNER JOIN $planets AS p USING (id, name)", 9, 20, None),
("SELECT id, name FROM $planets AS s INNER JOIN $planets AS p USING (id, name)", 9, 2, None),
]
A = [

("SELECT DATE_TRUNC('month', birth_date) FROM $astronauts", 357, 1, None),
("SELECT DISTINCT * FROM (SELECT DATE_TRUNC('year', birth_date) AS BIRTH_YEAR FROM $astronauts)", 54, 1, None),
("SELECT DISTINCT * FROM (SELECT DATE_TRUNC('month', birth_date) AS BIRTH_YEAR_MONTH FROM $astronauts)", 247, 1, None),
Expand Down

0 comments on commit 3b1c367

Please sign in to comment.