Skip to content

Commit df63b73

Browse files
authored
feat(core): apply default nulls last policy for ordering (#1262)
1 parent f0f9523 commit df63b73

File tree

25 files changed

+898
-250
lines changed

25 files changed

+898
-250
lines changed

ibis-server/app/model/connector.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,15 @@ class QueryDryRunError(UnprocessableEntityError):
5353
pass
5454

5555

56+
class GenericUserError(UnprocessableEntityError):
57+
def __init__(self, message: str):
58+
super().__init__(message)
59+
self.message = message
60+
61+
def __str__(self) -> str:
62+
return self.message
63+
64+
5665
class Connector:
5766
@tracer.start_as_current_span("connector_init", kind=trace.SpanKind.INTERNAL)
5867
def __init__(self, data_source: DataSource, connection_info: ConnectionInfo):
@@ -188,6 +197,20 @@ class MSSqlConnector(SimpleConnector):
188197
def __init__(self, connection_info: ConnectionInfo):
189198
super().__init__(DataSource.mssql, connection_info)
190199

200+
@tracer.start_as_current_span("connector_query", kind=trace.SpanKind.CLIENT)
201+
def query(self, sql: str, limit: int | None = None) -> pa.Table:
202+
try:
203+
return super().query(sql, limit)
204+
except Exception as e:
205+
# To descirbe the query result, ibis will wrap the query with a subquery. MSSQL doesn't
206+
# allow order by without limit in a subquery, so we need to handle this error and provide a more user-friendly error message.
207+
# error code 1033: https://learn.microsoft.com/zh-tw/sql/relational-databases/errors-events/database-engine-events-and-errors-1000-to-1999?view=sql-server-ver15
208+
if "(1033)" in e.args[1]:
209+
raise GenericUserError(
210+
"The query with order-by requires a specific limit to be set in MSSQL."
211+
)
212+
raise
213+
191214
def dry_run(self, sql: str) -> None:
192215
try:
193216
super().dry_run(sql)

ibis-server/tests/routers/v2/connector/test_bigquery.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -492,3 +492,20 @@ async def test_metadata_db_version(client):
492492
)
493493
assert response.status_code == 200
494494
assert response.text == '"Follow BigQuery release version"'
495+
496+
497+
async def test_order_by_nulls_last(client, manifest_str):
498+
response = await client.post(
499+
url=f"{base_url}/query",
500+
json={
501+
"connectionInfo": connection_info,
502+
"manifestStr": manifest_str,
503+
"sql": "SELECT letter FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num, letter) ORDER BY num",
504+
},
505+
)
506+
assert response.status_code == 200
507+
result = response.json()
508+
assert len(result["data"]) == 3
509+
assert result["data"][0][0] == "one"
510+
assert result["data"][1][0] == "two"
511+
assert result["data"][2][0] == "three"

ibis-server/tests/routers/v2/connector/test_mssql.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,17 @@
6464
],
6565
"primaryKey": "orderkey",
6666
},
67+
{
68+
"name": "null_test",
69+
"tableReference": {
70+
"schema": "dbo",
71+
"table": "null_test",
72+
},
73+
"columns": [
74+
{"name": "id", "type": "integer"},
75+
{"name": "letter", "type": "varchar"},
76+
],
77+
},
6778
],
6879
}
6980

@@ -103,6 +114,13 @@ def mssql(request) -> SqlServerContainer:
103114
@level2type = N'COLUMN', @level2name = 'o_comment';
104115
""")
105116
)
117+
conn.execute(text('CREATE TABLE "null_test" ("id" INT, "letter" TEXT)'))
118+
conn.execute(
119+
text(
120+
"INSERT INTO \"null_test\" (\"id\", \"letter\") VALUES (1, 'one'), (2, 'two'), (NULL, 'three')"
121+
)
122+
)
123+
106124
request.addfinalizer(mssql.stop)
107125
return mssql
108126

@@ -438,6 +456,58 @@ async def test_password_with_special_characters(client):
438456
assert "Microsoft SQL Server 2019" in response.text
439457

440458

459+
async def test_order_by_nulls_last(client, manifest_str, mssql: SqlServerContainer):
460+
connection_info = _to_connection_info(mssql)
461+
response = await client.post(
462+
url=f"{base_url}/query",
463+
json={
464+
"connectionInfo": connection_info,
465+
"manifestStr": manifest_str,
466+
"sql": 'SELECT letter FROM "null_test" ORDER BY id',
467+
},
468+
params={"limit": 3},
469+
)
470+
assert response.status_code == 200
471+
result = response.json()
472+
assert len(result["data"]) == 3
473+
assert result["data"][0][0] == "one"
474+
assert result["data"][1][0] == "two"
475+
assert result["data"][2][0] == "three"
476+
477+
connection_info = _to_connection_info(mssql)
478+
response = await client.post(
479+
url=f"{base_url}/query",
480+
json={
481+
"connectionInfo": connection_info,
482+
"manifestStr": manifest_str,
483+
"sql": 'SELECT letter FROM "null_test" ORDER BY id LIMIT 3',
484+
},
485+
)
486+
assert response.status_code == 200
487+
result = response.json()
488+
assert len(result["data"]) == 3
489+
assert result["data"][0][0] == "one"
490+
assert result["data"][1][0] == "two"
491+
assert result["data"][2][0] == "three"
492+
493+
494+
async def test_order_by_require_limit(client, manifest_str, mssql: SqlServerContainer):
495+
connection_info = _to_connection_info(mssql)
496+
response = await client.post(
497+
url=f"{base_url}/query",
498+
json={
499+
"connectionInfo": connection_info,
500+
"manifestStr": manifest_str,
501+
"sql": 'SELECT letter FROM "null_test" ORDER BY id NULLS LAST',
502+
},
503+
)
504+
assert response.status_code == 422
505+
assert (
506+
"The query with order-by requires a specific limit to be set in MSSQL."
507+
in response.text
508+
)
509+
510+
441511
def _to_connection_info(mssql: SqlServerContainer):
442512
return {
443513
"host": mssql.get_container_host_ip(),

ibis-server/tests/routers/v2/connector/test_mysql.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,24 @@ async def test_connection_valid_ssl_mode(client, mysql_ssl_off: MySqlContainer):
475475
assert response.text == '"8.0.40"'
476476

477477

478+
async def test_order_by_nulls_last(client, manifest_str, mysql: MySqlContainer):
479+
connection_info = _to_connection_info(mysql)
480+
response = await client.post(
481+
url=f"{base_url}/query",
482+
json={
483+
"connectionInfo": connection_info,
484+
"manifestStr": manifest_str,
485+
"sql": "SELECT letter FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num, letter) ORDER BY num",
486+
},
487+
)
488+
assert response.status_code == 200
489+
result = response.json()
490+
assert len(result["data"]) == 3
491+
assert result["data"][0][0] == "one"
492+
assert result["data"][1][0] == "two"
493+
assert result["data"][2][0] == "three"
494+
495+
478496
def _to_connection_info(mysql: MySqlContainer):
479497
return {
480498
"host": mysql.get_container_host_ip(),

ibis-server/tests/routers/v2/connector/test_postgres.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,6 +1046,24 @@ async def test_connection_timeout(client, manifest_str, postgres: PostgresContai
10461046
)
10471047

10481048

1049+
async def test_order_by_nulls_last(client, manifest_str, postgres: PostgresContainer):
1050+
connection_info = _to_connection_info(postgres)
1051+
response = await client.post(
1052+
url=f"{base_url}/query",
1053+
json={
1054+
"connectionInfo": connection_info,
1055+
"manifestStr": manifest_str,
1056+
"sql": "SELECT letter FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num, letter) ORDER BY num",
1057+
},
1058+
)
1059+
assert response.status_code == 200
1060+
result = response.json()
1061+
assert len(result["data"]) == 3
1062+
assert result["data"][0][0] == "one"
1063+
assert result["data"][1][0] == "two"
1064+
assert result["data"][2][0] == "three"
1065+
1066+
10491067
def _to_connection_info(pg: PostgresContainer):
10501068
return {
10511069
"host": pg.get_container_host_ip(),

ibis-server/tests/routers/v3/connector/bigquery/test_query.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import orjson
55
import pytest
66

7+
from app.dependencies import X_WREN_FALLBACK_DISABLE
78
from tests.routers.v3.connector.bigquery.conftest import base_url
89

910
manifest = {
@@ -56,6 +57,17 @@
5657
],
5758
"primaryKey": "o_orderkey",
5859
},
60+
{
61+
"name": "null_test",
62+
"tableReference": {
63+
"schema": "engine_ci",
64+
"table": "null_test",
65+
},
66+
"columns": [
67+
{"name": "id", "type": "integer"},
68+
{"name": "letter", "type": "varchar"},
69+
],
70+
},
5971
],
6072
}
6173

@@ -336,3 +348,41 @@ async def test_decimal_precision(client, manifest_str, connection_info):
336348
result = response.json()
337349
assert len(result["data"]) == 1
338350
assert result["data"][0][0] == "0.333333333"
351+
352+
353+
async def test_order_by_nulls_last(client, manifest_str, connection_info):
354+
response = await client.post(
355+
url=f"{base_url}/query",
356+
json={
357+
"connectionInfo": connection_info,
358+
"manifestStr": manifest_str,
359+
"sql": "SELECT letter FROM null_test ORDER BY id",
360+
},
361+
headers={
362+
X_WREN_FALLBACK_DISABLE: "true", # Disable fallback to DuckDB
363+
},
364+
)
365+
assert response.status_code == 200
366+
result = response.json()
367+
assert len(result["data"]) == 3
368+
assert result["data"][0][0] == "one"
369+
assert result["data"][1][0] == "two"
370+
assert result["data"][2][0] == "three"
371+
372+
response = await client.post(
373+
url=f"{base_url}/query",
374+
json={
375+
"connectionInfo": connection_info,
376+
"manifestStr": manifest_str,
377+
"sql": "SELECT letter FROM null_test ORDER BY id desc",
378+
},
379+
headers={
380+
X_WREN_FALLBACK_DISABLE: "true", # Disable fallback to DuckDB
381+
},
382+
)
383+
assert response.status_code == 200
384+
result = response.json()
385+
assert len(result["data"]) == 3
386+
assert result["data"][0][0] == "two"
387+
assert result["data"][1][0] == "one"
388+
assert result["data"][2][0] == "three"

ibis-server/tests/routers/v3/connector/oracle/conftest.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,12 @@ def oracle(request) -> OracleDbContainer:
8888
"INSERT INTO test_number (id, id_p, id_p_s) VALUES (1, 1234567890, 12345678.12)"
8989
)
9090
)
91+
conn.execute(text('CREATE TABLE "null_test" ("id" INT, "letter" CLOB)'))
92+
conn.execute(
93+
text(
94+
"INSERT INTO \"null_test\" (\"id\", \"letter\") VALUES (1, 'one'), (2, 'two'), (NULL, 'three')"
95+
)
96+
)
9197
request.addfinalizer(oracle.stop)
9298
return oracle
9399

ibis-server/tests/routers/v3/connector/oracle/test_query.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,18 @@
6262
},
6363
],
6464
"primaryKey": "orderkey",
65-
}
65+
},
66+
{
67+
"name": "null_test",
68+
"tableReference": {
69+
"schema": "SYSTEM",
70+
"table": "null_test",
71+
},
72+
"columns": [
73+
{"name": "id", "type": "integer"},
74+
{"name": "letter", "type": "varchar"},
75+
],
76+
},
6677
],
6778
}
6879

@@ -209,3 +220,41 @@ async def test_query_number_scale(client, connection_info):
209220
"id_p": "int64",
210221
"id_p_s": "decimal128(38, 9)",
211222
}
223+
224+
225+
async def test_order_by_nulls_last(client, manifest_str, connection_info):
226+
response = await client.post(
227+
url=f"{base_url}/query",
228+
json={
229+
"connectionInfo": connection_info,
230+
"manifestStr": manifest_str,
231+
"sql": "SELECT letter FROM null_test ORDER BY id",
232+
},
233+
headers={
234+
X_WREN_FALLBACK_DISABLE: "true", # Disable fallback to DuckDB
235+
},
236+
)
237+
assert response.status_code == 200
238+
result = response.json()
239+
assert len(result["data"]) == 3
240+
assert result["data"][0][0] == "one"
241+
assert result["data"][1][0] == "two"
242+
assert result["data"][2][0] == "three"
243+
244+
response = await client.post(
245+
url=f"{base_url}/query",
246+
json={
247+
"connectionInfo": connection_info,
248+
"manifestStr": manifest_str,
249+
"sql": 'SELECT "letter" FROM "null_test" ORDER BY "id" desc',
250+
},
251+
headers={
252+
X_WREN_FALLBACK_DISABLE: "true", # Disable fallback to DuckDB
253+
},
254+
)
255+
assert response.status_code == 200
256+
result = response.json()
257+
assert len(result["data"]) == 3
258+
assert result["data"][0][0] == "two"
259+
assert result["data"][1][0] == "one"
260+
assert result["data"][2][0] == "three"

ibis-server/tests/routers/v3/connector/postgres/conftest.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@ def postgres(request) -> PostgresContainer:
2929
pd.read_parquet(file_path("resource/tpch/data/customer.parquet")).to_sql(
3030
"customer", engine, index=False
3131
)
32+
with engine.begin() as conn:
33+
conn.execute(sqlalchemy.text("CREATE TABLE null_test (id INT, letter TEXT)"))
34+
conn.execute(
35+
sqlalchemy.text(
36+
"INSERT INTO null_test (id, letter) VALUES (1, 'one'), (2, 'two'), (NULL, 'three')"
37+
)
38+
)
39+
3240
request.addfinalizer(pg.stop)
3341
return pg
3442

0 commit comments

Comments
 (0)