Skip to content

Commit 18af04d

Browse files
Make fast 'exists' checks generic across database engines
1 parent f3821e4 commit 18af04d

File tree

6 files changed

+123
-16
lines changed

6 files changed

+123
-16
lines changed

cardinal_pythonlib/sqlalchemy/core_query.py

Lines changed: 69 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,18 @@
3333
from sqlalchemy.exc import MultipleResultsFound
3434
from sqlalchemy.orm.session import Session
3535
from sqlalchemy.sql.expression import (
36+
case,
3637
column,
3738
exists,
3839
func,
39-
literal,
4040
select,
4141
table,
4242
text,
4343
)
4444
from sqlalchemy.sql.schema import Table
45-
from sqlalchemy.sql.selectable import Select
45+
from sqlalchemy.sql.selectable import Select, TableClause
4646

4747
from cardinal_pythonlib.logs import get_brace_style_log_with_null_handler
48-
from cardinal_pythonlib.sqlalchemy.dialect import SqlaDialectName
4948

5049
log = get_brace_style_log_with_null_handler(__name__)
5150

@@ -233,15 +232,17 @@ def count_star_and_max(
233232
# http://docs.sqlalchemy.org/en/latest/orm/query.html
234233

235234

236-
def exists_in_table(session: Session, table_: Table, *criteria: Any) -> bool:
235+
def exists_in_table(
236+
session: Session, table_: Union[Table, TableClause], *criteria: Any
237+
) -> bool:
237238
"""
238239
Implements an efficient way of detecting if a record or records exist;
239240
should be faster than ``COUNT(*)`` in some circumstances.
240241
241242
Args:
242243
session: SQLAlchemy :class:`Session`, :class:`Engine`, or
243244
:class:`Connection` object
244-
table_: SQLAlchemy :class:`Table` object
245+
table_: SQLAlchemy :class:`Table` object or table clause
245246
criteria: optional SQLAlchemy "where" criteria
246247
247248
Returns:
@@ -262,15 +263,70 @@ def exists_in_table(session: Session, table_: Table, *criteria: Any) -> bool:
262263
exists_clause = exists_clause.where(criterion)
263264
# ... EXISTS (SELECT * FROM tablename WHERE ...)
264265

265-
if session.get_bind().dialect.name == SqlaDialectName.MSSQL:
266-
query = select(literal(True)).where(exists_clause)
267-
# ... SELECT 1 WHERE EXISTS (SELECT * FROM tablename WHERE ...)
268-
else:
269-
query = select(exists_clause)
270-
# ... SELECT EXISTS (SELECT * FROM tablename WHERE ...)
271-
266+
# Methods as follows.
267+
# SQL validation: http://developer.mimer.com/validator/
268+
# Standard syntax: https://en.wikipedia.org/wiki/SQL_syntax
269+
# We can make it conditional on dialect via
270+
# session.get_bind().dialect.name
271+
# but it would be better not to need to.
272+
#
273+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
274+
# SELECT 1 FROM mytable WHERE EXISTS (SELECT * FROM mytable WHERE ...)
275+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
276+
# - Produces multiple results (a 1 for each row).
277+
#
278+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
279+
# SELECT 1 WHERE EXISTS (SELECT * FROM tablename WHERE ...)
280+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
281+
# - Produces either 1 or NULL (no rows).
282+
# - Implementation:
283+
#
284+
# query = select(literal(True)).where(exists_clause)
285+
# result = session.execute(query).scalar()
286+
# return bool(result) # None/0 become False; 1 becomes True
287+
#
288+
# - However, may be non-standard: no FROM clause.
289+
# - Works on SQL Server (empirically).
290+
#
291+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
292+
# SELECT EXISTS (SELECT * FROM tablename WHERE ...)
293+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
294+
# - Produces 0 or 1.
295+
# - Implementation:
296+
#
297+
# query = select(exists_clause)
298+
# result = session.execute(query).scalar()
299+
# return bool(result)
300+
#
301+
# - But it may not be standard.
302+
#
303+
# - Supported by MySQL:
304+
# - https://dev.mysql.com/doc/refman/8.4/en/exists-and-not-exists-subqueries.html # noqa: E501
305+
# - and an empirical test
306+
#
307+
# Suported by SQLite:
308+
# - https://www.sqlite.org/lang_expr.html#the_exists_operator
309+
# - and an empirical test
310+
#
311+
# Possibly not SQL Server.
312+
#
313+
# Possibly not Databricks.
314+
# - https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select.html # noqa: E501
315+
# - https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-where.html # noqa: E501
316+
#
317+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
318+
# SELECT CASE WHEN EXISTS(SELECT * FROM tablename WHERE...) THEN 0 ELSE 1 END # noqa: E501
319+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
320+
# - ANSI standard.
321+
# - https://stackoverflow.com/questions/17284688/how-to-efficiently-check-if-a-table-is-empty # noqa: E501
322+
# - Returns 0 or 1.
323+
# - May be possible to use "SELECT 1 FROM tablename" also, but unclear
324+
# what's faster, and likely EXISTS() should optimise.
325+
# - Implementation as below.
326+
327+
query = select(case((exists_clause, 1), else_=0))
272328
result = session.execute(query).scalar()
273-
return bool(result)
329+
return bool(result) # None/0 become False; 1 becomes True
274330

275331

276332
def exists_plain(session: Session, tablename: str, *criteria: Any) -> bool:

cardinal_pythonlib/sqlalchemy/dialect.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ class SqlaDialectName(object):
4444
Dialect names used by SQLAlchemy.
4545
"""
4646

47+
# SQLAlchemy itself:
48+
4749
FIREBIRD = "firebird"
4850
MYSQL = "mysql"
4951
MSSQL = "mssql"
@@ -53,6 +55,12 @@ class SqlaDialectName(object):
5355
SQLSERVER = MSSQL # synonym
5456
SYBASE = "sybase"
5557

58+
# Additional third-party dialects:
59+
60+
DATABRICKS = "databricks"
61+
# ... https://github.com/databricks/databricks-sqlalchemy
62+
# ... https://docs.databricks.com/en/sql/language-manual/index.html
63+
5664

5765
ALL_SQLA_DIALECTS = list(
5866
set(

cardinal_pythonlib/sqlalchemy/engine_func.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,16 @@ def is_sqlserver_2008_or_later(engine: "Engine") -> bool:
130130
return False
131131
version_tuple = get_sqlserver_product_version(engine)
132132
return version_tuple >= (SQLSERVER_MAJOR_VERSION_2008,)
133+
134+
135+
# =============================================================================
136+
# Helper functions for Databricks
137+
# =============================================================================
138+
139+
140+
def is_databricks(engine: "Engine") -> bool:
141+
"""
142+
Is the SQLAlchemy :class:`Engine` a Databricks database?
143+
"""
144+
dialect_name = get_dialect_name(engine)
145+
return dialect_name == SqlaDialectName.DATABRICKS

cardinal_pythonlib/sqlalchemy/tests/core_query_tests.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,22 @@
5454

5555

5656
class CoreQueryTests(TestCase):
57+
def __init__(self, *args, echo: bool = False, **kwargs) -> None:
58+
self.echo = echo
59+
super().__init__(*args, **kwargs)
60+
5761
def setUp(self) -> None:
58-
self.engine = create_engine(SQLITE_MEMORY_URL, future=True)
62+
self.engine = create_engine(
63+
SQLITE_MEMORY_URL, echo=self.echo, future=True
64+
)
5965
self.tablename = "t"
6066
self.a = "a"
6167
self.b = "b"
6268
self.a_val1 = 1
6369
self.a_val2 = 2
6470
self.b_val1 = 101
6571
self.b_val2 = 102
72+
self.emptytablename = "emptytable"
6673
with self.engine.begin() as con:
6774
con.execute(
6875
text(
@@ -84,12 +91,16 @@ def setUp(self) -> None:
8491
f"VALUES ({self.a_val2}, {self.b_val2})"
8592
)
8693
)
94+
con.execute(
95+
text(f"CREATE TABLE {self.emptytablename} (x INTEGER)")
96+
)
8797
self.session = sessionmaker(
8898
bind=self.engine, future=True
8999
)() # type: Session
90100
self.metadata = MetaData()
91101
self.metadata.reflect(bind=self.engine)
92102
self.table = self.metadata.tables[self.tablename]
103+
self.emptytable = self.metadata.tables[self.emptytablename]
93104

94105
# noinspection DuplicatedCode
95106
def test_get_rows_fieldnames_from_raw_sql(self) -> None:
@@ -117,20 +128,39 @@ def test_count_star_and_max(self) -> None:
117128
self.assertEqual(maximum, self.b_val2)
118129

119130
def test_exists_in_table(self) -> None:
131+
# exists:
120132
exists1 = exists_in_table(self.session, self.table)
121133
self.assertTrue(exists1)
122134
exists2 = exists_in_table(
123135
self.session, self.table, column(self.a) == 1
124136
)
125137
self.assertTrue(exists2)
138+
# does not exist:
139+
exists3 = exists_in_table(
140+
self.session, self.table, column(self.a) == 99
141+
)
142+
self.assertFalse(exists3)
143+
exists4 = exists_in_table(self.session, self.emptytable)
144+
self.assertFalse(exists4)
126145

127146
def test_exists_plain(self) -> None:
147+
# exists:
128148
exists1 = exists_plain(self.session, self.tablename)
129149
self.assertTrue(exists1)
130150
exists2 = exists_plain(
131151
self.session, self.tablename, column(self.a) == 1
132152
)
133153
self.assertTrue(exists2)
154+
# does not exist:
155+
exists3 = exists_plain(
156+
self.session, self.tablename, column(self.a) == 99
157+
)
158+
self.assertFalse(exists3)
159+
exists4 = exists_plain(
160+
self.session,
161+
self.emptytablename,
162+
)
163+
self.assertFalse(exists4)
134164

135165
def test_fetch_all_first_values(self) -> None:
136166
select_stmt = select(text("*")).select_from(table(self.tablename))

cardinal_pythonlib/sqlalchemy/tests/orm_schema_tests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ class Pet(Base):
6565

6666

6767
class OrmQueryTests(TestCase):
68-
def __init__(self, *args, echo: bool = True, **kwargs) -> None:
68+
def __init__(self, *args, echo: bool = False, **kwargs) -> None:
6969
self.echo = echo
7070
super().__init__(*args, **kwargs)
7171

cardinal_pythonlib/sqlalchemy/tests/schema_tests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ def test_schema_functions(self) -> None:
190190

191191

192192
class IndexExistsTests(unittest.TestCase):
193-
def __init__(self, *args, echo: bool = True, **kwargs) -> None:
193+
def __init__(self, *args, echo: bool = False, **kwargs) -> None:
194194
self.echo = echo
195195
super().__init__(*args, **kwargs)
196196

0 commit comments

Comments
 (0)