Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 0f522dd

Browse files
author
Sergey Vasilyev
committed
Squash abstract table into already existent ITable
1 parent d58307a commit 0f522dd

File tree

5 files changed

+55
-106
lines changed

5 files changed

+55
-106
lines changed

data_diff/abcs/database_types.py

Lines changed: 1 addition & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
import decimal
22
from abc import ABC, abstractmethod
3-
from typing import Sequence, Optional, Tuple, Union, Dict, List
3+
from typing import Tuple, Union
44
from datetime import datetime
55

66
from runtype import dataclass
7-
from typing_extensions import Self
87

9-
from data_diff.abcs.compiler import AbstractCompiler
108
from data_diff.utils import ArithAlphanumeric, ArithUUID, Unknown
119

1210

@@ -172,91 +170,3 @@ class UnknownColType(ColType):
172170
text: str
173171

174172
supported = False
175-
176-
177-
class AbstractTable(ABC):
178-
@abstractmethod
179-
def select(self, *exprs, distinct=False, **named_exprs) -> "AbstractTable":
180-
"""Choose new columns, based on the old ones. (aka Projection)
181-
182-
Parameters:
183-
exprs: List of expressions to constitute the columns of the new table.
184-
If not provided, returns all columns in source table (i.e. ``select *``)
185-
distinct: 'select' or 'select distinct'
186-
named_exprs: More expressions to constitute the columns of the new table, aliased to keyword name.
187-
188-
"""
189-
# XXX distinct=SKIP
190-
191-
@abstractmethod
192-
def where(self, *exprs) -> "AbstractTable":
193-
"""Filter the rows, based on the given predicates. (aka Selection)"""
194-
195-
@abstractmethod
196-
def order_by(self, *exprs) -> "AbstractTable":
197-
"""Order the rows lexicographically, according to the given expressions."""
198-
199-
@abstractmethod
200-
def limit(self, limit: int) -> "AbstractTable":
201-
"""Stop yielding rows after the given limit. i.e. take the first 'n=limit' rows"""
202-
203-
@abstractmethod
204-
def join(self, target) -> "AbstractTable":
205-
"""Join the current table with the target table, returning a new table containing both side-by-side.
206-
207-
When joining, it's recommended to use explicit tables names, instead of `this`, in order to avoid potential name collisions.
208-
209-
Example:
210-
::
211-
212-
person = table('person')
213-
city = table('city')
214-
215-
name_and_city = (
216-
person
217-
.join(city)
218-
.on(person['city_id'] == city['id'])
219-
.select(person['id'], city['name'])
220-
)
221-
"""
222-
223-
@abstractmethod
224-
def group_by(self, *keys):
225-
"""Behaves like in SQL, except for a small change in syntax:
226-
227-
A call to `.agg()` must follow every call to `.group_by()`.
228-
229-
Example:
230-
::
231-
232-
# SELECT a, sum(b) FROM tmp GROUP BY 1
233-
table('tmp').group_by(this.a).agg(this.b.sum())
234-
235-
# SELECT a, sum(b) FROM a GROUP BY 1 HAVING (b > 10)
236-
(table('tmp')
237-
.group_by(this.a)
238-
.agg(this.b.sum())
239-
.having(this.b > 10)
240-
)
241-
242-
"""
243-
244-
@abstractmethod
245-
def count(self) -> int:
246-
"""SELECT count() FROM self"""
247-
248-
@abstractmethod
249-
def union(self, other: "ITable"):
250-
"""SELECT * FROM self UNION other"""
251-
252-
@abstractmethod
253-
def union_all(self, other: "ITable"):
254-
"""SELECT * FROM self UNION ALL other"""
255-
256-
@abstractmethod
257-
def minus(self, other: "ITable"):
258-
"""SELECT * FROM self EXCEPT other"""
259-
260-
@abstractmethod
261-
def intersect(self, other: "ITable"):
262-
"""SELECT * FROM self INTERSECT other"""

data_diff/abcs/mixins.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def random_sample_ratio_approx(self, tbl: str, ratio: float) -> str:
146146
i.e. the actual mount of rows returned may vary by standard deviation.
147147
"""
148148

149-
# def random_sample_ratio(self, table: AbstractTable, ratio: float):
149+
# def random_sample_ratio(self, table: ITable, ratio: float):
150150
# """Take a random sample of the size determined by the ratio (0..1), where 0 means no rows, and 1 means all rows
151151
# """
152152

data_diff/databases/base.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
CreateTable, Cte, \
2626
CurrentTimestamp, DropTable, Func, \
2727
GroupBy, \
28-
In, InsertToTable, IsDistinctFrom, \
28+
ITable, In, InsertToTable, IsDistinctFrom, \
2929
Join, \
3030
Param, \
3131
Random, \
@@ -34,7 +34,6 @@
3434
from data_diff.abcs.database_types import (
3535
Array,
3636
Struct,
37-
AbstractTable,
3837
ColType,
3938
Integer,
4039
Decimal,
@@ -207,11 +206,11 @@ def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable:
207206

208207

209208
class Mixin_RandomSample(AbstractMixin_RandomSample):
210-
def random_sample_n(self, tbl: AbstractTable, size: int) -> AbstractTable:
209+
def random_sample_n(self, tbl: ITable, size: int) -> ITable:
211210
# TODO use a more efficient algorithm, when the table count is known
212211
return tbl.order_by(Random()).limit(size)
213212

214-
def random_sample_ratio_approx(self, tbl: AbstractTable, ratio: float) -> AbstractTable:
213+
def random_sample_ratio_approx(self, tbl: ITable, ratio: float) -> ITable:
215214
return tbl.where(Random() < ratio)
216215

217216

data_diff/databases/duckdb.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
Text,
1515
FractionalType,
1616
Boolean,
17-
AbstractTable,
1817
)
1918
from data_diff.abcs.mixins import (
2019
AbstractMixin_MD5,
@@ -30,7 +29,7 @@
3029
TIMESTAMP_PRECISION_POS,
3130
)
3231
from data_diff.databases.base import MD5_HEXDIGITS, CHECKSUM_HEXDIGITS, Mixin_Schema
33-
from data_diff.queries.ast_classes import Func, Compilable
32+
from data_diff.queries.ast_classes import Func, Compilable, ITable
3433
from data_diff.queries.api import code
3534

3635

@@ -62,10 +61,10 @@ def normalize_boolean(self, value: str, _coltype: Boolean) -> str:
6261

6362

6463
class Mixin_RandomSample(AbstractMixin_RandomSample):
65-
def random_sample_n(self, tbl: AbstractTable, size: int) -> AbstractTable:
64+
def random_sample_n(self, tbl: ITable, size: int) -> ITable:
6665
return code("SELECT * FROM ({tbl}) USING SAMPLE {size};", tbl=tbl, size=size)
6766

68-
def random_sample_ratio_approx(self, tbl: AbstractTable, ratio: float) -> AbstractTable:
67+
def random_sample_ratio_approx(self, tbl: ITable, ratio: float) -> ITable:
6968
return code("SELECT * FROM ({tbl}) USING SAMPLE {percent}%;", tbl=tbl, percent=int(100 * ratio))
7069

7170

data_diff/queries/ast_classes.py

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
from data_diff.utils import ArithString
99
from data_diff.abcs.compiler import Compilable
10-
from data_diff.abcs.database_types import AbstractTable
1110
from data_diff.schema import Schema
1211

1312
from data_diff.queries.base import SKIP, args_as_tuple, SqeletonError
@@ -81,12 +80,20 @@ def _drop_skips_dict(exprs_dict):
8180
return {k: v for k, v in exprs_dict.items() if v is not SKIP}
8281

8382

84-
class ITable(AbstractTable):
83+
class ITable:
8584
source_table: Any
8685
schema: Schema = None
8786

8887
def select(self, *exprs, distinct=SKIP, optimizer_hints=SKIP, **named_exprs) -> "ITable":
89-
"""Create a new table with the specified fields"""
88+
"""Choose new columns, based on the old ones. (aka Projection)
89+
90+
Parameters:
91+
exprs: List of expressions to constitute the columns of the new table.
92+
If not provided, returns all columns in source table (i.e. ``select *``)
93+
distinct: 'select' or 'select distinct'
94+
named_exprs: More expressions to constitute the columns of the new table, aliased to keyword name.
95+
96+
"""
9097
exprs = args_as_tuple(exprs)
9198
exprs = _drop_skips(exprs)
9299
named_exprs = _drop_skips_dict(named_exprs)
@@ -95,6 +102,7 @@ def select(self, *exprs, distinct=SKIP, optimizer_hints=SKIP, **named_exprs) ->
95102
return Select.make(self, columns=exprs, distinct=distinct, optimizer_hints=optimizer_hints)
96103

97104
def where(self, *exprs):
105+
"""Filter the rows, based on the given predicates. (aka Selection)"""
98106
exprs = args_as_tuple(exprs)
99107
exprs = _drop_skips(exprs)
100108
if not exprs:
@@ -104,6 +112,7 @@ def where(self, *exprs):
104112
return Select.make(self, where_exprs=exprs)
105113

106114
def order_by(self, *exprs):
115+
"""Order the rows lexicographically, according to the given expressions."""
107116
exprs = _drop_skips(exprs)
108117
if not exprs:
109118
return self
@@ -112,19 +121,50 @@ def order_by(self, *exprs):
112121
return Select.make(self, order_by_exprs=exprs)
113122

114123
def limit(self, limit: int):
124+
"""Stop yielding rows after the given limit. i.e. take the first 'n=limit' rows"""
115125
if limit is SKIP:
116126
return self
117127

118128
return Select.make(self, limit_expr=limit)
119129

120130
def join(self, target: "ITable"):
121-
"""Join this table with the target table."""
131+
"""Join the current table with the target table, returning a new table containing both side-by-side.
132+
133+
When joining, it's recommended to use explicit tables names, instead of `this`, in order to avoid potential name collisions.
134+
135+
Example:
136+
::
137+
138+
person = table('person')
139+
city = table('city')
140+
141+
name_and_city = (
142+
person
143+
.join(city)
144+
.on(person['city_id'] == city['id'])
145+
.select(person['id'], city['name'])
146+
)
147+
"""
122148
return Join([self, target])
123149

124150
def group_by(self, *keys) -> "GroupBy":
125-
"""Group according to the given keys.
151+
"""Behaves like in SQL, except for a small change in syntax:
152+
153+
A call to `.agg()` must follow every call to `.group_by()`.
154+
155+
Example:
156+
::
157+
158+
# SELECT a, sum(b) FROM tmp GROUP BY 1
159+
table('tmp').group_by(this.a).agg(this.b.sum())
160+
161+
# SELECT a, sum(b) FROM a GROUP BY 1 HAVING (b > 10)
162+
(table('tmp')
163+
.group_by(this.a)
164+
.agg(this.b.sum())
165+
.having(this.b > 10)
166+
)
126167
127-
Must be followed by a call to :ref:``GroupBy.agg()``
128168
"""
129169
keys = _drop_skips(keys)
130170
resolve_names(self.source_table, keys)
@@ -145,6 +185,7 @@ def __getitem__(self, column):
145185
return self._get_column(column)
146186

147187
def count(self):
188+
"""SELECT count() FROM self"""
148189
return Select(self, [Count()])
149190

150191
def union(self, other: "ITable"):

0 commit comments

Comments
 (0)