Squash abstract table into already existent ITable

Sergey Vasilyev · Sergey Vasilyev · commit 0f522ddd799f · 2023-09-25T17:43:21.000+02:00
diff --git a/data_diff/abcs/database_types.py b/data_diff/abcs/database_types.py
@@ -1,12 +1,10 @@
 import decimal
 from abc import ABC, abstractmethod
-from typing import Sequence, Optional, Tuple, Union, Dict, List
+from typing import Tuple, Union
 from datetime import datetime
 
 from runtype import dataclass
-from typing_extensions import Self
 
-from data_diff.abcs.compiler import AbstractCompiler
 from data_diff.utils import ArithAlphanumeric, ArithUUID, Unknown
 
 
@@ -172,91 +170,3 @@ class UnknownColType(ColType):
     text: str
 
     supported = False
-
-
-class AbstractTable(ABC):
-    @abstractmethod
-    def select(self, *exprs, distinct=False, **named_exprs) -> "AbstractTable":
-        """Choose new columns, based on the old ones. (aka Projection)
-
-        Parameters:
-            exprs: List of expressions to constitute the columns of the new table.
-                    If not provided, returns all columns in source table (i.e. ``select *``)
-            distinct: 'select' or 'select distinct'
-            named_exprs: More expressions to constitute the columns of the new table, aliased to keyword name.
-
-        """
-        # XXX distinct=SKIP
-
-    @abstractmethod
-    def where(self, *exprs) -> "AbstractTable":
-        """Filter the rows, based on the given predicates. (aka Selection)"""
-
-    @abstractmethod
-    def order_by(self, *exprs) -> "AbstractTable":
-        """Order the rows lexicographically, according to the given expressions."""
-
-    @abstractmethod
-    def limit(self, limit: int) -> "AbstractTable":
-        """Stop yielding rows after the given limit. i.e. take the first 'n=limit' rows"""
-
-    @abstractmethod
-    def join(self, target) -> "AbstractTable":
-        """Join the current table with the target table, returning a new table containing both side-by-side.
-
-        When joining, it's recommended to use explicit tables names, instead of `this`, in order to avoid potential name collisions.
-
-        Example:
-            ::
-
-                person = table('person')
-                city = table('city')
-
-                name_and_city = (
-                    person
-                    .join(city)
-                    .on(person['city_id'] == city['id'])
-                    .select(person['id'], city['name'])
-                )
-        """
-
-    @abstractmethod
-    def group_by(self, *keys):
-        """Behaves like in SQL, except for a small change in syntax:
-
-        A call to `.agg()` must follow every call to `.group_by()`.
-
-        Example:
-            ::
-
-                # SELECT a, sum(b) FROM tmp GROUP BY 1
-                table('tmp').group_by(this.a).agg(this.b.sum())
-
-                # SELECT a, sum(b) FROM a GROUP BY 1 HAVING (b > 10)
-                (table('tmp')
-                    .group_by(this.a)
-                    .agg(this.b.sum())
-                    .having(this.b > 10)
-                )
-
-        """
-
-    @abstractmethod
-    def count(self) -> int:
-        """SELECT count() FROM self"""
-
-    @abstractmethod
-    def union(self, other: "ITable"):
-        """SELECT * FROM self UNION other"""
-
-    @abstractmethod
-    def union_all(self, other: "ITable"):
-        """SELECT * FROM self UNION ALL other"""
-
-    @abstractmethod
-    def minus(self, other: "ITable"):
-        """SELECT * FROM self EXCEPT other"""
-
-    @abstractmethod
-    def intersect(self, other: "ITable"):
-        """SELECT * FROM self INTERSECT other"""
diff --git a/data_diff/abcs/mixins.py b/data_diff/abcs/mixins.py
@@ -146,7 +146,7 @@ def random_sample_ratio_approx(self, tbl: str, ratio: float) -> str:
         i.e. the actual mount of rows returned may vary by standard deviation.
         """
 
-    # def random_sample_ratio(self, table: AbstractTable, ratio: float):
+    # def random_sample_ratio(self, table: ITable, ratio: float):
     #     """Take a random sample of the size determined by the ratio (0..1), where 0 means no rows, and 1 means all rows
     #     """
 
diff --git a/data_diff/databases/base.py b/data_diff/databases/base.py
@@ -25,7 +25,7 @@
     CreateTable, Cte, \
     CurrentTimestamp, DropTable, Func, \
     GroupBy, \
-    In, InsertToTable, IsDistinctFrom, \
+    ITable, In, InsertToTable, IsDistinctFrom, \
     Join, \
     Param, \
     Random, \
@@ -34,7 +34,6 @@
 from data_diff.abcs.database_types import (
     Array,
     Struct,
-    AbstractTable,
     ColType,
     Integer,
     Decimal,
@@ -207,11 +206,11 @@ def list_tables(self, table_schema: str, like: Compilable = None) -> Compilable:
 
 
 class Mixin_RandomSample(AbstractMixin_RandomSample):
-    def random_sample_n(self, tbl: AbstractTable, size: int) -> AbstractTable:
+    def random_sample_n(self, tbl: ITable, size: int) -> ITable:
         # TODO use a more efficient algorithm, when the table count is known
         return tbl.order_by(Random()).limit(size)
 
-    def random_sample_ratio_approx(self, tbl: AbstractTable, ratio: float) -> AbstractTable:
+    def random_sample_ratio_approx(self, tbl: ITable, ratio: float) -> ITable:
         return tbl.where(Random() < ratio)
 
 
diff --git a/data_diff/databases/duckdb.py b/data_diff/databases/duckdb.py
@@ -14,7 +14,6 @@
     Text,
     FractionalType,
     Boolean,
-    AbstractTable,
 )
 from data_diff.abcs.mixins import (
     AbstractMixin_MD5,
@@ -30,7 +29,7 @@
     TIMESTAMP_PRECISION_POS,
 )
 from data_diff.databases.base import MD5_HEXDIGITS, CHECKSUM_HEXDIGITS, Mixin_Schema
-from data_diff.queries.ast_classes import Func, Compilable
+from data_diff.queries.ast_classes import Func, Compilable, ITable
 from data_diff.queries.api import code
 
 
@@ -62,10 +61,10 @@ def normalize_boolean(self, value: str, _coltype: Boolean) -> str:
 
 
 class Mixin_RandomSample(AbstractMixin_RandomSample):
-    def random_sample_n(self, tbl: AbstractTable, size: int) -> AbstractTable:
+    def random_sample_n(self, tbl: ITable, size: int) -> ITable:
         return code("SELECT * FROM ({tbl}) USING SAMPLE {size};", tbl=tbl, size=size)
 
-    def random_sample_ratio_approx(self, tbl: AbstractTable, ratio: float) -> AbstractTable:
+    def random_sample_ratio_approx(self, tbl: ITable, ratio: float) -> ITable:
         return code("SELECT * FROM ({tbl}) USING SAMPLE {percent}%;", tbl=tbl, percent=int(100 * ratio))
 
 
diff --git a/data_diff/queries/ast_classes.py b/data_diff/queries/ast_classes.py
@@ -7,7 +7,6 @@
 
 from data_diff.utils import ArithString
 from data_diff.abcs.compiler import Compilable
-from data_diff.abcs.database_types import AbstractTable
 from data_diff.schema import Schema
 
 from data_diff.queries.base import SKIP, args_as_tuple, SqeletonError
@@ -81,12 +80,20 @@ def _drop_skips_dict(exprs_dict):
     return {k: v for k, v in exprs_dict.items() if v is not SKIP}
 
 
-class ITable(AbstractTable):
+class ITable:
     source_table: Any
     schema: Schema = None
 
     def select(self, *exprs, distinct=SKIP, optimizer_hints=SKIP, **named_exprs) -> "ITable":
-        """Create a new table with the specified fields"""
+        """Choose new columns, based on the old ones. (aka Projection)
+
+        Parameters:
+            exprs: List of expressions to constitute the columns of the new table.
+                    If not provided, returns all columns in source table (i.e. ``select *``)
+            distinct: 'select' or 'select distinct'
+            named_exprs: More expressions to constitute the columns of the new table, aliased to keyword name.
+
+        """
         exprs = args_as_tuple(exprs)
         exprs = _drop_skips(exprs)
         named_exprs = _drop_skips_dict(named_exprs)
@@ -95,6 +102,7 @@ def select(self, *exprs, distinct=SKIP, optimizer_hints=SKIP, **named_exprs) ->
         return Select.make(self, columns=exprs, distinct=distinct, optimizer_hints=optimizer_hints)
 
     def where(self, *exprs):
+        """Filter the rows, based on the given predicates. (aka Selection)"""
         exprs = args_as_tuple(exprs)
         exprs = _drop_skips(exprs)
         if not exprs:
@@ -104,6 +112,7 @@ def where(self, *exprs):
         return Select.make(self, where_exprs=exprs)
 
     def order_by(self, *exprs):
+        """Order the rows lexicographically, according to the given expressions."""
         exprs = _drop_skips(exprs)
         if not exprs:
             return self
@@ -112,19 +121,50 @@ def order_by(self, *exprs):
         return Select.make(self, order_by_exprs=exprs)
 
     def limit(self, limit: int):
+        """Stop yielding rows after the given limit. i.e. take the first 'n=limit' rows"""
         if limit is SKIP:
             return self
 
         return Select.make(self, limit_expr=limit)
 
     def join(self, target: "ITable"):
-        """Join this table with the target table."""
+        """Join the current table with the target table, returning a new table containing both side-by-side.
+
+        When joining, it's recommended to use explicit tables names, instead of `this`, in order to avoid potential name collisions.
+
+        Example:
+            ::
+
+                person = table('person')
+                city = table('city')
+
+                name_and_city = (
+                    person
+                    .join(city)
+                    .on(person['city_id'] == city['id'])
+                    .select(person['id'], city['name'])
+                )
+        """
         return Join([self, target])
 
     def group_by(self, *keys) -> "GroupBy":
-        """Group according to the given keys.
+        """Behaves like in SQL, except for a small change in syntax:
+
+        A call to `.agg()` must follow every call to `.group_by()`.
+
+        Example:
+            ::
+
+                # SELECT a, sum(b) FROM tmp GROUP BY 1
+                table('tmp').group_by(this.a).agg(this.b.sum())
+
+                # SELECT a, sum(b) FROM a GROUP BY 1 HAVING (b > 10)
+                (table('tmp')
+                    .group_by(this.a)
+                    .agg(this.b.sum())
+                    .having(this.b > 10)
+                )
 
-        Must be followed by a call to :ref:``GroupBy.agg()``
         """
         keys = _drop_skips(keys)
         resolve_names(self.source_table, keys)
@@ -145,6 +185,7 @@ def __getitem__(self, column):
         return self._get_column(column)
 
     def count(self):
+        """SELECT count() FROM self"""
         return Select(self, [Count()])
 
     def union(self, other: "ITable"):