Skip to content

Commit

Permalink
feat: indexer option to skip constraints creation (#1691)
Browse files Browse the repository at this point in the history
- Open Data Editor needs a way to create [flexible database
tables](okfn/opendataeditor#552) that can have
some constraint violations for further fixing. So here is a new
`Indexer.without_constraints` flag to achieve this goal.

---

@pierrecamilleri 
can you please take a look?
  • Loading branch information
roll authored Sep 28, 2024
1 parent 80f03c6 commit 7cadf17
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 22 deletions.
12 changes: 12 additions & 0 deletions frictionless/formats/sql/__spec__/test_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,15 @@ def test_sql_mapper_write_field():
column2 = mapper.write_field(field2, table_name="table")
assert isinstance(column1.type, sa.Integer)
assert isinstance(column2.type, sa.Text)


def test_sql_mapper_write_field_ignore_constraints():
mapper = formats.sql.SqlMapper("sqlite")
schema = Schema.describe("data/table.csv")
field1, field2 = schema.fields
field1.constraints = {"required": True}
field2.constraints = {"required": True}
column1 = mapper.write_field(field1, table_name="table")
column2 = mapper.write_field(field2, table_name="table", ignore_constraints=True)
assert column1.nullable is False
assert column2.nullable is True
6 changes: 5 additions & 1 deletion frictionless/formats/sql/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ def write_schema(
table_name: str,
force: bool = False,
with_metadata: bool = False,
ignore_constraints: bool = False,
) -> None:
with self.engine.begin() as conn:
if force:
Expand All @@ -130,7 +131,10 @@ def write_schema(
self.metadata.drop_all(conn, tables=[existing_table])
self.metadata.remove(existing_table)
table = self.mapper.write_schema(
schema, table_name=table_name, with_metadata=with_metadata
schema,
table_name=table_name,
with_metadata=with_metadata,
ignore_constraints=ignore_constraints,
)
table = table.to_metadata(self.metadata)
self.metadata.create_all(conn, tables=[table])
Expand Down
62 changes: 42 additions & 20 deletions frictionless/formats/sql/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,14 +153,19 @@ def read_type(self, column_type: str) -> str:
# Write

def write_schema( # type: ignore
self, schema: Schema, *, table_name: str, with_metadata: bool = False
self,
schema: Schema,
*,
table_name: str,
with_metadata: bool = False,
ignore_constraints: bool = False,
) -> Table:
"""Convert frictionless schema to sqlalchemy table"""
sa = platform.sqlalchemy
columns: List[Column] = [] # type: ignore
constraints: List[Constraint] = []

# Fields
# Metadata
if with_metadata:
columns.append( # type: ignore
sa.Column(
Expand All @@ -171,16 +176,19 @@ def write_schema( # type: ignore
)
)
columns.append(sa.Column(settings.ROW_VALID_IDENTIFIER, sa.Boolean)) # type: ignore

# Fields
for field in schema.fields:
column = self.write_field(field, table_name=table_name) # type: ignore
column = self.write_field( # type: ignore
field, table_name=table_name, ignore_constraints=ignore_constraints
)
columns.append(column) # type: ignore

# Primary key
if schema.primary_key:
Class = sa.UniqueConstraint if with_metadata else sa.PrimaryKeyConstraint
if not with_metadata:
constraint = Class(*schema.primary_key)
constraints.append(constraint)
constraint = Class(*schema.primary_key)
constraints.append(constraint)

# Foreign keys
for fk in schema.foreign_keys:
Expand All @@ -192,11 +200,18 @@ def write_schema( # type: ignore
constraint = sa.ForeignKeyConstraint(fields, foreign_fields)
constraints.append(constraint)

# Table
table = sa.Table(table_name, sa.MetaData(), *(columns + constraints))
# Prepare table
table_args = [table_name, sa.MetaData(), *columns] # type: ignore
if not ignore_constraints:
table_args += constraints # type: ignore

# Create table
table = sa.Table(*table_args)
return table

def write_field(self, field: Field, *, table_name: str) -> Column: # type: ignore
def write_field( # type: ignore
self, field: Field, *, table_name: str, ignore_constraints: bool = False
) -> Column: # type: ignore
"""Convert frictionless Field to sqlalchemy Column"""
sa = platform.sqlalchemy
quote = self.dialect.identifier_preparer.quote # type: ignore
Expand All @@ -206,8 +221,17 @@ def write_field(self, field: Field, *, table_name: str) -> Column: # type: igno
# General properties
quoted_name = quote(field.name)
column_type = self.write_type(field.type) # type: ignore

# Required constraint
nullable = not field.required

# Unique constraint
unique = field.constraints.get("unique", False)
if self.dialect.name == "mysql":
# MySQL requires keys to have an explicit maximum length
# https://stackoverflow.com/questions/1827063/mysql-error-key-specification-without-a-key-length
unique = unique and column_type is not sa.Text

# Length constraints
if field.type == "string":
min_length = field.constraints.get("minLength", None)
Expand All @@ -227,13 +251,6 @@ def write_field(self, field: Field, *, table_name: str) -> Column: # type: igno
if not isinstance(column_type, sa.CHAR) or self.dialect.name == "sqlite":
checks.append(Check("LENGTH(%s) >= %s" % (quoted_name, min_length)))

# Unique constraint
unique = field.constraints.get("unique", False)
if self.dialect.name == "mysql":
# MySQL requires keys to have an explicit maximum length
# https://stackoverflow.com/questions/1827063/mysql-error-key-specification-without-a-key-length
unique = unique and column_type is not sa.Text

# Others constraints
for const, value in field.constraints.items():
if const == "minimum":
Expand All @@ -252,15 +269,20 @@ def write_field(self, field: Field, *, table_name: str) -> Column: # type: igno
enum_name = "%s_%s_enum" % (table_name, field.name)
column_type = sa.Enum(*value, name=enum_name)

# Create column
column_args = [field.name, column_type] + checks # type: ignore
# Prepare column
# TODO: shall it use "autoincrement=False"
# https://github.com/Mause/duckdb_engine/issues/595#issuecomment-1495408566
column_kwargs = {"nullable": nullable, "unique": unique}
column_args = [field.name, column_type] # type: ignore
column_kwargs = {}
if field.description:
column_kwargs["comment"] = field.description
column = sa.Column(*column_args, **column_kwargs)
if not ignore_constraints:
column_args += checks # type: ignore
column_kwargs["nullable"] = nullable
column_kwargs["unique"] = unique

# Create column
column = sa.Column(*column_args, **column_kwargs)
return column

def write_type(self, field_type: str) -> Type[TypeEngine]: # type: ignore
Expand Down
4 changes: 3 additions & 1 deletion frictionless/indexer/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class Indexer:
qsv_path: Optional[str] = None
use_fallback: bool = False
with_metadata: bool = False
ignore_constraints: bool = False
on_row: Optional[types.IOnRow] = None
on_progress: Optional[types.IOnProgress] = None
adapter: SqlAdapter = attrs.field(init=False)
Expand Down Expand Up @@ -72,6 +73,7 @@ def create_table(self):
table_name=self.table_name,
force=True,
with_metadata=self.with_metadata,
ignore_constraints=self.ignore_constraints,
)

def populate_table(self) -> Optional[Report]:
Expand Down Expand Up @@ -117,7 +119,7 @@ def populate_table_fast_sqlite(self):

def populate_table_fast_postgresql(self):
database_url = self.adapter.engine.url.render_as_string(hide_password=False)
with platform.psycopg.connect(database_url) as connection:
with platform.psycopg.connect(database_url) as connection: # type: ignore
with connection.cursor() as cursor:
query = 'COPY "%s" FROM STDIN CSV HEADER' % self.table_name
with cursor.copy(query) as copy: # type: ignore
Expand Down

0 comments on commit 7cadf17

Please sign in to comment.