Skip to content

Commit c677819

Browse files
Returning ids/rows after bulk insert (#107)
* Implement can_return_rows_from_bulk_insert feature, returning ids or rows after bulk inserts. * Since mssql-django supports Django 2.2, we also need the pre-Django 3.0 version of feature flag can_return_rows_from_bulk_insert (namely, can_return_ids_from_bulk_insert) (cf. https://docs.djangoproject.com/en/4.0/releases/3.0/#database-backend-api) * My alternative changes on SQLInsertCompiler.as_sql. Maybe a bit ambitious, as we completely forsake the SCOPE_IDENTITY strategy (dead code path - we keep the code here, but we could decide not to, really) in favor of OUTPUT strategy. * Don't try to use the OUTPUT clause when inserting without fields * Actually we don't really have to offer the feature for Django 2.2, so let's only set can_return_rows_from_bulk_insert to True and not can_return_ids_from_bulk_insert * Tentative fix: when there are returning fields, but no fields (which means default values insertion - for n objects of course!), we must still fulfill our contract, and return the appropriate rows. This means we won't use INSERT INTO (...) DEFAULT VALUES n times, but a single INSERT INTO (...) VALUES (DEFAULT, (...), DEFAULT), (...), (DEFAULT, (...), DEFAULT) Also: be more thorough re the infamous feature flag rename from Django 3.0 * Using MERGE INTO to support Bulk Insertion of multiple rows into a table with only an IDENTITY column. * Add a link to a reference web page. * Attempt to make Django 2.2 tests pass * Get back to a lighter diff of as_sql function vs. original * Use a query to generate sequence of numbers instead of using the master....spt_values table. * Update mssql/operations.py Co-authored-by: marcperrinoptel <86617454+marcperrinoptel@users.noreply.github.com> * Simplification & refactoring Co-authored-by: marcperrinoptel <marc.perrin@optelgroup.com> Co-authored-by: marcperrinoptel <86617454+marcperrinoptel@users.noreply.github.com>
1 parent db20051 commit c677819

File tree

3 files changed

+79
-6
lines changed

3 files changed

+79
-6
lines changed

mssql/compiler.py

Lines changed: 60 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,16 @@ def get_returned_fields(self):
426426
return self.returning_fields
427427
return self.return_id
428428

429+
def can_return_columns_from_insert(self):
430+
if django.VERSION >= (3, 0, 0):
431+
return self.connection.features.can_return_columns_from_insert
432+
return self.connection.features.can_return_id_from_insert
433+
434+
def can_return_rows_from_bulk_insert(self):
435+
if django.VERSION >= (3, 0, 0):
436+
return self.connection.features.can_return_rows_from_bulk_insert
437+
return self.connection.features.can_return_ids_from_bulk_insert
438+
429439
def fix_auto(self, sql, opts, fields, qn):
430440
if opts.auto_field is not None:
431441
# db_column is None if not explicitly specified by model field
@@ -441,15 +451,39 @@ def fix_auto(self, sql, opts, fields, qn):
441451

442452
return sql
443453

454+
def bulk_insert_default_values_sql(self, table):
455+
seed_rows_number = 8
456+
cross_join_power = 4 # 8^4 = 4096 > maximum allowed batch size for the backend = 1000
457+
458+
def generate_seed_rows(n):
459+
return " UNION ALL ".join("SELECT 1 AS x" for _ in range(n))
460+
461+
def cross_join(p):
462+
return ", ".join("SEED_ROWS AS _%s" % i for i in range(p))
463+
464+
return """
465+
WITH SEED_ROWS AS (%s)
466+
MERGE INTO %s
467+
USING (
468+
SELECT TOP %s * FROM (SELECT 1 as x FROM %s) FAKE_ROWS
469+
) FAKE_DATA
470+
ON 1 = 0
471+
WHEN NOT MATCHED THEN
472+
INSERT DEFAULT VALUES
473+
""" % (generate_seed_rows(seed_rows_number),
474+
table,
475+
len(self.query.objs),
476+
cross_join(cross_join_power))
477+
444478
def as_sql(self):
445479
# We don't need quote_name_unless_alias() here, since these are all
446480
# going to be column names (so we can avoid the extra overhead).
447481
qn = self.connection.ops.quote_name
448482
opts = self.query.get_meta()
449483
result = ['INSERT INTO %s' % qn(opts.db_table)]
450-
fields = self.query.fields or [opts.pk]
451484

452485
if self.query.fields:
486+
fields = self.query.fields
453487
result.append('(%s)' % ', '.join(qn(f.column) for f in fields))
454488
values_format = 'VALUES (%s)'
455489
value_rows = [
@@ -470,11 +504,31 @@ def as_sql(self):
470504

471505
placeholder_rows, param_rows = self.assemble_as_sql(fields, value_rows)
472506

473-
if self.get_returned_fields() and self.connection.features.can_return_id_from_insert:
474-
result.insert(0, 'SET NOCOUNT ON')
475-
result.append((values_format + ';') % ', '.join(placeholder_rows[0]))
476-
params = [param_rows[0]]
477-
result.append('SELECT CAST(SCOPE_IDENTITY() AS bigint)')
507+
if self.get_returned_fields() and self.can_return_columns_from_insert():
508+
if self.can_return_rows_from_bulk_insert():
509+
if not(self.query.fields):
510+
# There isn't really a single statement to bulk multiple DEFAULT VALUES insertions,
511+
# so we have to use a workaround:
512+
# https://dba.stackexchange.com/questions/254771/insert-multiple-rows-into-a-table-with-only-an-identity-column
513+
result = [self.bulk_insert_default_values_sql(qn(opts.db_table))]
514+
r_sql, self.returning_params = self.connection.ops.return_insert_columns(self.get_returned_fields())
515+
if r_sql:
516+
result.append(r_sql)
517+
sql = " ".join(result) + ";"
518+
return [(sql, None)]
519+
# Regular bulk insert
520+
params = []
521+
r_sql, self.returning_params = self.connection.ops.return_insert_columns(self.get_returned_fields())
522+
if r_sql:
523+
result.append(r_sql)
524+
params += [self.returning_params]
525+
params += param_rows
526+
result.append(self.connection.ops.bulk_insert_sql(fields, placeholder_rows))
527+
else:
528+
result.insert(0, 'SET NOCOUNT ON')
529+
result.append((values_format + ';') % ', '.join(placeholder_rows[0]))
530+
params = [param_rows[0]]
531+
result.append('SELECT CAST(SCOPE_IDENTITY() AS bigint)')
478532
sql = [(" ".join(result), tuple(chain.from_iterable(params)))]
479533
else:
480534
if can_bulk:

mssql/features.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ class DatabaseFeatures(BaseDatabaseFeatures):
1212
can_introspect_small_integer_field = True
1313
can_return_columns_from_insert = True
1414
can_return_id_from_insert = True
15+
can_return_rows_from_bulk_insert = True
1516
can_rollback_ddl = True
1617
can_use_chunked_reads = False
1718
for_update_after_from = True

mssql/operations.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,24 @@ def datetime_trunc_sql(self, lookup_type, field_name, tzname):
188188
sql = "CONVERT(datetime2, CONVERT(varchar, %s, 20))" % field_name
189189
return sql
190190

191+
def fetch_returned_insert_rows(self, cursor):
192+
"""
193+
Given a cursor object that has just performed an INSERT...OUTPUT INSERTED
194+
statement into a table, return the list of returned data.
195+
"""
196+
return cursor.fetchall()
197+
198+
def return_insert_columns(self, fields):
199+
if not fields:
200+
return '', ()
201+
columns = [
202+
'%s.%s' % (
203+
'INSERTED',
204+
self.quote_name(field.column),
205+
) for field in fields
206+
]
207+
return 'OUTPUT %s' % ', '.join(columns), ()
208+
191209
def for_update_sql(self, nowait=False, skip_locked=False, of=()):
192210
if skip_locked:
193211
return 'WITH (ROWLOCK, UPDLOCK, READPAST)'

0 commit comments

Comments
 (0)