Skip to content

Commit b631330

Browse files
committed
update batch update script
1 parent a643563 commit b631330

File tree

1 file changed

+13
-33
lines changed

1 file changed

+13
-33
lines changed

batch_update_perf.py

Lines changed: 13 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from uuid import uuid4
99

1010
import tabulate
11-
from psycopg2.extensions import AsIs
1211
from psycopg2.extras import execute_batch
1312
import psycopg2
1413

@@ -33,6 +32,7 @@ def create_table(conn):
3332
write_date timestamp without time zone
3433
);
3534
CREATE INDEX {TABLE}_gin ON {TABLE} USING gin(name gin_trgm_ops);
35+
CREATE INDEX {TABLE}_some_int ON {TABLE}(some_int);
3636
""")
3737
conn.commit()
3838

@@ -82,42 +82,23 @@ def update_key_without_bypass(cur, id_vals):
8282
for rid, vals in id_vals.items():
8383
updates[tuple(vals)][tuple(vals.values())].append(rid)
8484

85+
def cast(column_name):
86+
if column_name in ('some_int', 'create_uid', 'write_uid'):
87+
return '::int'
88+
elif column_name in ('create_date', 'write_date'):
89+
return '::timestamp'
90+
else:
91+
return '::varchar'
92+
8593
for keys, by_values in updates.items():
8694
sub_table = f"{TABLE}_tmp"
8795
column_temp = ', '.join(f'"{column_name}"' for column_name in ('ids',) + keys)
88-
set_template = ', '.join(f'"{column_name}" = "{sub_table}"."{column_name}"' for column_name in keys)
96+
set_template = ', '.join(f'"{column_name}" = "{sub_table}"."{column_name}"{cast(column_name)}' for column_name in keys)
8997
values_template = ', '.join(['%s'] * len(by_values))
9098
query = f'UPDATE "{TABLE}" SET {set_template} FROM (VALUES {values_template}) AS {sub_table}({column_temp}) WHERE "{TABLE}"."id" = ANY("{sub_table}"."ids")'
9199
list_values = [tuple([ids] + list(values)) for values, ids in by_values.items()]
92100
cur.execute(query, list_values)
93101

94-
def update_key_with_bypass(cur, id_vals):
95-
updates = defaultdict(lambda: defaultdict(list))
96-
for rid, vals in id_vals.items():
97-
updates[tuple(vals)][tuple(vals.values())].append(rid)
98-
99-
def batch_update(keys, by_values):
100-
sub_table = f"{TABLE}_tmp"
101-
column_temp = ', '.join(f'"{column_name}"' for column_name in ('ids',) + keys)
102-
set_template = ', '.join(f'"{column_name}" = "{sub_table}"."{column_name}"' for column_name in keys)
103-
values_template = ', '.join(['%s'] * len(by_values))
104-
query = f'UPDATE "{TABLE}" SET {set_template} FROM (VALUES {values_template}) AS {sub_table}({column_temp}) WHERE "{TABLE}"."id" = ANY("{sub_table}"."ids")'
105-
list_params = [tuple([ids] + list(values)) for values, ids in by_values.items()]
106-
cur.execute(query, list_params)
107-
108-
def mono_update(keys, values, ids):
109-
set_template = ', '.join(f'"{column_name}" = %s' for column_name in keys)
110-
query = f'UPDATE "{TABLE}" SET {set_template} WHERE id IN %s'
111-
params = list(values) + [tuple(ids)]
112-
cur.execute(query, params)
113-
114-
for keys, by_values in updates.items():
115-
if len(by_values) == 1:
116-
values, ids = next(iter(by_values.items()))
117-
mono_update(keys, values, ids)
118-
else:
119-
batch_update(keys, by_values)
120-
121102
# ---------------- Different data kind ------
122103
# Worst case for new implem, best for current one
123104
def data_key_uniform_values_uniform(ids):
@@ -185,11 +166,11 @@ def data_key_change_3_values_change_4(ids):
185166

186167
# NB_ROW = 1_000_000
187168
NB_ROW = 200_000
188-
NB_BATCH_UPDATE = 1_000
169+
NB_BATCH_UPDATE = 1000
189170
SPACE_BETWEEN_ID = 3
190171

191-
NB_TEST_BY_METHOD = 100
192-
X_BESTS = 20
172+
NB_TEST_BY_METHOD = 50
173+
X_BESTS = 10
193174

194175
if __name__ == "__main__":
195176
print(f"Create table and row ({NB_ROW})")
@@ -216,7 +197,6 @@ def data_key_change_3_values_change_4(ids):
216197
update_key_values_current,
217198
update_key_values_execute_batch,
218199
update_key_without_bypass,
219-
update_key_with_bypass,
220200
]
221201
data_methods = [
222202
data_key_uniform_values_uniform,

0 commit comments

Comments
 (0)