Skip to content

Commit 11970bb

Browse files
authored
Implement tables row count ordering for MySQL. (dimitri#1120)
This should help optimise the duration of migrating databases with very big tables and lots of smaller ones. It might be a little too naive as far as the optimisation goes, while still being an improvement on the default alphabetical one. Fixes dimitri#1099.
1 parent 14fb15b commit 11970bb

File tree

8 files changed

+88
-8
lines changed

8 files changed

+88
-8
lines changed

src/load/copy-data.lisp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@
99
(defmethod queue-raw-data ((copy copy) rawq concurrency)
1010
"Stream data as read by the map-queue method on the COPY argument into QUEUE,
1111
as given."
12+
(log-message :notice "COPY ~a ~@[with ~d rows estimated~] [~a/~a]"
13+
(format-table-name (target copy))
14+
(table-row-count-estimate (target copy))
15+
(lp:kernel-worker-index)
16+
(lp:kernel-worker-count))
1217
(log-message :debug "Reader started for ~a" (format-table-name (target copy)))
1318
(let* ((start-time (get-internal-real-time))
1419
(row-count 0)
@@ -93,7 +98,6 @@
9398
(trivial-backtrace:print-backtrace condition
9499
:output nil))
95100
(lp::invoke-transfer-error condition))))
96-
(log-message :notice "COPY ~a" table-name)
97101

98102
;; Check for Read Concurrency Support from our source
99103
(when (and multiple-readers (< 1 concurrency))

src/load/migrate-database.lisp

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,24 @@
255255
(setf (catalog-distribution-rules catalog)
256256
(citus-distribute-schema catalog distribute))))
257257

258+
(defun optimize-table-copy-ordering (catalog)
259+
"Return a list of tables to copy over in optimized order"
260+
(let ((table-list (copy-list (table-list catalog)))
261+
(view-list (copy-list (view-list catalog))))
262+
;; when materialized views are not supported, view-list is empty here
263+
(cond
264+
((notevery #'zerop (mapcar #'table-row-count-estimate table-list))
265+
(let ((sorted-table-list
266+
(sort table-list #'> :key #'table-row-count-estimate)))
267+
(log-message :notice
268+
"Processing tables in this order: ~{~a: ~d rows~^, ~}"
269+
(loop :for table :in (append table-list view-list)
270+
:collect (format-table-name table)
271+
:collect (table-row-count-estimate table)))
272+
(nconc sorted-table-list view-list)))
273+
(t
274+
(nconc table-list view-list)))))
275+
258276

259277
;;;
260278
;;; Generic enough implementation of the copy-database method.
@@ -414,10 +432,7 @@
414432
(return-from copy-database)))
415433

416434
(loop
417-
:for table :in (append (table-list catalog)
418-
;; when materialized views are not supported,
419-
;; view-list is empty here
420-
(view-list catalog))
435+
:for table :in (optimize-table-copy-ordering catalog)
421436

422437
:do (let ((table-source (instanciate-table-copy-object copy table)))
423438
;; first COPY the data from source to PostgreSQL, using copy-kernel
@@ -472,8 +487,21 @@
472487

473488
(when (and create-indexes
474489
(zerop (gethash table writers-count)))
475-
(log-message :notice "DONE copying ~a"
476-
(format-table-name table))
490+
491+
(let* ((stats pgloader.monitor::*sections*)
492+
(section (get-state-section stats :data))
493+
(table-stats (pgstate-get-label section table))
494+
(pprint-secs
495+
(pgloader.state::format-interval seconds nil)))
496+
;; in CCL we have access to the *sections* dynamic
497+
;; binding from another thread, in SBCL we access
498+
;; an empty copy.
499+
(log-message :notice
500+
"DONE copying ~a in ~a~@[ for ~d rows~]"
501+
(format-table-name table)
502+
pprint-secs
503+
(when table-stats
504+
(pgtable-rows table-stats))))
477505
(alexandria:appendf
478506
pkeys
479507
(create-indexes-in-kernel (target-db copy)

src/package.lisp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@
9696
#:table-comment
9797
#:table-storage-parameter-list
9898
#:table-tablespace
99+
#:table-row-count-estimate
99100
#:table-field-list
100101
#:table-column-list
101102
#:table-index-list

src/sources/common/api.lisp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,9 @@
153153
(defgeneric fetch-foreign-keys (catalog db-copy &key including excluding)
154154
(:documentation "Get the list of foreign keys from the source database."))
155155

156+
(defgeneric fetch-table-row-count (catalog db-copy &key including excluding)
157+
(:documentation "Retrieve and set the row count estimate for given tables."))
158+
156159
(defgeneric fetch-comments (catalog db-copy &key including excluding)
157160
(:documentation "Get the list of comments from the source database."))
158161

src/sources/mysql/mysql-schema.lisp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,27 @@
155155
:finally
156156
(return schema)))
157157

158+
;;;
159+
;;; MySQL table row count estimate
160+
;;;
161+
(defmethod fetch-table-row-count ((schema schema)
162+
(mysql copy-mysql)
163+
&key
164+
including
165+
excluding)
166+
"Retrieve and set the row count estimate for given MySQL tables."
167+
(loop
168+
:for (table-name count)
169+
:in (mysql-query (sql "/mysql/list-table-rows.sql"
170+
(db-name *connection*)
171+
including ; do we print the clause?
172+
including
173+
excluding ; do we print the clause?
174+
excluding))
175+
:do (let* ((table (find-table schema table-name)))
176+
(when table
177+
(setf (table-row-count-estimate table) (parse-integer count))))))
178+
158179

159180
;;;
160181
;;; Queries to get the MySQL comments.

src/sources/mysql/mysql.lisp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,11 @@ Illegal ~a character starting at position ~a~@[: ~a~].~%"
165165
:including including
166166
:excluding excluding)
167167

168+
;; fetch tables row count estimate
169+
(fetch-table-row-count schema mysql
170+
:including including
171+
:excluding excluding)
172+
168173
;; fetch view (and their columns) metadata, covering comments too
169174
(let* ((view-names (unless (eq :all materialize-views)
170175
(mapcar #'matview-source-name materialize-views)))
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
-- params: db-name
2+
-- including
3+
-- filter-list-to-where-clause incuding
4+
-- excluding
5+
-- filter-list-to-where-clause excluding
6+
SELECT table_name,
7+
cast(data_length/avg_row_length as integer)
8+
FROM information_schema.tables
9+
WHERE table_schema = '~a'
10+
and table_type = 'BASE TABLE'
11+
~:[~*~;and (~{table_name ~a~^ or ~})~]
12+
~:[~*~;and (~{table_name ~a~^ and ~})~];

src/utils/catalog.lisp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,16 @@
4949

5050
(defstruct table source-name name schema oid comment
5151
storage-parameter-list tablespace
52+
(row-count-estimate 0 :type fixnum)
5253
;; field is for SOURCE
54+
field-list
5355
;; column is for TARGET
56+
column-list
57+
index-list
58+
fkey-list
59+
trigger-list
5460
;; citus is an extra slot for citus support
55-
field-list column-list index-list fkey-list trigger-list citus-rule)
61+
citus-rule)
5662

5763
(defstruct matview source-name name schema definition)
5864

0 commit comments

Comments
 (0)