@@ -8,21 +8,22 @@ import (
88 "github.com/sourcegraph/sourcegraph/lib/errors"
99)
1010
11- // CommentOutInvalidLines will comment out lines in the customer's SQL database dump file
12- // which gcloud sql import errors out on
11+ // FilterInvalidLines copies the initial lines of the pg_dump-created .sql files,
12+ // from src to dst (the GCS bucket),
13+ // until it hits a line prefixed with a filterEndMarker,
14+ // while commenting out the linesToFilter which cause `gcloud sql import` to error out.
15+ // It then resets src to the position of the last contents written to dst.
1316//
14- // It performs a partial copy of a SQL database dump from
15- // src to dst while commenting out the problematic lines.
16- // When it determines there are no more EXTENSIONs-related statements,
17- // it will return, resetting src to the position of the last contents written to dst.
17+ // Filtering requires reading entire lines into memory,
18+ // this can be a very expensive operation, so when filtering is complete,
19+ // the more efficient io.Copy is used to perform the remainder of the copy in the calling funciton
1820//
19- // This is needed for import to Google Cloud Storage, which does not like many statements which pg_dump may insert
20- // For more details, see https://cloud.google.com/sql/docs/postgres/import-export/import-export-dmp
21- //
22- // Filtering requires reading entire lines into memory - this can be a very expensive
23- // operation, so when filtering is complete, the more efficient io.Copy should be used
24- // to perform the remainder of the copy from src to dst.
25- func CommentOutInvalidLines (dst io.Writer , src io.ReadSeeker , progressFn func (int64 )) (int64 , error ) {
21+ // pg_dump writes these .sql files based on its own version,
22+ // not based on the Postgres version of either the source or destination database;
23+ // so self-hosted customers' diverse database environments
24+ // have inserted a variety of statements into the .sql files which cause the import to fail
25+ // For details, see https://cloud.google.com/sql/docs/postgres/import-export/import-export-dmp
26+ func FilterInvalidLines (dst io.Writer , src io.ReadSeeker , progressFn func (int64 )) (int64 , error ) {
2627 var (
2728 reader = bufio .NewReader (src )
2829
@@ -60,8 +61,15 @@ func CommentOutInvalidLines(dst io.Writer, src io.ReadSeeker, progressFn func(in
6061 "SET transaction_timeout" , // pg_dump v17, importing to Postgres 16
6162
6263 "\\ connect" ,
64+
65+ // Cloud instances' databases have been upgraded to Postgres v16.10,
66+ // which should include support for \restrict and \unrestrict
67+ // but leaving in the list in case we need to re-add them
6368 // "\\restrict",
69+ // To handle the \unrestrict command,
70+ // we'd have to add a search from the end of the file
6471 // "\\unrestrict",
72+ // Remove comments after databases are upgraded >= Postgres 17
6573 }
6674 )
6775
@@ -71,8 +79,8 @@ func CommentOutInvalidLines(dst io.Writer, src io.ReadSeeker, progressFn func(in
7179 line , err := reader .ReadBytes ('\n' )
7280 consumed += int64 (len (line ))
7381
74- // If this function has read through the whole file,
75- // then hand the last line
82+ // If this function has read through the whole file without hitting a filterEndMarker ,
83+ // then handle the last line correctly
7684 if err == io .EOF {
7785 noMoreLinesToFilter = true
7886
0 commit comments