Skip to content

Commit

Permalink
Merge branch 'bit-field_literals'
Browse files Browse the repository at this point in the history
  • Loading branch information
dumblob committed Jan 23, 2018
2 parents f04524c + 87a8374 commit be2cd09
Show file tree
Hide file tree
Showing 3 changed files with 237 additions and 55 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Converts MySQL dump to SQLite3 compatible dump (including MySQL `KEY xxxxx` stat
## Development
The script is POSIX-compliant and depends only on *awk* (tested with gawk, but should work with original awk, and the lightning fast mawk).
The script is written in *awk* (tested with gawk, but should work with original awk, and the lightning fast mawk) and shall be fully POSIX compliant.
It's originally based on the newest fork (https://gist.github.com/bign8/9055981/05e65fd90c469c5eaa730823910c0c5f9de40ab4) of the original `mysql2sqlite.sh` (https://gist.github.com/esperlu/943776/be469f0a0ab8962350f3c5ebe8459218b915f817) with the following patches:
Expand All @@ -46,4 +46,4 @@ MIT
* many different contributors forked the gist and made wildly varying changes, because @esperlu stopped working on it and didn't respond
* @dumblob took over in Aug 2015 and applied the most important patches from all the forks as well as many his own patches tested on Drupal DB
* @dumblob added the MIT license under assumption, that the original gist was released into public domain, because despite significant changes, it wasn't clean room engineering.
* 2016-05-11 17:32 GMT+2 [@esperlu declared](https://github.com/dumblob/mysql2sqlite/issues/2 ) MIT as a fitting license (also retrospectively) and the [original gist](https://gist.github.com/esperlu/943776 ) as deprecated.
* 2016-05-11 17:32 UTC+2 [@esperlu declared](https://github.com/dumblob/mysql2sqlite/issues/2 ) MIT as a fitting license (also retrospectively) and the [original gist](https://gist.github.com/esperlu/943776 ) as deprecated.
189 changes: 136 additions & 53 deletions mysql2sqlite
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,76 @@

# Authors: @esperlu, @artemyk, @gkuenning, @dumblob

# FIXME detect empty input file and issue a warning

function printerr( s ){ print s | "cat >&2" }

BEGIN {
if (ARGC != 2) {
printf "%s\n%s\n",
"USAGE: mysql2sqlite.sh dump_mysql.sql > dump_sqlite3.sql",
" file name - (dash) is not supported, because - means stdin" > "/dev/stderr"
err=1 # do not execute the END rule
exit 1
if( ARGC != 2 ){
printerr( \
"USAGE: mysql2sqlite dump_mysql.sql > dump_sqlite3.sql\n" \
" file name - (dash) is not supported, because - means stdin")
no_END = 1
exit 1
}

# Find INT_MAX supported by both this AWK (usually an ISO C signed int)
# and SQlite.
# On non-8bit-based architectures, the additional bits are safely ignored.

# 8bit (lower precision should not exist)
s="127"
# "63" + 0 avoids potential parser misbehavior
if( (s + 0) "" == s ){ INT_MAX_HALF = "63" + 0 }
# 16bit
s="32767"
if( (s + 0) "" == s ){ INT_MAX_HALF = "16383" + 0 }
# 32bit
s="2147483647"
if( (s + 0) "" == s ){ INT_MAX_HALF = "1073741823" + 0 }
# 64bit (as INTEGER in SQlite3)
s="9223372036854775807"
if( (s + 0) "" == s ){ INT_MAX_HALF = "4611686018427387904" + 0 }
# # 128bit
# s="170141183460469231731687303715884105728"
# if( (s + 0) "" == s ){ INT_MAX_HALF = "85070591730234615865843651857942052864" + 0 }
# # 256bit
# s="57896044618658097711785492504343953926634992332820282019728792003956564819968"
# if( (s + 0) "" == s ){ INT_MAX_HALF = "28948022309329048855892746252171976963317496166410141009864396001978282409984" + 0 }
# # 512bit
# s="6703903964971298549787012499102923063739682910296196688861780721860882015036773488400937149083451713845015929093243025426876941405973284973216824503042048"
# if( (s + 0) "" == s ){ INT_MAX_HALF = "3351951982485649274893506249551461531869841455148098344430890360930441007518386744200468574541725856922507964546621512713438470702986642486608412251521024" + 0 }
# # 1024bit
# s="89884656743115795386465259539451236680898848947115328636715040578866337902750481566354238661203768010560056939935696678829394884407208311246423715319737062188883946712432742638151109800623047059726541476042502884419075341171231440736956555270413618581675255342293149119973622969239858152417678164812112068608"
# if( (s + 0) "" == s ){ INT_MAX_HALF = "44942328371557897693232629769725618340449424473557664318357520289433168951375240783177119330601884005280028469967848339414697442203604155623211857659868531094441973356216371319075554900311523529863270738021251442209537670585615720368478277635206809290837627671146574559986811484619929076208839082406056034304" + 0 }
# # higher precision probably not needed

FS=",$"
print "PRAGMA synchronous = OFF;"
print "PRAGMA journal_mode = MEMORY;"
print "BEGIN TRANSACTION;"
}

# historically 3 spaces separate non-argument local variables
function bit_to_int( str_bit, powtwo, i, res, bit, overflow ){
powtwo = 1
overflow = 0
# 011101 = 1*2^0 + 0*2^1 + 1*2^2 ...
for( i = length( str_bit ); i > 0; --i ){
bit = substr( str_bit, i, 1 )
if( overflow || ( bit == 1 && res > INT_MAX_HALF ) ){
printerr( \
NR ": WARN Bit field overflow, number truncated (LSBs saved, MSBs ignored)." )
break
}
res = res + bit * powtwo
# no warning here as it might be the last iteration
if( powtwo > INT_MAX_HALF ){ overflow = 1; continue }
powtwo = powtwo * 2
}
return res
}

# CREATE TRIGGER statements have funny commenting. Remember we are in trigger.
/^\/\*.*(CREATE.*TRIGGER|create.*trigger)/ {
gsub( /^.*(TRIGGER|trigger)/, "CREATE TRIGGER" )
Expand All @@ -33,45 +89,54 @@ inTrigger != 0 { print; next }
inView = 1
next
}
# The end of CREATE VIEW
# end of CREATE VIEW
/^(\).*(ENGINE|engine).*\*\/;)/ {
inView = 0;
inView = 0
next
}
# The rest of view just get passed through
# content of CREATE VIEW
inView != 0 { next }

# Skip other comments
# skip comments
/^\/\*/ { next }

# Print all `INSERT` lines. The single quotes are protected by another single quote.
# print all INSERT lines
( /^ *\(/ && /\) *[,;] *$/ ) || /^(INSERT|insert)/ {
prev = "";
gsub( /\\\047/, "\047\047" ) # single quote
gsub( /\\\047\047,/, "\\\047," )
prev = ""
# single quotes are escaped by another single quote
gsub( /\\'/, "''" )
gsub( /\\'',/, "\\'," )
gsub( /\\n/, "\n" )
gsub( /\\r/, "\r" )
gsub( /\\"/, "\"" )
gsub( /\\\\/, "\\" )
gsub( /\\\032/, "\032" ) # substitute
gsub( /\\\032/, "\032" ) # substitute char
# sqlite3 is limited to 16 significant digits of precision
while ( match( $0, /0x[0-9a-fA-F]{17}/ ) ) {
while( match( $0, /0x[0-9a-fA-F]{17}/ ) ){
hexIssue = 1
sub( /0x[0-9a-fA-F]+/, substr( $0, RSTART, RLENGTH-1 ), $0 )
}
if( hexIssue ){
printerr( \
NR ": WARN Hex number trimmed (length longer than 16 chars)." )
hexIssue = 0
}
print
next
}

# CREATE DATABASE is not supported
/^(CREATE.*DATABASE|create.*database)/ { next }

# Print the `CREATE` line as is and capture the table name.
# print the CREATE line as is and capture the table name
/^(CREATE|create)/ {
if ( $0 ~ /IF NOT EXISTS|if not exists/ || $0 ~ /TEMPORARY|temporary/ ){
if( $0 ~ /IF NOT EXISTS|if not exists/ || $0 ~ /TEMPORARY|temporary/ ){
caseIssue = 1
printerr( \
NR ": WARN Potential case sensitivity issues with table/column naming\n" \
" (see INFO at the end)." )
}
if ( match( $0, /`[^`]+/ ) ) {
if( match( $0, /`[^`]+/ ) ){
tableName = substr( $0, RSTART+1, RLENGTH-1 )
}
aInc = 0
Expand All @@ -94,46 +159,63 @@ aInc == 1 && /PRIMARY KEY|primary key/ { next }

# Print all fields definition lines except the `KEY` lines.
/^ / && !/^( (KEY|key)|\);)/ {
if ( match( $0, /[^"`]AUTO_INCREMENT|auto_increment[^"`]/)) {
aInc = 1;
if( match( $0, /[^"`]AUTO_INCREMENT|auto_increment[^"`]/) ){
aInc = 1
gsub( /AUTO_INCREMENT|auto_increment/, "PRIMARY KEY AUTOINCREMENT" )
}
gsub( /(UNIQUE KEY|unique key) `.*` /, "UNIQUE " )
gsub( /(UNIQUE KEY|unique key) (`.*`|".*") /, "UNIQUE " )
gsub( /(CHARACTER SET|character set) [^ ]+[ ,]/, "" )
gsub( /DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP|default current_timestamp on update current_timestamp/, "" )
# FIXME
# CREATE TRIGGER [UpdateLastTime]
# AFTER UPDATE
# ON Package
# FOR EACH ROW
# BEGIN
# UPDATE Package SET LastUpdate = CURRENT_TIMESTAMP WHERE ActionId = old.ActionId;
# END
gsub( /ON UPDATE CURRENT_TIMESTAMP|on update current_timestamp/, "" )
gsub( /(COLLATE|collate) [^ ]+ /, "" )
gsub( /(ENUM|enum)[^)]+\)/, "text " )
gsub( /(SET|set)\([^)]+\)/, "text " )
gsub( /UNSIGNED|unsigned/, "" )
gsub( /` [^ ]*(INT|int)[^ ]*/, "` integer" )
gsub( /` [^ ]*(INT|int|BIT|bit)[^ ]*/, "` integer" )
gsub( /" [^ ]*(INT|int|BIT|bit)[^ ]*/, "\" integer" )
ere_bit_field = "[bB]'[10]+'"
if( match($0, ere_bit_field) ){
sub( ere_bit_field, bit_to_int( substr( $0, RSTART +2, RLENGTH -2 -1 ) ) )
}
# field comments are not supported
gsub( / (COMMENT|comment).+$/, "" )
# Get commas off end of line
gsub( /,.?$/, "")
if ( prev ){
if ( firstInTable ){
gsub( /,.?$/, "" )
if( prev ){
if( firstInTable ){
print prev
firstInTable = 0
}
else print "," prev
else {
print "," prev
}
}
else {
# FIXME check if this is correct in all cases
if ( match( $1,
/(CONSTRAINT|constraint) \".*\" (FOREIGN KEY|foreign key)/ ) )
if( match( $1,
/(CONSTRAINT|constraint) \".*\" (FOREIGN KEY|foreign key)/ ) ){
print ","
}
}
prev = $1
}
/ ENGINE| engine/ {
if (prev) {
if (firstInTable) {
if( prev ){
if( firstInTable ){
print prev
firstInTable = 0
}
else print "," prev
# else print prev
else {
print "," prev
}
}
prev=""
print ");"
Expand All @@ -143,42 +225,43 @@ aInc == 1 && /PRIMARY KEY|primary key/ { next }
# in a separate `CREATE KEY` command. The index name is prefixed by the table name to
# avoid a sqlite error for duplicate index name.
/^( (KEY|key)|\);)/ {
if (prev) {
if (firstInTable) {
if( prev ){
if( firstInTable ){
print prev
firstInTable = 0
}
else print "," prev
# else print prev
else {
print "," prev
}
}
prev = ""
if ($0 == ");"){
if( $0 == ");" ){
print
} else {
if ( match( $0, /`[^`]+/ ) ) {
}
else {
if( match( $0, /`[^`]+/ ) ){
indexName = substr( $0, RSTART+1, RLENGTH-1 )
}
if ( match( $0, /\([^()]+/ ) ) {
if( match( $0, /\([^()]+/ ) ){
indexKey = substr( $0, RSTART+1, RLENGTH-1 )
}
# idx_ prefix to avoid name clashes (they really happen!)
key[tableName]=key[tableName] "CREATE INDEX \"idx_" tableName "_" indexName "\" ON \"" tableName "\" (" indexKey ");\n"
key[tableName] = key[tableName] "CREATE INDEX \"idx_" \
tableName "_" indexName "\" ON \"" tableName "\" (" indexKey ");\n"
}
}

END {
if (err) { exit 1};
# print all `KEY` creation lines.
for (table in key) printf key[table]
if( no_END ){ exit 1}
# print all KEY creation lines.
for( table in key ){ printf key[table] }

print "END TRANSACTION;"

if ( hexIssue ){
print "WARN Hexadecimal numbers longer than 16 characters has been trimmed." | "cat >&2"
}
if ( caseIssue ){
print "WARN Pure sqlite identifiers are case insensitive (even if quoted\n" \
" or if ASCII) and doesnt cross-check TABLE and TEMPORARY TABLE\n" \
" identifiers. Thus expect errors like \"table T has no column named F\"." | "cat >&2"
if( caseIssue ){
printerr( \
"INFO Pure sqlite identifiers are case insensitive (even if quoted\n" \
" or if ASCII) and doesnt cross-check TABLE and TEMPORARY TABLE\n" \
" identifiers. Thus expect errors like \"table T has no column named F\".")
}
}
Loading

0 comments on commit be2cd09

Please sign in to comment.