diff --git a/doc/doc/advanced/deepdiveapp.md b/doc/doc/advanced/deepdiveapp.md index 8b0d38feb..6c9403c24 100644 --- a/doc/doc/advanced/deepdiveapp.md +++ b/doc/doc/advanced/deepdiveapp.md @@ -65,15 +65,21 @@ deepdive help run ### Initializing Database ```bash -deepdive initdb +deepdive initdb [TABLE] ``` This command initializes the underlying database configured for the application by creating necessary tables and loading the initial data into them. -It makes sure the following: +If `TABLE` is not given, it makes sure the following: 1. The configured database is created. -2. The tables defined in `schema.sql` are created. -3. The data that exist under `input/` are loaded into the tables with the help of `load.sh`. +2. The tables defined in `schema.sql` (for deepdive application) or `app.ddlog` (for ddlog application) are created. +3. The data that exists under `input/` is loaded into the tables with the help of `init.sh`. + +If `TABLE` is given, it will make sure the following: + +1. The configured database is created. +2. The given table is created. +3. The data that exists under `input/` is loaded into the `TABLE` with the help of `init_TABLE.sh`. ### Running Pipelines diff --git a/shell/deepdive-initdb b/shell/deepdive-initdb index 3be3e68d3..bc7b7c305 100755 --- a/shell/deepdive-initdb +++ b/shell/deepdive-initdb @@ -1,6 +1,10 @@ #!/usr/bin/env bash # deepdive-initdb -- Initializes the underlying database for the DeepDive application # > deepdive initdb +# Initializes the whole database. +# +# > deepdive initdb TABLE +# Initializes the given table. ## set -eu @@ -14,30 +18,33 @@ cd "$DEEPDIVE_APP" # make sure database is created based on the database type db-init "$@" -# make sure the necessary tables are all created -if [[ -e app.ddlog ]]; then - if [[ $# -gt 0 ]]; then - tmp=$(mktemp -d "${TMPDIR:-/tmp}"/deepdive-initdb.XXXXXXX) - trap 'rm -rf "$tmp"' EXIT - schema_json="$tmp"/schema.json - ddlog export-schema app.ddlog > "$schema_json" - for t in "$@"; do - deepdive-sql "DROP TABLE IF EXISTS $t CASCADE" - ddlog_initdb $schema_json $t | deepdive-sql - done - else - # TODO export schema.sql from ddlog instead of running initdb pipeline - deepdive-run initdb +generate_schema_json() { + tmp=$(mktemp -d "${TMPDIR:-/tmp}"/deepdive-initdb.XXXXXXX) + trap 'rm -rf "$tmp"' EXIT + schema_json="$tmp"/schema.json + ddlog export-schema app.ddlog > "$schema_json" +} + +# if a list of table names given, initialize corresponding tables +if [[ $# -gt 0 ]]; then + [[ -e app.ddlog ]] || error "deepdive initdb TABLE is only available for ddlog applications" + generate_schema_json + for t in "$@"; do + schema_json_to_sql $schema_json $t | deepdive-sql + if [[ -x input/init_$t.sh ]]; then + input/init_$t.sh + fi + done +else # no arguments given, init database + if [[ -e app.ddlog ]]; then + generate_schema_json + schema_json_to_sql $schema_json | deepdive-sql + elif [[ -e schema.sql ]]; then + db-prompt /dev/null +if [[ $# -gt 0 ]]; then + createdb $DBNAME || true >/dev/null +else + { + dropdb $DBNAME || true + createdb $DBNAME + } >/dev/null +fi diff --git a/shell/driver.postgresql/schema_json_to_sql b/shell/driver.postgresql/schema_json_to_sql new file mode 100755 index 000000000..1ccae6625 --- /dev/null +++ b/shell/driver.postgresql/schema_json_to_sql @@ -0,0 +1,37 @@ +#! /usr/bin/env python +# Generate create table statement given a ddlog exported schema and a table name. +# Usage: ddlog_initdb SCHEMA.JSON TABLE_NAME + +import json, sys + +def generate_create_table_sql(schema, table): + columns_json = schema["relations"][table]["columns"] + # variable relation + if "variable_type" in schema["relations"][table]: + columns = range(len(columns_json) + 2) + columns[-2] = "id bigint" + label_type = "boolean" if schema["relations"][table]["variable_type"] == "boolean" else "int" + columns[-1] = "label " + label_type + else: + columns = range(len(columns_json)) + for k, v in columns_json.iteritems(): + columns[v["index"]] = "%s %s" %(k, v["type"]) + return "DROP TABLE IF EXISTS %s CASCADE; CREATE TABLE %s(%s);" %(table, table, ", ".join(columns)) + +def main(): + # load schema.json + with open(sys.argv[1]) as schema_file: + schema = json.load(schema_file) + # initialize all tables + if len(sys.argv) <= 2: + print ' '.join([generate_create_table_sql(schema, table) for table in schema["relations"].keys()]) + else: + table = sys.argv[2] + # the given table is not in the schema, do nothing + if table not in schema["relations"]: + print "" + else: + print generate_create_table_sql(schema, table) + +if __name__ == "__main__": + main() diff --git a/stage.sh b/stage.sh index e59d86c7c..95fbd1cf4 100755 --- a/stage.sh +++ b/stage.sh @@ -60,7 +60,6 @@ stage util/active.sh util/ stage util/calibration.py util/ stage util/calibration.plg util/ stage util/pgtsv_to_json util/ -stage util/ddlog_initdb util/ # DDlog compiler stage util/ddlog bin/ diff --git a/test/postgresql/deepdive_initdb.bats b/test/postgresql/deepdive_initdb.bats old mode 100644 new mode 100755 index 3e4b0d18c..63fed829e --- a/test/postgresql/deepdive_initdb.bats +++ b/test/postgresql/deepdive_initdb.bats @@ -7,6 +7,35 @@ setup() { cd "$BATS_TEST_DIRNAME"/spouse_example } +@test "$DBVARIANT schema_json_to_sql a single table" { + cd ddlog || skip + tmp=$(mktemp -d "${TMPDIR:-/tmp}"/deepdive-initdb.XXXXXXX) + schema_json="$tmp"/schema.json + ddlog export-schema app.ddlog > "$schema_json" + expected='DROP TABLE IF EXISTS articles CASCADE; CREATE TABLE articles(article_id text, text text);' + [[ $(schema_json_to_sql $schema_json articles) = "$expected" ]] +} + +@test "$DBVARIANT schema_json_to_sql without arguments" { + cd ddlog || skip + tmp=$(mktemp -d "${TMPDIR:-/tmp}"/deepdive-initdb.XXXXXXX) + schema_json="$tmp"/schema.json + ddlog export-schema app.ddlog > "$schema_json" + expected='DROP TABLE IF EXISTS articles CASCADE;' + expected+=' CREATE TABLE articles(article_id text, text text);' + expected+=' DROP TABLE IF EXISTS people_mentions CASCADE;' + expected+=' CREATE TABLE people_mentions(sentence_id text, start_position int, length int, text text, mention_id text);' + expected+=' DROP TABLE IF EXISTS has_spouse_features CASCADE;' + expected+=' CREATE TABLE has_spouse_features(relation_id text, feature text);' + expected+=' DROP TABLE IF EXISTS has_spouse CASCADE;' + expected+=' CREATE TABLE has_spouse(relation_id text, id bigint, label boolean);' + expected+=' DROP TABLE IF EXISTS has_spouse_candidates CASCADE;' + expected+=' CREATE TABLE has_spouse_candidates(person1_id text, person2_id text, sentence_id text, description text, relation_id text, is_true boolean);' + expected+=' DROP TABLE IF EXISTS sentences CASCADE;' + expected+=' CREATE TABLE sentences(document_id text, sentence text, words text[], lemma text[], pos_tags text[], dependencies text[], ner_tags text[], sentence_offset int, sentence_id text);' + [[ $(schema_json_to_sql $schema_json) = "$expected" ]] +} + @test "$DBVARIANT initdb from ddlog" { cd ddlog || skip deepdive initdb articles diff --git a/util/ddlog_initdb b/util/ddlog_initdb deleted file mode 100755 index 8a288adda..000000000 --- a/util/ddlog_initdb +++ /dev/null @@ -1,23 +0,0 @@ -#! /usr/bin/env python -# Generate create table statement given a ddlog exported schema and a table name. -# Usage: ddlog_initdb SCHEMA.JSON TABLE_NAME - -import json, sys - -def main(): - # load schema.json - with open(sys.argv[1]) as schema_file: - schema = json.load(schema_file) - table = sys.argv[2] - # the given table is not in the schema, do nothing - if table not in schema["relations"]: - print "" - else: - columns_json = schema["relations"][table]["columns"] - columns = range(len(columns_json)) - for k, v in columns_json.iteritems(): - columns[v["index"]] = "%s %s" %(k, v["type"]) - print "CREATE TABLE %s(%s)\n" %(table, ", ".join(columns)) - -if __name__ == "__main__": - main()