Add support for command: deepdive initdb TABLE

HazyResearch · netj · Sep 24, 2015 · Sep 1, 2015 · Sep 4, 2015 · Sep 4, 2015
commit 70e8342c2bafcdf7f69f5f6d448acaa643945c42
diff --git a/shell/deepdive-initdb b/shell/deepdive-initdb
@@ -16,16 +16,27 @@ db-init "$@"
 
 # make sure the necessary tables are all created
 if [[ -e app.ddlog ]]; then
-    # TODO export schema.sql from ddlog instead of running initdb pipeline
-    deepdive-run initdb
+    if [[ $# -gt 0 ]]; then
+        tmp=$(mktemp -d "${TMPDIR:-/tmp}"/deepdive-initdb.XXXXXXX)
+        trap 'rm -rf "$tmp"' EXIT
+        schema_json="$tmp"/schema.json
+        ddlog export-schema app.ddlog > "$schema_json"
+        for t in "$@"; do
+            deepdive-sql "DROP TABLE IF EXISTS $t CASCADE"
+            ddlog_initdb $schema_json $t | deepdive-sql
+        done
+    else
+        # TODO export schema.sql from ddlog instead of running initdb pipeline
+        deepdive-run initdb
+    fi
 fi
 # run all DDL statements in schema.sql if available
 if [[ -e schema.sql ]]; then
     db-prompt <schema.sql
 fi
 
 # load the input data
-! [[ -x input/init.sh ]] || {
+! [[ -x input/init.sh && $# -eq 0 ]] || {
     # XXX set the legacy environment variables
     export APP_HOME=$DEEPDIVE_APP
     input/init.sh "$@"

diff --git a/stage.sh b/stage.sh
@@ -60,6 +60,7 @@ stage util/active.sh                                              util/
 stage util/calibration.py                                         util/
 stage util/calibration.plg                                        util/
 stage util/pgtsv_to_json                                          util/
+stage util/ddlog_initdb                                           util/
 
 # DDlog compiler
 stage util/ddlog                                                  bin/

diff --git a/test/postgresql/deepdive_initdb.bats b/test/postgresql/deepdive_initdb.bats
@@ -0,0 +1,15 @@
+#!/usr/bin/env bats
+# Tests for initdb
+
+. "$BATS_TEST_DIRNAME"/env.sh >&2
+
+setup() {
+    cd "$BATS_TEST_DIRNAME"/spouse_example
+}
+
+@test "$DBVARIANT initdb from ddlog" {
+    cd ddlog || skip
+    deepdive initdb articles
+    [[ $(deepdive sql eval "SELECT * FROM articles" format=csv header=1) = 'article_id,text' ]]
+    deepdive sql "INSERT INTO articles VALUES ('foo', 'bar')"
+}
diff --git a/util/ddlog_initdb b/util/ddlog_initdb
@@ -0,0 +1,23 @@
+#! /usr/bin/env python
+# Generate create table statement given a ddlog exported schema and a table name.
+# Usage: ddlog_initdb SCHEMA.JSON TABLE_NAME
+
+import json, sys
+
+def main():
+  # load schema.json
+  with open(sys.argv[1]) as schema_file:
+    schema = json.load(schema_file)
+  table = sys.argv[2]
+  # the given table is not in the schema, do nothing
+  if table not in schema["relations"]:
+    print ""
+  else:
+    columns_json = schema["relations"][table]["columns"]
+    columns = range(len(columns_json))
+    for k, v in columns_json.iteritems():
+      columns[v["index"]] = "%s %s" %(k, v["type"])
+    print "CREATE TABLE %s(%s)\n" %(table, ", ".join(columns))
+
+if __name__ == "__main__":
+  main()