Skip to content

Commit

Permalink
In-memory _schemas database tracking schemas of attached tables, closes
Browse files Browse the repository at this point in the history
  • Loading branch information
simonw committed Dec 18, 2020
1 parent 5e9895c commit ebc7aa2
Show file tree
Hide file tree
Showing 8 changed files with 279 additions and 6 deletions.
39 changes: 37 additions & 2 deletions datasette/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
asgi_send_json,
asgi_send_redirect,
)
from .utils.schemas import init_schemas, populate_schema_tables
from .utils.sqlite import (
sqlite3,
using_pysqlite3,
Expand Down Expand Up @@ -222,6 +223,11 @@ def __init__(
elif memory:
self.files = (MEMORY,) + self.files
self.databases = collections.OrderedDict()
# memory_name is a random string so that each Datasette instance gets its own
# unique in-memory named database - otherwise unit tests can fail with weird
# errors when different instances accidentally share an in-memory database
self.add_database("_schemas", Database(self, memory_name=secrets.token_hex()))
self._schemas_created = False
for file in self.files:
path = file
is_memory = False
Expand Down Expand Up @@ -326,6 +332,33 @@ def __init__(
self._root_token = secrets.token_hex(32)
self.client = DatasetteClient(self)

async def refresh_schemas(self):
schema_db = self.databases["_schemas"]
if not self._schemas_created:
await init_schemas(schema_db)
self._schemas_created = True

current_schema_versions = {
row["database_name"]: row["schema_version"]
for row in await schema_db.execute(
"select database_name, schema_version from databases"
)
}
for database_name, db in self.databases.items():
schema_version = (await db.execute("PRAGMA schema_version")).first()[0]
# Compare schema versions to see if we should skip it
if schema_version == current_schema_versions.get(database_name):
continue
await schema_db.execute_write(
"""
INSERT OR REPLACE INTO databases (database_name, path, is_memory, schema_version)
VALUES (?, ?, ?, ?)
""",
[database_name, db.path, db.is_memory, schema_version],
block=True,
)
await populate_schema_tables(schema_db, db)

@property
def urls(self):
return Urls(self)
Expand All @@ -342,7 +375,8 @@ def unsign(self, signed, namespace="default"):

def get_database(self, name=None):
if name is None:
return next(iter(self.databases.values()))
# Return first no-_schemas database
name = [key for key in self.databases.keys() if key != "_schemas"][0]
return self.databases[name]

def add_database(self, name, db):
Expand Down Expand Up @@ -590,7 +624,8 @@ def _connected_databases(self):
"is_memory": d.is_memory,
"hash": d.hash,
}
for d in sorted(self.databases.values(), key=lambda d: d.name)
for name, d in sorted(self.databases.items(), key=lambda p: p[1].name)
if name != "_schemas"
]

def _versions(self):
Expand Down
3 changes: 3 additions & 0 deletions datasette/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ async def inspect_(files, sqlite_extensions):
app = Datasette([], immutables=files, sqlite_extensions=sqlite_extensions)
data = {}
for name, database in app.databases.items():
if name == "_schemas":
# Don't include the in-memory _schemas database
continue
counts = await database.table_counts(limit=3600 * 1000)
data[name] = {
"hash": database.hash,
Expand Down
2 changes: 2 additions & 0 deletions datasette/default_permissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ async def inner():
if allow is not None:
return actor_matches_allow(actor, allow)
elif action == "view-database":
if resource == "_schemas" and (actor is None or actor.get("id") != "root"):
return False
database_allow = datasette.metadata("allow", database=resource)
if database_allow is None:
return None
Expand Down
7 changes: 4 additions & 3 deletions datasette/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1024,11 +1024,12 @@ def find_spatialite():

async def initial_path_for_datasette(datasette):
"Return suggested path for opening this Datasette, based on number of DBs and tables"
if len(datasette.databases) == 1:
db_name = next(iter(datasette.databases.keys()))
databases = dict([p for p in datasette.databases.items() if p[0] != "_schemas"])
if len(databases) == 1:
db_name = next(iter(databases.keys()))
path = datasette.urls.database(db_name)
# Does this DB only have one table?
db = next(iter(datasette.databases.values()))
db = next(iter(databases.values()))
tables = await db.table_names()
if len(tables) == 1:
path = datasette.urls.table(db_name, tables[0])
Expand Down
162 changes: 162 additions & 0 deletions datasette/utils/schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
async def init_schemas(db):
await db.execute_write(
"""
CREATE TABLE databases (
"database_name" TEXT PRIMARY KEY,
"path" TEXT,
"is_memory" INTEGER,
"schema_version" INTEGER
)
""",
block=True,
)
await db.execute_write(
"""
CREATE TABLE tables (
"database_name" TEXT,
"table_name" TEXT,
"rootpage" INTEGER,
"sql" TEXT,
PRIMARY KEY (database_name, table_name)
)
""",
block=True,
)
await db.execute_write(
"""
CREATE TABLE columns (
"database_name" TEXT,
"table_name" TEXT,
"cid" INTEGER,
"name" TEXT,
"type" TEXT,
"notnull" INTEGER,
"default_value" TEXT, -- renamed from dflt_value
"is_pk" INTEGER, -- renamed from pk
"hidden" INTEGER,
PRIMARY KEY (database_name, table_name, name)
)
""",
block=True,
)
await db.execute_write(
"""
CREATE TABLE indexes (
"database_name" TEXT,
"table_name" TEXT,
"seq" INTEGER,
"name" TEXT,
"unique" INTEGER,
"origin" TEXT,
"partial" INTEGER,
PRIMARY KEY (database_name, table_name, name)
)
""",
block=True,
)
await db.execute_write(
"""
CREATE TABLE foreign_keys (
"database_name" TEXT,
"table_name" TEXT,
"id" INTEGER,
"seq" INTEGER,
"table" TEXT,
"from" TEXT,
"to" TEXT,
"on_update" TEXT,
"on_delete" TEXT,
"match" TEXT
)
""",
block=True,
)


async def populate_schema_tables(schema_db, db):
database_name = db.name
await schema_db.execute_write(
"delete from tables where database_name = ?", [database_name], block=True
)
tables = (await db.execute("select * from sqlite_master where type = 'table'")).rows
for table in tables:
table_name = table["name"]
await schema_db.execute_write(
"""
insert into tables (database_name, table_name, rootpage, sql)
values (?, ?, ?, ?)
""",
[database_name, table_name, table["rootpage"], table["sql"]],
block=True,
)
# And the columns
await schema_db.execute_write(
"delete from columns where database_name = ? and table_name = ?",
[database_name, table_name],
block=True,
)
columns = await db.table_column_details(table_name)
for column in columns:
params = {
**{"database_name": database_name, "table_name": table_name},
**column._asdict(),
}
await schema_db.execute_write(
"""
insert into columns (
database_name, table_name, cid, name, type, "notnull", default_value, is_pk, hidden
) VALUES (
:database_name, :table_name, :cid, :name, :type, :notnull, :default_value, :is_pk, :hidden
)
""",
params,
block=True,
)
# And the foreign_keys
await schema_db.execute_write(
"delete from foreign_keys where database_name = ? and table_name = ?",
[database_name, table_name],
block=True,
)
foreign_keys = (
await db.execute(f"PRAGMA foreign_key_list([{table_name}])")
).rows
for foreign_key in foreign_keys:
params = {
**{"database_name": database_name, "table_name": table_name},
**dict(foreign_key),
}
await schema_db.execute_write(
"""
insert into foreign_keys (
database_name, table_name, "id", seq, "table", "from", "to", on_update, on_delete, match
) VALUES (
:database_name, :table_name, :id, :seq, :table, :from, :to, :on_update, :on_delete, :match
)
""",
params,
block=True,
)
# And the indexes
await schema_db.execute_write(
"delete from indexes where database_name = ? and table_name = ?",
[database_name, table_name],
block=True,
)
indexes = (await db.execute(f"PRAGMA index_list([{table_name}])")).rows
for index in indexes:
params = {
**{"database_name": database_name, "table_name": table_name},
**dict(index),
}
await schema_db.execute_write(
"""
insert into indexes (
database_name, table_name, seq, name, "unique", origin, partial
) VALUES (
:database_name, :table_name, :seq, :name, :unique, :origin, :partial
)
""",
params,
block=True,
)
2 changes: 2 additions & 0 deletions datasette/views/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ async def delete(self, request, *args, **kwargs):
return Response.text("Method not allowed", status=405)

async def dispatch_request(self, request, *args, **kwargs):
if self.ds:
await self.ds.refresh_schemas()
handler = getattr(self, request.method.lower(), None)
return await handler(request, *args, **kwargs)

Expand Down
2 changes: 1 addition & 1 deletion tests/test_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ def test_hook_extra_body_script(app_client, path, expected_extra_body_script):

def test_hook_asgi_wrapper(app_client):
response = app_client.get("/fixtures")
assert "fixtures" == response.headers["x-databases"]
assert "_schemas, fixtures" == response.headers["x-databases"]


def test_hook_extra_template_vars(restore_working_directory):
Expand Down
68 changes: 68 additions & 0 deletions tests/test_schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from .fixtures import app_client
import pytest


def test_schemas_only_available_to_root(app_client):
cookie = app_client.actor_cookie({"id": "root"})
assert app_client.get("/_schemas").status == 403
assert app_client.get("/_schemas", cookies={"ds_actor": cookie}).status == 200


def test_schemas_databases(app_client):
cookie = app_client.actor_cookie({"id": "root"})
databases = app_client.get(
"/_schemas/databases.json?_shape=array", cookies={"ds_actor": cookie}
).json
assert len(databases) == 2
assert databases[0]["database_name"] == "_schemas"
assert databases[1]["database_name"] == "fixtures"


def test_schemas_tables(app_client):
cookie = app_client.actor_cookie({"id": "root"})
tables = app_client.get(
"/_schemas/tables.json?_shape=array", cookies={"ds_actor": cookie}
).json
assert len(tables) > 5
table = tables[0]
assert set(table.keys()) == {"rootpage", "table_name", "database_name", "sql"}


def test_schemas_indexes(app_client):
cookie = app_client.actor_cookie({"id": "root"})
indexes = app_client.get(
"/_schemas/indexes.json?_shape=array", cookies={"ds_actor": cookie}
).json
assert len(indexes) > 5
index = indexes[0]
assert set(index.keys()) == {
"partial",
"name",
"table_name",
"unique",
"seq",
"database_name",
"origin",
}


def test_schemas_foreign_keys(app_client):
cookie = app_client.actor_cookie({"id": "root"})
foreign_keys = app_client.get(
"/_schemas/foreign_keys.json?_shape=array", cookies={"ds_actor": cookie}
).json
assert len(foreign_keys) > 5
foreign_key = foreign_keys[0]
assert set(foreign_key.keys()) == {
"table",
"seq",
"on_update",
"on_delete",
"to",
"rowid",
"id",
"match",
"database_name",
"table_name",
"from",
}

0 comments on commit ebc7aa2

Please sign in to comment.