Skip to content

Commit

Permalink
Initial implementation of facets, plus tests and docs
Browse files Browse the repository at this point in the history
Refs #255
  • Loading branch information
simonw committed May 12, 2018
1 parent 70ff615 commit eaaa3ea
Show file tree
Hide file tree
Showing 7 changed files with 217 additions and 4 deletions.
41 changes: 38 additions & 3 deletions datasette/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,8 +733,8 @@ async def data(self, request, name, hash, table):
forward_querystring=False
)

units = self.table_metadata(name, table).get('units', {})

table_metadata = self.table_metadata(name, table)
units = table_metadata.get('units', {})
filters = Filters(sorted(other_args.items()), units, ureg)
where_clauses, params = filters.build_where_clauses()

Expand Down Expand Up @@ -799,12 +799,13 @@ async def data(self, request, name, hash, table):
raise DatasetteError('Cannot use _sort and _sort_desc at the same time')
order_by = '{} desc'.format(escape_sqlite(sort_desc))

count_sql = 'select count(*) from {table_name} {where}'.format(
from_sql = 'from {table_name} {where}'.format(
table_name=escape_sqlite(table),
where=(
'where {} '.format(' and '.join(where_clauses))
) if where_clauses else '',
)
count_sql = 'select count(*) {}'.format(from_sql)

_next = special_args.get('_next')
offset = ''
Expand Down Expand Up @@ -933,6 +934,39 @@ async def data(self, request, name, hash, table):
name, sql, params, truncate=True, **extra_args
)

# facets support
try:
facets = request.args['_facet']
except KeyError:
facets = table_metadata.get('facets', [])
facet_results = {}
for column in facets:
facet_sql = '''
select {col} as value, count(*) as count
{from_sql}
group by {col} order by count desc limit 20
'''.format(col=escape_sqlite(column), from_sql=from_sql)
try:
facet_rows = await self.execute(
name,
facet_sql,
params,
truncate=False,
custom_time_limit=200
)
facet_results[column] = [{
'value': row['value'],
'count': row['count'],
'toggle_url': urllib.parse.urljoin(
request.url, path_with_added_args(
request, {column: row['value']}
)
)
} for row in facet_rows]
except sqlite3.OperationalError:
# Hit time limit
pass

columns = [r[0] for r in description]
rows = list(rows)

Expand Down Expand Up @@ -1043,6 +1077,7 @@ async def extra_template():
'sql': sql,
'params': params,
},
'facet_results': facet_results,
'next': next_value and str(next_value) or None,
'next_url': next_url,
}, extra_template, (
Expand Down
9 changes: 9 additions & 0 deletions datasette/templates/table.html
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,15 @@ <h3>{% if filtered_table_rows_count or filtered_table_rows_count == 0 %}{{ "{:,}

<p>This data as <a href="{{ url_json }}">.json</a></p>

{% for facet_name, facet_values in facet_results.items() %}
<p><strong>{{ facet_name }}</strong></p>
<ul>
{% for facet_value in facet_values %}
<li><a href="{{ facet_value.toggle_url }}">{{ facet_value.value }}</a> ({{ facet_value.count }})</li>
{% endfor %}
</ul>
{% endfor %}

{% include custom_rows_and_columns_templates %}

{% if next_url %}
Expand Down
70 changes: 70 additions & 0 deletions docs/facets.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
.. _facets:

Facets
======

This feature is currently under development, see `#255 <https://github.com/simonw/datasette/issues/255>`_

Datasette facets can be used to add a faceted browse interface to any Datasette table. With facets, tables are displayed along with a summary showing the most common values in specified columns. These values can be selected to further filter the table.

Facets can be specified in two ways: using queryset parameters, or in ``metadata.json`` configuration for the table.

Facets in querystrings
----------------------

To turn on faceting for specific columns on a Datasette table view, add one or more ``_facet=COLUMN`` parameters to the URL. For example, if you want to turn on facets for the ``city`` and ``state`` columns, construct a URL that looks like this::

/dbname/tablename?_facet=state&_facet=city

This works for both the HTML interface and the ``.json`` view. When enabled, facets will cause a ``facet_results`` block to be added to the JSON output, looking something like this::

"facet_results": {
"state": [
{
"value": "CA",
"count": 10,
"toggle_url": "http://...&state=CA"
},
{
"value": "MI",
"count": 4,
"toggle_url": "http://...&state=MI"
}
],
"city": [
{
"value": "San Francisco",
"count": 6,
"toggle_url": "http://...=San+Francisco"
},
{
"value": "Detroit",
"count": 4,
"toggle_url": "http://...&city=Detroit"
},
{
"value": "Los Angeles",
"count": 4,
"toggle_url": "http://...=Los+Angeles"
}
]
}

Facets in metadata.json
-----------------------

You can turn facets on by default for specific tables by adding them to a ``"facets"`` key in a Datasette :ref:`metadata` file.

Here's an example that turns on faceting by default for the ``qLegalStatus`` column in the ``Street_Tree_List`` table in the ``sf-trees`` database::

{
"databases": {
"sf-trees": {
"tables": {
"Street_Tree_List": {
"facets": ["qLegalStatus"]
}
}
}
}
}
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Contents
getting_started
json_api
sql_queries
facets
full_text_search
metadata
custom_templates
Expand Down
2 changes: 2 additions & 0 deletions docs/metadata.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.. _metadata:

Metadata
========

Expand Down
23 changes: 23 additions & 0 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,29 @@ def extra_js_urls():
);
INSERT INTO [select] VALUES ('group', 'having', 'and');
CREATE TABLE facetable (
pk integer primary key,
state text,
city text,
neighborhood text
);
INSERT INTO facetable (state, city, neighborhood) VALUES
('CA', 'San Francisco', 'Mission'),
('CA', 'San Francisco', 'Dogpatch'),
('CA', 'San Francisco', 'SOMA'),
('CA', 'San Francisco', 'Tenderloin'),
('CA', 'San Francisco', 'Bernal Heights'),
('CA', 'San Francisco', 'Hayes Valley'),
('CA', 'Los Angeles', 'Hollywood'),
('CA', 'Los Angeles', 'Downtown'),
('CA', 'Los Angeles', 'Los Feliz'),
('CA', 'Los Angeles', 'Koreatown'),
('MI', 'Detroit', 'Downtown'),
('MI', 'Detroit', 'Greektown'),
('MI', 'Detroit', 'Corktown'),
('MI', 'Detroit', 'Mexicantown')
;
INSERT INTO simple_primary_key VALUES (1, 'hello');
INSERT INTO simple_primary_key VALUES (2, 'world');
INSERT INTO simple_primary_key VALUES (3, '');
Expand Down
75 changes: 74 additions & 1 deletion tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_homepage(app_client):
assert response.json.keys() == {'test_tables': 0}.keys()
d = response.json['test_tables']
assert d['name'] == 'test_tables'
assert d['tables_count'] == 15
assert d['tables_count'] == 16


def test_database_page(app_client):
Expand Down Expand Up @@ -104,6 +104,15 @@ def test_database_page(app_client):
'label_column': None,
'fts_table': None,
'primary_keys': ['pk'],
}, {
'columns': ['pk', 'state', 'city', 'neighborhood'],
'name': 'facetable',
'count': 14,
'foreign_keys': {'incoming': [], 'outgoing': []},
'fts_table': None,
'hidden': False,
'label_column': None,
'primary_keys': ['pk'],
}, {
'columns': ['pk', 'foreign_key_with_label', 'foreign_key_with_no_label'],
'name': 'foreign_key_references',
Expand Down Expand Up @@ -878,3 +887,67 @@ def test_page_size_matching_max_returned_rows(app_client_returend_rows_matches_p
assert len(response.json['rows']) in (1, 50)
path = response.json['next_url']
assert 201 == len(fetched)


@pytest.mark.parametrize('path,expected_facet_results', [
(
"/test_tables/facetable.json?_facet=state&_facet=city",
{
"state": [
{
"value": "CA",
"count": 10,
"toggle_url": "_facet=state&_facet=city&state=CA",
},
{
"value": "MI",
"count": 4,
"toggle_url": "_facet=state&_facet=city&state=MI",
},
],
"city": [
{
"value": "San Francisco",
"count": 6,
"toggle_url": "_facet=state&_facet=city&city=San+Francisco",
},
{
"value": "Detroit",
"count": 4,
"toggle_url": "_facet=state&_facet=city&city=Detroit",
},
{
"value": "Los Angeles",
"count": 4,
"toggle_url": "_facet=state&_facet=city&city=Los+Angeles",
},
],
},
), (
"/test_tables/facetable.json?_facet=state&_facet=city&state=MI",
{
"state": [
{
"value": "MI",
"count": 4,
"toggle_url": "_facet=state&_facet=city&state=MI",
},
],
"city": [
{
"value": "Detroit",
"count": 4,
"toggle_url": "_facet=state&_facet=city&state=MI&city=Detroit",
},
],
},
)
])
def test_facets(app_client, path, expected_facet_results):
response = app_client.get(path, gather_request=False)
facet_results = response.json['facet_results']
# We only compare the querystring portion of the taggle_url
for facet_name, facet_values in facet_results.items():
for facet_value in facet_values:
facet_value['toggle_url'] = facet_value['toggle_url'].split('?')[1]
assert expected_facet_results == facet_results

0 comments on commit eaaa3ea

Please sign in to comment.