Skip to content

Commit

Permalink
AD-6: Administrative branch summary report, more columns and visible …
Browse files Browse the repository at this point in the history
…hierarchy
  • Loading branch information
bzar committed Nov 14, 2017
1 parent fdc7cca commit 0ce82e6
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 9 deletions.
2 changes: 1 addition & 1 deletion ansible/roles/ckan/vars/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ celery_user: "{{ www_user }}"

ckan_plugins_default: stats scheming_datasets fluent
# order matters, when templates call super()
ckan_plugins: harvest ckan_harvester hri_harvester dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface spatial_metadata spatial_query csw_harvester drupal7 datarequests report qa archiver ytp_organizations ytp_comments ytp_request hierarchy_display ytp_theme ytp_drupal ytp_tasks ytp_dataset ytp_user ytp_service datastore showcase datapusher recline_grid_view recline_graph_view recline_map_view text_view image_view pdf_view geo_view geojson_view
ckan_plugins: harvest ckan_harvester hri_harvester dcat dcat_rdf_harvester dcat_json_harvester dcat_json_interface spatial_metadata spatial_query csw_harvester drupal7 datarequests report qa archiver ytp_organizations ytp_comments ytp_request ytp_report hierarchy_display ytp_theme ytp_drupal ytp_tasks ytp_dataset ytp_user ytp_service datastore showcase datapusher recline_grid_view recline_graph_view recline_map_view text_view image_view pdf_view geo_view geojson_view


ckan_aws_plugins: cloudstorage
Expand Down
47 changes: 40 additions & 7 deletions modules/ckanext-ytp-main/ckanext/ytp/report/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
import itertools
from datetime import timedelta, datetime

import logging

log = logging.getLogger(__name__)

def test_report():
return {
'table' : [
Expand Down Expand Up @@ -36,24 +40,44 @@ def administrative_branch_summary_report():
]

context = {}

# Optimization opportunity: Could fetch all orgs here and manually create the hierarchy
orgs = get_action('organization_list')(context, {'organizations': org_names, 'all_fields': True})
orgs_by_name = {org['name']: org for org in orgs}

org_trees = [get_action('group_tree_section')(context, {'id': org['id'], 'type': 'organization'})
for org in orgs]
org_ids_by_tree = {r['name']: [x['id'] for x in flatten(r, lambda x: x['children'])]
for r in org_trees}
datasets_by_tree = {k: list(package_generator('owner_org:(%s)' % ' OR '.join(v), 1000, context))
for k, v in org_ids_by_tree.iteritems()}

def children(dataset):
return dataset['children']

org_levels = {
org['name']: level
for t in org_trees
for org, level in hierarchy_levels(t, children)}

flat_orgs = (org for t in org_trees for org in flatten(t, children))
root_tree_ids_pairs = (
(r, [x['id'] for x in flatten(r, children)])
for r in flat_orgs)

# Optimization opportunity: Prefetch datasets for all related orgs in one go
root_datasets_pairs = (
(k, list(package_generator('owner_org:(%s)' % ' OR '.join(v), 1000, context)))
for k, v in root_tree_ids_pairs)

return {
'table' : [{
'organization': orgs_by_name[org_name],
'organization': org,
'level': org_levels[org['name']],
'dataset_count': len(datasets),
'dataset_count_1yr': glen(d for d in datasets if age(d) >= timedelta(1 * 365)),
'dataset_count_2yr': glen(d for d in datasets if age(d) >= timedelta(2 * 365)),
'dataset_count_3yr': glen(d for d in datasets if age(d) >= timedelta(3 * 365)),
'new_datasets_month': glen(d for d in datasets if age(d) <= timedelta(30)),
'new_datasets_year': glen(d for d in datasets if age(d) <= timedelta(365)),
'resource_formats': resource_formats(datasets)
}
for org_name, datasets in datasets_by_tree.iteritems()
for org, datasets in root_datasets_pairs
]
}

Expand Down Expand Up @@ -98,5 +122,14 @@ def flatten(x, children):
for cx in flatten(child, children):
yield cx

def hierarchy_levels(x, children, level=0):
'''
Provide hierarchy levels for nodes in a hierarchy
'''
yield(x, level)
for child in children(x):
for cx, cl in hierarchy_levels(child, children, level + 1):
yield (cx, cl)

def resource_formats(datasets):
return ', '.join({r['format'] for d in datasets for r in d['resources'] if r['format']})
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
<tr>
<th>Administrative branch</th>
<th>Dataset count</th>
<th>Dataset count (1 year ago)</th>
<th>Dataset count (2 years ago)</th>
<th>Dataset count (3 years ago)</th>
<th>New datasets last month</th>
<th>New datasets last year</th>
<th>Data formats</th>
Expand All @@ -11,8 +14,11 @@
<tbody>
{% for row in table %}
<tr>
<td>{{ row.organization.name }}</td>
<td style="padding-left: {{ row.level + 1 }}em">{{ row.organization.name }}</td>
<td>{{ row.dataset_count }}</td>
<td>{{ row.dataset_count_1yr }}</td>
<td>{{ row.dataset_count_2yr }}</td>
<td>{{ row.dataset_count_3yr }}</td>
<td>{{ row.new_datasets_month }}</td>
<td>{{ row.new_datasets_year }}</td>
<td>{{ row.resource_formats }}</td>
Expand Down

0 comments on commit 0ce82e6

Please sign in to comment.