diff --git a/dw-pipeline.json b/dw-pipeline.json index 0d18e53..2c9871f 100644 --- a/dw-pipeline.json +++ b/dw-pipeline.json @@ -3,7 +3,7 @@ "name": "nba-plots" }, "transform": { - "image": "rflprr/custom-pipeline:release-0.1.0", + "image": "rflprr/custom-pipeline:release-0.1.1", "cmd": [ "/usr/src/custom-pipeline/table_plots.py", "/pfs/nba-tables" diff --git a/table_plots.py b/table_plots.py index 223440b..c7c3dfd 100755 --- a/table_plots.py +++ b/table_plots.py @@ -7,27 +7,33 @@ import pandas as pd -@click.command -@click.argument('input_path', 'output_path') +@click.command() +@click.argument('input_path') +@click.option('-o', '--output_path', default='/pfs/out') def plots(input_path, output_path='/pfs/out'): dp = datapackage.DataPackage(descriptor=os.path.join(input_path, 'datapackage.json')) dp_out = datapackage.DataPackage() - dp_out['name'] = 'nba-plots' - dp_out['title'] = 'nba-plots' - dp_out['description'] = 'My NBA Plots' - dp_out['x-visibility'] = 'PRIVATE' - dp_out['licenses'] = [ + dp_out.descriptor['name'] = 'nba-plots' + dp_out.descriptor['title'] = 'nba-plots' + dp_out.descriptor['description'] = 'My NBA Plots' + dp_out.descriptor['x-visibility'] = 'PRIVATE' + dp_out.descriptor['licenses'] = [ {'name': 'Other'} ] - dp_out['resources'] = [] + dp_out.descriptor['resources'] = [] for r in dp.descriptor['resources']: - df = pd.read_csv(r['path']) - fig = df.plot().get_figure() - plot_name = '{}.png'.format(r['name']) - fig.savefig(os.path.join(output_path, plot_name)) - dp_out['resources'].append({'name': plot_name, 'path': plot_name}) + if r.get('mediatype', '') == 'text/csv': + df = pd.read_csv(os.path.join(dp.base_path, r['path'])) + try: + fig = df.plot().get_figure() + plot_name = os.path.basename('{}.png'.format(r['name'])) + fig.savefig(os.path.join(output_path, plot_name)) + dp_out.descriptor['resources'].append({'name': plot_name, 'path': + plot_name}) + except: + pass with open(os.path.join(output_path, 'datapackage.json'), 'w') as f: f.write(dp_out.to_json())