From 02d2967f77e3fa5a18aea63dc84aa9ab418dc165 Mon Sep 17 00:00:00 2001 From: Nicola Soranzo Date: Sun, 22 Nov 2020 12:20:17 +0000 Subject: [PATCH] Fix file open mode for Python 3 and PEP-8 errors (#3323) Also: - prefer json `load()`/`dump()` to `loads()`/`dumps()` - use `with` statement to open/close files Close https://github.com/galaxyproject/tools-iuc/pull/2032 --- .../data_manager/bowtie2_index_builder.py | 9 +- .../data_manager/bowtie_index_builder.py | 70 +++---- .../data_manager/bracken_build_database.py | 9 +- .../data_manager/kma_build_index.py | 7 +- .../data_manager/kraken2_build_database.py | 10 +- .../data_manager/make_json.py | 7 +- .../data_manager/bwa_mem_index_builder.py | 8 +- .../data_manager/bwameth_index_builder.py | 38 ++-- .../data_manager/data_manager_cat.py | 5 +- .../data_manager/data_manager.py | 8 +- .../data_manager/data_manager.py | 7 +- .../data_manager/data_manager.py | 4 +- .../data_manager_fetch_and_index_maf.py | 2 +- .../data_manager/data_manager.py | 7 +- .../data_manager/fetch_refseq.py | 6 +- .../data_manager_gatk_picard_index_builder.py | 189 +++++++++--------- .../data_manager_gemini_download.py | 7 +- .../data_manager/hisat2_index_builder.py | 8 +- .../data_manager_humann2_download.py | 13 +- .../data_manager/kallisto_index_builder.py | 62 +++--- .../data_manager/data_manager_manual.py | 4 +- .../data_manager/mash_sketch_builder.py | 6 +- .../data_manager_metaphlan2_download.py | 10 +- .../data_manager/data_manager.py | 6 +- .../fetch_mothur_reference_data.py | 91 +++++---- .../data_manager_ncbi_taxonomy_sqlite.py | 12 +- .../data_manager/picard_index_builder.py | 8 +- ...manager_plant_tribes_scaffolds_download.py | 4 +- .../install_primer_scheme_bedfiles.py | 12 +- .../data_manager_qiime_download.py | 21 +- .../data_manager/data_manager_rsync.py | 13 +- .../data_manager_sam_fasta_index_builder.py | 6 +- .../data_manager_snpEff_databases.py | 13 +- .../data_manager_snpEff_download.py | 62 +++--- .../data_manager_snpsift_dbnsfp.py | 23 +-- .../data_manager/rna_star_index_builder.py | 3 +- .../data_manager/twobit_builder.py | 8 +- .../data_manager/vsnp_dnaprints_fetcher.py | 4 +- .../data_manager/vsnp_excel_fetcher.py | 4 +- .../data_manager/vsnp_genbank_fetcher.py | 4 +- 40 files changed, 402 insertions(+), 388 deletions(-) diff --git a/data_managers/data_manager_bowtie2_index_builder/data_manager/bowtie2_index_builder.py b/data_managers/data_manager_bowtie2_index_builder/data_manager/bowtie2_index_builder.py index 5e9d6ea5d48..20e921bb7cc 100644 --- a/data_managers/data_manager_bowtie2_index_builder/data_manager/bowtie2_index_builder.py +++ b/data_managers/data_manager_bowtie2_index_builder/data_manager/bowtie2_index_builder.py @@ -2,11 +2,11 @@ # Dan Blankenberg from __future__ import print_function +import json import optparse import os import subprocess import sys -from json import dumps, loads DEFAULT_DATA_TABLE_NAMES = ["bowtie2_indexes"] @@ -61,7 +61,8 @@ def main(): filename = args[0] - params = loads(open(filename).read()) + with open(filename) as fh: + params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] os.mkdir(target_directory) data_manager_dict = {} @@ -77,8 +78,8 @@ def main(): build_bowtie2_index(data_manager_dict, options.fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, data_table_names=options.data_table_name or DEFAULT_DATA_TABLE_NAMES) # save info to json file - with open(filename, 'w') as json_out: - json_out.write(dumps(data_manager_dict, sort_keys=True)) + with open(filename, 'w') as fh: + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_bowtie_index_builder/data_manager/bowtie_index_builder.py b/data_managers/data_manager_bowtie_index_builder/data_manager/bowtie_index_builder.py index ef75aaef8bf..015564d719a 100644 --- a/data_managers/data_manager_bowtie_index_builder/data_manager/bowtie_index_builder.py +++ b/data_managers/data_manager_bowtie_index_builder/data_manager/bowtie_index_builder.py @@ -13,7 +13,7 @@ DEFAULT_DATA_TABLE_NAME = "bowtie_indexes" -def get_id_name( params, dbkey, fasta_description=None): +def get_id_name(params, dbkey, fasta_description=None): # TODO: ensure sequence_id is unique and does not already appear in location file sequence_id = params['param_dict']['sequence_id'] if not sequence_id: @@ -27,70 +27,72 @@ def get_id_name( params, dbkey, fasta_description=None): return sequence_id, sequence_name -def build_bowtie_index( data_manager_dict, fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, data_table_name=DEFAULT_DATA_TABLE_NAME, color_space=False ): +def build_bowtie_index(data_manager_dict, fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, data_table_name=DEFAULT_DATA_TABLE_NAME, color_space=False): # TODO: allow multiple FASTA input files - fasta_base_name = os.path.split( fasta_filename )[-1] - sym_linked_fasta_filename = os.path.join( target_directory, fasta_base_name ) - os.symlink( fasta_filename, sym_linked_fasta_filename ) - args = [ 'bowtie-build' ] + fasta_base_name = os.path.split(fasta_filename)[-1] + sym_linked_fasta_filename = os.path.join(target_directory, fasta_base_name) + os.symlink(fasta_filename, sym_linked_fasta_filename) + args = ['bowtie-build'] if color_space: - args.append( '-C' ) - args.append( sym_linked_fasta_filename) - args.append( fasta_base_name ) - args.append( sym_linked_fasta_filename ) - tmp_stderr = tempfile.NamedTemporaryFile( prefix="tmp-data-manager-bowtie-index-builder-stderr" ) - proc = subprocess.Popen( args=args, shell=False, cwd=target_directory, stderr=tmp_stderr.fileno() ) + args.append('-C') + args.append(sym_linked_fasta_filename) + args.append(fasta_base_name) + args.append(sym_linked_fasta_filename) + tmp_stderr = tempfile.NamedTemporaryFile(prefix="tmp-data-manager-bowtie-index-builder-stderr") + proc = subprocess.Popen(args=args, shell=False, cwd=target_directory, stderr=tmp_stderr.fileno()) return_code = proc.wait() if return_code: tmp_stderr.flush() tmp_stderr.seek(0) print("Error building index:", file=sys.stderr) while True: - chunk = tmp_stderr.read( CHUNK_SIZE ) + chunk = tmp_stderr.read(CHUNK_SIZE) if not chunk: break - sys.stderr.write( chunk ) - sys.exit( return_code ) + sys.stderr.write(chunk) + sys.exit(return_code) tmp_stderr.close() - data_table_entry = dict( value=sequence_id, dbkey=dbkey, name=sequence_name, path=fasta_base_name ) - _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ) + data_table_entry = dict(value=sequence_id, dbkey=dbkey, name=sequence_name, path=fasta_base_name) + _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) -def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ): - data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) - data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] ) - data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry ) +def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry): + data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) + data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, []) + data_manager_dict['data_tables'][data_table_name].append(data_table_entry) return data_manager_dict def main(): parser = optparse.OptionParser() - parser.add_option( '-f', '--fasta_filename', dest='fasta_filename', action='store', type="string", default=None, help='fasta_filename' ) - parser.add_option( '-d', '--fasta_dbkey', dest='fasta_dbkey', action='store', type="string", default=None, help='fasta_dbkey' ) - parser.add_option( '-t', '--fasta_description', dest='fasta_description', action='store', type="string", default=None, help='fasta_description' ) - parser.add_option( '-n', '--data_table_name', dest='data_table_name', action='store', type="string", default=None, help='data_table_name' ) - parser.add_option( '-c', '--color_space', dest='color_space', action='store_true', default=False, help='color_space' ) + parser.add_option('-f', '--fasta_filename', dest='fasta_filename', action='store', type="string", default=None, help='fasta_filename') + parser.add_option('-d', '--fasta_dbkey', dest='fasta_dbkey', action='store', type="string", default=None, help='fasta_dbkey') + parser.add_option('-t', '--fasta_description', dest='fasta_description', action='store', type="string", default=None, help='fasta_description') + parser.add_option('-n', '--data_table_name', dest='data_table_name', action='store', type="string", default=None, help='data_table_name') + parser.add_option('-c', '--color_space', dest='color_space', action='store_true', default=False, help='color_space') (options, args) = parser.parse_args() filename = args[0] - params = json.loads( open( filename ).read() ) - target_directory = params[ 'output_data' ][0]['extra_files_path'] - os.mkdir( target_directory ) + with open(filename) as fh: + params = json.load(fh) + target_directory = params['output_data'][0]['extra_files_path'] + os.mkdir(target_directory) data_manager_dict = {} dbkey = options.fasta_dbkey - if dbkey in [ None, '', '?' ]: - raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( dbkey ) ) + if dbkey in [None, '', '?']: + raise Exception('"%s" is not a valid dbkey. You must specify a valid dbkey.' % (dbkey)) - sequence_id, sequence_name = get_id_name( params, dbkey=dbkey, fasta_description=options.fasta_description ) + sequence_id, sequence_name = get_id_name(params, dbkey=dbkey, fasta_description=options.fasta_description) # build the index - build_bowtie_index( data_manager_dict, options.fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, data_table_name=options.data_table_name or DEFAULT_DATA_TABLE_NAME, color_space=options.color_space ) + build_bowtie_index(data_manager_dict, options.fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, data_table_name=options.data_table_name or DEFAULT_DATA_TABLE_NAME, color_space=options.color_space) # save info to json file - open( filename, 'wb' ).write( json.dumps( data_manager_dict ) ) + with open(filename, 'w') as fh: + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_build_bracken_database/data_manager/bracken_build_database.py b/data_managers/data_manager_build_bracken_database/data_manager/bracken_build_database.py index 26ee784680c..21449f9c12d 100644 --- a/data_managers/data_manager_build_bracken_database/data_manager/bracken_build_database.py +++ b/data_managers/data_manager_build_bracken_database/data_manager/bracken_build_database.py @@ -55,7 +55,8 @@ def main(): parser.add_argument('--database-name', dest='database_name', help='Database Name') args = parser.parse_args() - data_manager_input = json.loads(open(args.data_manager_json).read()) + with open(args.data_manager_json) as fh: + data_manager_input = json.load(fh) target_directory = data_manager_input['output_data'][0]['extra_files_path'] @@ -69,7 +70,7 @@ def main(): try: os.mkdir(target_directory) except OSError as exc: - if exc.errno == errno.EEXIST and os.path.isdir( target_directory ): + if exc.errno == errno.EEXIST and os.path.isdir(target_directory): pass else: raise @@ -82,8 +83,8 @@ def main(): args.database_name, ) - with open(args.data_manager_json, 'w') as out: - out.write(json.dumps(data_manager_output, sort_keys=True)) + with open(args.data_manager_json, 'w') as fh: + json.dump(data_manager_output, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_build_kma_index/data_manager/kma_build_index.py b/data_managers/data_manager_build_kma_index/data_manager/kma_build_index.py index c16adbbca6a..4eda6f825a5 100644 --- a/data_managers/data_manager_build_kma_index/data_manager/kma_build_index.py +++ b/data_managers/data_manager_build_kma_index/data_manager/kma_build_index.py @@ -47,8 +47,8 @@ def kma_build_index(kma_index_args, index_name, target_directory, data_table_nam def main(args): - - data_manager_input = json.loads(open(args.data_manager_json).read()) + with open(args.data_manager_json) as fh: + data_manager_input = json.load(fh) target_directory = data_manager_input['output_data'][0]['extra_files_path'] @@ -78,7 +78,8 @@ def main(args): target_directory, ) - open(args.data_manager_json, 'w').write(json.dumps(data_manager_output)) + with open(args.data_manager_json, 'w') as fh: + json.dump(data_manager_output, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_build_kraken2_database/data_manager/kraken2_build_database.py b/data_managers/data_manager_build_kraken2_database/data_manager/kraken2_build_database.py index ab23615a5bd..3a6266d503f 100644 --- a/data_managers/data_manager_build_kraken2_database/data_manager/kraken2_build_database.py +++ b/data_managers/data_manager_build_kraken2_database/data_manager/kraken2_build_database.py @@ -292,14 +292,15 @@ def main(): parser.add_argument('--clean', dest='clean', action='store_true', help='Clean up extra files') args = parser.parse_args() - data_manager_input = json.loads(open(args.data_manager_json).read()) + with open(args.data_manager_json) as fh: + data_manager_input = json.load(fh) target_directory = data_manager_input['output_data'][0]['extra_files_path'] try: - os.mkdir( target_directory ) + os.mkdir(target_directory) except OSError as exc: - if exc.errno == errno.EEXIST and os.path.isdir( target_directory ): + if exc.errno == errno.EEXIST and os.path.isdir(target_directory): pass else: raise @@ -354,7 +355,8 @@ def main(): else: sys.exit("Invalid database type") - open(args.data_manager_json, 'w').write(json.dumps(data_manager_output, sort_keys=True)) + with open(args.data_manager_json, 'w') as fh: + json.dump(data_manager_output, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_build_kraken_database/data_manager/make_json.py b/data_managers/data_manager_build_kraken_database/data_manager/make_json.py index dd16d6833bc..0c37b39ba84 100644 --- a/data_managers/data_manager_build_kraken_database/data_manager/make_json.py +++ b/data_managers/data_manager_build_kraken_database/data_manager/make_json.py @@ -10,14 +10,15 @@ def main(args): data_manager_entry['name'] = args.database data_manager_entry['path'] = '.' data_manager_json = dict(data_tables=dict(kraken_databases=data_manager_entry)) - params = json.loads(open(args.output).read()) + with open(args.output) as fh: + params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] os.mkdir(target_directory) output_path = os.path.join(os.getcwd(), 'kraken-database') for filename in os.listdir(output_path): shutil.move(os.path.join(output_path, filename), target_directory) - with open(args.output, 'w') as out: - out.write(json.dumps(data_manager_json, sort_keys=True)) + with open(args.output, 'w') as fh: + json.dump(data_manager_json, fh, sort_keys=True) if __name__ == '__main__': diff --git a/data_managers/data_manager_bwa_mem_index_builder/data_manager/bwa_mem_index_builder.py b/data_managers/data_manager_bwa_mem_index_builder/data_manager/bwa_mem_index_builder.py index 3658537df72..c1a4b752429 100644 --- a/data_managers/data_manager_bwa_mem_index_builder/data_manager/bwa_mem_index_builder.py +++ b/data_managers/data_manager_bwa_mem_index_builder/data_manager/bwa_mem_index_builder.py @@ -2,11 +2,11 @@ # Dan Blankenberg from __future__ import print_function +import json import optparse import os import subprocess import sys -from json import dumps, loads CHUNK_SIZE = 2**20 TWO_GB = 2**30 * 2 @@ -68,7 +68,8 @@ def main(): filename = args[0] - params = loads(open(filename).read()) + with open(filename) as fh: + params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] os.mkdir(target_directory) data_manager_dict = {} @@ -93,7 +94,8 @@ def main(): ) # save info to json file - open(filename, 'w').write(dumps(data_manager_dict, sort_keys=True)) + with open(filename, 'w') as fh: + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_bwameth_index_builder/data_manager/bwameth_index_builder.py b/data_managers/data_manager_bwameth_index_builder/data_manager/bwameth_index_builder.py index 538e68bbf50..042cee0239c 100644 --- a/data_managers/data_manager_bwameth_index_builder/data_manager/bwameth_index_builder.py +++ b/data_managers/data_manager_bwameth_index_builder/data_manager/bwameth_index_builder.py @@ -2,19 +2,19 @@ # Based heavily on the HISAT2 data manager wrapper import argparse +import json import os import subprocess import sys -from json import dumps, loads DEFAULT_DATA_TABLE_NAME = "bwameth_indexes" def build_bwameth_index(data_manager_dict, params, args): data_table_name = args.data_table_name - target_directory = params[ 'output_data' ][0]['extra_files_path'] - if not os.path.exists( target_directory ): - os.mkdir( target_directory ) + target_directory = params['output_data'][0]['extra_files_path'] + if not os.path.exists(target_directory): + os.mkdir(target_directory) fasta_base_name = os.path.basename(args.fasta_filename) sym_linked_fasta_filename = os.path.join(target_directory, fasta_base_name) os.symlink(os.path.abspath(args.fasta_filename), sym_linked_fasta_filename) @@ -23,41 +23,43 @@ def build_bwameth_index(data_manager_dict, params, args): return_code = proc.wait() if return_code: print("Error building index.", file=sys.stderr) - sys.exit( return_code ) + sys.exit(return_code) data_table_entry = dict(value=args.dbkey, dbkey=args.dbkey, name=args.name, path=sym_linked_fasta_filename) _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) -def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ): - data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) - data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] ) - data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry ) +def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry): + data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) + data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, []) + data_manager_dict['data_tables'][data_table_name].append(data_table_entry) return data_manager_dict def main(): # Parse Command Line parser = argparse.ArgumentParser() - parser.add_argument( '--output', default=None ) - parser.add_argument( '--fasta_filename', default=None ) - parser.add_argument( '--dbkey', default=None ) - parser.add_argument( '--name', default=None ) - parser.add_argument( '--description', default=None ) - parser.add_argument( '--data_table_name', default=DEFAULT_DATA_TABLE_NAME ) + parser.add_argument('--output', default=None) + parser.add_argument('--fasta_filename', default=None) + parser.add_argument('--dbkey', default=None) + parser.add_argument('--name', default=None) + parser.add_argument('--description', default=None) + parser.add_argument('--data_table_name', default=DEFAULT_DATA_TABLE_NAME) args = parser.parse_args() filename = args.output - params = loads(open(filename).read()) + with open(filename) as fh: + params = json.load(fh) data_manager_dict = {} - if args.dbkey in [ None, '', '?' ]: + if args.dbkey in [None, '', '?']: raise Exception('"%s" is not a valid dbkey. You must specify a valid dbkey.' % (args.dbkey)) # build the index build_bwameth_index(data_manager_dict, params, args) # save info to json file - open(filename, 'w').write(dumps(data_manager_dict, sort_keys=True)) + with open(filename, 'w') as fh: + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_cat/data_manager/data_manager_cat.py b/data_managers/data_manager_cat/data_manager/data_manager_cat.py index 8f595b0203f..1688de6c221 100755 --- a/data_managers/data_manager_cat/data_manager/data_manager_cat.py +++ b/data_managers/data_manager_cat/data_manager/data_manager_cat.py @@ -89,7 +89,7 @@ def main(): url_download(args.db_url, args.install_path) else: cat_prepare(args.install_path) - for root, dirs, files in os.walk(args.install_path): + for root, dirs, _ in os.walk(args.install_path): for dname in dirs: if dname.endswith('CAT_database'): cat_db = dname @@ -109,7 +109,8 @@ def main(): taxonomy_folder=os.path.join(cat_dir, tax_db)) dm_dict['data_tables'][data_table].append(data_table_entry) # save info to json file - open(args.config_file, 'w').write(json.dumps(dm_dict)) + with open(args.config_file, 'w') as fh: + json.dump(dm_dict, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_dada2/data_manager/data_manager.py b/data_managers/data_manager_dada2/data_manager/data_manager.py index e99521b04e7..1a2c3cb84ae 100644 --- a/data_managers/data_manager_dada2/data_manager/data_manager.py +++ b/data_managers/data_manager_dada2/data_manager/data_manager.py @@ -100,8 +100,8 @@ def url_download(url, fname, workdir): def remote_dataset(dataset, outjson): - with open(outjson) as jf: - params = json.loads(jf.read()) + with open(outjson) as fh: + params = json.load(fh) workdir = params['output_data'][0]['extra_files_path'] os.mkdir(workdir) @@ -123,8 +123,8 @@ def remote_dataset(dataset, outjson): data_manager_entry['path'] = dataset + ".species" data_manager_json["data_tables"]["dada2_species"] = data_manager_entry - with open(outjson, 'w') as jf: - jf.write(json.dumps(data_manager_json, sort_keys=True)) + with open(outjson, 'w') as fh: + json.dump(data_manager_json, fh, sort_keys=True) if __name__ == '__main__': diff --git a/data_managers/data_manager_fetch_busco/data_manager/data_manager.py b/data_managers/data_manager_fetch_busco/data_manager/data_manager.py index 932952efcca..263029f1d2e 100644 --- a/data_managers/data_manager_fetch_busco/data_manager/data_manager.py +++ b/data_managers/data_manager_fetch_busco/data_manager/data_manager.py @@ -50,14 +50,15 @@ def main(args): data_manager_entry['name'] = args.name data_manager_entry['path'] = '.' data_manager_json = dict(data_tables=dict(busco=data_manager_entry)) - params = json.loads(open(args.output).read()) + with open(args.output) as fh: + params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] os.mkdir(target_directory) output_path = os.path.abspath(os.path.join(os.getcwd(), 'busco')) for filename in os.listdir(workdir): shutil.move(os.path.join(output_path, filename), target_directory) - with open(args.output, 'w') as out: - out.write(json.dumps(data_manager_json, sort_keys=True)) + with open(args.output, 'w') as fh: + json.dump(data_manager_json, fh, sort_keys=True) if __name__ == '__main__': diff --git a/data_managers/data_manager_fetch_gene_annotation/data_manager/data_manager.py b/data_managers/data_manager_fetch_gene_annotation/data_manager/data_manager.py index 86088c48007..0e12b078309 100644 --- a/data_managers/data_manager_fetch_gene_annotation/data_manager/data_manager.py +++ b/data_managers/data_manager_fetch_gene_annotation/data_manager/data_manager.py @@ -135,8 +135,8 @@ def main(): } } - with open(os.path.join(args.output), 'w+') as f: - f.write(json.dumps(data_manager_entry)) + with open(os.path.join(args.output), 'w+') as fh: + json.dump(data_manager_entry, fh, sort_keys=True) if __name__ == '__main__': diff --git a/data_managers/data_manager_fetch_index_maf/data_manager/data_manager_fetch_and_index_maf.py b/data_managers/data_manager_fetch_index_maf/data_manager/data_manager_fetch_and_index_maf.py index 95c0c25ebea..b02cb9013fd 100644 --- a/data_managers/data_manager_fetch_index_maf/data_manager/data_manager_fetch_and_index_maf.py +++ b/data_managers/data_manager_fetch_index_maf/data_manager/data_manager_fetch_and_index_maf.py @@ -202,7 +202,7 @@ def main(): # Fetch and index the MAFs index_maf_files(maf_files, maf_path, options, params, target_directory) with open(options.output, 'w') as fh: - fh.write(json.dumps(data_manager_entry)) + json.dump(data_manager_entry, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_fetch_ncbi_taxonomy/data_manager/data_manager.py b/data_managers/data_manager_fetch_ncbi_taxonomy/data_manager/data_manager.py index d66bc20c67a..8ec785813a9 100644 --- a/data_managers/data_manager_fetch_ncbi_taxonomy/data_manager/data_manager.py +++ b/data_managers/data_manager_fetch_ncbi_taxonomy/data_manager/data_manager.py @@ -45,14 +45,15 @@ def main(args): data_manager_entry['name'] = args.name data_manager_entry['path'] = '.' data_manager_json = dict(data_tables=dict(ncbi_taxonomy=data_manager_entry)) - params = json.loads(open(args.output).read()) + with open(args.output) as fh: + params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] os.mkdir(target_directory) output_path = os.path.abspath(os.path.join(os.getcwd(), 'taxonomy')) for filename in os.listdir(workdir): shutil.move(os.path.join(output_path, filename), target_directory) - with open(args.output, 'w') as out: - out.write(json.dumps(data_manager_json, sort_keys=True)) + with open(args.output, 'w') as fh: + json.dump(data_manager_json, fh, sort_keys=True) if __name__ == '__main__': diff --git a/data_managers/data_manager_fetch_refseq/data_manager/fetch_refseq.py b/data_managers/data_manager_fetch_refseq/data_manager/fetch_refseq.py index 81be5253bda..09a6929170c 100755 --- a/data_managers/data_manager_fetch_refseq/data_manager/fetch_refseq.py +++ b/data_managers/data_manager_fetch_refseq/data_manager/fetch_refseq.py @@ -141,7 +141,8 @@ def get_refseq_division(division_name, mol_types, output_directory, debug=False, division_names = args.division_names.split(',') mol_types = args.mol_types.split(',') if args.galaxy_datamanager_filename is not None: - dm_opts = json.loads(open(args.galaxy_datamanager_filename).read()) + with open(args.galaxy_datamanager_filename) as fh: + dm_opts = json.load(fh) output_directory = dm_opts['output_data'][0]['extra_files_path'] # take the extra_files_path of the first output parameter data_manager_dict = {} else: @@ -162,4 +163,5 @@ def get_refseq_division(division_name, mol_types, output_directory, debug=False, _add_data_table_entry(data_manager_dict=data_manager_dict, data_table_entry=dict(value=unique_key, dbkey=dbkey, name=desc, path=path), data_table_name='all_fasta') - open(args.galaxy_datamanager_filename, 'w').write(json.dumps(data_manager_dict, sort_keys=True)) + with open(args.galaxy_datamanager_filename, 'w') as fh: + json.dump(data_manager_dict, fh, sort_keys=True) diff --git a/data_managers/data_manager_gatk_picard_index_builder/data_manager/data_manager_gatk_picard_index_builder.py b/data_managers/data_manager_gatk_picard_index_builder/data_manager/data_manager_gatk_picard_index_builder.py index 43f51c1afda..b868a9af47b 100644 --- a/data_managers/data_manager_gatk_picard_index_builder/data_manager/data_manager_gatk_picard_index_builder.py +++ b/data_managers/data_manager_gatk_picard_index_builder/data_manager/data_manager_gatk_picard_index_builder.py @@ -14,7 +14,7 @@ DEFAULT_DATA_TABLE_NAME = "fasta_indexes" -def get_id_name( params, dbkey, fasta_description=None): +def get_id_name(params, dbkey, fasta_description=None): # TODO: ensure sequence_id is unique and does not already appear in location file sequence_id = params['param_dict']['sequence_id'] if not sequence_id: @@ -28,98 +28,93 @@ def get_id_name( params, dbkey, fasta_description=None): return sequence_id, sequence_name -def build_picard_index( data_manager_dict, fasta_filename, target_directory, dbkey, sequence_id, sequence_name, jar, data_table_name=DEFAULT_DATA_TABLE_NAME ): - fasta_base_name = os.path.split( fasta_filename )[-1] - gatk_sorted_fasta_filename = os.path.join( target_directory, fasta_base_name ) - shutil.copy( fasta_filename, gatk_sorted_fasta_filename ) - _sort_fasta_gatk( gatk_sorted_fasta_filename ) +def build_picard_index(data_manager_dict, fasta_filename, target_directory, dbkey, sequence_id, sequence_name, jar, data_table_name=DEFAULT_DATA_TABLE_NAME): + fasta_base_name = os.path.split(fasta_filename)[-1] + gatk_sorted_fasta_filename = os.path.join(target_directory, fasta_base_name) + shutil.copy(fasta_filename, gatk_sorted_fasta_filename) + _sort_fasta_gatk(gatk_sorted_fasta_filename) sam_index_filename = '%s.fai' % gatk_sorted_fasta_filename - if not os.path.exists( sam_index_filename ): - sam_command = [ 'samtools', 'faidx', gatk_sorted_fasta_filename ] - _run_command( sam_command, target_directory ) - args = [ 'java', '-jar', jar, 'R=%s' % gatk_sorted_fasta_filename, 'O=%s.dict' % sequence_id ] - _run_command( args, target_directory ) - data_table_entry = dict( value=sequence_id, dbkey=dbkey, name=sequence_name, path=fasta_base_name ) - _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ) - - -def _run_command( command, target_directory ): - tmp_stderr = tempfile.NamedTemporaryFile( prefix="tmp-data-manager-gatk_picard_index_builder-stderr" ) - proc = subprocess.Popen( args=command, shell=False, cwd=target_directory, stderr=tmp_stderr.fileno() ) + if not os.path.exists(sam_index_filename): + sam_command = ['samtools', 'faidx', gatk_sorted_fasta_filename] + _run_command(sam_command, target_directory) + args = ['java', '-jar', jar, 'R=%s' % gatk_sorted_fasta_filename, 'O=%s.dict' % sequence_id] + _run_command(args, target_directory) + data_table_entry = dict(value=sequence_id, dbkey=dbkey, name=sequence_name, path=fasta_base_name) + _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) + + +def _run_command(command, target_directory): + tmp_stderr = tempfile.NamedTemporaryFile(prefix="tmp-data-manager-gatk_picard_index_builder-stderr") + proc = subprocess.Popen(args=command, shell=False, cwd=target_directory, stderr=tmp_stderr.fileno()) return_code = proc.wait() if return_code: tmp_stderr.flush() - tmp_stderr.seek( 0 ) - sys.stderr.write( "Error building index:\n" ) + tmp_stderr.seek(0) + sys.stderr.write("Error building index:\n") while True: - chunk = tmp_stderr.read( CHUNK_SIZE ) + chunk = tmp_stderr.read(CHUNK_SIZE) if not chunk: break - sys.stderr.write( chunk ) - sys.exit( return_code ) + sys.stderr.write(chunk) + sys.exit(return_code) tmp_stderr.close() -def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ): - data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) - data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] ) - data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry ) +def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry): + data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) + data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, []) + data_manager_dict['data_tables'][data_table_name].append(data_table_entry) return data_manager_dict -def _move_and_index_fasta_for_sorting( fasta_filename ): +def _move_and_index_fasta_for_sorting(fasta_filename): unsorted_filename = tempfile.NamedTemporaryFile().name - shutil.move( fasta_filename, unsorted_filename ) + shutil.move(fasta_filename, unsorted_filename) fasta_offsets = {} - unsorted_fh = open( unsorted_filename ) - while True: - offset = unsorted_fh.tell() - line = unsorted_fh.readline() - if not line: - break - if line.startswith( ">" ): - line = line.split( None, 1 )[0][1:] - fasta_offsets[ line ] = offset - unsorted_fh.close() - current_order = [x[1] for x in sorted( ( x[1], x[0] ) for x in fasta_offsets.items() )] - return ( unsorted_filename, fasta_offsets, current_order ) - - -def _write_sorted_fasta( sorted_names, fasta_offsets, sorted_fasta_filename, unsorted_fasta_filename ): - unsorted_fh = open( unsorted_fasta_filename ) - sorted_fh = open( sorted_fasta_filename, 'wb+' ) - - for name in sorted_names: - offset = fasta_offsets[ name ] - unsorted_fh.seek( offset ) - sorted_fh.write( unsorted_fh.readline() ) + with open(unsorted_filename) as unsorted_fh: while True: + offset = unsorted_fh.tell() line = unsorted_fh.readline() - if not line or line.startswith( ">" ): + if not line: break - sorted_fh.write( line ) - unsorted_fh.close() - sorted_fh.close() - - -def _int_to_roman( integer ): - if not isinstance( integer, int ): - raise TypeError("expected integer, got %s" % type( integer )) + if line.startswith(">"): + line = line.split(None, 1)[0][1:] + fasta_offsets[line] = offset + current_order = [x[1] for x in sorted((x[1], x[0]) for x in fasta_offsets.items())] + return (unsorted_filename, fasta_offsets, current_order) + + +def _write_sorted_fasta(sorted_names, fasta_offsets, sorted_fasta_filename, unsorted_fasta_filename): + with open(unsorted_fasta_filename, 'rb') as unsorted_fh, open(sorted_fasta_filename, 'wb+') as sorted_fh: + for name in sorted_names: + offset = fasta_offsets[name] + unsorted_fh.seek(offset) + sorted_fh.write(unsorted_fh.readline()) + while True: + line = unsorted_fh.readline() + if not line or line.startswith(b">"): + break + sorted_fh.write(line) + + +def _int_to_roman(integer): + if not isinstance(integer, int): + raise TypeError("expected integer, got %s" % type(integer)) if not 0 < integer < 4000: - raise ValueError("Argument must be between 1 and 3999, got %s" % str( integer )) - ints = ( 1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1 ) - nums = ( 'M', 'CM', 'D', 'CD', 'C', 'XC', 'L', 'XL', 'X', 'IX', 'V', 'IV', 'I' ) + raise ValueError("Argument must be between 1 and 3999, got %s" % str(integer)) + ints = (1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1) + nums = ('M', 'CM', 'D', 'CD', 'C', 'XC', 'L', 'XL', 'X', 'IX', 'V', 'IV', 'I') result = "" - for i in range( len( ints ) ): - count = int( integer / ints[ i ] ) - result += nums[ i ] * count - integer -= ints[ i ] * count + for i in range(len(ints)): + count = int(integer / ints[i]) + result += nums[i] * count + integer -= ints[i] * count return result -def _sort_fasta_gatk( fasta_filename ): - ( unsorted_filename, fasta_offsets, current_order ) = _move_and_index_fasta_for_sorting( fasta_filename ) - sorted_names = list(map( str, range( 1, 100 ) )) + list(map( _int_to_roman, range( 1, 100 ) )) + [ 'X', 'Y', 'M' ] +def _sort_fasta_gatk(fasta_filename): + (unsorted_filename, fasta_offsets, current_order) = _move_and_index_fasta_for_sorting(fasta_filename) + sorted_names = list(map(str, range(1, 100))) + list(map(_int_to_roman, range(1, 100))) + ['X', 'Y', 'M'] # detect if we have chrN, or just N has_chr = False for chrom in sorted_names: @@ -130,58 +125,60 @@ def _sort_fasta_gatk( fasta_filename ): if has_chr: sorted_names = ["chr%s" % x for x in sorted_names] else: - sorted_names.insert( 0, "MT" ) - sorted_names.extend( [ "%s_random" % x for x in sorted_names ] ) + sorted_names.insert(0, "MT") + sorted_names.extend(["%s_random" % x for x in sorted_names]) existing_sorted_names = [] for name in sorted_names: # Append each chromosome only once. if name in current_order and name not in existing_sorted_names: - existing_sorted_names.append( name ) + existing_sorted_names.append(name) for name in current_order: # TODO: confirm that non-canonical names do not need to be sorted specially if name not in existing_sorted_names: - existing_sorted_names.append( name ) + existing_sorted_names.append(name) if existing_sorted_names == current_order: - shutil.move( unsorted_filename, fasta_filename ) + shutil.move(unsorted_filename, fasta_filename) else: - _write_sorted_fasta( existing_sorted_names, fasta_offsets, fasta_filename, unsorted_filename ) + _write_sorted_fasta(existing_sorted_names, fasta_offsets, fasta_filename, unsorted_filename) def main(): parser = optparse.OptionParser() - parser.add_option( '-f', '--fasta_filename', dest='fasta_filename', action='store', type="string", default=None, help='fasta_filename' ) - parser.add_option( '-d', '--fasta_dbkey', dest='fasta_dbkey', action='store', type="string", default=None, help='fasta_dbkey' ) - parser.add_option( '-t', '--fasta_description', dest='fasta_description', action='store', type="string", default=None, help='fasta_description' ) - parser.add_option( '-n', '--data_table_name', dest='data_table_name', action='store', type="string", default=None, help='data_table_name' ) - parser.add_option( '-j', '--jar', dest='jar', action='store', type="string", default=None, help='GATK .jar file' ) + parser.add_option('-f', '--fasta_filename', dest='fasta_filename', action='store', type="string", default=None, help='fasta_filename') + parser.add_option('-d', '--fasta_dbkey', dest='fasta_dbkey', action='store', type="string", default=None, help='fasta_dbkey') + parser.add_option('-t', '--fasta_description', dest='fasta_description', action='store', type="string", default=None, help='fasta_description') + parser.add_option('-n', '--data_table_name', dest='data_table_name', action='store', type="string", default=None, help='data_table_name') + parser.add_option('-j', '--jar', dest='jar', action='store', type="string", default=None, help='GATK .jar file') (options, args) = parser.parse_args() filename = args[0] - params = json.loads( open( filename ).read() ) - target_directory = params[ 'output_data' ][0]['extra_files_path'] - os.mkdir( target_directory ) + with open(filename) as fh: + params = json.load(fh) + target_directory = params['output_data'][0]['extra_files_path'] + os.mkdir(target_directory) data_manager_dict = {} - if options.fasta_dbkey in [ None, '', '?' ]: - raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( options.fasta_dbkey ) ) + if options.fasta_dbkey in [None, '', '?']: + raise Exception('"%s" is not a valid dbkey. You must specify a valid dbkey.' % (options.fasta_dbkey)) - sequence_id, sequence_name = get_id_name( params, dbkey=options.fasta_dbkey, fasta_description=options.fasta_description ) + sequence_id, sequence_name = get_id_name(params, dbkey=options.fasta_dbkey, fasta_description=options.fasta_description) # build the index - build_picard_index( data_manager_dict, - options.fasta_filename, - target_directory, - options.fasta_dbkey, - sequence_id, - sequence_name, - options.jar, - data_table_name=options.data_table_name or DEFAULT_DATA_TABLE_NAME ) + build_picard_index(data_manager_dict, + options.fasta_filename, + target_directory, + options.fasta_dbkey, + sequence_id, + sequence_name, + options.jar, + data_table_name=options.data_table_name or DEFAULT_DATA_TABLE_NAME) # save info to json file - open( filename, 'wb' ).write( json.dumps( data_manager_dict ) ) + with open(filename, 'w') as fh: + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_gemini_database_downloader/data_manager/data_manager_gemini_download.py b/data_managers/data_manager_gemini_database_downloader/data_manager/data_manager_gemini_download.py index b0821bb3e75..05b5884822f 100644 --- a/data_managers/data_manager_gemini_database_downloader/data_manager/data_manager_gemini_download.py +++ b/data_managers/data_manager_gemini_database_downloader/data_manager/data_manager_gemini_download.py @@ -16,7 +16,8 @@ def write_gemini_config(config, config_file): def main(): today = datetime.date.today() - params = json.loads(open(sys.argv[1]).read()) + with open(sys.argv[1]) as fh: + params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] os.mkdir(target_directory) @@ -83,8 +84,8 @@ def main(): } # ... and save it to the json results file - with open(sys.argv[1], 'w') as out: - out.write(json.dumps(data_manager_dict)) + with open(sys.argv[1], 'w') as fh: + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_hisat2_index_builder/data_manager/hisat2_index_builder.py b/data_managers/data_manager_hisat2_index_builder/data_manager/hisat2_index_builder.py index 60499479f3d..dc02d4aa024 100644 --- a/data_managers/data_manager_hisat2_index_builder/data_manager/hisat2_index_builder.py +++ b/data_managers/data_manager_hisat2_index_builder/data_manager/hisat2_index_builder.py @@ -3,11 +3,11 @@ from __future__ import print_function import argparse +import json import os import shlex import subprocess import sys -from json import dumps, loads DEFAULT_DATA_TABLE_NAME = "hisat2_indexes" @@ -66,7 +66,8 @@ def main(): filename = options.output - params = loads(open(filename).read()) + with open(filename) as fh: + params = json.load(fh) data_manager_dict = {} if options.fasta_dbkey in [None, '', '?']: @@ -78,7 +79,8 @@ def main(): build_hisat_index(data_manager_dict, options, params, sequence_id, sequence_name) # save info to json file - open(filename, 'w').write(dumps(data_manager_dict, sort_keys=True)) + with open(filename, 'w') as fh: + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_humann2_database_downloader/data_manager/data_manager_humann2_download.py b/data_managers/data_manager_humann2_database_downloader/data_manager/data_manager_humann2_download.py index 7ab7e10c20c..c1561a5fda6 100644 --- a/data_managers/data_manager_humann2_database_downloader/data_manager/data_manager_humann2_download.py +++ b/data_managers/data_manager_humann2_database_downloader/data_manager/data_manager_humann2_download.py @@ -38,7 +38,8 @@ def read_input_json(jsonfile): to create it if necessary. """ - params = json.loads(open(jsonfile).read()) + with open(jsonfile) as fh: + params = json.load(fh) return (params['param_dict'], params['output_data'][0]['extra_files_path']) @@ -50,7 +51,7 @@ def read_input_json(jsonfile): # >>> add_data_table(d,'my_data') # >>> add_data_table_entry(dict(dbkey='hg19',value='human')) # >>> add_data_table_entry(dict(dbkey='mm9',value='mouse')) -# >>> print str(json.dumps(d)) +# >>> print(json.dumps(d)) def create_data_tables_dict(): """Return a dictionary for storing data table information @@ -109,10 +110,10 @@ def download_humann2_db(data_tables, table_name, database, build, target_dir): target_dir: directory to put copy or link to the data file """ - value = "%s-%s-%s" % (database, build, datetime.date.today().isoformat()) + value = "{}-{}-{}".format(database, build, datetime.date.today().isoformat()) db_target_dir = os.path.join(target_dir, database) build_target_dir = os.path.join(db_target_dir, build) - cmd = "humann2_databases --download %s %s %s --update-config no" % ( + cmd = "humann2_databases --download {} {} {} --update-config no".format( database, build, db_target_dir) @@ -171,6 +172,6 @@ def download_humann2_db(data_tables, table_name, database, build, target_dir): # Write output JSON print("Outputting JSON") - print(str(json.dumps(data_tables))) - open(jsonfile, 'wb').write(json.dumps(data_tables)) + with open(jsonfile, 'w') as fh: + json.dump(data_tables, fh, sort_keys=True) print("Done.") diff --git a/data_managers/data_manager_kallisto_index_builder/data_manager/kallisto_index_builder.py b/data_managers/data_manager_kallisto_index_builder/data_manager/kallisto_index_builder.py index 50fd5482ea3..ff06e0c511f 100644 --- a/data_managers/data_manager_kallisto_index_builder/data_manager/kallisto_index_builder.py +++ b/data_managers/data_manager_kallisto_index_builder/data_manager/kallisto_index_builder.py @@ -3,15 +3,15 @@ from __future__ import print_function import argparse +import json import os import subprocess import sys -from json import dumps, loads DEFAULT_DATA_TABLE_NAME = "kallisto_indexes" -def get_id_name( params, dbkey, fasta_description=None): +def get_id_name(params, dbkey, fasta_description=None): # TODO: ensure sequence_id is unique and does not already appear in location file sequence_id = params['param_dict']['sequence_id'] if not sequence_id: @@ -25,57 +25,59 @@ def get_id_name( params, dbkey, fasta_description=None): return sequence_id, sequence_name -def build_kallisto_index( data_manager_dict, options, params, sequence_id, sequence_name ): +def build_kallisto_index(data_manager_dict, options, params, sequence_id, sequence_name): data_table_name = options.data_table_name or DEFAULT_DATA_TABLE_NAME - target_directory = params[ 'output_data' ][0]['extra_files_path'] - if not os.path.exists( target_directory ): - os.mkdir( target_directory ) - fasta_base_name = os.path.split( options.fasta_filename )[-1] - sym_linked_fasta_filename = os.path.join( target_directory, fasta_base_name ) - os.symlink( options.fasta_filename, sym_linked_fasta_filename ) - args = [ 'kallisto', 'index' ] - args.extend( [ sym_linked_fasta_filename, '-i', sequence_id ] ) - proc = subprocess.Popen( args=args, shell=False, cwd=target_directory ) + target_directory = params['output_data'][0]['extra_files_path'] + if not os.path.exists(target_directory): + os.mkdir(target_directory) + fasta_base_name = os.path.split(options.fasta_filename)[-1] + sym_linked_fasta_filename = os.path.join(target_directory, fasta_base_name) + os.symlink(options.fasta_filename, sym_linked_fasta_filename) + args = ['kallisto', 'index'] + args.extend([sym_linked_fasta_filename, '-i', sequence_id]) + proc = subprocess.Popen(args=args, shell=False, cwd=target_directory) return_code = proc.wait() if return_code: print("Error building index.", file=sys.stderr) - sys.exit( return_code ) - data_table_entry = dict( value=sequence_id, dbkey=options.fasta_dbkey, name=sequence_name, path=sequence_id ) - _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ) + sys.exit(return_code) + data_table_entry = dict(value=sequence_id, dbkey=options.fasta_dbkey, name=sequence_name, path=sequence_id) + _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) -def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ): - data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) - data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] ) - data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry ) +def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry): + data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) + data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, []) + data_manager_dict['data_tables'][data_table_name].append(data_table_entry) return data_manager_dict def main(): # Parse Command Line parser = argparse.ArgumentParser() - parser.add_argument( '--output', dest='output', action='store', type=str, default=None ) - parser.add_argument( '--fasta_filename', dest='fasta_filename', action='store', type=str, default=None ) - parser.add_argument( '--fasta_dbkey', dest='fasta_dbkey', action='store', type=str, default=None ) - parser.add_argument( '--fasta_description', dest='fasta_description', action='store', type=str, default=None ) - parser.add_argument( '--data_table_name', dest='data_table_name', action='store', type=str, default='kallisto_indexes' ) + parser.add_argument('--output', dest='output', action='store', type=str, default=None) + parser.add_argument('--fasta_filename', dest='fasta_filename', action='store', type=str, default=None) + parser.add_argument('--fasta_dbkey', dest='fasta_dbkey', action='store', type=str, default=None) + parser.add_argument('--fasta_description', dest='fasta_description', action='store', type=str, default=None) + parser.add_argument('--data_table_name', dest='data_table_name', action='store', type=str, default='kallisto_indexes') options = parser.parse_args() filename = options.output - params = loads( open( filename ).read() ) + with open(filename) as fh: + params = json.load(fh) data_manager_dict = {} - if options.fasta_dbkey in [ None, '', '?' ]: - raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( options.fasta_dbkey ) ) + if options.fasta_dbkey in [None, '', '?']: + raise Exception('"%s" is not a valid dbkey. You must specify a valid dbkey.' % (options.fasta_dbkey)) - sequence_id, sequence_name = get_id_name( params, dbkey=options.fasta_dbkey, fasta_description=options.fasta_description ) + sequence_id, sequence_name = get_id_name(params, dbkey=options.fasta_dbkey, fasta_description=options.fasta_description) # build the index - build_kallisto_index( data_manager_dict, options, params, sequence_id, sequence_name ) + build_kallisto_index(data_manager_dict, options, params, sequence_id, sequence_name) # save info to json file - open( filename, 'w' ).write( dumps( data_manager_dict ) ) + with open(filename, 'w') as fh: + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_manual/data_manager/data_manager_manual.py b/data_managers/data_manager_manual/data_manager/data_manager_manual.py index af131c6b6c5..7c56340c0fe 100644 --- a/data_managers/data_manager_manual/data_manager/data_manager_manual.py +++ b/data_managers/data_manager_manual/data_manager/data_manager_manual.py @@ -162,14 +162,14 @@ def main(): filename = args[0] with open(filename) as fh: - params = json.loads(fh.read()) + params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] data_table_entries = get_data_table_entries(params['param_dict'], options.galaxy_data_manager_data_path) # save info to json file with open(filename, 'w') as fh: - fh.write(json.dumps({"data_tables": data_table_entries}, sort_keys=True)) + json.dump({"data_tables": data_table_entries}, fh, sort_keys=True) get_file_content(params['param_dict'], target_directory) diff --git a/data_managers/data_manager_mash_sketch_builder/data_manager/mash_sketch_builder.py b/data_managers/data_manager_mash_sketch_builder/data_manager/mash_sketch_builder.py index 3df93442d38..81fd1c3a35e 100644 --- a/data_managers/data_manager_mash_sketch_builder/data_manager/mash_sketch_builder.py +++ b/data_managers/data_manager_mash_sketch_builder/data_manager/mash_sketch_builder.py @@ -59,7 +59,8 @@ def main(): parser.add_argument('--sketch-name', dest='sketch_name', help='Name for sketch') args = parser.parse_args() - data_manager_input = json.loads(open(args.data_manager_json).read()) + with open(args.data_manager_json) as fh: + data_manager_input = json.load(fh) target_directory = data_manager_input['output_data'][0]['extra_files_path'] @@ -88,7 +89,8 @@ def main(): target_directory, ) - open(args.data_manager_json, 'w').write(json.dumps(data_manager_output, sort_keys=True)) + with open(args.data_manager_json, 'w') as fh: + json.dump(data_manager_output, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_metaphlan2_database_downloader/data_manager/data_manager_metaphlan2_download.py b/data_managers/data_manager_metaphlan2_database_downloader/data_manager/data_manager_metaphlan2_download.py index 8510fe13a6a..f1d642ce387 100644 --- a/data_managers/data_manager_metaphlan2_database_downloader/data_manager/data_manager_metaphlan2_download.py +++ b/data_managers/data_manager_metaphlan2_database_downloader/data_manager/data_manager_metaphlan2_download.py @@ -24,7 +24,8 @@ def read_input_json(jsonfile): to create it if necessary. """ - params = json.loads(open(jsonfile).read()) + with open(jsonfile) as fh: + params = json.load(fh) return (params['param_dict'], params['output_data'][0]['extra_files_path']) @@ -36,7 +37,7 @@ def read_input_json(jsonfile): # >>> add_data_table(d,'my_data') # >>> add_data_table_entry(dict(dbkey='hg19',value='human')) # >>> add_data_table_entry(dict(dbkey='mm9',value='mouse')) -# >>> print str(json.dumps(d)) +# >>> print(json.dumps(d)) def create_data_tables_dict(): """Return a dictionary for storing data table information @@ -144,7 +145,6 @@ def download_metaphlan2_db(data_tables, build, table_name, target_dir): # Write output JSON print("Outputting JSON") - print(str(json.dumps(data_tables))) - with open(jsonfile, 'wb') as out: - out.write(json.dumps(data_tables)) + with open(jsonfile, 'w') as fh: + json.dump(data_tables, fh, sort_keys=True) print("Done.") diff --git a/data_managers/data_manager_mitos/data_manager/data_manager.py b/data_managers/data_manager_mitos/data_manager/data_manager.py index da2a36260ec..31923b173d7 100644 --- a/data_managers/data_manager_mitos/data_manager/data_manager.py +++ b/data_managers/data_manager_mitos/data_manager/data_manager.py @@ -71,14 +71,14 @@ def main(tpe, db, outjson): data_manager_entry['path'] = path data_manager_json = dict(data_tables=dict(mitos=data_manager_entry)) - with open(outjson) as f: - params = json.loads(f.read()) + with open(outjson) as fh: + params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] os.mkdir(target_directory) # output_path = os.path.abspath(os.path.join(os.getcwd(), 'mitos')) shutil.move(os.path.join(workdir, path), target_directory) with open(outjson, 'w') as fh: - fh.write(json.dumps(data_manager_json, sort_keys=True)) + json.dump(data_manager_json, fh, sort_keys=True) if __name__ == '__main__': diff --git a/data_managers/data_manager_mothur_toolsuite/data_manager/fetch_mothur_reference_data.py b/data_managers/data_manager_mothur_toolsuite/data_manager/fetch_mothur_reference_data.py index 16b49d50daf..314855a03d7 100755 --- a/data_managers/data_manager_mothur_toolsuite/data_manager/fetch_mothur_reference_data.py +++ b/data_managers/data_manager_mothur_toolsuite/data_manager/fetch_mothur_reference_data.py @@ -160,7 +160,8 @@ def read_input_json(jsonfile): to create it if necessary. """ - params = json.loads(open(jsonfile).read()) + with open(jsonfile) as fh: + params = json.load(fh) return (params['param_dict'], params['output_data'][0]['extra_files_path']) @@ -172,7 +173,7 @@ def read_input_json(jsonfile): # >>> add_data_table(d,'my_data') # >>> add_data_table_entry(dict(dbkey='hg19',value='human')) # >>> add_data_table_entry(dict(dbkey='mm9',value='mouse')) -# >>> print str(json.dumps(d)) +# >>> print(json.dumps(d)) def create_data_tables_dict(): """Return a dictionary for storing data table information @@ -235,7 +236,8 @@ def download_file(url, target=None, wd=None): if wd: target = os.path.join(wd, target) print("Saving to %s" % target) - open(target, 'wb').write(urllib2.urlopen(url).read()) + with open(target, 'wb') as fh: + fh.write(urllib2.urlopen(url).read()) return target @@ -258,31 +260,32 @@ def unpack_zip_archive(filen, wd=None): print("%s: not ZIP formatted file") return [filen] file_list = [] - z = zipfile.ZipFile(filen) - for name in z.namelist(): - if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): - print("Ignoring %s" % name) - continue - if wd: - target = os.path.join(wd, name) - else: - target = name - if name.endswith('/'): - # Make directory - print("Creating dir %s" % target) - try: - os.makedirs(target) - except OSError: - pass - else: - # Extract file - print("Extracting %s" % name) - try: - os.makedirs(os.path.dirname(target)) - except OSError: - pass - open(target, 'wb').write(z.read(name)) - file_list.append(target) + with zipfile.ZipFile(filen) as z: + for name in z.namelist(): + if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): + print("Ignoring %s" % name) + continue + if wd: + target = os.path.join(wd, name) + else: + target = name + if name.endswith('/'): + # Make directory + print("Creating dir %s" % target) + try: + os.makedirs(target) + except OSError: + pass + else: + # Extract file + print("Extracting %s" % name) + try: + os.makedirs(os.path.dirname(target)) + except OSError: + pass + with open(target, 'wb') as fh: + fh.write(z.read(name)) + file_list.append(target) print("Removing %s" % filen) os.remove(filen) return file_list @@ -308,20 +311,20 @@ def unpack_tar_archive(filen, wd=None): if not tarfile.is_tarfile(filen): print("%s: not TAR file") return [filen] - t = tarfile.open(filen) - for name in t.getnames(): - # Check for unwanted files - if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): - print("Ignoring %s" % name) - continue - # Extract file - print("Extracting %s" % name) - t.extract(name, wd) - if wd: - target = os.path.join(wd, name) - else: - target = name - file_list.append(target) + with tarfile.open(filen) as t: + for name in t.getnames(): + # Check for unwanted files + if reduce(lambda x, y: x or name.startswith(y), IGNORE_PATHS, False): + print("Ignoring %s" % name) + continue + # Extract file + print("Extracting %s" % name) + t.extract(name, wd) + if wd: + target = os.path.join(wd, name) + else: + target = name + file_list.append(target) print("Removing %s" % filen) os.remove(filen) return file_list @@ -556,6 +559,6 @@ def import_from_server(data_tables, target_dir, paths, description, link_to_data import_from_server(data_tables, target_dir, paths, description, link_to_data=options.link_to_data) # Write output JSON print("Outputting JSON") - print(json.dumps(data_tables)) - open(jsonfile, 'w').write(json.dumps(data_tables, sort_keys=True)) + with open(jsonfile, 'w') as fh: + json.dump(data_tables, fh, sort_keys=True) print("Done.") diff --git a/data_managers/data_manager_ncbi_taxonomy_sqlite/data_manager/data_manager_ncbi_taxonomy_sqlite.py b/data_managers/data_manager_ncbi_taxonomy_sqlite/data_manager/data_manager_ncbi_taxonomy_sqlite.py index 1a4a9278f7a..517fb65f7b8 100644 --- a/data_managers/data_manager_ncbi_taxonomy_sqlite/data_manager/data_manager_ncbi_taxonomy_sqlite.py +++ b/data_managers/data_manager_ncbi_taxonomy_sqlite/data_manager/data_manager_ncbi_taxonomy_sqlite.py @@ -55,7 +55,8 @@ def build_sqlite(taxonomy_dir, output_directory, name=None, description=None): ) args = parser.parse_args() - config = json.load(open(args.galaxy_datamanager_filename)) + with open(args.galaxy_datamanager_filename) as fh: + config = json.load(fh) output_directory = config.get("output_data", [{}])[0].get("extra_files_path", None) if output_directory is None: output_directory = args.output_directory @@ -64,10 +65,7 @@ def build_sqlite(taxonomy_dir, output_directory, name=None, description=None): os.makedirs(output_directory) data_manager_dict = {} - data_manager_dict["data_tables"] = json.load( - open(args.galaxy_datamanager_filename) - ).get("data_tables", {}) - data_manager_dict["data_tables"] = data_manager_dict.get("data_tables", {}) + data_manager_dict["data_tables"] = config.get("data_tables", {}) data_manager_dict["data_tables"][DATA_TABLE_NAME] = data_manager_dict[ "data_tables" ].get(DATA_TABLE_NAME, []) @@ -75,5 +73,5 @@ def build_sqlite(taxonomy_dir, output_directory, name=None, description=None): data = build_sqlite(args.taxonomy_dir, output_directory, args.name, args.description) data_manager_dict["data_tables"][DATA_TABLE_NAME].extend(data) - print(json.dumps(data_manager_dict)) - json.dump(data_manager_dict, open(args.galaxy_datamanager_filename, "w")) + with open(args.galaxy_datamanager_filename, "w") as fh: + json.dump(data_manager_dict, fh, sort_keys=True) diff --git a/data_managers/data_manager_picard_index_builder/data_manager/picard_index_builder.py b/data_managers/data_manager_picard_index_builder/data_manager/picard_index_builder.py index 13e3d3157cb..ad522c4e942 100644 --- a/data_managers/data_manager_picard_index_builder/data_manager/picard_index_builder.py +++ b/data_managers/data_manager_picard_index_builder/data_manager/picard_index_builder.py @@ -2,11 +2,11 @@ # Dan Blankenberg from __future__ import print_function +import json import optparse import os import subprocess import sys -from json import dumps, loads DEFAULT_DATA_TABLE_NAME = "picard_indexes" @@ -58,7 +58,8 @@ def main(): filename = args[0] - params = loads(open(filename).read()) + with open(filename) as fh: + params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] os.mkdir(target_directory) data_manager_dict = {} @@ -75,7 +76,8 @@ def main(): sequence_name, data_table_name=options.data_table_name or DEFAULT_DATA_TABLE_NAME) # save info to json file - open(filename, 'w').write(dumps(data_manager_dict, sort_keys=True)) + with open(filename, 'w') as fh: + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_plant_tribes_scaffolds_downloader/data_manager/data_manager_plant_tribes_scaffolds_download.py b/data_managers/data_manager_plant_tribes_scaffolds_downloader/data_manager/data_manager_plant_tribes_scaffolds_download.py index 7b4e7c9c44b..f15140aad5f 100644 --- a/data_managers/data_manager_plant_tribes_scaffolds_downloader/data_manager/data_manager_plant_tribes_scaffolds_download.py +++ b/data_managers/data_manager_plant_tribes_scaffolds_downloader/data_manager/data_manager_plant_tribes_scaffolds_download.py @@ -115,7 +115,7 @@ def download(target_directory, web_url, config_web_url, description, data_table_ args = parser.parse_args() with open(args.out_file) as fh: - params = json.loads(fh.read()) + params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] make_directory(target_directory) @@ -128,4 +128,4 @@ def download(target_directory, web_url, config_web_url, description, data_table_ data_manager_dict = download(target_directory, args.web_url, args.config_web_url, description) # Write the JSON output dataset. with open(args.out_file, 'w') as fh: - fh.write(json.dumps(data_manager_dict, sort_keys=True)) + json.dump(data_manager_dict, fh, sort_keys=True) diff --git a/data_managers/data_manager_primer_scheme_bedfiles/data_manager/install_primer_scheme_bedfiles.py b/data_managers/data_manager_primer_scheme_bedfiles/data_manager/install_primer_scheme_bedfiles.py index 857b16fd0d1..2ad8b9848f2 100644 --- a/data_managers/data_manager_primer_scheme_bedfiles/data_manager/install_primer_scheme_bedfiles.py +++ b/data_managers/data_manager_primer_scheme_bedfiles/data_manager/install_primer_scheme_bedfiles.py @@ -135,7 +135,8 @@ def __call__(self, parser, namespace, values, option_string=None): ) exit(1) - config = json.load(open(args.galaxy_datamanager_filename)) + with open(args.galaxy_datamanager_filename) as fh: + config = json.load(fh) output_directory = config.get("output_data", [{}])[0].get("extra_files_path", None) if output_directory is None: output_directory = args.output_directory @@ -144,10 +145,7 @@ def __call__(self, parser, namespace, values, option_string=None): os.makedirs(output_directory) data_manager_dict = {} - data_manager_dict["data_tables"] = json.load( - open(args.galaxy_datamanager_filename) - ).get("data_tables", {}) - data_manager_dict["data_tables"] = data_manager_dict.get("data_tables", {}) + data_manager_dict["data_tables"] = config.get("data_tables", {}) data_manager_dict["data_tables"][DATA_TABLE_NAME] = data_manager_dict[ "data_tables" ].get(DATA_TABLE_NAME, []) @@ -163,5 +161,5 @@ def __call__(self, parser, namespace, values, option_string=None): ) data_manager_dict["data_tables"][DATA_TABLE_NAME].extend(data) - print(data_manager_dict) - json.dump(data_manager_dict, open(args.galaxy_datamanager_filename, "w")) + with open(args.galaxy_datamanager_filename, "w") as fh: + json.dump(data_manager_dict, fh, sort_keys=True) diff --git a/data_managers/data_manager_qiime_database_downloader/data_manager/data_manager_qiime_download.py b/data_managers/data_manager_qiime_database_downloader/data_manager/data_manager_qiime_download.py index e4dfda99eac..a8ab3b5e33d 100644 --- a/data_managers/data_manager_qiime_database_downloader/data_manager/data_manager_qiime_download.py +++ b/data_managers/data_manager_qiime_database_downloader/data_manager/data_manager_qiime_download.py @@ -69,7 +69,8 @@ def read_input_json(jsonfile): to create it if necessary. """ - params = json.loads(open(jsonfile).read()) + with open(jsonfile) as fh: + params = json.load(fh) return (params['param_dict'], params['output_data'][0]['extra_files_path']) @@ -81,7 +82,7 @@ def read_input_json(jsonfile): # >>> add_data_table(d,'my_data') # >>> add_data_table_entry(dict(dbkey='hg19',value='human')) # >>> add_data_table_entry(dict(dbkey='mm9',value='mouse')) -# >>> print str(json.dumps(d)) +# >>> print(json.dumps(d)) def create_data_tables_dict(): """Return a dictionary for storing data table information @@ -126,7 +127,8 @@ def get_ftp_file(ftp, filename): """ """ try: - ftp.retrbinary("RETR " + filename, open(filename, 'wb').write) + with open(filename, 'wb') as fh: + ftp.retrbinary("RETR " + filename, fh.write) except Exception: print("Error") @@ -180,14 +182,12 @@ def extract_archive(filepath, ext, db): """ archive_content_path = "tmp" if ext == "tar.gz" or ext == "tgz": - tar = tarfile.open(filepath) - tar.extractall(path=archive_content_path) - tar.close() + with tarfile.open(filepath) as tar: + tar.extractall(path=archive_content_path) archive_content_path = find_archive_content_path(archive_content_path) elif ext == "zip": - zip_ref = zipfile.ZipFile(filepath, 'r') - zip_ref.extractall(archive_content_path) - zip_ref.close() + with zipfile.ZipFile(filepath, 'r') as zip_ref: + zip_ref.extractall(archive_content_path) archive_content_path = find_archive_content_path(archive_content_path) return archive_content_path @@ -372,7 +372,6 @@ def download_db(data_tables, db, version, target_dir): # Write output JSON print("Outputting JSON") - print(str(json.dumps(data_tables))) with open(jsonfile, 'w') as out: - json.dump(data_tables, out) + json.dump(data_tables, out, sort_keys=True) print("Done.") diff --git a/data_managers/data_manager_rsync_g2/data_manager/data_manager_rsync.py b/data_managers/data_manager_rsync_g2/data_manager/data_manager_rsync.py index 5f24d6c5d3b..f29845115ec 100644 --- a/data_managers/data_manager_rsync_g2/data_manager/data_manager_rsync.py +++ b/data_managers/data_manager_rsync_g2/data_manager/data_manager_rsync.py @@ -4,6 +4,7 @@ import base64 import datetime +import json import logging import optparse import os @@ -76,7 +77,7 @@ def exec_before_job(app, inp_data, out_data, param_dict, tool=None, **kwd): data_manager = app.data_managers.get_manager(tool.data_manager_id, None) data_table_entries = get_data_table_entries(param_dict) data_tables = load_data_tables_from_url(data_table_class=app.tool_data_tables.__class__).get('data_tables') - for data_table_name, entries in data_table_entries.items(): + for data_table_name in data_table_entries.keys(): # get data table managed by this data Manager has_data_table = app.tool_data_tables.get_tables().get(str(data_table_name)) if has_data_table: @@ -128,7 +129,7 @@ def rsync_urljoin(base, url): return "%s/%s" % (base, url) -def rsync_list_dir(server, dir=None, skip_names=[]): +def rsync_list_dir(server, dir=None): # drwxr-xr-x 50 2014/05/16 20:58:11 . if dir: dir = rsync_urljoin(server, dir) @@ -158,8 +159,6 @@ def rsync_list_dir(server, dir=None, skip_names=[]): line = line.strip() time, line = line.split(None, 1) name = line.strip() - if name in skip_names: - continue size = line.strip() rval[name] = dict(name=name, permissions=perms, bytes=size, date=date, time=time) rsync_response.close() @@ -394,7 +393,8 @@ def main(): filename = args[0] - params = loads(open(filename).read()) + with open(filename) as fh: + params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] os.mkdir(target_directory) data_manager_dict = {} @@ -405,7 +405,8 @@ def main(): data_manager_dict = fulfill_data_table_entries(data_table_entries, data_manager_dict, target_directory) # save info to json file - open(filename, 'w').write(dumps(data_manager_dict, sort_keys=True)) + with open(filename, 'w') as fh: + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_sam_fasta_index_builder/data_manager/data_manager_sam_fasta_index_builder.py b/data_managers/data_manager_sam_fasta_index_builder/data_manager/data_manager_sam_fasta_index_builder.py index dfa0d90a75d..3520e0e4f8b 100644 --- a/data_managers/data_manager_sam_fasta_index_builder/data_manager/data_manager_sam_fasta_index_builder.py +++ b/data_managers/data_manager_sam_fasta_index_builder/data_manager/data_manager_sam_fasta_index_builder.py @@ -71,7 +71,8 @@ def main(): filename = args[0] - params = json.loads(open(filename).read()) + with open(filename) as fh: + params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] os.mkdir(target_directory) data_manager_dict = {} @@ -93,7 +94,8 @@ def main(): ) # save info to json file - open(filename, 'w').write(json.dumps(data_manager_dict, sort_keys=True)) + with open(filename, 'w') as fh: + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_snpeff/data_manager/data_manager_snpEff_databases.py b/data_managers/data_manager_snpeff/data_manager/data_manager_snpEff_databases.py index e4ee9f10e82..882b2f35624 100755 --- a/data_managers/data_manager_snpeff/data_manager/data_manager_snpEff_databases.py +++ b/data_managers/data_manager_snpeff/data_manager/data_manager_snpEff_databases.py @@ -10,17 +10,16 @@ def fetch_databases(data_manager_dict, target_directory): if not os.path.exists(target_directory): os.makedirs(target_directory) databases_path = os.path.join(target_directory, 'databases.out') - databases_output = open(databases_path, 'w') args = ['snpEff', 'databases'] - return_code = subprocess.call(args=args, shell=False, stdout=databases_output.fileno()) + with open(databases_path, 'w') as databases_output: + return_code = subprocess.call(args=args, shell=False, stdout=databases_output.fileno()) if return_code: sys.exit(return_code) - databases_output.close() data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) data_manager_dict['data_tables']['snpeffv_databases'] = data_manager_dict['data_tables'].get('snpeffv_databases', []) data_table_entries = [] with open(databases_path, 'r') as fh: - for i, line in enumerate(fh): + for line in fh: fields = line.split('\t') if len(fields) >= 2: genome_version = fields[0].strip() @@ -41,7 +40,8 @@ def main(): filename = args[0] - params = json.loads(open(filename).read()) + with open(filename) as fh: + params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] os.mkdir(target_directory) data_manager_dict = {} @@ -50,7 +50,8 @@ def main(): data_manager_dict = fetch_databases(data_manager_dict, target_directory) # save info to json file - open(filename, 'w').write(json.dumps(data_manager_dict, sort_keys=True)) + with open(filename, 'w') as fh: + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_snpeff/data_manager/data_manager_snpEff_download.py b/data_managers/data_manager_snpeff/data_manager/data_manager_snpEff_download.py index 3f9d09a5b89..8b611a2b86a 100755 --- a/data_managers/data_manager_snpeff/data_manager/data_manager_snpEff_download.py +++ b/data_managers/data_manager_snpeff/data_manager/data_manager_snpEff_download.py @@ -7,36 +7,28 @@ import sys -def stop_err(msg): - sys.stderr.write(msg) - sys.exit(1) - - def fetch_databases(genome_list=None): snpDBs = dict() databases_path = 'databases.out' - databases_output = open(databases_path, 'w') args = ['snpEff', 'databases'] - return_code = subprocess.call(args=args, shell=False, stdout=databases_output.fileno()) + with open(databases_path, 'w') as databases_output: + return_code = subprocess.call(args=args, shell=False, stdout=databases_output.fileno()) if return_code: sys.exit(return_code) - databases_output.close() try: - fh = open(databases_path, 'r') - for i, line in enumerate(fh): - fields = line.split('\t') - if len(fields) >= 2: - genome_version = fields[0].strip() - if genome_list and genome_version not in genome_list: - continue - if genome_version.startswith("Genome") or genome_version.startswith("-"): - continue - description = fields[1].strip() - snpDBs[genome_version] = description + with open(databases_path, 'r') as fh: + for line in fh: + fields = line.split('\t') + if len(fields) >= 2: + genome_version = fields[0].strip() + if genome_list and genome_version not in genome_list: + continue + if genome_version.startswith("Genome") or genome_version.startswith("-"): + continue + description = fields[1].strip() + snpDBs[genome_version] = description except Exception as e: - stop_err('Error parsing %s %s\n' % (databases_path, str(e))) - else: - fh.close() + sys.exit('Error parsing %s %s\n' % (databases_path, str(e))) return snpDBs @@ -55,19 +47,17 @@ def getOrganismNames(genomes, organisms): def getSnpeffVersion(): snpeff_version = 'SnpEff ?.?' stderr_path = 'snpeff.err' - stderr_fh = open(stderr_path, 'w') args = ['snpEff', '-h'] - return_code = subprocess.call(args=args, shell=False, stderr=stderr_fh.fileno()) + with open(stderr_path, 'w') as stderr_fh: + return_code = subprocess.call(args=args, shell=False, stderr=stderr_fh.fileno()) if return_code != 255: sys.exit(return_code) - stderr_fh.close() - fh = open(stderr_path, 'r') - for line in fh: - m = re.match(r'^[Ss]npEff version (SnpEff)\s*(\d+\.\d+).*$', line) - if m: - snpeff_version = m.groups()[0] + m.groups()[1] - break - fh.close() + with open(stderr_path) as fh: + for line in fh: + m = re.match(r'^[Ss]npEff version (SnpEff)\s*(\d+\.\d+).*$', line) + if m: + snpeff_version = m.groups()[0] + m.groups()[1] + break return snpeff_version @@ -97,7 +87,7 @@ def download_database(data_manager_dict, target_directory, genome_version, organ snpeff_version = getSnpeffVersion() key = snpeff_version + '_' + genome_version if os.path.isdir(genome_path): - for root, dirs, files in os.walk(genome_path): + for _, _, files in os.walk(genome_path): for fname in files: if fname.startswith('snpEffectPredictor'): # if snpEffectPredictor.bin download succeeded @@ -128,7 +118,8 @@ def main(): filename = args[0] - params = json.loads(open(filename).read()) + with open(filename) as fh: + params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] os.mkdir(target_directory) data_manager_dict = {} @@ -138,7 +129,8 @@ def main(): download_database(data_manager_dict, target_directory, genome_version, organism) # save info to json file - open(filename, 'w').write(json.dumps(data_manager_dict, sort_keys=True)) + with open(filename, 'w'): + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_snpsift_dbnsfp/data_manager/data_manager_snpsift_dbnsfp.py b/data_managers/data_manager_snpsift_dbnsfp/data_manager/data_manager_snpsift_dbnsfp.py index 68f236e3d60..e73dd699f68 100755 --- a/data_managers/data_manager_snpsift_dbnsfp/data_manager/data_manager_snpsift_dbnsfp.py +++ b/data_managers/data_manager_snpsift_dbnsfp/data_manager/data_manager_snpsift_dbnsfp.py @@ -47,11 +47,6 @@ dbNSFP_name_pat = r'dbNSFP(v|_light)?(\d*).*?' -def stop_err(msg): - sys.stderr.write(msg) - sys.exit(1) - - def get_nsfp_genome_version(name): genome_version = 'hg19' dbNSFP_name_pat = r'(dbscSNV|dbNSFP(v|_light)?)(\d*).*?' @@ -67,18 +62,14 @@ def get_nsfp_genome_version(name): def get_annotations(gzip_path): annotations = None - fh = None try: - fh = gzip.open(gzip_path, 'r') - buf = fh.read(10000) + with gzip.open(gzip_path, 'r') as fh: + buf = fh.read(10000) lines = buf.splitlines() headers = lines[0].split('\t') annotations = ','.join([x.strip() for x in headers[4:]]) except Exception as e: - stop_err('Error Reading annotations %s : %s' % (gzip_path, e)) - finally: - if fh: - fh.close() + sys.exit('Error Reading annotations %s : %s' % (gzip_path, e)) return annotations @@ -157,7 +148,8 @@ def main(): (options, args) = parser.parse_args() filename = args[0] - params = json.loads(open(filename).read()) + with open(filename) as fh: + params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] if not os.path.exists(target_directory): os.mkdir(target_directory) @@ -183,7 +175,7 @@ def main(): bzip_path = os.path.join(target_directory, bgzip_name) db_name = re.sub('(.txt)?.gz$', '', bgzip_name) else: - stop_err('Either --softgenetics or --dbnsfp_tabular required') + sys.exit('Either --softgenetics or --dbnsfp_tabular required') if dbnsfp_tsv: bgzip_name = '%s.txt.gz' % db_name bzip_path = os.path.join(target_directory, bgzip_name) @@ -196,7 +188,8 @@ def main(): data_manager_dict['data_tables'][data_table].append(data_table_entry) # save info to json file - open(filename, 'w').write(json.dumps(data_manager_dict, sort_keys=True)) + with open(filename, 'w') as fh: + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_star_index_builder/data_manager/rna_star_index_builder.py b/data_managers/data_manager_star_index_builder/data_manager/rna_star_index_builder.py index f73f247bed6..9658cf1f3f0 100644 --- a/data_managers/data_manager_star_index_builder/data_manager/rna_star_index_builder.py +++ b/data_managers/data_manager_star_index_builder/data_manager/rna_star_index_builder.py @@ -41,7 +41,8 @@ def main(): ] } } - open(args.config_file, 'w').write(json.dumps(data_manager_dict, sort_keys=True)) + with open(args.config_file, 'w') as fh: + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_twobit_builder/data_manager/twobit_builder.py b/data_managers/data_manager_twobit_builder/data_manager/twobit_builder.py index cc91b678a01..bb1a8f9c173 100644 --- a/data_managers/data_manager_twobit_builder/data_manager/twobit_builder.py +++ b/data_managers/data_manager_twobit_builder/data_manager/twobit_builder.py @@ -2,12 +2,12 @@ # Dan Blankenberg from __future__ import print_function +import json import optparse import os import subprocess import sys import tempfile -from json import dumps, loads CHUNK_SIZE = 2**20 # 1mb @@ -75,7 +75,8 @@ def main(): filename = args[0] - params = loads(open(filename).read()) + with open(filename) as fh: + params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] os.mkdir(target_directory) @@ -92,7 +93,8 @@ def main(): build_twobit(data_manager_dict, options.fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name) # save info to json file - open(filename, 'w').write(dumps(data_manager_dict, sort_keys=True)) + with open(filename, 'w') as fh: + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__": diff --git a/data_managers/data_manager_vsnp_dnaprints/data_manager/vsnp_dnaprints_fetcher.py b/data_managers/data_manager_vsnp_dnaprints/data_manager/vsnp_dnaprints_fetcher.py index 9ed85549979..f5e04ab0603 100644 --- a/data_managers/data_manager_vsnp_dnaprints/data_manager/vsnp_dnaprints_fetcher.py +++ b/data_managers/data_manager_vsnp_dnaprints/data_manager/vsnp_dnaprints_fetcher.py @@ -35,7 +35,7 @@ def url_download(url, workdir): def download(dbkey, name, url, out_file): with open(out_file) as fh: - params = json.loads(fh.read()) + params = json.load(fh) workdir = params['output_data'][0]['extra_files_path'] os.makedirs(workdir) @@ -51,7 +51,7 @@ def download(dbkey, name, url, out_file): data_manager_json["data_tables"]["vsnp_dnaprints"] = data_manager_entry with open(out_file, 'w') as fh: - fh.write(json.dumps(data_manager_json, sort_keys=True)) + json.dump(data_manager_json, fh, sort_keys=True) parser = argparse.ArgumentParser() diff --git a/data_managers/data_manager_vsnp_excel/data_manager/vsnp_excel_fetcher.py b/data_managers/data_manager_vsnp_excel/data_manager/vsnp_excel_fetcher.py index 992a65b4fcb..f80e059c17b 100644 --- a/data_managers/data_manager_vsnp_excel/data_manager/vsnp_excel_fetcher.py +++ b/data_managers/data_manager_vsnp_excel/data_manager/vsnp_excel_fetcher.py @@ -35,7 +35,7 @@ def url_download(url, workdir): def download(dbkey, name, url, out_file): with open(out_file) as fh: - params = json.loads(fh.read()) + params = json.load(fh) workdir = params['output_data'][0]['extra_files_path'] os.makedirs(workdir) @@ -51,7 +51,7 @@ def download(dbkey, name, url, out_file): data_manager_json["data_tables"]["vsnp_excel"] = data_manager_entry with open(out_file, 'w') as fh: - fh.write(json.dumps(data_manager_json, sort_keys=True)) + json.dump(data_manager_json, fh, sort_keys=True) parser = argparse.ArgumentParser() diff --git a/data_managers/data_manager_vsnp_genbank/data_manager/vsnp_genbank_fetcher.py b/data_managers/data_manager_vsnp_genbank/data_manager/vsnp_genbank_fetcher.py index 3caf531d202..886f105299e 100644 --- a/data_managers/data_manager_vsnp_genbank/data_manager/vsnp_genbank_fetcher.py +++ b/data_managers/data_manager_vsnp_genbank/data_manager/vsnp_genbank_fetcher.py @@ -35,7 +35,7 @@ def url_download(url, workdir): def download(dbkey, name, url, out_file): with open(out_file) as fh: - params = json.loads(fh.read()) + params = json.load(fh) workdir = params['output_data'][0]['extra_files_path'] os.makedirs(workdir) @@ -51,7 +51,7 @@ def download(dbkey, name, url, out_file): data_manager_json["data_tables"]["vsnp_genbank"] = data_manager_entry with open(out_file, 'w') as fh: - fh.write(json.dumps(data_manager_json, sort_keys=True)) + json.dump(data_manager_json, fh, sort_keys=True) parser = argparse.ArgumentParser()