Skip to content

Commit

Permalink
Fix file open mode for Python 3 and PEP-8 errors (#3323)
Browse files Browse the repository at this point in the history
Also:
- prefer json `load()`/`dump()` to `loads()`/`dumps()`
- use `with` statement to open/close files

Close #2032
  • Loading branch information
nsoranzo authored Nov 22, 2020
1 parent 4fa1ca5 commit 02d2967
Show file tree
Hide file tree
Showing 40 changed files with 402 additions and 388 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
# Dan Blankenberg
from __future__ import print_function

import json
import optparse
import os
import subprocess
import sys
from json import dumps, loads

DEFAULT_DATA_TABLE_NAMES = ["bowtie2_indexes"]

Expand Down Expand Up @@ -61,7 +61,8 @@ def main():

filename = args[0]

params = loads(open(filename).read())
with open(filename) as fh:
params = json.load(fh)
target_directory = params['output_data'][0]['extra_files_path']
os.mkdir(target_directory)
data_manager_dict = {}
Expand All @@ -77,8 +78,8 @@ def main():
build_bowtie2_index(data_manager_dict, options.fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, data_table_names=options.data_table_name or DEFAULT_DATA_TABLE_NAMES)

# save info to json file
with open(filename, 'w') as json_out:
json_out.write(dumps(data_manager_dict, sort_keys=True))
with open(filename, 'w') as fh:
json.dump(data_manager_dict, fh, sort_keys=True)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
DEFAULT_DATA_TABLE_NAME = "bowtie_indexes"


def get_id_name( params, dbkey, fasta_description=None):
def get_id_name(params, dbkey, fasta_description=None):
# TODO: ensure sequence_id is unique and does not already appear in location file
sequence_id = params['param_dict']['sequence_id']
if not sequence_id:
Expand All @@ -27,70 +27,72 @@ def get_id_name( params, dbkey, fasta_description=None):
return sequence_id, sequence_name


def build_bowtie_index( data_manager_dict, fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, data_table_name=DEFAULT_DATA_TABLE_NAME, color_space=False ):
def build_bowtie_index(data_manager_dict, fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, data_table_name=DEFAULT_DATA_TABLE_NAME, color_space=False):
# TODO: allow multiple FASTA input files
fasta_base_name = os.path.split( fasta_filename )[-1]
sym_linked_fasta_filename = os.path.join( target_directory, fasta_base_name )
os.symlink( fasta_filename, sym_linked_fasta_filename )
args = [ 'bowtie-build' ]
fasta_base_name = os.path.split(fasta_filename)[-1]
sym_linked_fasta_filename = os.path.join(target_directory, fasta_base_name)
os.symlink(fasta_filename, sym_linked_fasta_filename)
args = ['bowtie-build']
if color_space:
args.append( '-C' )
args.append( sym_linked_fasta_filename)
args.append( fasta_base_name )
args.append( sym_linked_fasta_filename )
tmp_stderr = tempfile.NamedTemporaryFile( prefix="tmp-data-manager-bowtie-index-builder-stderr" )
proc = subprocess.Popen( args=args, shell=False, cwd=target_directory, stderr=tmp_stderr.fileno() )
args.append('-C')
args.append(sym_linked_fasta_filename)
args.append(fasta_base_name)
args.append(sym_linked_fasta_filename)
tmp_stderr = tempfile.NamedTemporaryFile(prefix="tmp-data-manager-bowtie-index-builder-stderr")
proc = subprocess.Popen(args=args, shell=False, cwd=target_directory, stderr=tmp_stderr.fileno())
return_code = proc.wait()
if return_code:
tmp_stderr.flush()
tmp_stderr.seek(0)
print("Error building index:", file=sys.stderr)
while True:
chunk = tmp_stderr.read( CHUNK_SIZE )
chunk = tmp_stderr.read(CHUNK_SIZE)
if not chunk:
break
sys.stderr.write( chunk )
sys.exit( return_code )
sys.stderr.write(chunk)
sys.exit(return_code)
tmp_stderr.close()
data_table_entry = dict( value=sequence_id, dbkey=dbkey, name=sequence_name, path=fasta_base_name )
_add_data_table_entry( data_manager_dict, data_table_name, data_table_entry )
data_table_entry = dict(value=sequence_id, dbkey=dbkey, name=sequence_name, path=fasta_base_name)
_add_data_table_entry(data_manager_dict, data_table_name, data_table_entry)


def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ):
data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] )
data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry )
def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry):
data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, [])
data_manager_dict['data_tables'][data_table_name].append(data_table_entry)
return data_manager_dict


def main():
parser = optparse.OptionParser()
parser.add_option( '-f', '--fasta_filename', dest='fasta_filename', action='store', type="string", default=None, help='fasta_filename' )
parser.add_option( '-d', '--fasta_dbkey', dest='fasta_dbkey', action='store', type="string", default=None, help='fasta_dbkey' )
parser.add_option( '-t', '--fasta_description', dest='fasta_description', action='store', type="string", default=None, help='fasta_description' )
parser.add_option( '-n', '--data_table_name', dest='data_table_name', action='store', type="string", default=None, help='data_table_name' )
parser.add_option( '-c', '--color_space', dest='color_space', action='store_true', default=False, help='color_space' )
parser.add_option('-f', '--fasta_filename', dest='fasta_filename', action='store', type="string", default=None, help='fasta_filename')
parser.add_option('-d', '--fasta_dbkey', dest='fasta_dbkey', action='store', type="string", default=None, help='fasta_dbkey')
parser.add_option('-t', '--fasta_description', dest='fasta_description', action='store', type="string", default=None, help='fasta_description')
parser.add_option('-n', '--data_table_name', dest='data_table_name', action='store', type="string", default=None, help='data_table_name')
parser.add_option('-c', '--color_space', dest='color_space', action='store_true', default=False, help='color_space')
(options, args) = parser.parse_args()

filename = args[0]

params = json.loads( open( filename ).read() )
target_directory = params[ 'output_data' ][0]['extra_files_path']
os.mkdir( target_directory )
with open(filename) as fh:
params = json.load(fh)
target_directory = params['output_data'][0]['extra_files_path']
os.mkdir(target_directory)
data_manager_dict = {}

dbkey = options.fasta_dbkey

if dbkey in [ None, '', '?' ]:
raise Exception( '"%s" is not a valid dbkey. You must specify a valid dbkey.' % ( dbkey ) )
if dbkey in [None, '', '?']:
raise Exception('"%s" is not a valid dbkey. You must specify a valid dbkey.' % (dbkey))

sequence_id, sequence_name = get_id_name( params, dbkey=dbkey, fasta_description=options.fasta_description )
sequence_id, sequence_name = get_id_name(params, dbkey=dbkey, fasta_description=options.fasta_description)

# build the index
build_bowtie_index( data_manager_dict, options.fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, data_table_name=options.data_table_name or DEFAULT_DATA_TABLE_NAME, color_space=options.color_space )
build_bowtie_index(data_manager_dict, options.fasta_filename, params, target_directory, dbkey, sequence_id, sequence_name, data_table_name=options.data_table_name or DEFAULT_DATA_TABLE_NAME, color_space=options.color_space)

# save info to json file
open( filename, 'wb' ).write( json.dumps( data_manager_dict ) )
with open(filename, 'w') as fh:
json.dump(data_manager_dict, fh, sort_keys=True)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ def main():
parser.add_argument('--database-name', dest='database_name', help='Database Name')
args = parser.parse_args()

data_manager_input = json.loads(open(args.data_manager_json).read())
with open(args.data_manager_json) as fh:
data_manager_input = json.load(fh)

target_directory = data_manager_input['output_data'][0]['extra_files_path']

Expand All @@ -69,7 +70,7 @@ def main():
try:
os.mkdir(target_directory)
except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir( target_directory ):
if exc.errno == errno.EEXIST and os.path.isdir(target_directory):
pass
else:
raise
Expand All @@ -82,8 +83,8 @@ def main():
args.database_name,
)

with open(args.data_manager_json, 'w') as out:
out.write(json.dumps(data_manager_output, sort_keys=True))
with open(args.data_manager_json, 'w') as fh:
json.dump(data_manager_output, fh, sort_keys=True)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ def kma_build_index(kma_index_args, index_name, target_directory, data_table_nam


def main(args):

data_manager_input = json.loads(open(args.data_manager_json).read())
with open(args.data_manager_json) as fh:
data_manager_input = json.load(fh)

target_directory = data_manager_input['output_data'][0]['extra_files_path']

Expand Down Expand Up @@ -78,7 +78,8 @@ def main(args):
target_directory,
)

open(args.data_manager_json, 'w').write(json.dumps(data_manager_output))
with open(args.data_manager_json, 'w') as fh:
json.dump(data_manager_output, fh, sort_keys=True)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,14 +292,15 @@ def main():
parser.add_argument('--clean', dest='clean', action='store_true', help='Clean up extra files')
args = parser.parse_args()

data_manager_input = json.loads(open(args.data_manager_json).read())
with open(args.data_manager_json) as fh:
data_manager_input = json.load(fh)

target_directory = data_manager_input['output_data'][0]['extra_files_path']

try:
os.mkdir( target_directory )
os.mkdir(target_directory)
except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir( target_directory ):
if exc.errno == errno.EEXIST and os.path.isdir(target_directory):
pass
else:
raise
Expand Down Expand Up @@ -354,7 +355,8 @@ def main():
else:
sys.exit("Invalid database type")

open(args.data_manager_json, 'w').write(json.dumps(data_manager_output, sort_keys=True))
with open(args.data_manager_json, 'w') as fh:
json.dump(data_manager_output, fh, sort_keys=True)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@ def main(args):
data_manager_entry['name'] = args.database
data_manager_entry['path'] = '.'
data_manager_json = dict(data_tables=dict(kraken_databases=data_manager_entry))
params = json.loads(open(args.output).read())
with open(args.output) as fh:
params = json.load(fh)
target_directory = params['output_data'][0]['extra_files_path']
os.mkdir(target_directory)
output_path = os.path.join(os.getcwd(), 'kraken-database')
for filename in os.listdir(output_path):
shutil.move(os.path.join(output_path, filename), target_directory)
with open(args.output, 'w') as out:
out.write(json.dumps(data_manager_json, sort_keys=True))
with open(args.output, 'w') as fh:
json.dump(data_manager_json, fh, sort_keys=True)


if __name__ == '__main__':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
# Dan Blankenberg
from __future__ import print_function

import json
import optparse
import os
import subprocess
import sys
from json import dumps, loads

CHUNK_SIZE = 2**20
TWO_GB = 2**30 * 2
Expand Down Expand Up @@ -68,7 +68,8 @@ def main():

filename = args[0]

params = loads(open(filename).read())
with open(filename) as fh:
params = json.load(fh)
target_directory = params['output_data'][0]['extra_files_path']
os.mkdir(target_directory)
data_manager_dict = {}
Expand All @@ -93,7 +94,8 @@ def main():
)

# save info to json file
open(filename, 'w').write(dumps(data_manager_dict, sort_keys=True))
with open(filename, 'w') as fh:
json.dump(data_manager_dict, fh, sort_keys=True)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@
# Based heavily on the HISAT2 data manager wrapper

import argparse
import json
import os
import subprocess
import sys
from json import dumps, loads

DEFAULT_DATA_TABLE_NAME = "bwameth_indexes"


def build_bwameth_index(data_manager_dict, params, args):
data_table_name = args.data_table_name
target_directory = params[ 'output_data' ][0]['extra_files_path']
if not os.path.exists( target_directory ):
os.mkdir( target_directory )
target_directory = params['output_data'][0]['extra_files_path']
if not os.path.exists(target_directory):
os.mkdir(target_directory)
fasta_base_name = os.path.basename(args.fasta_filename)
sym_linked_fasta_filename = os.path.join(target_directory, fasta_base_name)
os.symlink(os.path.abspath(args.fasta_filename), sym_linked_fasta_filename)
Expand All @@ -23,41 +23,43 @@ def build_bwameth_index(data_manager_dict, params, args):
return_code = proc.wait()
if return_code:
print("Error building index.", file=sys.stderr)
sys.exit( return_code )
sys.exit(return_code)
data_table_entry = dict(value=args.dbkey, dbkey=args.dbkey, name=args.name, path=sym_linked_fasta_filename)
_add_data_table_entry(data_manager_dict, data_table_name, data_table_entry)


def _add_data_table_entry( data_manager_dict, data_table_name, data_table_entry ):
data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
data_manager_dict['data_tables'][ data_table_name ] = data_manager_dict['data_tables'].get( data_table_name, [] )
data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry )
def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry):
data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, [])
data_manager_dict['data_tables'][data_table_name].append(data_table_entry)
return data_manager_dict


def main():
# Parse Command Line
parser = argparse.ArgumentParser()
parser.add_argument( '--output', default=None )
parser.add_argument( '--fasta_filename', default=None )
parser.add_argument( '--dbkey', default=None )
parser.add_argument( '--name', default=None )
parser.add_argument( '--description', default=None )
parser.add_argument( '--data_table_name', default=DEFAULT_DATA_TABLE_NAME )
parser.add_argument('--output', default=None)
parser.add_argument('--fasta_filename', default=None)
parser.add_argument('--dbkey', default=None)
parser.add_argument('--name', default=None)
parser.add_argument('--description', default=None)
parser.add_argument('--data_table_name', default=DEFAULT_DATA_TABLE_NAME)
args = parser.parse_args()

filename = args.output
params = loads(open(filename).read())
with open(filename) as fh:
params = json.load(fh)
data_manager_dict = {}

if args.dbkey in [ None, '', '?' ]:
if args.dbkey in [None, '', '?']:
raise Exception('"%s" is not a valid dbkey. You must specify a valid dbkey.' % (args.dbkey))

# build the index
build_bwameth_index(data_manager_dict, params, args)

# save info to json file
open(filename, 'w').write(dumps(data_manager_dict, sort_keys=True))
with open(filename, 'w') as fh:
json.dump(data_manager_dict, fh, sort_keys=True)


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def main():
url_download(args.db_url, args.install_path)
else:
cat_prepare(args.install_path)
for root, dirs, files in os.walk(args.install_path):
for root, dirs, _ in os.walk(args.install_path):
for dname in dirs:
if dname.endswith('CAT_database'):
cat_db = dname
Expand All @@ -109,7 +109,8 @@ def main():
taxonomy_folder=os.path.join(cat_dir, tax_db))
dm_dict['data_tables'][data_table].append(data_table_entry)
# save info to json file
open(args.config_file, 'w').write(json.dumps(dm_dict))
with open(args.config_file, 'w') as fh:
json.dump(dm_dict, fh, sort_keys=True)


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 02d2967

Please sign in to comment.