Skip to content

Commit b80c4bd

Browse files
authored
Merge pull request #31 from bmichanderson/parallel_patch
Change parallel processing for assemble_reads.py
2 parents 0141550 + 9ca8ec2 commit b80c4bd

File tree

1 file changed

+12
-6
lines changed

1 file changed

+12
-6
lines changed

secapr/assemble_reads.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,14 @@ def add_arguments(parser):
6161
default=1,
6262
help='For parallel processing you can set the number of cores you want to run the assembly on.'
6363
)
64+
parser.add_argument(
65+
'--instances',
66+
type=int,
67+
default=1,
68+
help='How many parallel assemblies to run at a time. This will multiply the cores and max_memory arguments (for each SPAdes run). For example, max_memory=4, cores=2 and instances=4 will use 8 threads and 16 GB.'
69+
)
6470

65-
def assembly_spades(sorted_fastq_files,n_library_numbers,output_folder,id_sample,kmer,cores,max_memory,args):
71+
def assembly_spades(sorted_fastq_files,n_library_numbers,output_folder,id_sample,kmer,args):
6672
print("De-novo assembly with spades of sample %s:" %id_sample)
6773
command = [
6874
"spades.py",
@@ -160,7 +166,7 @@ def process_subfolder(pool_args):
160166
print(('#' * 50))
161167
print(("Processing sample %s" % sample_id))
162168
start = time.time()
163-
assembly_spades(sorted_fastq_files, n_library_numbers, sample_output_folder, sample_id, kmer, 1, max_memory, args)
169+
assembly_spades(sorted_fastq_files, n_library_numbers, sample_output_folder, sample_id, kmer, args)
164170
contig_file = os.path.join(sample_output_folder, 'contigs.fasta')
165171
new_contig_file = '%s/../../%s.fa' % (sample_output_folder, sample_id)
166172
mv_contig = "cp %s %s" % (contig_file, new_contig_file)
@@ -177,11 +183,11 @@ def main(args):
177183
out_dir = os.path.join(out_folder,'stats')
178184
if not os.path.exists(out_dir):
179185
os.makedirs(out_dir)
180-
cores = args.cores
186+
instances = args.instances
181187
subfolder_list = [subfolder for subfolder, __, __ in os.walk(input_folder) if os.path.basename(subfolder) != os.path.basename(input_folder)]
182-
if cores > 1:
183-
print(("Running in parallel on %d cores" %cores))
184-
pool = multiprocessing.Pool(cores)
188+
if instances > 1:
189+
print(("Running assemblies in parallel as %d instances" %instances))
190+
pool = multiprocessing.Pool(instances)
185191
pool_args = [[subfolder,args] for subfolder in subfolder_list]
186192
contig_count_df_list = list(pool.map(partial(process_subfolder), pool_args))
187193
pool.close()

0 commit comments

Comments
 (0)