Skip to content

Commit

Permalink
Merge pull request #75 from CenterForMedicalGeneticsGhent/feature/cra…
Browse files Browse the repository at this point in the history
…m_support

sam/cram input support
  • Loading branch information
matthdsm authored Mar 24, 2021
2 parents 3436705 + b878a0b commit b2d38a8
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 16 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ conda install -f -c conda-forge -c bioconda wisecondorx
### Running WisecondorX

There are three main stages (converting, reference creating and predicting) when using WisecondorX:
- Convert .bam to .npz files (for both reference and test samples)
- Convert aligned reads to .npz files (for both reference and test samples)
- Create a reference (using reference .npz files)
- **Important notes**
- Automated gender prediction, required to consistently analyze sex chromosomes, is based on a Gaussian mixture
Expand All @@ -56,11 +56,11 @@ There are three main stages (converting, reference creating and predicting) when
observe additional improvement concerning normalization.
- Predict copy number alterations (using the reference file and test .npz cases of interest)

### Stage (1) Convert .bam to .npz
### Stage (1) Convert aligned reads (sam/bam/cram) to .npz

```bash

WisecondorX convert input.bam output.npz [--optional arguments]
WisecondorX convert input.sam/bam/cram output.npz [--optional arguments]
```

<br>Optional argument <br><br> | Function
Expand Down
32 changes: 21 additions & 11 deletions wisecondorX/convert_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,31 @@

import numpy as np
import pysam
import sys

'''
Converts bam file to numpy array by transforming
Converts aligned reads file to numpy array by transforming
individual reads to counts per bin.
'''


def convert_bam(args):
def convert_reads(args):
bins_per_chr = dict()
for chr in range(1, 25):
bins_per_chr[str(chr)] = None

logging.info('Importing data ...')

bam_file = pysam.AlignmentFile(args.infile, 'rb')
if args.infile.endswith(".sam"):
reads_file = pysam.AlignmentFile(args.infile, 'r')
elif args.infile.endswith(".bam"):
reads_file = pysam.AlignmentFile(args.infile, 'rb')
elif args.infile.endswith(".cram"):
reads_file = pysam.AlignmentFile(args.infile, 'rc')
else:
logging.error(
"Unsupported input file type. Make sure your input filename has a correct extension (sam/bam/cram)")
sys.exit(1)

reads_seen = 0
reads_kept = 0
Expand All @@ -28,9 +38,9 @@ def convert_bam(args):
larp = -1
larp2 = -1

logging.info('Converting bam ... This might take a while ...')
logging.info('Converting aligned reads ... This might take a while ...')

for index, chr in enumerate(bam_file.references):
for index, chr in enumerate(reads_file.references):

chr_name = chr
if chr_name[:3].lower() == 'chr':
Expand All @@ -39,9 +49,9 @@ def convert_bam(args):
continue

logging.info('Working at {}; processing {} bins'
.format(chr, int(bam_file.lengths[index] / float(args.binsize) + 1)))
counts = np.zeros(int(bam_file.lengths[index] / float(args.binsize) + 1), dtype=np.int32)
bam_chr = bam_file.fetch(chr)
.format(chr, int(reads_file.lengths[index] / float(args.binsize) + 1)))
counts = np.zeros(int(reads_file.lengths[index] / float(args.binsize) + 1), dtype=np.int32)
bam_chr = reads_file.fetch(chr)

if chr_name == 'X':
chr_name = '23'
Expand Down Expand Up @@ -81,9 +91,9 @@ def convert_bam(args):
bins_per_chr[chr_name] = counts
reads_kept += sum(counts)

qual_info = {'mapped': bam_file.mapped,
'unmapped': bam_file.unmapped,
'no_coordinate': bam_file.nocoordinate,
qual_info = {'mapped': reads_file.mapped,
'unmapped': reads_file.unmapped,
'no_coordinate': reads_file.nocoordinate,
'filter_rmdup': reads_rmdup,
'filter_mapq': reads_mapq,
'pre_retro': reads_seen,
Expand Down
4 changes: 2 additions & 2 deletions wisecondorX/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import numpy as np

from wisecondorX.convert_tools import convert_bam
from wisecondorX.convert_tools import convert_reads
from wisecondorX.newref_control import tool_newref_prep, tool_newref_main, tool_newref_merge
from wisecondorX.newref_tools import train_gender_model, get_mask
from wisecondorX.overall_tools import gender_correct, scale_sample
Expand All @@ -20,7 +20,7 @@
def tool_convert(args):
logging.info('Starting conversion')

sample, qual_info = convert_bam(args)
sample, qual_info = convert_reads(args)
np.savez_compressed(args.outfile,
binsize=args.binsize,
sample=sample,
Expand Down

0 comments on commit b2d38a8

Please sign in to comment.