Python bindings for the BWA-MEM aligner.
pip install bwamemfrom bwamem import BwaIndexer
indexer = BwaIndexer()
index_path = indexer.build_index('reference.fa')from bwamem import BwaAligner
aligner = BwaAligner('path/to/index')
alignments = aligner.align('ACGATCGCGATCGA')
for aln in alignments:
print(f'{aln.ctg}:{aln.r_st} strand={aln.strand} mapq={aln.mapq}')read1 = 'ACGATCGCGATCGA'
read2 = 'TTCGATCGATCGAT'
paired_alignments = aligner.align(read1, read2)
for pe_aln in paired_alignments:
print(f'Insert size: {pe_aln.insert_size}, Proper pair: {pe_aln.is_proper_pair}')# Get full sequence
seq = aligner.seq('chr1')
# Get subsequence
subseq = aligner.seq('chr1', start=100, end=200)from bwamem import BwaIndexer
# Progress messages are captured by default (no console spam)
indexer = BwaIndexer(capture_progress=True)
index_path = indexer.build_index('genome.fasta')
# Check progress info
progress = indexer.get_progress()
print(f"Status: {progress['status']}")
print(f"Iterations: {progress['iterations']}")
print(f"Characters processed: {progress['characters_processed']}")
# Get progress percentage (if available)
if indexer.progress_percent:
print(f"Progress: {indexer.progress_percent:.1f}%")
# Access all captured messages
for msg in progress['messages']:
print(msg)from bwamem import BwaIndexer
# Verbosity levels:
# 0 = silent (no output)
# 1 = quiet (only warnings/errors) - default
# 2 = normal (standard BWA messages)
# 3+ = debug (verbose output)
# Silent mode
indexer = BwaIndexer(verbose=0)
indexer.build_index('genome.fasta')
# Normal mode with progress messages shown in console
indexer = BwaIndexer(verbose=2, capture_progress=False)
indexer.build_index('genome.fasta')
# Debug mode with captured progress
indexer = BwaIndexer(verbose=3, capture_progress=True)
indexer.build_index('genome.fasta')
# Custom algorithm and block size
indexer = BwaIndexer(algorithm='bwtsw', block_size=50000000)
indexer.build_index('genome.fasta')from bwamem import fastx_read, read_paired_fastx
# Single-end (supports both FASTA and FASTQ)
for read in fastx_read('sequences.fasta.gz'):
print(f'{read.name}: {read.sequence}')
# Paired-end
for read1, read2 in read_paired_fastx('R1.fastq', 'R2.fastq'):
print(f'{read1.name}, {read2.name}')# Specify alignment parameters
aligner = BwaAligner('path/to/index', options='-x ont2d -A 1 -B 0')
# Set custom insert size for paired-end reads
aligner = BwaAligner('path/to/index', insert_model=(500, 50))
paired_alignments = aligner.align(read1, read2)Each Alignment object contains the following attributes:
| Attribute | Description |
|---|---|
ctg |
Contig/reference name |
r_st |
Reference start position (0-based) |
r_en |
Reference end position (property) |
strand |
Strand: +1 for forward, -1 for reverse |
q_st, q_en |
Query start/end positions |
mapq |
Mapping quality score |
cigar |
CIGAR as list of [length, op] pairs |
cigar_str |
CIGAR string (property) |
NM |
Edit distance |
score |
Alignment score |
is_primary |
Primary alignment flag |
Calculated properties (computed on demand): r_en, cigar_str, blen, mlen
CIGAR operations: 0=M (match), 1=I (insertion), 2=D (deletion), 3=N (skip), 4=S (soft-clip), 5=H (hard-clip)
PairedAlignment contains: read1, read2 (Alignment objects), is_proper_pair (bool), insert_size (int or None)
- Python bindings: Mozilla Public License 2.0
- BWA: GNU General Public License v3.0