7
7
# License is MIT: https://github.com/BioJulia/Bio.jl/blob/master/LICENSE.md
8
8
9
9
"""
10
- Alignment of two sequences.
10
+ Defines how to align a given sequence onto a reference sequence.
11
+ The alignment is represented as a sequence of elementary operations (match, insertion, deletion etc)
12
+ anchored to specific positions of the input and reference sequence.
11
13
"""
12
14
struct Alignment
13
15
anchors:: Vector{AlignmentAnchor}
@@ -58,9 +60,10 @@ function Alignment(cigar::AbstractString, seqpos::Int=1, refpos::Int=1)
58
60
# path starts prior to the first aligned position pair
59
61
seqpos -= 1
60
62
refpos -= 1
63
+ alnpos = 0
61
64
62
65
n = 0
63
- anchors = AlignmentAnchor[AlignmentAnchor (seqpos, refpos, OP_START)]
66
+ anchors = AlignmentAnchor[AlignmentAnchor (seqpos, refpos, alnpos, OP_START)]
64
67
for c in cigar
65
68
if isdigit (c)
66
69
n = n * 10 + convert (Int, c - ' 0' )
@@ -79,8 +82,9 @@ function Alignment(cigar::AbstractString, seqpos::Int=1, refpos::Int=1)
79
82
else
80
83
error (" The $(op) CIGAR operation is not yet supported." )
81
84
end
85
+ alnpos += n
82
86
83
- push! (anchors, AlignmentAnchor (seqpos, refpos, op))
87
+ push! (anchors, AlignmentAnchor (seqpos, refpos, alnpos, op))
84
88
n = 0
85
89
end
86
90
end
@@ -100,41 +104,74 @@ function Base.show(io::IO, aln::Alignment)
100
104
print (io, " CIGAR string: " , cigar (aln))
101
105
end
102
106
103
- """
104
- seq2ref(aln::Alignment, i::Integer)::Tuple{Int,Operation}
105
-
106
- Map a position `i` from sequence to reference.
107
- """
108
- function seq2ref (aln:: Alignment , i:: Integer ):: Tuple{Int,Operation}
109
- idx = findanchor (aln, i, Val{true })
107
+ # generic function for mapping between sequence, reference and alignment positions
108
+ # getsrc specifies anchor source position getter
109
+ # getdest specifies anchor destination position getter
110
+ function pos2pos (aln:: Alignment , i:: Integer ,
111
+ srcpos:: Function , destpos:: Function ):: Tuple{Int,Operation}
112
+ idx = findanchor (aln, i, srcpos)
110
113
if idx == 0
111
- throw (ArgumentError (" invalid sequence position: $i " ))
114
+ if srcpos === seqpos
115
+ throw (ArgumentError (" invalid sequence position: $i " ))
116
+ elseif srcpos === refpos
117
+ throw (ArgumentError (" invalid reference position: $i " ))
118
+ elseif srcpos === alnpos
119
+ throw (ArgumentError (" invalid alignment position: $i " ))
120
+ else
121
+ throw (ArgumentError (" Unknown position getter: $srcpos " ))
122
+ end
112
123
end
113
124
anchor = aln. anchors[idx]
114
- refpos = anchor. refpos
115
- if ismatchop (anchor. op)
116
- refpos += i - anchor. seqpos
125
+ pos = destpos (anchor)
126
+ if ismatchop (anchor. op) ||
127
+ ((srcpos === alnpos) && ((destpos === seqpos) && isinsertop (anchor. op) || (destpos === refpos) && isdeleteop (anchor. op))) ||
128
+ ((destpos === alnpos) && ((srcpos === seqpos) && isinsertop (anchor. op) || (srcpos === refpos) && isdeleteop (anchor. op)))
129
+ pos += i - srcpos (anchor)
117
130
end
118
- return refpos , anchor. op
131
+ return pos , anchor. op
119
132
end
120
133
121
134
"""
122
- ref2seq(aln::Alignment, i::Integer)::Tuple{Int,Operation}
135
+ seq2ref(aln::Union{Alignment, AlignedSequence, PairwiseAlignment}, i::Integer)::Tuple{Int,Operation}
136
+
137
+ Map a position `i` from sequence to reference.
138
+ """
139
+ seq2ref (aln:: Alignment , i:: Integer ) = pos2pos (aln, i, seqpos, refpos)
140
+
141
+ """
142
+ ref2seq(aln::Union{Alignment, AlignedSequence, PairwiseAlignment}, i::Integer)::Tuple{Int,Operation}
123
143
124
144
Map a position `i` from reference to sequence.
125
145
"""
126
- function ref2seq (aln:: Alignment , i:: Integer ):: Tuple{Int,Operation}
127
- idx = findanchor (aln, i, Val{false })
128
- if idx == 0
129
- throw (ArgumentError (" invalid reference position: $i " ))
130
- end
131
- anchor = aln. anchors[idx]
132
- seqpos = anchor. seqpos
133
- if ismatchop (anchor. op)
134
- seqpos += i - anchor. refpos
135
- end
136
- return seqpos, anchor. op
137
- end
146
+ ref2seq (aln:: Alignment , i:: Integer ) = pos2pos (aln, i, refpos, seqpos)
147
+
148
+ """
149
+ seq2aln(aln::Union{Alignment, AlignedSequence, PairwiseAlignment}, i::Integer)::Tuple{Int,Operation}
150
+
151
+ Map a position `i` from the input sequence to the alignment sequence.
152
+ """
153
+ seq2aln (aln:: Alignment , i:: Integer ) = pos2pos (aln, i, seqpos, alnpos)
154
+
155
+ """
156
+ ref2aln(aln::Union{Alignment, AlignedSequence, PairwiseAlignment}, i::Integer)::Tuple{Int,Operation}
157
+
158
+ Map a position `i` from the reference sequence to the alignment sequence.
159
+ """
160
+ ref2aln (aln:: Alignment , i:: Integer ) = pos2pos (aln, i, refpos, alnpos)
161
+
162
+ """
163
+ aln2seq(aln::Union{Alignment, AlignedSequence, PairwiseAlignment}, i::Integer)::Tuple{Int,Operation}
164
+
165
+ Map a position `i` from the alignment sequence to the input sequence.
166
+ """
167
+ aln2seq (aln:: Alignment , i:: Integer ) = pos2pos (aln, i, alnpos, seqpos)
168
+
169
+ """
170
+ aln2ref(aln::Union{Alignment, AlignedSequence, PairwiseAlignment}, i::Integer)::Tuple{Int,Operation}
171
+
172
+ Map a position `i` from the alignment sequence to the reference sequence.
173
+ """
174
+ aln2ref (aln:: Alignment , i:: Integer ) = pos2pos (aln, i, alnpos, refpos)
138
175
139
176
"""
140
177
cigar(aln::Alignment)
@@ -172,65 +209,68 @@ function check_alignment_anchors(anchors)
172
209
end
173
210
174
211
for i in 2 : lastindex (anchors)
175
- if anchors[i]. refpos < anchors[i- 1 ]. refpos ||
176
- anchors[i] . seqpos < anchors[i - 1 ] . seqpos
212
+ @inbounds acur, aprev = anchors[i], anchors[i- 1 ]
213
+ if acur . refpos < aprev . refpos || acur . seqpos < aprev . seqpos || acur . alnpos < aprev . alnpos
177
214
error (" Alignment anchors must be sorted." )
178
215
end
179
216
180
- op = anchors[i] . op
181
- if convert (UInt8, op) > convert (UInt8, OP_MAX_VALID )
217
+ op = acur . op
218
+ if ! isvalid (op )
182
219
error (" Anchor at index $(i) has an invalid operation." )
183
220
end
184
221
185
222
# reference skip/delete operations
186
223
if isdeleteop (op)
187
- if anchors[i]. seqpos != anchors[i- 1 ]. seqpos
188
- error (" Invalid anchor positions for reference deletion." )
224
+ if acur. seqpos != aprev. seqpos
225
+ error (" Invalid anchor sequence positions for reference deletion." )
226
+ end
227
+ if acur. alnpos - aprev. alnpos != acur. refpos - aprev. refpos
228
+ error (" Invalid anchor reference positions for reference deletion." )
189
229
end
190
230
# reference insertion operations
191
231
elseif isinsertop (op)
192
- if anchors[i]. refpos != anchors[i- 1 ]. refpos
193
- error (" Invalid anchor positions for reference insertion." )
232
+ if acur. refpos != aprev. refpos
233
+ error (" Invalid anchor reference positions for reference insertion." )
234
+ end
235
+ if acur. alnpos - aprev. alnpos != acur. seqpos - aprev. seqpos
236
+ error (" Invalid anchor sequence positions for reference deletion." )
194
237
end
195
238
# match operations
196
239
elseif ismatchop (op)
197
- if anchors[i] . refpos - anchors[i - 1 ] . refpos !=
198
- anchors[i] . seqpos - anchors[i - 1 ] . seqpos
199
- error (" Invalid anchor positions for match operation." )
240
+ if (acur . refpos - aprev . refpos != acur . seqpos - aprev . seqpos) ||
241
+ (acur . alnpos - aprev . alnpos != acur . seqpos - aprev . seqpos)
242
+ error (" Invalid anchor positions for match operation." )
200
243
end
201
244
end
202
245
end
203
246
end
204
247
205
- # find the index of the first anchor that satisfies `i ≤ pos`
206
- @generated function findanchor (aln:: Alignment , i:: Integer , :: Type{Val{isseq}} ) where isseq
207
- pos = isseq ? :seqpos : :refpos
208
- quote
209
- anchors = aln. anchors
210
- lo = 1
211
- hi = lastindex (anchors)
212
- if ! (anchors[lo]. $ pos < i ≤ anchors[hi]. $ pos)
213
- return 0
214
- end
215
- # binary search
216
- @inbounds while hi - lo > 2
217
- m = (lo + hi) >> 1
218
- if anchors[m]. $ pos < i
219
- lo = m
220
- else # i ≤ anchors[m].$pos
221
- hi = m
222
- end
223
- # invariant (activate this for debugging)
224
- # @assert anchors[lo].$pos < i ≤ anchors[hi].$pos
248
+ # find the index of the first anchor that satisfies `i ≤ pos(anchor)`
249
+ function findanchor (aln:: Alignment , i:: Integer , pos:: Function )
250
+ anchors = aln. anchors
251
+ lo = 1
252
+ hi = lastindex (anchors)
253
+ @inbounds if ! (pos (anchors[lo]) < i ≤ pos (anchors[hi]))
254
+ return 0
255
+ end
256
+ # binary search
257
+ @inbounds while hi - lo > 2
258
+ m = (lo + hi) >> 1
259
+ if pos (anchors[m]) < i
260
+ lo = m
261
+ else # i ≤ pos(anchors[m])
262
+ hi = m
225
263
end
226
- # linear search
227
- @inbounds for j in lo+ 1 : hi
228
- if i ≤ aln. anchors[j]. $ pos
229
- return j
230
- end
264
+ # invariant (activate this for debugging)
265
+ # @assert pos(anchors[lo]) < i ≤ pos(anchors[hi])
266
+ end
267
+ # linear search
268
+ @inbounds for j in lo+ 1 : hi
269
+ if i ≤ pos (aln. anchors[j])
270
+ return j
231
271
end
232
- # do not reach here
233
- @assert false
234
- return 0
235
272
end
273
+ # do not reach here
274
+ @assert false
275
+ return 0
236
276
end
0 commit comments