Skip to content

Commit c023a3c

Browse files
timholyCiaranOMara
authored andcommitted
Add more SubstitutionMatrix operations (#60)
SubstitutionMatrix is not a subtype of AbstractArray. This may or may not be intentional, but currently, the number of supported operations is fairly minimal. This provides a few more conveniences for users who might want to manipulate them beyond mere lookup tables.
1 parent 25602e1 commit c023a3c

File tree

3 files changed

+45
-2
lines changed

3 files changed

+45
-2
lines changed

docs/src/pairalign.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ CurrentModule = BioAlignments
55
DocTestSetup = quote
66
using BioSequences
77
using BioAlignments
8+
using BioAlignments.BioSymbols
89
end
910
```
1011

@@ -237,6 +238,15 @@ Make sure to create a copy of the original matrix when you create a matrix from
237238
a predefined matrix. In the above case, `BLOSUM62` is shared in the whole
238239
program and modification on it will affect any result that uses `BLOSUM62`.
239240

241+
Other supported operations include `similar`, `isassigned`, and `BioSymbols.alphabet`:
242+
```jldoctest
243+
julia> reshape(alphabet(BLOSUM62), 1, :)
244+
1×27 Matrix{AminoAcid}:
245+
AA_A AA_R AA_N AA_D AA_C AA_Q AA_E … AA_B AA_J AA_Z AA_X AA_Term
246+
```
247+
248+
As shown, there is no entry corresponding to `AA_Gap`.
249+
240250
`DichotomousSubstitutionMatrix` is a specialized matrix for matching or
241251
mismatching substitution. This is a preferable choice when performance is more
242252
important than flexibility because looking up score is faster than

src/submat.jl

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ The required method:
1313
1414
* `Base.getindex(submat, x, y)`: substitution score/cost from `x` to `y`
1515
"""
16-
abstract type AbstractSubstitutionMatrix{S<:Real} end
16+
abstract type AbstractSubstitutionMatrix{S<:Real} end # is it deliberate that this is not <:AbstractMatrix{S}?
1717

1818
"""
1919
Substitution matrix.
@@ -82,18 +82,37 @@ Base.convert(::Type{Matrix}, submat::SubstitutionMatrix) = copy(submat.data)
8282
return submat.data[i,j]
8383
end
8484

85+
Base.getindex(submat::SubstitutionMatrix, mask::AbstractMatrix{Bool}) = submat.data[mask]
86+
Base.getindex(submat::SubstitutionMatrix, mask1::AbstractVector{Bool}, mask2::AbstractVector{Bool}) = submat.data[mask1, mask2]
87+
8588
function Base.setindex!(submat::SubstitutionMatrix{T}, val, x, y) where T
8689
i = index(convert(T, x))
8790
j = index(convert(T, y))
8891
submat.data[i,j] = val
8992
submat.defined[i,j] = true
90-
return submat
93+
return val
94+
end
95+
96+
Base.eltype(::SubstitutionMatrix{T,S}) where {T,S} = S
97+
98+
BioSymbols.alphabet(::SubstitutionMatrix{T}) where {T} = alphabet_without_gap(T)
99+
100+
Base.isassigned(submat::SubstitutionMatrix) = submat.defined
101+
function Base.isassigned(submat::SubstitutionMatrix{T}, x, y) where T
102+
i = index(convert(T, x))
103+
j = index(convert(T, y))
104+
return submat.defined[i,j]
91105
end
92106

93107
function Base.copy(submat::SubstitutionMatrix{T,S}) where {T,S}
94108
return SubstitutionMatrix{T,S}(copy(submat.data), copy(submat.defined))
95109
end
96110

111+
function Base.similar(submat::SubstitutionMatrix{T}, ::Type{S}=eltype(submat)) where {T,S<:Real}
112+
n = length(BioSymbols.alphabet(T)) - 1
113+
return SubstitutionMatrix{T,S}(Matrix{S}(undef, n, n), falses(n, n))
114+
end
115+
97116
Base.minimum(submat::SubstitutionMatrix) = minimum(submat.data)
98117
Base.maximum(submat::SubstitutionMatrix) = maximum(submat.data)
99118

test/runtests.jl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,20 @@ end
376376

377377
@test convert(Matrix, BioAlignments.load_submat(AminoAcid, "BLOSUM62")) == convert(Matrix, BLOSUM62)
378378

379+
# Various operations
380+
@test eltype(BLOSUM62) === Int
381+
@test eltype(similar(BLOSUM62, Float64)) === Float64
382+
@test !any(isassigned(similar(BLOSUM62)))
383+
@test isassigned(BLOSUM62, AA_R, AA_K)
384+
@test !isassigned(BLOSUM62, AA_R, AA_O)
385+
@test size(BLOSUM62[isassigned(BLOSUM62)]) == (sum(BLOSUM62.defined),)
386+
keep1, keep2 = vec(all(isassigned(BLOSUM62); dims=2)), vec(all(isassigned(BLOSUM62); dims=1))
387+
@test size(BLOSUM62[keep1, keep2]) == (sum(keep1), sum(keep2))
388+
@test BioSymbols.alphabet(BLOSUM62) == BioAlignments.alphabet_without_gap(AminoAcid)
389+
myblossum = similar(BLOSUM62, Float64)
390+
myblossum[AA_K,AA_R] = myblossum[AA_R,AA_K] = 1.11
391+
@test myblossum[AA_K,AA_R] == myblossum[AA_R,AA_K] == 1.11
392+
379393
submat = SubstitutionMatrix(DNA, rand(Float64, 15, 15))
380394
@test isa(submat, SubstitutionMatrix{DNA,Float64})
381395

0 commit comments

Comments
 (0)