Skip to content

Commit

Permalink
Stop pirating BioSymbols (#259)
Browse files Browse the repository at this point in the history
BioSequences pirated BioSymbols by overloading `gap`, `isambigous`, `isgap` and
`iscertain`. Remove these overloads.
  • Loading branch information
jakobnissen committed Nov 12, 2022
1 parent e4c8997 commit db8a692
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 39 deletions.
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "BioSequences"
uuid = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59"
authors = ["Sabrina Jaye Ward <[email protected]>", "Jakob Nissen <[email protected]>"]
version = "3.1.0"
version = "3.1.1"

[deps]
BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9"
Expand All @@ -10,11 +10,11 @@ SnoopPrecompile = "66db9d55-30c0-4569-8b51-7e840670fc0c"
Twiddle = "7200193e-83a8-5a55-b20d-5d36d44a0795"

[compat]
BioSymbols = "5.1.0"
BioSymbols = "5.1.2"
SnoopPrecompile = "1"
StableRNGs = "0.1, 1.0"
Twiddle = "1.1.1"
julia = "1.5"
SnoopPrecompile = "1"

[extras]
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
Expand Down
2 changes: 0 additions & 2 deletions src/BioSequences.jl
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,6 @@ import Twiddle: enumerate_nibbles,
repeatpattern
using Random

BioSymbols.gap(::Type{Char}) = '-'

include("alphabet.jl")

# Load the bit-twiddling internals that optimised BioSequences methods depend on.
Expand Down
16 changes: 10 additions & 6 deletions src/biosequence/counting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,13 @@ Base.count(pred, seq::BioSequence) = count_naive(pred, seq)
Base.count(pred, seqa::BioSequence, seqb::BioSequence) = count_naive(pred, seqa, seqb)

# These functions are BioSequences-specific because they take two arguments
BioSymbols.isambiguous(x::T, y::T) where {T<:NucleicAcid} = isambiguous(x) | isambiguous(y)
BioSymbols.isgap(x::T, y::T) where {T<:NucleicAcid} = isgap(x) | isgap(y)
BioSymbols.iscertain(x::T, y::T) where {T<:NucleicAcid} = iscertain(x) & iscertain(y)
isambiguous_or(x::T, y::T) where {T<:NucleicAcid} = isambiguous(x) | isambiguous(y)
isgap_or(x::T, y::T) where {T<:NucleicAcid} = isgap(x) | isgap(y)
iscertain_and(x::T, y::T) where {T<:NucleicAcid} = iscertain(x) & iscertain(y)

#BioSymbols.isambiguous(x::T, y::T) where {T<:NucleicAcid} = isambiguous(x) | isambiguous(y)
#BioSymbols.isgap(x::T, y::T) where {T<:NucleicAcid} = isgap(x) | isgap(y)
#BioSymbols.iscertain(x::T, y::T) where {T<:NucleicAcid} = iscertain(x) & iscertain(y)

Base.count(::typeof(isambiguous), seqa::S, seqb::S) where {S<:BioSequence{<:NucleicAcidAlphabet{2}}} = 0
Base.count(::typeof(isgap), seqa::S, seqb::S) where {S<:BioSequence{<:NucleicAcidAlphabet{2}}} = 0
Expand All @@ -56,13 +60,13 @@ Calculate GC content of `seq`.
gc_content(seq::NucleotideSeq) = isempty(seq) ? 0.0 : count(isGC, seq) / length(seq)

n_ambiguous(seq) = count(isambiguous, seq)
n_ambiguous(seqa::BioSequence, seqb::BioSequence) = count(isambiguous, seqa, seqb)
n_ambiguous(seqa::BioSequence, seqb::BioSequence) = count(isambiguous_or, seqa, seqb)

n_certain(seq) = count(iscertain, seq)
n_certain(seqa::BioSequence, seqb::BioSequence) = count(iscertain, seqa, seqb)
n_certain(seqa::BioSequence, seqb::BioSequence) = count(iscertain_and, seqa, seqb)

n_gaps(seq::BioSequence) = count(isgap, seq)
n_gaps(seqa::BioSequence, seqb::BioSequence) = count(isgap, seqa, seqb)
n_gaps(seqa::BioSequence, seqb::BioSequence) = count(isgap_or, seqa, seqb)

mismatches(seqa::BioSequence, seqb::BioSequence) = count(!=, seqa, seqb)
matches(seqa::BioSequence, seqb::BioSequence) = count(==, seqa, seqb)
12 changes: 6 additions & 6 deletions src/longsequences/counting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ Base.count(::typeof(isambiguous), seq::SeqOrView{<:NucleicAcidAlphabet{4}}) = co
# A pair of 2-bit encoded sequences will never have ambiguous bases.
Base.count(::typeof(isambiguous), seqa::SeqOrView{A}, seqb::SeqOrView{A}) where {A<:NucleicAcidAlphabet{2}} = 0
Base.count(::typeof(isambiguous), seqa::SeqOrView{A}, seqb::SeqOrView{A}) where {A<:NucleicAcidAlphabet{4}} = count_ambiguous_bitpar(seqa, seqb)
Base.count(::typeof(isambiguous), seqa::SeqOrView{<:NucleicAcidAlphabet{4}}, seqb::SeqOrView{<:NucleicAcidAlphabet{2}}) = count(isambiguous, promote(seqa, seqb)...)
Base.count(::typeof(isambiguous), seqa::SeqOrView{<:NucleicAcidAlphabet{2}}, seqb::SeqOrView{<:NucleicAcidAlphabet{4}}) = count(isambiguous, promote(seqa, seqb)...)
Base.count(::typeof(isambiguous), seqa::SeqOrView{<:NucleicAcidAlphabet{4}}, seqb::SeqOrView{<:NucleicAcidAlphabet{2}}) = count(isambiguous_or, promote(seqa, seqb)...)
Base.count(::typeof(isambiguous), seqa::SeqOrView{<:NucleicAcidAlphabet{2}}, seqb::SeqOrView{<:NucleicAcidAlphabet{4}}) = count(isambiguous_or, promote(seqa, seqb)...)

# Counting certain sites
let
Expand All @@ -120,8 +120,8 @@ let
) |> eval
end
Base.count(::typeof(iscertain), seqa::SeqOrView{A}, seqb::SeqOrView{A}) where {A<:NucleicAcidAlphabet{4}} = count_certain_bitpar(seqa, seqb)
Base.count(::typeof(iscertain), seqa::SeqOrView{<:NucleicAcidAlphabet{4}}, seqb::SeqOrView{<:NucleicAcidAlphabet{2}}) = count(iscertain, promote(seqa, seqb)...)
Base.count(::typeof(iscertain), seqa::SeqOrView{<:NucleicAcidAlphabet{2}}, seqb::SeqOrView{<:NucleicAcidAlphabet{4}}) = count(iscertain, promote(seqa, seqb)...)
Base.count(::typeof(iscertain), seqa::SeqOrView{<:NucleicAcidAlphabet{4}}, seqb::SeqOrView{<:NucleicAcidAlphabet{2}}) = count(iscertain_and, promote(seqa, seqb)...)
Base.count(::typeof(iscertain), seqa::SeqOrView{<:NucleicAcidAlphabet{2}}, seqb::SeqOrView{<:NucleicAcidAlphabet{4}}) = count(iscertain_and, promote(seqa, seqb)...)

# Counting gap sites
let
Expand Down Expand Up @@ -163,5 +163,5 @@ let
end
Base.count(::typeof(isgap), seqa::SeqOrView{A}, seqb::SeqOrView{A}) where {A<:NucleicAcidAlphabet{4}} = count_gap_bitpar(seqa, seqb)
Base.count(::typeof(isgap), seqa::SeqOrView{A}) where {A<:NucleicAcidAlphabet{4}} = count_gap_bitpar(seqa)
Base.count(::typeof(isgap), seqa::SeqOrView{<:NucleicAcidAlphabet{4}}, seqb::SeqOrView{<:NucleicAcidAlphabet{2}}) = count(isgap, promote(seqa, seqb)...)
Base.count(::typeof(isgap), seqa::SeqOrView{<:NucleicAcidAlphabet{2}}, seqb::SeqOrView{<:NucleicAcidAlphabet{4}}) = count(isgap, promote(seqa, seqb)...)
Base.count(::typeof(isgap), seqa::SeqOrView{<:NucleicAcidAlphabet{4}}, seqb::SeqOrView{<:NucleicAcidAlphabet{2}}) = count(isgap_or, promote(seqa, seqb)...)
Base.count(::typeof(isgap), seqa::SeqOrView{<:NucleicAcidAlphabet{2}}, seqb::SeqOrView{<:NucleicAcidAlphabet{4}}) = count(isgap_or, promote(seqa, seqb)...)
44 changes: 23 additions & 21 deletions test/counting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,16 @@
alias::Function,
seqa::BioSequence,
seqb::BioSequence,
singlearg::Bool
singlearg::Bool,
multi_alias::Function
)
# Test that order does not matter.
@test count(pred, seqa, seqb) == count(pred, seqb, seqa)
@test BioSequences.count_naive(pred, seqa, seqb) == BioSequences.count_naive(pred, seqb, seqa)
@test BioSequences.count_naive(multi_alias, seqa, seqb) == BioSequences.count_naive(multi_alias, seqb, seqa)
@test alias(seqa, seqb) == alias(seqb, seqa)
# Test that result is the same as counting naively.
@test count(pred, seqa, seqb) == BioSequences.count_naive(pred, seqa, seqb)
@test count(pred, seqb, seqa) == BioSequences.count_naive(pred, seqb, seqa)
@test count(pred, seqa, seqb) == BioSequences.count_naive(multi_alias, seqa, seqb)
@test count(pred, seqb, seqa) == BioSequences.count_naive(multi_alias, seqb, seqa)
# Test that the alias function works.
@test count(pred, seqa, seqb) == alias(seqa, seqb)
@test count(pred, seqb, seqa) == alias(seqb, seqa)
Expand All @@ -57,7 +58,8 @@
alphx::Type{<:Alphabet},
alphy::Type{<:Alphabet},
subset::Bool,
singlearg::Bool
singlearg::Bool,
multi_alias::Function
)
for _ in 1:10
seqA = random_seq(alphx, rand(10:100))
Expand All @@ -72,7 +74,7 @@
sa = subA
sb = subB
end
testcounter(pred, alias, sa, sb, singlearg)
testcounter(pred, alias, sa, sb, singlearg, multi_alias)
end
end

Expand All @@ -81,23 +83,23 @@
# Can't promote views
for sub in (true, false)
for n in (4, 2)
counter_random_tests(!=, mismatches, a{n}, a{n}, sub, false)
counter_random_tests(!=, mismatches, a{n}, a{n}, sub, false, !=)
end
end
counter_random_tests(!=, mismatches, a{4}, a{2}, false, false)
counter_random_tests(!=, mismatches, a{2}, a{4}, false, false)
counter_random_tests(!=, mismatches, a{4}, a{2}, false, false, !=)
counter_random_tests(!=, mismatches, a{2}, a{4}, false, false, !=)
end
end

@testset "Matches" begin
for a in (DNAAlphabet, RNAAlphabet)
for sub in (true, false)
for n in (4, 2)
counter_random_tests(==, matches, a{n}, a{n}, sub, false)
counter_random_tests(==, matches, a{n}, a{n}, sub, false, ==)
end
end
counter_random_tests(==, matches, a{4}, a{2}, false, false)
counter_random_tests(==, matches, a{2}, a{4}, false, false)
counter_random_tests(==, matches, a{4}, a{2}, false, false, ==)
counter_random_tests(==, matches, a{2}, a{4}, false, false, ==)
end
end

Expand All @@ -106,35 +108,35 @@
# Can't promote views
for n in (4, 2)
for sub in (true, false)
counter_random_tests(isambiguous, n_ambiguous, a{n}, a{n}, sub, true)
counter_random_tests(isambiguous, n_ambiguous, a{n}, a{n}, sub, false, BioSequences.isambiguous_or)
end
end
counter_random_tests(isambiguous, n_ambiguous, a{4}, a{2}, false, true)
counter_random_tests(isambiguous, n_ambiguous, a{2}, a{4}, false, true)
counter_random_tests(isambiguous, n_ambiguous, a{4}, a{2}, false, true, BioSequences.isambiguous_or)
counter_random_tests(isambiguous, n_ambiguous, a{2}, a{4}, false, true, BioSequences.isambiguous_or)
end
end

@testset "Certain" begin
for a in (DNAAlphabet, RNAAlphabet)
for n in (4, 2)
for sub in (true, false)
counter_random_tests(iscertain, n_certain, a{n}, a{n}, sub, true)
counter_random_tests(iscertain, n_certain, a{n}, a{n}, sub, true, BioSequences.iscertain_and)
end
end
counter_random_tests(iscertain, n_certain, a{4}, a{2}, false, true)
counter_random_tests(iscertain, n_certain, a{2}, a{4}, false, true)
counter_random_tests(iscertain, n_certain, a{4}, a{2}, false, true, BioSequences.iscertain_and)
counter_random_tests(iscertain, n_certain, a{2}, a{4}, false, true, BioSequences.iscertain_and)
end
end

@testset "Gap" begin
for a in (DNAAlphabet, RNAAlphabet)
for n in (4, 2)
for sub in (true, false)
counter_random_tests(isgap, n_gaps, a{n}, a{n}, sub, true)
counter_random_tests(isgap, n_gaps, a{n}, a{n}, sub, true, BioSequences.isgap_or)
end
end
counter_random_tests(isgap, n_gaps, a{4}, a{2}, false, true)
counter_random_tests(isgap, n_gaps, a{2}, a{4}, false, true)
counter_random_tests(isgap, n_gaps, a{4}, a{2}, false, true, BioSequences.isgap_or)
counter_random_tests(isgap, n_gaps, a{2}, a{4}, false, true, BioSequences.isgap_or)
end
end

Expand Down
1 change: 0 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ end
include("longsequences/randseq.jl")
include("longsequences/shuffle.jl")
end

include("translation.jl")
include("counting.jl")

Expand Down

2 comments on commit db8a692

@jakobnissen
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/72101

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v3.1.1 -m "<description of version>" db8a69293e45910e60b0aae4e7f468621cb5b21d
git push origin v3.1.1

Please sign in to comment.