Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Genetic #42

Open
wants to merge 14 commits into
base: dev
Choose a base branch
from
108 changes: 54 additions & 54 deletions src/Genetics.jl
Original file line number Diff line number Diff line change
@@ -1,68 +1,68 @@
# using Diversity
# using Diversity.API
using Diversity
using Diversity.API

# using PopGen
# using StringDistances
# using LinearAlgebra
using PopGen
using StringDistances
using LinearAlgebra

# using FASTX
using FASTX

# abstract type AbstractGeneticTypes{PopData} <:
# Diversity.API.AbstractTypes
# end
abstract type AbstractGeneticTypes{PopData} <:
Diversity.API.AbstractTypes
end

# struct GeneticType{PopData} <: AbstractGeneticTypes{PopData}
# dat::PopData
# ntypes::Int64
# Zmatrix::Matrix{Float64}
# end
struct GeneticType{PopData} <: AbstractGeneticTypes{PopData}
dat::PopData
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, as a for instance, I would make this some arbitrary "Data" type, which is defined when you construct the object (note Data must not already exist!):

Suggested change
struct GeneticType{PopData} <: AbstractGeneticTypes{PopData}
dat::PopData
struct GeneticType{Data} <: AbstractGeneticTypes{Data}
dat::Data

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alrighty, got both constructors working. Wondering though if AbstractGenetic is necessary? Since the two genetic types could just as easily be subtypes of Diversity.API.AbstractTypes, unless you want genetic types to be treated differently?

ntypes::Int64
Zmatrix::Matrix{Float64}
end

# function _hammingDistance(geno1, geno2)
# ismissing(geno1) || ismissing(geno2) && return missing
# if length(geno1) > 2
# @warn "hamming_distance may not work correctly for ploidy > 2"
# end
# #TODO Fix ploidy > 2 - e.g. (1, 1, 1, 2) ≠ (1, 2, 2, 2)
function _hammingDistance(geno1, geno2)
ismissing(geno1) || ismissing(geno2) && return missing
if length(geno1) > 2
@warn "hamming_distance may not work correctly for ploidy > 2"
end
#TODO Fix ploidy > 2 - e.g. (1, 1, 1, 2) ≠ (1, 2, 2, 2)

# max(sum(geno1 .∉ Ref(geno2)), sum(geno2 .∉ Ref(geno1)))
# end
max(sum(geno1 .∉ Ref(geno2)), sum(geno2 .∉ Ref(geno1)))
end

# function GeneticType(dat::PopData)
# # Initialise objects
# matrix_obj = PopGen.loci_matrix(dat)
# ntypes = size(matrix_obj, 1)
# output = zeros(Float64, ntypes, ntypes)
# indices = PopGen.pairwise_pairs(1:ntypes)
function GeneticType(dat::PopData)
# Initialise objects
matrix_obj = PopGen.loci_matrix(dat)
ntypes = size(matrix_obj, 1)
output = zeros(Float64, ntypes, ntypes)
indices = PopGen.pairwise_pairs(1:ntypes)

# # Calculate distance matrix
# for (a, b) in indices
# output[a, b] = sum(_hammingDistance.((@view matrix_obj[a, :]),
# (@view matrix_obj[b, :])))
# end
# dist = Symmetric(output)
# dist /= maximum(dist)
# Calculate distance matrix
for (a, b) in indices
output[a, b] = sum(_hammingDistance.((@view matrix_obj[a, :]),
(@view matrix_obj[b, :])))
end
dist = Symmetric(output)
dist /= maximum(dist)

# # Calculate similarity matrix
# Zmatrix = 1 .- dist
# Calculate similarity matrix
Zmatrix = 1 .- dist

# return GeneticType{PopData}(dat, ntypes, Zmatrix)
# end
return GeneticType{PopData}(dat, ntypes, Zmatrix)
end

# function GeneticType(dat::Vector) # Vector{BioSequences.AminoAcidSequence}
# # Initialise objects
# ntypes = length(dat)
# output = zeros(Int64, ntypes, ntypes)
# indices = PopGen.pairwise_pairs(1:ntypes)
function GeneticType(dat::Vector) # Vector{BioSequences.AminoAcidSequence}
# Initialise objects
ntypes = length(dat)
output = zeros(Int64, ntypes, ntypes)
indices = PopGen.pairwise_pairs(1:ntypes)

# # Calculate distance matrix
# for (a, b) in indices
# output[a, b] = evaluate(Hamming(), dat[a], dat[b])
# end
# dist = Symmetric(output)
# dist /= maximum(dist)
# Calculate distance matrix
for (a, b) in indices
output[a, b] = evaluate(Hamming(), dat[a], dat[b])
end
dist = Symmetric(output)
dist /= maximum(dist)

# # Calculate similarity matrix
# Zmatrix = 1 .- dist
# Calculate similarity matrix
Zmatrix = 1 .- dist

# return GeneticType{BioSequences.AminoAcidSequence}(dat, ntypes, Zmatrix)
# end
return GeneticType{BioSequences.AminoAcidSequence}(dat, ntypes, Zmatrix)
end