Skip to content

Commit

Permalink
MM-51: remove kronecker occurences, test, rewrite davies part mk1 (#52)…
Browse files Browse the repository at this point in the history
… [rinstall]

* MM-51: remove kronecker occurences, test, rewrite davies part mk1

* MM-51: davies only for variance components

* MM-51: autoformatting

* MM-51: up version

* MM-51: auto formatting command
  • Loading branch information
jdstamp authored Apr 3, 2022
1 parent 0027a0a commit f5b9524
Show file tree
Hide file tree
Showing 27 changed files with 1,284 additions and 1,298 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Type: Package
Package: mvMAPIT
Title: Implements the Multivariate MArginal ePIstasis Test (mvMAPIT) from
Crawford et al. 2017
Version: 1.0.1
Version: 1.1.0
Author: lcrawlab
Maintainer: Lorin Crawford <[email protected]>
Description: Epistasis, commonly defined as the interaction between
Expand Down
283 changes: 152 additions & 131 deletions R/MAPIT.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
#' @param test is a parameter defining what hypothesis test should be implemented. Takes on values 'normal', 'davies', and 'hybrid'. The 'hybrid' test runs first the 'normal' test and then the 'davies' test on the significant variants.
#' @param cores is a parameter detailing the number of cores to parallelize over. It is important to note that this value only matters when the user has implemented OPENMP on their operating system. If OPENMP is not installed, then please leave cores = 1 and use the standard version of this code and software.
#' @param variantIndex is a vector containing indices of variants to be included in the computation.
#' @param phenotypeCovariance is a string parameter defining how to model the covariance between phenotypes of effects. Possible values: 'identity', 'covariance', 'homogeneous', 'combinatorial'.
#' @param logLevel is a string parameter defining the log level for the logging package.
#' @param logFile is a string parameter defining the name of the log file for the logging output.
#'
Expand All @@ -46,154 +45,176 @@
#' @export
#' @import CompQuadForm
#' @import Rcpp
MvMAPIT <- function(X,
Y,
Z = NULL,
C = NULL,
threshold = 0.05,
accuracy = 1e-8,
test = c('normal', 'davies', 'hybrid'),
cores = 1,
variantIndex = NULL,
phenotypeCovariance = c('identity', 'covariance', 'homogeneous', 'combinatorial'),
logLevel = 'WARN',
logFile = NULL) {
MvMAPIT <- function(
X, Y, Z = NULL, C = NULL, threshold = 0.05, accuracy = 1e-08, test = c("normal", "davies", "hybrid"),
cores = 1, variantIndex = NULL, logLevel = "WARN", logFile = NULL
) {

test <- match.arg(test)
phenotypeCovariance <- match.arg(phenotypeCovariance)
if (cores > 1) {
if (cores > detectCores()) {
warning("The number of cores you're setting is larger than detected cores!")
cores <- detectCores()
test <- match.arg(test)
if (cores > 1) {
if (cores > detectCores()) {
warning("The number of cores you're setting is larger than detected cores!")
cores <- detectCores()
}
}
}

logging::logReset()
logging::basicConfig(level = logLevel)
log <- logging::getLogger('MvMAPIT')
if(!is.null(logFile)) {
filePath <- file.path(getwd(),logFile)
log$debug('Logging to file: %s', filePath)
log$addHandler(logging::writeToFile, file=filePath)
}
logging::logReset()
logging::basicConfig(level = logLevel)
log <- logging::getLogger("MvMAPIT")
if (!is.null(logFile)) {
filePath <- file.path(getwd(), logFile)
log$debug("Logging to file: %s", filePath)
log$addHandler(logging::writeToFile, file = filePath)
}

if (is.vector(Y)) {
Y <- t(Y)
}
if (is.vector(Y)) {
Y <- t(Y)
}

log$debug('Running in %s test mode.', test)
log$debug('Phenotype covariance: %s', phenotypeCovariance)
log$debug('Genotype matrix: %d x %d', nrow(X), ncol(X))
log$debug('Phenotype matrix: %d x %d', nrow(Y), ncol(Y))
log$debug('Genotype matrix determinant: %f', det((X) %*% t(X)))
zero_var <- which(apply(X, 1, var) == 0)
log$debug('Number of zero variance variants: %d', length(zero_var))
X <- remove_zero_variance(X) # operates on rows
log$debug('Genotype matrix after removing zero variance variants: %d x %d', nrow(X), ncol(X))
log$debug("Running in %s test mode.", test)
log$debug(
"Genotype matrix: %d x %d", nrow(X),
ncol(X)
)
log$debug(
"Phenotype matrix: %d x %d", nrow(Y),
ncol(Y)
)
log$debug("Genotype matrix determinant: %f", det((X) %*% t(X)))
zero_var <- which(
apply(X, 1, var) ==
0
)
log$debug("Number of zero variance variants: %d", length(zero_var))
X <- remove_zero_variance(X) # operates on rows
log$debug(
"Genotype matrix after removing zero variance variants: %d x %d", nrow(X),
ncol(X)
)

log$debug('Scale X matrix appropriately.')
Xsd <- apply(X, 1, sd)
Xmean <- apply(X, 1, mean)
X <- (X - Xmean) / Xsd
log$debug("Scale X matrix appropriately.")
Xsd <- apply(X, 1, sd)
Xmean <- apply(X, 1, mean)
X <- (X - Xmean)/Xsd

if (test == 'hybrid') {
vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, NULL, phenotypeCovariance) # Normal Z-Test
pvals <- vc.mod$pvalues
#row.names(pvals) <- rownames(X)
pves <- vc.mod$PVE
#row.names(pves) <- rownames(X)
timings <- vc.mod$timings
ind <- which(pvals <= threshold) # Find the indices of the p-values that are below the threshold
if (phenotypeCovariance == 'combinatorial') {
any_significance <- apply(pvals, 1, function(r) any(r <= threshold))
ind_temp <- ind
ind <- which(any_significance == TRUE)
}
log$info('%d p-values are significant with alpha = %f', length(ind), threshold)
variance_components_ind <- get_variance_components_ind(Y)
if (test == "hybrid") {
vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, NULL) # Normal Z-Test
pvals <- vc.mod$pvalues
# row.names(pvals) <- rownames(X)
pves <- vc.mod$PVE
# row.names(pves) <- rownames(X)
timings <- vc.mod$timings
ind_matrix <- which(pvals[, variance_components_ind] <= threshold) # Find the variance component indices of the p-values that are below the threshold
log$info(
"%d p-values of variance components are significant with alpha = %f",
length(ind_matrix),
threshold
)
any_significance <- apply(pvals[, variance_components_ind], 1, function(r) any(r <= threshold))
ind <- which(any_significance == TRUE)
log$info(
"%d positions are significant with alpha = %f", length(ind),
threshold
)

log$info('Running davies method on selected SNPs.')
vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, NULL, phenotypeCovariance)
davies.pvals <- mvmapit_pvalues(vc.mod, X, accuracy)
if (phenotypeCovariance == 'combinatorial') {
pvals[ind_temp] <- davies.pvals[ind_temp]
log$info("Running davies method on selected SNPs.")
vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, NULL)
davies.pvals <- mvmapit_pvalues(vc.mod, X, accuracy)
pvals[, variance_components_ind][ind_matrix] <- davies.pvals[, variance_components_ind][ind_matrix]
} else if (test == "normal") {
vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, NULL)
pvals <- vc.mod$pvalues
pves <- vc.mod$PVE
timings <- vc.mod$timings
} else {
pvals[ind] <- davies.pvals[ind]
ind <- ifelse(variantIndex, variantIndex, 1:nrow(X))
vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, NULL)
pvals <- mvmapit_pvalues(vc.mod, X, accuracy)
pvals <- set_covariance_components(variance_components_ind, pvals)
pves <- vc.mod$PVE
timings <- vc.mod$timings
}
timings_mean <- apply(
as.matrix(
timings[rowSums(timings) !=
0, ]
),
2, mean
)
log$info("Calculated mean time of execution. Return list.")
row.names(pvals) <- rownames(X)
row.names(pves) <- rownames(X)
column_names <- mapit_struct_names(Y)
colnames(pvals) <- column_names
colnames(pves) <- column_names
if (!is.null(variantIndex)) {
log$debug("Set pve to NA if not in varianIndex.")
pves[!(c(1:nrow(pves)) %in%
variantIndex)] <- NA
}
if (ncol(pvals) >
1) {
fisherp <- apply(pvals, 1, sumlog)
pvals <- cbind(pvals, metap = fisherp)
}
} else if (test == "normal") {
vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, NULL, phenotypeCovariance)
pvals <- vc.mod$pvalues
pves <- vc.mod$PVE
timings <- vc.mod$timings
} else {
ind <- ifelse(variantIndex, variantIndex, 1:nrow(X))
vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, NULL, phenotypeCovariance)
pvals <- mvmapit_pvalues(vc.mod, X, accuracy)
pves <- vc.mod$PVE
timings <- vc.mod$timings
}
timings_mean <- apply(as.matrix(timings[rowSums(timings) != 0, ]), 2, mean)
log$info('Calculated mean time of execution. Return list.')
row.names(pvals) <- rownames(X)
row.names(pves) <- rownames(X)
if (length(rownames(Y)) > 0) {
column_names <- mapit_struct_names(Y, phenotypeCovariance)
} else if (nrow(Y) > 1 && (phenotypeCovariance == 'combinatorial')) {
row.names(Y) <- sprintf("P%s", 1:nrow(Y))
column_names <- mapit_struct_names(Y, phenotypeCovariance)
} else {
column_names <- NULL
}
colnames(pvals) <- column_names
colnames(pves) <- column_names
if (!is.null(variantIndex)) {
log$debug('Set pve to NA if not in varianIndex.')
pves[!(c(1:nrow(pves)) %in% variantIndex)] <- NA
}
if (nrow(Y) > 1 && (phenotypeCovariance == 'combinatorial')) {
fisherp <- apply(pvals, 1, sumlog)
pvals <- cbind(pvals, metap = fisherp)
pves <- set_covariance_pves(Y, pves)
}
return(list("pvalues" = pvals, "pves" = pves, "timings" = timings_mean))
pves <- set_covariance_components(variance_components_ind, pves)
return(list(pvalues = pvals, pves = pves, timings = timings_mean))
}

remove_zero_variance <- function(X) {
return(X[which(apply(X, 1, var) != 0),])
return(
X[which(
apply(X, 1, var) !=
0
),
]
)
}

# This naming sequence has to match the creation of the q-matrix in the C++ routine of mvMAPIT
mapit_struct_names <- function (Y, phenotypeCovariance) {
if (length(phenotypeCovariance) > 0 && !(phenotypeCovariance == 'combinatorial')) {
return(c('kronecker'))
}
phenotype_names <- rownames(Y)
phenotype_combinations <- c()
for (i in seq_len(nrow(Y))) {
for (j in seq_len(nrow(Y))) {
if (j <= i) {
phenotype_combinations <- c(phenotype_combinations,
paste0(phenotype_names[i], "*", phenotype_names[j]))
}
# This naming sequence has to match the creation of the q-matrix in the C++
# routine of mvMAPIT
mapit_struct_names <- function(Y) {
phenotype_names <- rownames(Y)
if (length(phenotype_names) ==
0) {
phenotype_names <- sprintf("P%s", 1:nrow(Y))
}
}
return(phenotype_combinations)
if (length(phenotype_names) ==
1) {
return(phenotype_names)
}
phenotype_combinations <- c()
for (i in seq_len(nrow(Y))) {
for (j in seq_len(nrow(Y))) {
if (j <= i) {
phenotype_combinations <- c(phenotype_combinations, paste0(phenotype_names[i], "*", phenotype_names[j]))
}
}
}
return(phenotype_combinations)
}

set_covariance_pves <- function(Y, pves) {
counter <- 0
for (i in seq_len(nrow(Y))) {
for (j in seq_len(nrow(Y))) {
if (j <= i) {
counter <- counter + 1
if (j < i) pves[, counter] <- NA
}
}
}
return(pves)
set_covariance_components <- function(variance_components_ind, X) {
X[, !variance_components_ind] <- NA
return(X)
}

sumlog <- function(pvalues) {
df <- 2 * length(pvalues)
fisherp <- pchisq(-2 * sum(log(pvalues)), df, lower.tail = FALSE)
return(fisherp)
get_variance_components_ind <- function(Y) {
ind <- c()
counter <- 0
for (i in seq_len(nrow(Y))) {
for (j in seq_len(nrow(Y))) {
if (j <= i) {
counter <- counter + 1
if (j == i) {
ind[counter] <- TRUE
} else {
ind[counter] <- FALSE
}
}
}
}
return(ind)
}

14 changes: 10 additions & 4 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
# Generated by using Rcpp::compileAttributes() -> do not edit by hand Generator
# token: 10BE3573-1514-4C36-9D1C-5A225CD40393

MAPITCpp <- function(X, Y, Z = NULL, C = NULL, variantIndices = NULL, testMethod = "normal", cores = 1L, GeneticSimilarityMatrix = NULL, phenotypeCovariance = "identity") {
.Call('_mvMAPIT_MAPITCpp', PACKAGE = 'mvMAPIT', X, Y, Z, C, variantIndices, testMethod, cores, GeneticSimilarityMatrix, phenotypeCovariance)
MAPITCpp <- function(
X, Y, Z = NULL, C = NULL, variantIndices = NULL, testMethod = "normal", cores = 1L,
GeneticSimilarityMatrix = NULL
) {
.Call(
"_mvMAPIT_MAPITCpp", PACKAGE = "mvMAPIT", X, Y, Z, C, variantIndices, testMethod,
cores, GeneticSimilarityMatrix
)
}

6 changes: 3 additions & 3 deletions R/catch-routine-registration.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This dummy function definition is included with the package to ensure that
# 'tools::package_native_routine_registration_skeleton()' generates the required
# registration info for the 'run_testthat_tests' symbol.
# 'tools::package_native_routine_registration_skeleton()' generates the
# required registration info for the 'run_testthat_tests' symbol.
(function() {
.Call("run_testthat_tests", PACKAGE = "mvMAPIT")
.Call("run_testthat_tests", PACKAGE = "mvMAPIT")
})
6 changes: 3 additions & 3 deletions R/mapit_analysis_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
#'
#' @format A list with p-values and interaction SNPs:
#' \describe{
#' \item{davies_pvalues}{The p-values returned by MAPIT using the "davies" method.}
#' \item{hybrid_pvalues}{The p-values returned by MAPIT using the "hybrid" method.}
#' \item{normal_pvalues}{The p-values returned by MAPIT using the "normal" method.}
#' \item{davies_pvalues}{The p-values returned by MAPIT using the 'davies' method.}
#' \item{hybrid_pvalues}{The p-values returned by MAPIT using the 'hybrid' method.}
#' \item{normal_pvalues}{The p-values returned by MAPIT using the 'normal' method.}
#' \item{exhaustive_search}{A list containing the p-values of an exhaustive search and fitting the interaction of significant SNPs.}
#' \item{epistatic_snps}{The list of the epistatic SNPs.}
#' \item{additive_snps}{The list of the additive SNPs.}
Expand Down
Loading

0 comments on commit f5b9524

Please sign in to comment.