diff --git a/DESCRIPTION b/DESCRIPTION index f56ba31b..b2d916cf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Type: Package Package: mvMAPIT Title: Implements the Multivariate MArginal ePIstasis Test (mvMAPIT) from Crawford et al. 2017 -Version: 1.0.1 +Version: 1.1.0 Author: lcrawlab Maintainer: Lorin Crawford Description: Epistasis, commonly defined as the interaction between diff --git a/R/MAPIT.R b/R/MAPIT.R index 4904ac42..775a0c7c 100644 --- a/R/MAPIT.R +++ b/R/MAPIT.R @@ -37,7 +37,6 @@ #' @param test is a parameter defining what hypothesis test should be implemented. Takes on values 'normal', 'davies', and 'hybrid'. The 'hybrid' test runs first the 'normal' test and then the 'davies' test on the significant variants. #' @param cores is a parameter detailing the number of cores to parallelize over. It is important to note that this value only matters when the user has implemented OPENMP on their operating system. If OPENMP is not installed, then please leave cores = 1 and use the standard version of this code and software. #' @param variantIndex is a vector containing indices of variants to be included in the computation. -#' @param phenotypeCovariance is a string parameter defining how to model the covariance between phenotypes of effects. Possible values: 'identity', 'covariance', 'homogeneous', 'combinatorial'. #' @param logLevel is a string parameter defining the log level for the logging package. #' @param logFile is a string parameter defining the name of the log file for the logging output. #' @@ -46,154 +45,176 @@ #' @export #' @import CompQuadForm #' @import Rcpp -MvMAPIT <- function(X, - Y, - Z = NULL, - C = NULL, - threshold = 0.05, - accuracy = 1e-8, - test = c('normal', 'davies', 'hybrid'), - cores = 1, - variantIndex = NULL, - phenotypeCovariance = c('identity', 'covariance', 'homogeneous', 'combinatorial'), - logLevel = 'WARN', - logFile = NULL) { +MvMAPIT <- function( + X, Y, Z = NULL, C = NULL, threshold = 0.05, accuracy = 1e-08, test = c("normal", "davies", "hybrid"), + cores = 1, variantIndex = NULL, logLevel = "WARN", logFile = NULL +) { - test <- match.arg(test) - phenotypeCovariance <- match.arg(phenotypeCovariance) - if (cores > 1) { - if (cores > detectCores()) { - warning("The number of cores you're setting is larger than detected cores!") - cores <- detectCores() + test <- match.arg(test) + if (cores > 1) { + if (cores > detectCores()) { + warning("The number of cores you're setting is larger than detected cores!") + cores <- detectCores() + } } - } - logging::logReset() - logging::basicConfig(level = logLevel) - log <- logging::getLogger('MvMAPIT') - if(!is.null(logFile)) { - filePath <- file.path(getwd(),logFile) - log$debug('Logging to file: %s', filePath) - log$addHandler(logging::writeToFile, file=filePath) - } + logging::logReset() + logging::basicConfig(level = logLevel) + log <- logging::getLogger("MvMAPIT") + if (!is.null(logFile)) { + filePath <- file.path(getwd(), logFile) + log$debug("Logging to file: %s", filePath) + log$addHandler(logging::writeToFile, file = filePath) + } - if (is.vector(Y)) { - Y <- t(Y) - } + if (is.vector(Y)) { + Y <- t(Y) + } - log$debug('Running in %s test mode.', test) - log$debug('Phenotype covariance: %s', phenotypeCovariance) - log$debug('Genotype matrix: %d x %d', nrow(X), ncol(X)) - log$debug('Phenotype matrix: %d x %d', nrow(Y), ncol(Y)) - log$debug('Genotype matrix determinant: %f', det((X) %*% t(X))) - zero_var <- which(apply(X, 1, var) == 0) - log$debug('Number of zero variance variants: %d', length(zero_var)) - X <- remove_zero_variance(X) # operates on rows - log$debug('Genotype matrix after removing zero variance variants: %d x %d', nrow(X), ncol(X)) + log$debug("Running in %s test mode.", test) + log$debug( + "Genotype matrix: %d x %d", nrow(X), + ncol(X) + ) + log$debug( + "Phenotype matrix: %d x %d", nrow(Y), + ncol(Y) + ) + log$debug("Genotype matrix determinant: %f", det((X) %*% t(X))) + zero_var <- which( + apply(X, 1, var) == + 0 + ) + log$debug("Number of zero variance variants: %d", length(zero_var)) + X <- remove_zero_variance(X) # operates on rows + log$debug( + "Genotype matrix after removing zero variance variants: %d x %d", nrow(X), + ncol(X) + ) - log$debug('Scale X matrix appropriately.') - Xsd <- apply(X, 1, sd) - Xmean <- apply(X, 1, mean) - X <- (X - Xmean) / Xsd + log$debug("Scale X matrix appropriately.") + Xsd <- apply(X, 1, sd) + Xmean <- apply(X, 1, mean) + X <- (X - Xmean)/Xsd - if (test == 'hybrid') { - vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, NULL, phenotypeCovariance) # Normal Z-Test - pvals <- vc.mod$pvalues - #row.names(pvals) <- rownames(X) - pves <- vc.mod$PVE - #row.names(pves) <- rownames(X) - timings <- vc.mod$timings - ind <- which(pvals <= threshold) # Find the indices of the p-values that are below the threshold - if (phenotypeCovariance == 'combinatorial') { - any_significance <- apply(pvals, 1, function(r) any(r <= threshold)) - ind_temp <- ind - ind <- which(any_significance == TRUE) - } - log$info('%d p-values are significant with alpha = %f', length(ind), threshold) + variance_components_ind <- get_variance_components_ind(Y) + if (test == "hybrid") { + vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, NULL) # Normal Z-Test + pvals <- vc.mod$pvalues + # row.names(pvals) <- rownames(X) + pves <- vc.mod$PVE + # row.names(pves) <- rownames(X) + timings <- vc.mod$timings + ind_matrix <- which(pvals[, variance_components_ind] <= threshold) # Find the variance component indices of the p-values that are below the threshold + log$info( + "%d p-values of variance components are significant with alpha = %f", + length(ind_matrix), + threshold + ) + any_significance <- apply(pvals[, variance_components_ind], 1, function(r) any(r <= threshold)) + ind <- which(any_significance == TRUE) + log$info( + "%d positions are significant with alpha = %f", length(ind), + threshold + ) - log$info('Running davies method on selected SNPs.') - vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, NULL, phenotypeCovariance) - davies.pvals <- mvmapit_pvalues(vc.mod, X, accuracy) - if (phenotypeCovariance == 'combinatorial') { - pvals[ind_temp] <- davies.pvals[ind_temp] + log$info("Running davies method on selected SNPs.") + vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, NULL) + davies.pvals <- mvmapit_pvalues(vc.mod, X, accuracy) + pvals[, variance_components_ind][ind_matrix] <- davies.pvals[, variance_components_ind][ind_matrix] + } else if (test == "normal") { + vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, NULL) + pvals <- vc.mod$pvalues + pves <- vc.mod$PVE + timings <- vc.mod$timings } else { - pvals[ind] <- davies.pvals[ind] + ind <- ifelse(variantIndex, variantIndex, 1:nrow(X)) + vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, NULL) + pvals <- mvmapit_pvalues(vc.mod, X, accuracy) + pvals <- set_covariance_components(variance_components_ind, pvals) + pves <- vc.mod$PVE + timings <- vc.mod$timings + } + timings_mean <- apply( + as.matrix( + timings[rowSums(timings) != + 0, ] + ), + 2, mean + ) + log$info("Calculated mean time of execution. Return list.") + row.names(pvals) <- rownames(X) + row.names(pves) <- rownames(X) + column_names <- mapit_struct_names(Y) + colnames(pvals) <- column_names + colnames(pves) <- column_names + if (!is.null(variantIndex)) { + log$debug("Set pve to NA if not in varianIndex.") + pves[!(c(1:nrow(pves)) %in% + variantIndex)] <- NA + } + if (ncol(pvals) > + 1) { + fisherp <- apply(pvals, 1, sumlog) + pvals <- cbind(pvals, metap = fisherp) } - } else if (test == "normal") { - vc.mod <- MAPITCpp(X, Y, Z, C, variantIndex, "normal", cores = cores, NULL, phenotypeCovariance) - pvals <- vc.mod$pvalues - pves <- vc.mod$PVE - timings <- vc.mod$timings - } else { - ind <- ifelse(variantIndex, variantIndex, 1:nrow(X)) - vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, NULL, phenotypeCovariance) - pvals <- mvmapit_pvalues(vc.mod, X, accuracy) - pves <- vc.mod$PVE - timings <- vc.mod$timings - } - timings_mean <- apply(as.matrix(timings[rowSums(timings) != 0, ]), 2, mean) - log$info('Calculated mean time of execution. Return list.') - row.names(pvals) <- rownames(X) - row.names(pves) <- rownames(X) - if (length(rownames(Y)) > 0) { - column_names <- mapit_struct_names(Y, phenotypeCovariance) - } else if (nrow(Y) > 1 && (phenotypeCovariance == 'combinatorial')) { - row.names(Y) <- sprintf("P%s", 1:nrow(Y)) - column_names <- mapit_struct_names(Y, phenotypeCovariance) - } else { - column_names <- NULL - } - colnames(pvals) <- column_names - colnames(pves) <- column_names - if (!is.null(variantIndex)) { - log$debug('Set pve to NA if not in varianIndex.') - pves[!(c(1:nrow(pves)) %in% variantIndex)] <- NA - } - if (nrow(Y) > 1 && (phenotypeCovariance == 'combinatorial')) { - fisherp <- apply(pvals, 1, sumlog) - pvals <- cbind(pvals, metap = fisherp) - pves <- set_covariance_pves(Y, pves) - } - return(list("pvalues" = pvals, "pves" = pves, "timings" = timings_mean)) + pves <- set_covariance_components(variance_components_ind, pves) + return(list(pvalues = pvals, pves = pves, timings = timings_mean)) } remove_zero_variance <- function(X) { - return(X[which(apply(X, 1, var) != 0),]) + return( + X[which( + apply(X, 1, var) != + 0 + ), + ] + ) } -# This naming sequence has to match the creation of the q-matrix in the C++ routine of mvMAPIT -mapit_struct_names <- function (Y, phenotypeCovariance) { - if (length(phenotypeCovariance) > 0 && !(phenotypeCovariance == 'combinatorial')) { - return(c('kronecker')) - } - phenotype_names <- rownames(Y) - phenotype_combinations <- c() - for (i in seq_len(nrow(Y))) { - for (j in seq_len(nrow(Y))) { - if (j <= i) { - phenotype_combinations <- c(phenotype_combinations, - paste0(phenotype_names[i], "*", phenotype_names[j])) - } +# This naming sequence has to match the creation of the q-matrix in the C++ +# routine of mvMAPIT +mapit_struct_names <- function(Y) { + phenotype_names <- rownames(Y) + if (length(phenotype_names) == + 0) { + phenotype_names <- sprintf("P%s", 1:nrow(Y)) } - } - return(phenotype_combinations) + if (length(phenotype_names) == + 1) { + return(phenotype_names) + } + phenotype_combinations <- c() + for (i in seq_len(nrow(Y))) { + for (j in seq_len(nrow(Y))) { + if (j <= i) { + phenotype_combinations <- c(phenotype_combinations, paste0(phenotype_names[i], "*", phenotype_names[j])) + } + } + } + return(phenotype_combinations) } -set_covariance_pves <- function(Y, pves) { - counter <- 0 - for (i in seq_len(nrow(Y))) { - for (j in seq_len(nrow(Y))) { - if (j <= i) { - counter <- counter + 1 - if (j < i) pves[, counter] <- NA - } - } - } - return(pves) +set_covariance_components <- function(variance_components_ind, X) { + X[, !variance_components_ind] <- NA + return(X) } -sumlog <- function(pvalues) { - df <- 2 * length(pvalues) - fisherp <- pchisq(-2 * sum(log(pvalues)), df, lower.tail = FALSE) - return(fisherp) +get_variance_components_ind <- function(Y) { + ind <- c() + counter <- 0 + for (i in seq_len(nrow(Y))) { + for (j in seq_len(nrow(Y))) { + if (j <= i) { + counter <- counter + 1 + if (j == i) { + ind[counter] <- TRUE + } else { + ind[counter] <- FALSE + } + } + } + } + return(ind) } + diff --git a/R/RcppExports.R b/R/RcppExports.R index 04434493..14a6a49e 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,7 +1,13 @@ -# Generated by using Rcpp::compileAttributes() -> do not edit by hand -# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 +# Generated by using Rcpp::compileAttributes() -> do not edit by hand Generator +# token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -MAPITCpp <- function(X, Y, Z = NULL, C = NULL, variantIndices = NULL, testMethod = "normal", cores = 1L, GeneticSimilarityMatrix = NULL, phenotypeCovariance = "identity") { - .Call('_mvMAPIT_MAPITCpp', PACKAGE = 'mvMAPIT', X, Y, Z, C, variantIndices, testMethod, cores, GeneticSimilarityMatrix, phenotypeCovariance) +MAPITCpp <- function( + X, Y, Z = NULL, C = NULL, variantIndices = NULL, testMethod = "normal", cores = 1L, + GeneticSimilarityMatrix = NULL +) { + .Call( + "_mvMAPIT_MAPITCpp", PACKAGE = "mvMAPIT", X, Y, Z, C, variantIndices, testMethod, + cores, GeneticSimilarityMatrix + ) } diff --git a/R/catch-routine-registration.R b/R/catch-routine-registration.R index 88142b38..d22c1b1e 100644 --- a/R/catch-routine-registration.R +++ b/R/catch-routine-registration.R @@ -1,6 +1,6 @@ # This dummy function definition is included with the package to ensure that -# 'tools::package_native_routine_registration_skeleton()' generates the required -# registration info for the 'run_testthat_tests' symbol. +# 'tools::package_native_routine_registration_skeleton()' generates the +# required registration info for the 'run_testthat_tests' symbol. (function() { - .Call("run_testthat_tests", PACKAGE = "mvMAPIT") + .Call("run_testthat_tests", PACKAGE = "mvMAPIT") }) diff --git a/R/mapit_analysis_data.R b/R/mapit_analysis_data.R index 24dc1996..6dc19d45 100644 --- a/R/mapit_analysis_data.R +++ b/R/mapit_analysis_data.R @@ -4,9 +4,9 @@ #' #' @format A list with p-values and interaction SNPs: #' \describe{ -#' \item{davies_pvalues}{The p-values returned by MAPIT using the "davies" method.} -#' \item{hybrid_pvalues}{The p-values returned by MAPIT using the "hybrid" method.} -#' \item{normal_pvalues}{The p-values returned by MAPIT using the "normal" method.} +#' \item{davies_pvalues}{The p-values returned by MAPIT using the 'davies' method.} +#' \item{hybrid_pvalues}{The p-values returned by MAPIT using the 'hybrid' method.} +#' \item{normal_pvalues}{The p-values returned by MAPIT using the 'normal' method.} #' \item{exhaustive_search}{A list containing the p-values of an exhaustive search and fitting the interaction of significant SNPs.} #' \item{epistatic_snps}{The list of the epistatic SNPs.} #' \item{additive_snps}{The list of the additive SNPs.} diff --git a/R/mapit_pvalues.R b/R/mapit_pvalues.R index c00bec9e..4f9f7ff6 100644 --- a/R/mapit_pvalues.R +++ b/R/mapit_pvalues.R @@ -1,62 +1,84 @@ # Runs the Davies portion of the hypothesis testing mvmapit_pvalues <- function(cpp_structure, X, accuracy) { - num_combinations <- dim(cpp_structure$Eigenvalues)[2] - p <- nrow(X) - hybrid.pvals <- matrix(1, p, num_combinations) - for (c in seq_len(num_combinations)) { - hybrid.pvals[, c] <- hybrid_pvalues(cpp_structure$Est[, c], - cpp_structure$Eigenvalues[, c,], - accuracy) - } - names(hybrid.pvals) <- rownames(X) - return(hybrid.pvals) + num_combinations <- dim(cpp_structure$Eigenvalues)[2] + p <- nrow(X) + hybrid.pvals <- matrix(1, p, num_combinations) + for (c in seq_len(num_combinations)) { + hybrid.pvals[, c] <- hybrid_pvalues(cpp_structure$Est[, c], cpp_structure$Eigenvalues[, c, ], accuracy) + } + names(hybrid.pvals) <- rownames(X) + return(hybrid.pvals) } hybrid_pvalues <- function(point_estimates, eigenvalues, acc) { - ### Apply Davies Exact Method ### - hybrid.pvals <- c() - for (i in seq_len(length(point_estimates))) { - lambda <- sort(eigenvalues[, i], decreasing = T) - if (all(lambda == 0)) { - hybrid.pvals[i] <- NA - next + ### Apply Davies Exact Method ### + hybrid.pvals <- c() + for (i in seq_len(length(point_estimates))) { + lambda <- sort(eigenvalues[, i], decreasing = T) + if (all(lambda == 0)) { + hybrid.pvals[i] <- NA + next + } + Davies_Method <- suppressWarnings(davies(point_estimates[i], lambda = lambda, acc = acc, lim = 1e+06)) + if (Davies_Method$ifault == 0) { + hybrid.pvals[i] <- 2 * min(Davies_Method$Qq, 1 - Davies_Method$Qq) + } else { + warning(paste("Davies function exited with error code: ", Davies_Method$ifault)) + warning("Using saddlepoint approximation.") + saddle <- saddlepoint_approximation(point_estimates[i], lambda = lambda) + hybrid.pvals[i] <- 2 * min(saddle, 1 - saddle) + } } - Davies_Method <- suppressWarnings(davies(point_estimates[i], lambda = lambda, acc = acc, lim = 1e6)) - if (Davies_Method$ifault == 0) { - hybrid.pvals[i] <- 2 * min(Davies_Method$Qq, 1 - Davies_Method$Qq) + return(hybrid.pvals) +} + +# saddlepoint approx: adapted from https://github.com/cran/survey v4.0 and +# https://github.com/baolinwu/mkatr v0.1.0 +saddlepoint_approximation = function(x, lambda) { + d = max(lambda) + lambda = lambda/d + x = x/d + k0 = function(zeta) -sum(log(1 - 2 * zeta * lambda))/2 + kprime0 = function(zeta) sapply(zeta, function(zz) sum(lambda/(1 - 2 * zz * lambda))) + kpprime0 = function(zeta) 2 * + sum(lambda^2/(1 - 2 * zeta * lambda)^2) + n = length(lambda) + if (any(lambda < 0)) { + lmin = max(1/(2 * lambda[lambda < 0])) * + 0.99999 + } else if (x > sum(lambda)) { + lmin = -0.01 } else { - warning(paste("Davies function exited with error code: ", Davies_Method$ifault)) - warning("Using saddlepoint approximation.") - saddle <- saddlepoint_approximation(point_estimates[i], lambda = lambda) - hybrid.pvals[i] <- 2 * min(saddle, 1 - saddle) + lmin = -length(lambda)/(2 * + x) + } + lmax = min(1/(2 * lambda[lambda > 0])) * + 0.99999 + hatzeta = uniroot( + function(zeta) kprime0(zeta) - + x, lower = lmin, upper = lmax, tol = 1e-08 + )$root + w = sign(hatzeta) * + sqrt(2 * (hatzeta * x - k0(hatzeta))) + v = hatzeta * sqrt(kpprime0(hatzeta)) + if (abs(hatzeta) < + 1e-04) { + return(NA) + } else { + return( + pnorm( + w + log(v/w)/w, + lower.tail = FALSE + ) + ) } - } - return(hybrid.pvals) } -# saddlepoint approx: adapted from https://github.com/cran/survey v4.0 and https://github.com/baolinwu/mkatr v0.1.0 -saddlepoint_approximation = function(x, lambda) { - d = max(lambda) - lambda = lambda / d - x = x / d - k0 = function(zeta) - sum(log(1 - 2 * zeta * lambda)) / 2 - kprime0 = function(zeta) sapply(zeta, function(zz) sum(lambda / (1 - 2 * zz * lambda))) - kpprime0 = function(zeta) 2*sum(lambda^2 / (1 - 2 * zeta * lambda)^2) - n = length(lambda) - if (any(lambda < 0)) { - lmin = max(1 / (2 * lambda[lambda < 0])) * 0.99999 - } else if (x > sum(lambda)) { - lmin = -0.01 - } else { - lmin = -length(lambda) / (2 * x) - } - lmax = min(1 / (2 * lambda[lambda > 0])) * 0.99999 - hatzeta = uniroot(function(zeta) kprime0(zeta) - x, lower = lmin, upper = lmax, tol = 1e-08)$root - w = sign(hatzeta) * sqrt(2 * (hatzeta * x - k0(hatzeta))) - v = hatzeta * sqrt(kpprime0(hatzeta)) - if(abs(hatzeta) < 1e-4){ - return(NA) - } else { - return( pnorm(w + log(v / w) / w, lower.tail=FALSE) ) - } +sumlog <- function(pvalues) { + df <- 2 * length(pvalues) + fisherp <- pchisq( + -2 * sum(log(pvalues)), + df, lower.tail = FALSE + ) + return(fisherp) } diff --git a/R/simulate_data.R b/R/simulate_data.R index c43fbe3d..5d361921 100644 --- a/R/simulate_data.R +++ b/R/simulate_data.R @@ -34,218 +34,231 @@ #' @import foreach #' @import mvtnorm #' @import parallel -simulate_phenotypes <- function(genotype_matrix, - n_causal = 1000, - n_trait_specific = 10, - n_pleiotropic = 10, - H2 = 0.6, - d = 2, - rho = 0.8, - marginal_correlation = 0.3, - epistatic_correlation = 0.3, - group_ratio_trait = 1, - group_ratio_pleiotropic = 1, - maf_threshold = 0.01, - seed = 67132, - logLevel = 'INFO', - logFile = NULL) { - set.seed(seed) - coll <- makeAssertCollection() - assertInt(n_causal, lower = 0, add = coll) - assertInt(n_trait_specific, lower = 0, add = coll) - assertInt(n_pleiotropic, lower = 0, add = coll) - assertDouble(group_ratio_trait, lower = 1, add = coll) - assertDouble(group_ratio_pleiotropic, lower = 1, add = coll) - assertDouble(H2, lower = 0, upper = 1, add = coll) - assertDouble(rho, lower = 0, upper = 1, add = coll) - assertDouble(marginal_correlation, lower = -1, upper = 1, add = coll) - assertDouble(epistatic_correlation, lower = -1, upper = 1, add = coll) - assertDouble(maf_threshold, lower = 0, upper = 1, add = coll) - assertInt(d, lower = 1, add = coll) - assertInt(seed, lower = 1, add = coll) - assertMatrix(genotype_matrix, all.missing = FALSE, add = coll) - reportAssertions(coll) - - logging::logReset() - logging::basicConfig(level = logLevel) - log <- logging::getLogger('simulate_phenotypes') - if(!is.null(logFile)) { - filePath <- file.path(getwd(),logFile) - log$debug('Logging to file: %s', filePath) - log$addHandler(logging::writeToFile, file=filePath) - } - - snp.ids <- 1:ncol(genotype_matrix) - maf <- colMeans(genotype_matrix) / 2 - X <- scale(genotype_matrix) - maf_compliant <- (maf > maf_threshold) & (maf < 1 - maf_threshold) - # scale produces NaN when the columns have zero variance - snp.ids.filtered <- snp.ids[complete.cases(t(X)) & maf_compliant] - - n_samples <- nrow(X) # number of genotype samples - n_snp <- length(snp.ids.filtered) # number of SNPs passing quality control - log$debug('Scaled genotype matrix: %d x %d', n_samples, n_snp) - log$debug('Disregard %d variants due to zero variance or small minor allele frequency.', ncol(genotype_matrix) - length(snp.ids.filtered)) - log$debug('Minor allele frequency threshold %f.', maf_threshold) - - # divide groups into ratios - n_group1_trait = ceiling(n_trait_specific / (1 + group_ratio_trait)) - n_group1_pleiotropic = ceiling(n_pleiotropic / (1 + group_ratio_pleiotropic)) - - coll <- makeAssertCollection() - assertInt(n_causal, lower = 0, upper = n_snp, add = coll) - assertInt(n_pleiotropic + n_trait_specific, lower = 0, upper = n_causal, add = coll) - assertInt(n_group1_trait, lower = 0, upper = n_trait_specific, add = coll) - assertInt(n_group1_pleiotropic, lower = 0, upper = n_pleiotropic, add = coll) - reportAssertions(coll) - - # factor vectors for splitting the groups - f_trait <- get_factors(n_group1_trait, n_trait_specific) - f_pleiotropic <- get_factors(n_group1_pleiotropic, n_pleiotropic) - - - log$debug('Number of causal SNPs: %d', n_causal) - log$debug('Number of trait specific SNPs: %d', n_trait_specific) - log$debug('Number of pleiotropic SNPs: %d', n_pleiotropic) - log$debug('NA in raw genotype matrix: %d', sum(is.na(genotype_matrix))) - log$debug('NA in scaled genotype matrix: %d', sum(is.na(X))) - - Y <- c() - causal_snps <- list() - - pleiotropic_set <- sample(snp.ids.filtered, n_pleiotropic, replace = F) # declare peleiotropic SNPs before since they have to be present in every phenotype - pleio_split <- split(pleiotropic_set, f = f_pleiotropic) - X_pleio_group1 <- X[, pleio_split$group1] - X_pleio_group2 <- X[, pleio_split$group2] - X_epi_pleio <- foreach(i=seq_len(n_group1_pleiotropic), .combine=cbind) %do% { - # this step fails if there are too little pleiotropic SNPs; i.e. <= 3? - X_pleio_group1[, i] * X_pleio_group2 - } - if (!is.matrix(X_epi_pleio)) { - X_epi_pleio <- matrix(NA, nrow = nrow(X), ncol = 0) - } - log$debug('Dimensions of pleiotropic interaction matrix: %s x %s', nrow(X_epi_pleio), ncol(X_epi_pleio)) - - log$debug('Draw effects from multivariate normal with desired correlation.') - - C_marginal <- matrix(marginal_correlation, ncol = d, nrow = d) - diag(C_marginal) <- 1 - - C_epistatic <- matrix(epistatic_correlation, ncol = d, nrow = d) - diag(C_epistatic) <- 1 - - C_error <- matrix(0, ncol = d, nrow = d) - diag(C_error) <- 1 - - log$debug('Desired marginal correlation: %f', marginal_correlation) - beta <- mvtnorm::rmvnorm(n_causal, sigma = C_marginal) - log$debug('Correlation of simulated marginal effects: %s', cor(beta)) - - n_epistatic_effects <- n_group1_trait * (n_trait_specific - n_group1_trait) + ncol(X_epi_pleio) - log$debug('Number of epistatic effects: %s', n_epistatic_effects) - if (n_epistatic_effects > 0) { - log$debug('Desired epistatic correlation: %f', epistatic_correlation) - alpha <- mvtnorm::rmvnorm(n_epistatic_effects, - sigma = C_epistatic) - log$debug('Correlation of simulated epistatic effects: %s', cor(alpha)) - } else { - log$debug('Return empty effect matrix.') - alpha <- matrix(0, ncol = d, nrow = 0) - } - log$debug('Desired error correlation: %f', 0) - error <- mvtnorm::rmvnorm(n_samples, sigma = C_error) - log$debug('Correlation of simulated error: %s', cor(error)) - snp.ids.trait <- setdiff(snp.ids.filtered, pleiotropic_set) - for (j in 1:d) { - ## select causal SNPs - log$debug('Simulating phenotype %d', j) - causal_snps_j <- sample(snp.ids.trait, n_causal - n_pleiotropic, replace = F) - trait_specific_snps <- sample(causal_snps_j, n_trait_specific, replace = F) - trait_grouped <- split(trait_specific_snps, f_trait) - trait_specific_j_1 <- trait_grouped$group1 - trait_specific_j_2 <- trait_grouped$group2 - log$debug('Length causal set: %d', length(causal_snps_j)) - log$debug('Length pleiotropic set: %d', length(pleiotropic_set)) - log$debug('Length trait specific set 1: %d', length(trait_specific_j_1)) - log$debug('Length trait specific set 2: %d', length(trait_specific_j_2)) - - log$debug('Head of causal SNPs: %s', head(c(causal_snps_j, pleiotropic_set))) - log$debug('Head of trait specific SNPs group 1: %s', head(trait_specific_j_1)) - log$debug('Head of trait specific SNPs group 2: %s', head(trait_specific_j_2)) - - # create trait_specific interaction matrix - X_causal_j <- X[, c(causal_snps_j, pleiotropic_set)] # all SNPs have additive effects - X_trait_specific_j_1 <- X[, trait_specific_j_1] - X_trait_specific_j_2 <- X[, trait_specific_j_2] - - start_interactions <- proc.time() - log$debug('Computing interactions. This may take a while.') - X_epi <- foreach(i=seq_len(length(trait_specific_j_1)), .combine=cbind) %do% { - X_trait_specific_j_1[, i] * X_trait_specific_j_2 +simulate_phenotypes <- function( + genotype_matrix, n_causal = 1000, n_trait_specific = 10, n_pleiotropic = 10, + H2 = 0.6, d = 2, rho = 0.8, marginal_correlation = 0.3, epistatic_correlation = 0.3, + group_ratio_trait = 1, group_ratio_pleiotropic = 1, maf_threshold = 0.01, seed = 67132, + logLevel = "INFO", logFile = NULL +) { + set.seed(seed) + coll <- makeAssertCollection() + assertInt(n_causal, lower = 0, add = coll) + assertInt(n_trait_specific, lower = 0, add = coll) + assertInt(n_pleiotropic, lower = 0, add = coll) + assertDouble(group_ratio_trait, lower = 1, add = coll) + assertDouble(group_ratio_pleiotropic, lower = 1, add = coll) + assertDouble(H2, lower = 0, upper = 1, add = coll) + assertDouble(rho, lower = 0, upper = 1, add = coll) + assertDouble(marginal_correlation, lower = -1, upper = 1, add = coll) + assertDouble(epistatic_correlation, lower = -1, upper = 1, add = coll) + assertDouble(maf_threshold, lower = 0, upper = 1, add = coll) + assertInt(d, lower = 1, add = coll) + assertInt(seed, lower = 1, add = coll) + assertMatrix(genotype_matrix, all.missing = FALSE, add = coll) + reportAssertions(coll) + + logging::logReset() + logging::basicConfig(level = logLevel) + log <- logging::getLogger("simulate_phenotypes") + if (!is.null(logFile)) { + filePath <- file.path(getwd(), logFile) + log$debug("Logging to file: %s", filePath) + log$addHandler(logging::writeToFile, file = filePath) + } + + snp.ids <- 1:ncol(genotype_matrix) + maf <- colMeans(genotype_matrix)/2 + X <- scale(genotype_matrix) + maf_compliant <- (maf > maf_threshold) & (maf < 1 - maf_threshold) + # scale produces NaN when the columns have zero variance + snp.ids.filtered <- snp.ids[complete.cases(t(X)) & + maf_compliant] + + n_samples <- nrow(X) # number of genotype samples + n_snp <- length(snp.ids.filtered) # number of SNPs passing quality control + log$debug("Scaled genotype matrix: %d x %d", n_samples, n_snp) + log$debug( + "Disregard %d variants due to zero variance or small minor allele frequency.", + ncol(genotype_matrix) - + length(snp.ids.filtered) + ) + log$debug("Minor allele frequency threshold %f.", maf_threshold) + + # divide groups into ratios + n_group1_trait = ceiling(n_trait_specific/(1 + group_ratio_trait)) + n_group1_pleiotropic = ceiling(n_pleiotropic/(1 + group_ratio_pleiotropic)) + + coll <- makeAssertCollection() + assertInt(n_causal, lower = 0, upper = n_snp, add = coll) + assertInt(n_pleiotropic + n_trait_specific, lower = 0, upper = n_causal, add = coll) + assertInt(n_group1_trait, lower = 0, upper = n_trait_specific, add = coll) + assertInt(n_group1_pleiotropic, lower = 0, upper = n_pleiotropic, add = coll) + reportAssertions(coll) + + # factor vectors for splitting the groups + f_trait <- get_factors(n_group1_trait, n_trait_specific) + f_pleiotropic <- get_factors(n_group1_pleiotropic, n_pleiotropic) + + + log$debug("Number of causal SNPs: %d", n_causal) + log$debug("Number of trait specific SNPs: %d", n_trait_specific) + log$debug("Number of pleiotropic SNPs: %d", n_pleiotropic) + log$debug("NA in raw genotype matrix: %d", sum(is.na(genotype_matrix))) + log$debug("NA in scaled genotype matrix: %d", sum(is.na(X))) + + Y <- c() + causal_snps <- list() + + pleiotropic_set <- sample(snp.ids.filtered, n_pleiotropic, replace = F) # declare peleiotropic SNPs before since they have to be present in every phenotype + pleio_split <- split(pleiotropic_set, f = f_pleiotropic) + X_pleio_group1 <- X[, pleio_split$group1] + X_pleio_group2 <- X[, pleio_split$group2] + X_epi_pleio <- foreach( + i = seq_len(n_group1_pleiotropic), + .combine = cbind + ) %do% + { + # this step fails if there are too little pleiotropic SNPs; i.e. <= + # 3? + X_pleio_group1[, i] * X_pleio_group2 + } + if (!is.matrix(X_epi_pleio)) { + X_epi_pleio <- matrix( + NA, nrow = nrow(X), + ncol = 0 + ) } - X_epi <- cbind(X_epi_pleio, X_epi) + log$debug( + "Dimensions of pleiotropic interaction matrix: %s x %s", nrow(X_epi_pleio), + ncol(X_epi_pleio) + ) + + log$debug("Draw effects from multivariate normal with desired correlation.") - time_interactions <- proc.time() - start_interactions - log$debug('Interactions X_epi computed in %f', time_interactions[3]) - log$debug('Dimension of interaction matrix X_epi: %d x %d', nrow(X_epi), ncol(X_epi)) + C_marginal <- matrix(marginal_correlation, ncol = d, nrow = d) + diag(C_marginal) <- 1 - # marginal effects - X_marginal <- X_causal_j - beta_j <- beta[, j] - y_marginal <- X_marginal %*% beta_j - y_marginal <- y_marginal * sqrt(H2 * rho / c(var(y_marginal))) - log$debug('Variance scaled y_marginal: %f', var(y_marginal)) + C_epistatic <- matrix(epistatic_correlation, ncol = d, nrow = d) + diag(C_epistatic) <- 1 - # pairwise epistatic effects - alpha_j <- alpha[, j] + C_error <- matrix(0, ncol = d, nrow = d) + diag(C_error) <- 1 + + log$debug("Desired marginal correlation: %f", marginal_correlation) + beta <- mvtnorm::rmvnorm(n_causal, sigma = C_marginal) + log$debug("Correlation of simulated marginal effects: %s", cor(beta)) + + n_epistatic_effects <- n_group1_trait * (n_trait_specific - n_group1_trait) + + ncol(X_epi_pleio) + log$debug("Number of epistatic effects: %s", n_epistatic_effects) if (n_epistatic_effects > 0) { - y_epi <- X_epi %*% alpha_j - y_epi <- y_epi * sqrt(H2 * (1 - rho) / c(var(y_epi))) - log$debug('Variance scaled y_epi: %f', var(y_epi)) + log$debug("Desired epistatic correlation: %f", epistatic_correlation) + alpha <- mvtnorm::rmvnorm(n_epistatic_effects, sigma = C_epistatic) + log$debug("Correlation of simulated epistatic effects: %s", cor(alpha)) } else { - y_epi <- 0 * y_marginal - log$debug('y_epi vector of zeros size y_marginal: %s', y_epi) + log$debug("Return empty effect matrix.") + alpha <- matrix(0, ncol = d, nrow = 0) } + log$debug("Desired error correlation: %f", 0) + error <- mvtnorm::rmvnorm(n_samples, sigma = C_error) + log$debug("Correlation of simulated error: %s", cor(error)) + snp.ids.trait <- setdiff(snp.ids.filtered, pleiotropic_set) + for (j in 1:d) { + ## select causal SNPs + log$debug("Simulating phenotype %d", j) + causal_snps_j <- sample(snp.ids.trait, n_causal - n_pleiotropic, replace = F) + trait_specific_snps <- sample(causal_snps_j, n_trait_specific, replace = F) + trait_grouped <- split(trait_specific_snps, f_trait) + trait_specific_j_1 <- trait_grouped$group1 + trait_specific_j_2 <- trait_grouped$group2 + log$debug("Length causal set: %d", length(causal_snps_j)) + log$debug("Length pleiotropic set: %d", length(pleiotropic_set)) + log$debug("Length trait specific set 1: %d", length(trait_specific_j_1)) + log$debug("Length trait specific set 2: %d", length(trait_specific_j_2)) - # unexplained phenotypic variation - y_err <- error[, j] - y_err <- y_err * sqrt((1 - H2) / c(var(y_err))) - - y <- y_marginal + y_epi + y_err - Y <- cbind(Y, y) - causal_snps[[paste0('phenotype_', j)]] <- list( - 'causal_snps' = c(causal_snps_j, pleiotropic_set), - 'pleiotropic_groups' = pleio_split, - 'trait_specific_groups' = trait_grouped, - 'alpha' = alpha_j, - 'beta' = beta_j - ) - } + log$debug("Head of causal SNPs: %s", head(c(causal_snps_j, pleiotropic_set))) + log$debug("Head of trait specific SNPs group 1: %s", head(trait_specific_j_1)) + log$debug("Head of trait specific SNPs group 2: %s", head(trait_specific_j_2)) + + # create trait_specific interaction matrix + X_causal_j <- X[, c(causal_snps_j, pleiotropic_set)] # all SNPs have additive effects + X_trait_specific_j_1 <- X[, trait_specific_j_1] + X_trait_specific_j_2 <- X[, trait_specific_j_2] + start_interactions <- proc.time() + log$debug("Computing interactions. This may take a while.") + X_epi <- foreach( + i = seq_len(length(trait_specific_j_1)), + .combine = cbind + ) %do% + { + X_trait_specific_j_1[, i] * X_trait_specific_j_2 + } + X_epi <- cbind(X_epi_pleio, X_epi) - colnames(genotype_matrix) <- seq_len(ncol(genotype_matrix)) %>% sprintf(fmt = "snp_%05d") # column names names for SNPs - colnames(Y) <- seq_len(ncol(Y)) %>% sprintf(fmt = "p_%02d") # column names names for phenotypes + time_interactions <- proc.time() - start_interactions + log$debug("Interactions X_epi computed in %f", time_interactions[3]) + log$debug( + "Dimension of interaction matrix X_epi: %d x %d", nrow(X_epi), + ncol(X_epi) + ) - log$debug('Phenotype data: %s', head(Y)) - log$debug('Phenotype correlation: %s', cor(Y)) - log$debug('Correlation of simulated epistatic effects: %s', cor(alpha)) + # marginal effects + X_marginal <- X_causal_j + beta_j <- beta[, j] + y_marginal <- X_marginal %*% beta_j + y_marginal <- y_marginal * sqrt(H2 * rho/c(var(y_marginal))) + log$debug("Variance scaled y_marginal: %f", var(y_marginal)) - # return data - simulated_pleiotropic_epistasis_data <- list( - number_samples = n_samples, - pve = H2, - rho = rho, - phenotype = Y, - genotype = genotype_matrix, - snps = causal_snps, - snps.filtered = snp.ids.filtered, - seed = seed - ) + # pairwise epistatic effects + alpha_j <- alpha[, j] + if (n_epistatic_effects > 0) { + y_epi <- X_epi %*% alpha_j + y_epi <- y_epi * sqrt(H2 * (1 - rho)/c(var(y_epi))) + log$debug("Variance scaled y_epi: %f", var(y_epi)) + } else { + y_epi <- 0 * y_marginal + log$debug("y_epi vector of zeros size y_marginal: %s", y_epi) + } + + # unexplained phenotypic variation + y_err <- error[, j] + y_err <- y_err * sqrt((1 - H2)/c(var(y_err))) + + y <- y_marginal + y_epi + y_err + Y <- cbind(Y, y) + causal_snps[[paste0("phenotype_", j)]] <- list( + causal_snps = c(causal_snps_j, pleiotropic_set), + pleiotropic_groups = pleio_split, trait_specific_groups = trait_grouped, + alpha = alpha_j, beta = beta_j + ) + } - return(simulated_pleiotropic_epistasis_data) + + colnames(genotype_matrix) <- seq_len(ncol(genotype_matrix)) %>% + sprintf(fmt = "snp_%05d") # column names names for SNPs + colnames(Y) <- seq_len(ncol(Y)) %>% + sprintf(fmt = "p_%02d") # column names names for phenotypes + + log$debug("Phenotype data: %s", head(Y)) + log$debug("Phenotype correlation: %s", cor(Y)) + log$debug("Correlation of simulated epistatic effects: %s", cor(alpha)) + + # return data + simulated_pleiotropic_epistasis_data <- list( + number_samples = n_samples, pve = H2, rho = rho, phenotype = Y, genotype = genotype_matrix, + snps = causal_snps, snps.filtered = snp.ids.filtered, seed = seed + ) + + return(simulated_pleiotropic_epistasis_data) } get_factors <- function(n1, n) { - return(c(rep("group1", n1), rep("group2", n - n1))) + return( + c( + rep("group1", n1), + rep("group2", n - n1) + ) + ) } diff --git a/configure b/configure index 03bb167f..5b530f04 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for mvMAPIT 1.0.1. +# Generated by GNU Autoconf 2.71 for mvMAPIT 1.1.0. # # # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, @@ -607,8 +607,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='mvMAPIT' PACKAGE_TARNAME='mvmapit' -PACKAGE_VERSION='1.0.1' -PACKAGE_STRING='mvMAPIT 1.0.1' +PACKAGE_VERSION='1.1.0' +PACKAGE_STRING='mvMAPIT 1.1.0' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1270,7 +1270,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures mvMAPIT 1.0.1 to adapt to many kinds of systems. +\`configure' configures mvMAPIT 1.1.0 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1332,7 +1332,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of mvMAPIT 1.0.1:";; + short | recursive ) echo "Configuration of mvMAPIT 1.1.0:";; esac cat <<\_ACEOF @@ -1415,7 +1415,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -mvMAPIT configure 1.0.1 +mvMAPIT configure 1.1.0 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -1648,7 +1648,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by mvMAPIT $as_me 1.0.1, which was +It was created by mvMAPIT $as_me 1.1.0, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -4718,7 +4718,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by mvMAPIT $as_me 1.0.1, which was +This file was extended by mvMAPIT $as_me 1.1.0, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -4773,7 +4773,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -mvMAPIT config.status 1.0.1 +mvMAPIT config.status 1.1.0 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index 7d40a402..b82e7275 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([mvMAPIT],[1.0.1]) +AC_INIT([mvMAPIT],[1.1.0]) # Find the compiler and compiler flags used by R. : ${R_HOME=`R RHOME`} diff --git a/man/MvMAPIT.Rd b/man/MvMAPIT.Rd index 708587d8..ef0b1eb5 100644 --- a/man/MvMAPIT.Rd +++ b/man/MvMAPIT.Rd @@ -14,7 +14,6 @@ MvMAPIT( test = c("normal", "davies", "hybrid"), cores = 1, variantIndex = NULL, - phenotypeCovariance = c("identity", "covariance", "homogeneous", "combinatorial"), logLevel = "WARN", logFile = NULL ) @@ -38,8 +37,6 @@ MvMAPIT( \item{variantIndex}{is a vector containing indices of variants to be included in the computation.} -\item{phenotypeCovariance}{is a string parameter defining how to model the covariance between phenotypes of effects. Possible values: 'identity', 'covariance', 'homogeneous', 'combinatorial'.} - \item{logLevel}{is a string parameter defining the log level for the logging package.} \item{logFile}{is a string parameter defining the name of the log file for the logging output.} diff --git a/oscar-tasks.py b/oscar-tasks.py index d3520ebd..b3b560eb 100644 --- a/oscar-tasks.py +++ b/oscar-tasks.py @@ -46,5 +46,15 @@ def change_version(version): _cmd("autoconf") +def autoformat(): + directories = ["R", "tests/testthat"] + + logger.info(f"Autoformat directories: {directories}") + for d in directories: + cmd = ["Rscript", "-e", f"formatR::tidy_dir('{d}', args.newline = TRUE)"] + logger.info(f"Run cmd: {cmd}") + run(cmd) + + if __name__ == "__main__": fire.Fire() diff --git a/src/MAPIT.cpp b/src/MAPIT.cpp index fda3c4e8..b0b251be 100644 --- a/src/MAPIT.cpp +++ b/src/MAPIT.cpp @@ -4,7 +4,6 @@ #include "gsm/gsm.h" #include "logging/log.h" #include "mapit/davies.h" -#include "mapit/kronecker.h" #include "mapit/normal.h" #include "mapit/projection.h" #include "mapit/pve.h" @@ -47,8 +46,7 @@ Rcpp::List MAPITCpp( Rcpp::Nullable C = R_NilValue, Rcpp::Nullable variantIndices = R_NilValue, std::string testMethod = "normal", int cores = 1, - Rcpp::Nullable GeneticSimilarityMatrix = R_NilValue, - std::string phenotypeCovariance = "identity") { + Rcpp::Nullable GeneticSimilarityMatrix = R_NilValue) { int i; const int n = X.n_cols; const int p = X.n_rows; @@ -56,8 +54,7 @@ Rcpp::List MAPITCpp( int num_combinations = 1; int z = 0; - const bool combinatorial = - (phenotypeCovariance.compare("combinatorial") == 0 && d > 1); + const bool combinatorial = (d > 1); if (combinatorial) { num_combinations = num_combinations_with_replacement(d, 2); } @@ -75,7 +72,6 @@ Rcpp::List MAPITCpp( logger->info("Number of SNPs: {}", p); logger->info("Number of phenotypes: {}", d); logger->info("Test method: {}", testMethod); - logger->info("Phenotype covariance model: {}", phenotypeCovariance); #ifdef _OPENMP logger->info("Execute c++ routine on {} cores.", cores); @@ -117,21 +113,6 @@ Rcpp::List MAPITCpp( ind = Rcpp::as(variantIndices.get()); } - // between phenotype variance - arma::mat V_error(d, d); - // effect of error uncorrelated - V_error.eye(); - arma::mat V_phenotype(d, d); - // string.compare() returns '0' if equal - if (phenotypeCovariance.compare("covariance") == 0) { - V_phenotype = cov(Y.t()); - } else if (phenotypeCovariance.compare("homogeneous") == 0) { - V_phenotype.ones(); - } else { - // 'identity' as default - V_phenotype.eye(); - } - #ifdef _OPENMP omp_set_num_threads(cores); #endif @@ -205,7 +186,6 @@ Rcpp::List MAPITCpp( } else { arma::vec yc = vectorise(Yc); phenotypes = matrix_to_vector_of_rows(yc.as_row()); - matrices = kronecker_products(matrices, V_phenotype, V_error); } end = steady_clock::now(); execution_t(i, 2) = duration_cast(end - start).count(); diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 25d2e511..a8df1621 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -12,8 +12,8 @@ Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // MAPITCpp -Rcpp::List MAPITCpp(const arma::mat X, const arma::mat Y, Rcpp::Nullable Z, Rcpp::Nullable C, Rcpp::Nullable variantIndices, std::string testMethod, int cores, Rcpp::Nullable GeneticSimilarityMatrix, std::string phenotypeCovariance); -RcppExport SEXP _mvMAPIT_MAPITCpp(SEXP XSEXP, SEXP YSEXP, SEXP ZSEXP, SEXP CSEXP, SEXP variantIndicesSEXP, SEXP testMethodSEXP, SEXP coresSEXP, SEXP GeneticSimilarityMatrixSEXP, SEXP phenotypeCovarianceSEXP) { +Rcpp::List MAPITCpp(const arma::mat X, const arma::mat Y, Rcpp::Nullable Z, Rcpp::Nullable C, Rcpp::Nullable variantIndices, std::string testMethod, int cores, Rcpp::Nullable GeneticSimilarityMatrix); +RcppExport SEXP _mvMAPIT_MAPITCpp(SEXP XSEXP, SEXP YSEXP, SEXP ZSEXP, SEXP CSEXP, SEXP variantIndicesSEXP, SEXP testMethodSEXP, SEXP coresSEXP, SEXP GeneticSimilarityMatrixSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -25,8 +25,7 @@ BEGIN_RCPP Rcpp::traits::input_parameter< std::string >::type testMethod(testMethodSEXP); Rcpp::traits::input_parameter< int >::type cores(coresSEXP); Rcpp::traits::input_parameter< Rcpp::Nullable >::type GeneticSimilarityMatrix(GeneticSimilarityMatrixSEXP); - Rcpp::traits::input_parameter< std::string >::type phenotypeCovariance(phenotypeCovarianceSEXP); - rcpp_result_gen = Rcpp::wrap(MAPITCpp(X, Y, Z, C, variantIndices, testMethod, cores, GeneticSimilarityMatrix, phenotypeCovariance)); + rcpp_result_gen = Rcpp::wrap(MAPITCpp(X, Y, Z, C, variantIndices, testMethod, cores, GeneticSimilarityMatrix)); return rcpp_result_gen; END_RCPP } @@ -34,7 +33,7 @@ END_RCPP RcppExport SEXP run_testthat_tests(); static const R_CallMethodDef CallEntries[] = { - {"_mvMAPIT_MAPITCpp", (DL_FUNC) &_mvMAPIT_MAPITCpp, 9}, + {"_mvMAPIT_MAPITCpp", (DL_FUNC) &_mvMAPIT_MAPITCpp, 8}, {"run_testthat_tests", (DL_FUNC) &run_testthat_tests, 0}, {NULL, NULL, 0} }; diff --git a/src/mapit/kronecker.cpp b/src/mapit/kronecker.cpp deleted file mode 100644 index 58606ae1..00000000 --- a/src/mapit/kronecker.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2021 Lorin Crawford. -#include "mapit/kronecker.h" - -std::vector kronecker_products(std::vector matrices, - const arma::mat V_phenotype, - const arma::mat V_error) { - int num_variance_components = matrices.size(); - for (int i = 0; i < num_variance_components - 1; i++) { - matrices[i] = kron(V_phenotype, matrices[i]); - } - matrices[num_variance_components - 1] = - kron(V_error, matrices[num_variance_components - 1]); - return matrices; -} diff --git a/src/mapit/kronecker.h b/src/mapit/kronecker.h deleted file mode 100644 index 2e6609e9..00000000 --- a/src/mapit/kronecker.h +++ /dev/null @@ -1,8 +0,0 @@ -// Copyright 2021 Lorin Crawford. -#pragma once -#include -#include - -std::vector kronecker_products(std::vector matrices, - const arma::mat V_phenotype, - const arma::mat V_error); diff --git a/tests/testthat/compare-to-original-mapit.R b/tests/testthat/compare-to-original-mapit.R index 49912065..424036da 100644 --- a/tests/testthat/compare-to-original-mapit.R +++ b/tests/testthat/compare-to-original-mapit.R @@ -1,25 +1,27 @@ -library('mvMAPIT') +library("mvMAPIT") # given original <- readRDS("original_MAPIT.rds") X <- original$genotype Y <- original$phenotype normal.pvalues <- original$normal$pvalues davies.pvalues <- original$davies$pvalues -tolerance <- 0.0001 +tolerance <- 1e-04 # when -Xmean=apply(X, 2, mean); Xsd=apply(X, 2, sd); X=t((t(X)-Xmean)/Xsd) -mapit.normal <- MvMAPIT(t(X), - (Y), - test = 'normal', - cores = 4, - phenotypeCovariance = 'combinatorial', - logLevel = "INFO") -mapit.davies <- MvMAPIT(t(X), - t(Y), - test = 'davies', - cores = 4, - phenotypeCovariance = 'combinatorial', - logLevel = "INFO") +Xmean = apply(X, 2, mean) +Xsd = apply(X, 2, sd) +X = t( + (t(X) - + Xmean)/Xsd +) +mapit.normal <- MvMAPIT( + t(X), + (Y), test = "normal", cores = 4, logLevel = "INFO" +) +mapit.davies <- MvMAPIT( + t(X), + t(Y), + test = "davies", cores = 4, logLevel = "INFO" +) # then normal <- as.vector(mapit.normal$pvalues) names(normal.pvalues) <- NULL @@ -35,17 +37,18 @@ normal.na.counter <- c() normal.diff.index <- c() # comparison necessary because all.equal returns string if FALSE -if(normal.all.equal == TRUE) { +if (normal.all.equal == TRUE) { message("Normal method p-values are equal for MAPIT and mvMAPIT.") } else { warning("Normal method p-values differ between MAPIT and mvMAPIT.") NORMAL <- cbind(normal.pvalues, normal) - for (i in seq_len(nrow(NORMAL))) { - if(any(is.na(NORMAL[i, ]))) { + for (i in seq_len(nrow(NORMAL))) { + if (any(is.na(NORMAL[i, ]))) { normal.na.counter <- rbind(normal.na.counter, NORMAL[i, ]) normal.diff.index <- c(normal.diff.index, i) - } else if(abs(NORMAL[i, 1] - NORMAL[i, 2]) > tolerance) { - print(NORMAL[i,]) + } else if (abs(NORMAL[i, 1] - NORMAL[i, 2]) > + tolerance) { + print(NORMAL[i, ]) normal.diff.counter <- normal.diff.counter + 1 normal.diff.index <- c(normal.diff.index, i) } @@ -56,17 +59,18 @@ davies.diff.counter <- 0 davies.na.counter <- c() davies.diff.index <- c() -if(davies.all.equal == TRUE) { +if (davies.all.equal == TRUE) { message("Davies method p-values are equal for MAPIT and mvMAPIT.") } else { warning("Davies method p-values differ between MAPIT and mvMAPIT.") DAVIES <- cbind(davies.pvalues, davies) for (i in seq_len(nrow(DAVIES))) { - if(any(is.na(DAVIES[i, ]))) { + if (any(is.na(DAVIES[i, ]))) { davies.na.counter <- rbind(davies.na.counter, DAVIES[i, ]) davies.diff.index <- c(davies.diff.index, i) - } else if (abs(DAVIES[i, 1] - DAVIES[i, 2]) > tolerance) { - print(DAVIES[i,]) + } else if (abs(DAVIES[i, 1] - DAVIES[i, 2]) > + tolerance) { + print(DAVIES[i, ]) davies.diff.counter <- davies.diff.counter + 1 davies.diff.index <- c(davies.diff.index, i) } diff --git a/tests/testthat/test-cpp.R b/tests/testthat/test-cpp.R index 8f3ff1ed..9b2b6f56 100644 --- a/tests/testthat/test-cpp.R +++ b/tests/testthat/test-cpp.R @@ -1,4 +1,6 @@ context("C++") -test_that("Catch unit tests pass", { - expect_cpp_tests_pass("mvMAPIT") -}) +test_that( + "Catch unit tests pass", { + expect_cpp_tests_pass("mvMAPIT") + } +) diff --git a/tests/testthat/test-mvmapit-argument-parsing.R b/tests/testthat/test-mvmapit-argument-parsing.R index 03110272..af51c5e5 100644 --- a/tests/testthat/test-mvmapit-argument-parsing.R +++ b/tests/testthat/test-mvmapit-argument-parsing.R @@ -1,38 +1,51 @@ -test_that("MvMapit can take a vector as phenotype input. hybrid = FALSE, test = normal", { - # given - p <- 10 - n <- 4 - pvalues <- matrix(rep(0.48001, 10), ncol = 1) - set.seed(853) - X <- matrix(runif(p * n), ncol = p) - Y <- c(runif(n)) - # when - mapit <- MvMAPIT(t(X), - Y, - accuracy = 1e-5, - cores = 1, - phenotypeCovariance = 'identity', - logLevel = "ERROR") - # then - expect_equal(mapit$pvalues, pvalues, tolerance = 1e-3) -}) +test_that( + "MvMapit can take a vector as phenotype input. test = normal", { + # given + p <- 10 + n <- 4 + pvalues <- matrix( + rep(0.48001, 10), + ncol = 1 + ) + colnames(pvalues) <- c("P1") + set.seed(853) + X <- matrix( + runif(p * n), + ncol = p + ) + Y <- c(runif(n)) + # when + mapit <- MvMAPIT( + t(X), + Y, accuracy = 1e-05, cores = 1, logLevel = "ERROR" + ) + # then + expect_equal(mapit$pvalues, pvalues, tolerance = 0.001) + } +) -test_that("MvMapit can take a vector as phenotype input. hybrid = FALSE, test = davies", { - # given - p <- 10 - n <- 4 - pvalues <- matrix(rep(0.209, 10), ncol = 1) - set.seed(853) - X <- matrix(runif(p * n), ncol = p) - Y <- c(runif(n)) - # when - mapit <- MvMAPIT(t(X), - Y, - test = 'davies', - accuracy = 1e-5, - cores = 1, - phenotypeCovariance = 'identity', - logLevel = "ERROR") - # then - expect_equal(mapit$pvalues, pvalues, tolerance = 1e-3) -}) +test_that( + "MvMapit can take a vector as phenotype input. test = davies", { + # given + p <- 10 + n <- 4 + pvalues <- matrix( + rep(0.209, 10), + ncol = 1 + ) + colnames(pvalues) <- c("P1") + set.seed(853) + X <- matrix( + runif(p * n), + ncol = p + ) + Y <- c(runif(n)) + # when + mapit <- MvMAPIT( + t(X), + Y, test = "davies", accuracy = 1e-05, cores = 1, logLevel = "ERROR" + ) + # then + expect_equal(mapit$pvalues, pvalues, tolerance = 0.001) + } +) diff --git a/tests/testthat/test-mvmapit-kronecker-executes-without-error.R b/tests/testthat/test-mvmapit-kronecker-executes-without-error.R deleted file mode 100644 index bbb80995..00000000 --- a/tests/testthat/test-mvmapit-kronecker-executes-without-error.R +++ /dev/null @@ -1,211 +0,0 @@ -test_that("MvMapit executes without error when test = hybrid.", { - # given - p <- 10 - n <- 5 - d <- 1 - pvalues <- matrix(c( -0.7315409, -0.7961257, -0.7607653, -0.4380301, -0.5265725, -0.8722502, -0.5097280, -0.6620606, -0.4780134, -0.8188633 -), ncol = 1) - set.seed(5) - X <- matrix(runif(p * n), ncol = p) - Y <- matrix(runif(d * n), ncol = d) - # when - mapit <- MvMAPIT(t(X), - t(Y), - test = 'hybrid', - cores = 1, - accuracy = 1e-2, - phenotypeCovariance = 'covariance', - logLevel = "ERROR") - # then - expect_equal(mapit$pvalues, pvalues, tolerance = 1e-7) -}) - -test_that("MvMapit executes without error when test = normal.", { - # given - p <- 10 - n <- 5 - d <- 3 - pvalues <- matrix(c( -0.5823842, -0.4968245, -0.9075542, -0.8032449, -0.6508556, -0.6345080, -0.2701859, -0.7694585, -0.8733472, -0.7751935 -), ncol = 1) - set.seed(6) - X <- matrix(runif(p * n), ncol = p) - Y <- matrix(runif(d * n), ncol = d) - # when - mapit <- MvMAPIT(t(X), - t(Y), - test = 'normal', - cores = 1, - phenotypeCovariance = 'covariance', - logLevel = "ERROR") - # then - expect_equal(mapit$pvalues, pvalues, tolerance = 1e-7) -}) - -test_that("MvMapit executes without error when test = davies.", { - # given - p <- 10 - n <- 5 - d <- 3 - pvalues <- matrix(c( -0.5649081, -0.5902518, -0.9844545, -0.5968390, -0.3404326, -0.6872187, -0.3451383, -0.4676168, -0.9013817, -0.6097202 -), ncol = 1) - set.seed(6) - X <- matrix(runif(p * n), ncol = p) - Y <- matrix(runif(d * n), ncol = d) - # when - mapit <- MvMAPIT(t(X), - t(Y), - test = 'davies', - accuracy = 1e-5, - cores = 1, - variantIndex = c(1:p), - phenotypeCovariance = 'covariance', - logLevel = "ERROR") - # then - expect_equal(length(mapit$pvalues), p) - expect_equal(mapit$pvalues, pvalues, tolerance = 1e-7) -}) - -test_that("MvMapit times computations when test = hybrid.", { - # given - p <- 10 - n <- 5 - d <- 1 - set.seed(5) - X <- matrix(runif(p * n), ncol = p) - Y <- matrix(runif(d * n), ncol = d) - # when - mapit <- MvMAPIT(t(X), - t(Y), - cores = 1, - test = 'hybrid', - accuracy = 1e-2, - phenotypeCovariance = 'covariance', - logLevel = "ERROR") - # then - expect_equal(length(mapit$timings), 6) -}) - -test_that("MvMapit executes without error when C is not NULL, test = hybrid.", { - # given - p <- 10 - n <- 5 - d <- 1 - pvalues <- matrix(c( -0.6627869, -0.5757486, -0.4161929, -0.3638452, -0.9156424, -0.6960296, -0.7774770, -0.5700789, -0.7186994, -0.6525567 -), ncol = 1, byrow = TRUE) - set.seed(23) - X <- matrix(runif(p * n), ncol = p) - Y <- matrix(runif(d * n), ncol = d) - C <- matrix(runif(n * n), ncol = n) - # when - mapit <- MvMAPIT(t(X), - t(Y), - C = C, - cores = 1, - test = 'hybrid', - accuracy = 1e-6, - phenotypeCovariance = 'covariance', - logLevel = "ERROR") - # then - expect_equal(mapit$pvalues, pvalues, tolerance = 1e-7) -}) - - -test_that("test = normal. phenotypeCovariance = identity", { - # given - p <- 10 - n <- 5 - d <- 3 - pvalues <- matrix(c( -0.4852139, -0.5064176, -0.8818390, -0.4643340, -0.5941437, -0.7743915, -0.2540507, -0.8820358, -0.3711353, -0.2102760 - ), ncol = 1) - set.seed(6) - X <- matrix(runif(p * n), ncol = p) - Y <- matrix(runif(d * n), ncol = d) - # when - mapit <- MvMAPIT(t(X), - t(Y), - cores = 1, - phenotypeCovariance = 'identity', - logLevel = "ERROR") - # then - expect_equal(mapit$pvalues, pvalues, tolerance = 1e-7) -}) - -test_that("test = normal. phenotypeCovariance = homogeneous", { - # given - p <- 10 - n <- 5 - d <- 3 - pvalues <- matrix(c( -0.8384163, -0.8715925, -0.7138586, -0.7740888, -0.5336523, -0.4166844, -0.8883106, -0.9463360, -0.4320256, -0.4812932), ncol = 1, byrow = TRUE) - set.seed(6) - X <- matrix(runif(p * n), ncol = p) - Y <- matrix(runif(d * n), ncol = d) - # when - mapit <- MvMAPIT(t(X), - t(Y), - accuracy = 1e-5, - cores = 1, - phenotypeCovariance = 'homogeneous', - logLevel = "ERROR") - # then - expect_equal(mapit$pvalues, pvalues, tolerance = 1e-5) -}) diff --git a/tests/testthat/test-mvmapit-pariwise-executes-without-error.R b/tests/testthat/test-mvmapit-pariwise-executes-without-error.R index 3d9b8d81..9defce0c 100644 --- a/tests/testthat/test-mvmapit-pariwise-executes-without-error.R +++ b/tests/testthat/test-mvmapit-pariwise-executes-without-error.R @@ -1,193 +1,261 @@ -test_that("test = 'normal'. phenotypeCovariance = 'combinatorial'", { - # given - p <- 2 - n <- 10 - d <- 3 - pvalues <- matrix(c( -0.4990573, 0.4478648, 0.9574136, 0.4662016, 0.4782672, 0.1381317, 0.612, -0.5015375, 0.4619467, 0.1347061, 0.4507170, 0.6405290, 0.2410251, 0.425), - nrow = p, ncol = 7, byrow = TRUE) - colnames(pvalues) <- c("P1*P1", "P2*P1", "P2*P2", "P3*P1", "P3*P2", "P3*P3", "metap") - set.seed(853) - X <- matrix(runif(p * n), ncol = p) - Y <- matrix(runif(d * n), ncol = d) - # when - mapit <- MvMAPIT(t(X), - t(Y), - test = 'normal', - cores = 1, - phenotypeCovariance = 'combinatorial', - logLevel = "DEBUG") - # then - expect_equal(mapit$pvalues, pvalues, tolerance = 1e-4) -}) +test_that( + "test = 'normal'. ", { + # given + p <- 2 + n <- 10 + d <- 3 + pvalues <- matrix( + c( + 0.4990573, 0.4478648, 0.9574136, 0.4662016, 0.4782672, 0.1381317, + 0.612, 0.5015375, 0.4619467, 0.1347061, 0.450717, 0.640529, 0.2410251, + 0.425 + ), + nrow = p, ncol = 7, byrow = TRUE + ) + colnames(pvalues) <- c("P1*P1", "P2*P1", "P2*P2", "P3*P1", "P3*P2", "P3*P3", "metap") + set.seed(853) + X <- matrix( + runif(p * n), + ncol = p + ) + Y <- matrix( + runif(d * n), + ncol = d + ) + # when + mapit <- MvMAPIT( + t(X), + t(Y), + test = "normal", cores = 1, logLevel = "DEBUG" + ) + # then + expect_equal(mapit$pvalues, pvalues, tolerance = 1e-04) + } +) -test_that("test = davies. phenotypeCovariance = 'combinatorial'", { - # given - p <- 2 - n <- 10 - d <- 3 - pvalues <- matrix(c( -0.01624319, 0, 0.6531582, 1.380850e-04, 0.000000e+00, 0.419213, 0.0, -0.02694842, 0, 0.3977345, 5.755901e-05, 1.216344e-08, 0.490887, 0.0), - nrow = p, ncol = 7, byrow = TRUE) - colnames(pvalues) <- c("P1*P1", "P2*P1", "P2*P2", "P3*P1", "P3*P2", "P3*P3", "metap") - set.seed(853) - X <- matrix(runif(p * n), ncol = p) - Y <- matrix(runif(d * n), ncol = d) - # when - mapit <- MvMAPIT(t(X), - t(Y), - test = 'davies', - cores = 1, - phenotypeCovariance = 'combinatorial', - logLevel = "ERROR") - # then - expect_equal(mapit$pvalues, pvalues, tolerance = 1e-4) -}) +test_that( + "test = davies. ", { + # given + p <- 2 + n <- 10 + d <- 3 + pvalues <- matrix( + c( + 0.01624319, NA, 0.6531582, NA, NA, 0.419213, NA, 0.02694842, NA, + 0.3977345, NA, NA, 0.490887, NA + ), + nrow = p, ncol = 7, byrow = TRUE + ) + colnames(pvalues) <- c("P1*P1", "P2*P1", "P2*P2", "P3*P1", "P3*P2", "P3*P3", "metap") + set.seed(853) + X <- matrix( + runif(p * n), + ncol = p + ) + Y <- matrix( + runif(d * n), + ncol = d + ) + # when + mapit <- MvMAPIT( + t(X), + t(Y), + test = "davies", cores = 1, logLevel = "ERROR" + ) + # then + expect_equal(mapit$pvalues, pvalues, tolerance = 1e-04) + } +) -test_that("phenotypeCovariance = 'combinatorial'", { - # given - p <- 2 - n <- 10 - d <- 3 - pvalues <- matrix(c( -0.4990573, 0.4478648, 0.9574136, 0.4662016, 0.4782672, 0.1381317, 0.612, -0.5015375, 0.4619467, 0.1347061, 0.4507170, 0.6405290, 0.2410251, 0.425), - nrow = p, ncol = 7, byrow = TRUE) - colnames(pvalues) <- c("P1*P1", "P2*P1", "P2*P2", "P3*P1", "P3*P2", "P3*P3", "metap") - set.seed(853) - X <- matrix(runif(p * n), ncol = p) - Y <- matrix(runif(d * n), ncol = d) - # when - mapit <- MvMAPIT(t(X), - t(Y), - test = 'hybrid', - cores = 1, - phenotypeCovariance = 'combinatorial', - logLevel = "ERROR") - # then - expect_equal(mapit$pvalues, pvalues, tolerance = 1e-4) -}) +test_that( + "test = hybrid", { + # given + p <- 2 + n <- 10 + d <- 3 + pvalues <- matrix( + c( + 0.4990573, 0.4478648, 0.9574136, 0.4662016, 0.4782672, 0.1381317, + 0.612, 0.5015375, 0.4619467, 0.1347061, 0.450717, 0.640529, 0.2410251, + 0.425 + ), + nrow = p, ncol = 7, byrow = TRUE + ) + colnames(pvalues) <- c("P1*P1", "P2*P1", "P2*P2", "P3*P1", "P3*P2", "P3*P3", "metap") + set.seed(853) + X <- matrix( + runif(p * n), + ncol = p + ) + Y <- matrix( + runif(d * n), + ncol = d + ) + # when + mapit <- MvMAPIT( + t(X), + t(Y), + test = "hybrid", cores = 1, logLevel = "ERROR" + ) + # then + expect_equal(mapit$pvalues, pvalues, tolerance = 1e-04) + } +) -test_that("C is not NULL. phenotypeCovariance = 'combinatorial'", { - # given - p <- 4 - n <- 10 - d <- 3 - pvalues <- matrix(c( -0.6876487, 0.2148062, 0.5640931, 0.1657485, 0.2837563, 0.5020969, 0.409, -0.8920097, 0.9107812, 0.9787608, 0.6248188, 0.2751130, 0.4994958, 0.945, -0.5868067, 0.5128342, 0.3823134, 0.8747280, 0.2352273, 0.6889640, 0.767, -0.3184337, 0.5047131, 0.6774045, 0.3071930, 0.8257162, 0.5527816, 0.756 - ), - nrow = p, ncol = 7, byrow = TRUE) - colnames(pvalues) <- c("P1*P1", "P2*P1", "P2*P2", "P3*P1", "P3*P2", "P3*P3", "metap") - set.seed(29) - X <- matrix(runif(p * n), ncol = p) - Y <- matrix(runif(d * n), ncol = d) - C <- matrix(runif(n * n), ncol = n) - # when - mapit <- MvMAPIT(t(X), - t(Y), - C = C, - test = 'hybrid', - accuracy = 1e-5, - cores = 1, - phenotypeCovariance = 'combinatorial', - logLevel = "ERROR") - # then - expect_equal(mapit$pvalues, pvalues, tolerance = 1e-4) -}) +test_that( + "C is not NULL. ", { + # given + p <- 4 + n <- 10 + d <- 3 + pvalues <- matrix( + c( + 0.6876487, 0.2148062, 0.5640931, 0.1657485, 0.2837563, 0.5020969, + 0.409, 0.8920097, 0.9107812, 0.9787608, 0.6248188, 0.275113, 0.4994958, + 0.945, 0.5868067, 0.5128342, 0.3823134, 0.874728, 0.2352273, 0.688964, + 0.767, 0.3184337, 0.5047131, 0.6774045, 0.307193, 0.8257162, 0.5527816, + 0.756 + ), + nrow = p, ncol = 7, byrow = TRUE + ) + colnames(pvalues) <- c("P1*P1", "P2*P1", "P2*P2", "P3*P1", "P3*P2", "P3*P3", "metap") + set.seed(29) + X <- matrix( + runif(p * n), + ncol = p + ) + Y <- matrix( + runif(d * n), + ncol = d + ) + C <- matrix( + runif(n * n), + ncol = n + ) + # when + mapit <- MvMAPIT( + t(X), + t(Y), + C = C, test = "hybrid", accuracy = 1e-05, cores = 1, logLevel = "ERROR" + ) + # then + expect_equal(mapit$pvalues, pvalues, tolerance = 1e-04) + } +) -test_that("test = 'normal', C is not NULL. phenotypeCovariance = 'combinatorial'", { - # given - p <- 4 - n <- 10 - d <- 3 - pvalues <- matrix(c( -0.6876487, 0.2148062, 0.5640931, 0.1657485, 0.2837563, 0.5020969, 0.409, -0.8920097, 0.9107812, 0.9787608, 0.6248188, 0.2751130, 0.4994958, 0.945, -0.5868067, 0.5128342, 0.3823134, 0.8747280, 0.2352273, 0.6889640, 0.767, -0.3184337, 0.5047131, 0.6774045, 0.3071930, 0.8257162, 0.5527816, 0.756 - ), - nrow = p, ncol = 7, byrow = TRUE) - colnames(pvalues) <- c("P1*P1", "P2*P1", "P2*P2", "P3*P1", "P3*P2", "P3*P3", "metap") - set.seed(29) - X <- matrix(runif(p * n), ncol = p) - Y <- matrix(runif(d * n), ncol = d) - C <- matrix(runif(n * n), ncol = n) - # when - mapit <- MvMAPIT(t(X), - t(Y), - C = C, - test = 'normal', - cores = 1, - phenotypeCovariance = 'combinatorial', - logLevel = "ERROR") - # then - expect_equal(mapit$pvalues, pvalues, tolerance = 1e-4) -}) +test_that( + "test = 'normal', C is not NULL. ", { + # given + p <- 4 + n <- 10 + d <- 3 + pvalues <- matrix( + c( + 0.6876487, 0.2148062, 0.5640931, 0.1657485, 0.2837563, 0.5020969, + 0.409, 0.8920097, 0.9107812, 0.9787608, 0.6248188, 0.275113, 0.4994958, + 0.945, 0.5868067, 0.5128342, 0.3823134, 0.874728, 0.2352273, 0.688964, + 0.767, 0.3184337, 0.5047131, 0.6774045, 0.307193, 0.8257162, 0.5527816, + 0.756 + ), + nrow = p, ncol = 7, byrow = TRUE + ) + colnames(pvalues) <- c("P1*P1", "P2*P1", "P2*P2", "P3*P1", "P3*P2", "P3*P3", "metap") + set.seed(29) + X <- matrix( + runif(p * n), + ncol = p + ) + Y <- matrix( + runif(d * n), + ncol = d + ) + C <- matrix( + runif(n * n), + ncol = n + ) + # when + mapit <- MvMAPIT( + t(X), + t(Y), + C = C, test = "normal", cores = 1, logLevel = "ERROR" + ) + # then + expect_equal(mapit$pvalues, pvalues, tolerance = 1e-04) + } +) -test_that("C is not NULL, test = 'davies'. phenotypeCovariance = 'combinatorial'", { - # given - p <- 4 - n <- 10 - d <- 3 - pvalues <- matrix(c( -0.13395441, 0, 0, 0.8361945, 0.001629745, 0.009014612, 0, -0.00000000, 0, 0, 0.5526955, 0.000000000, 0.000000000, 0, -0.01039137, 0, 0, 0.7475025, 0.000000000, 0.000000000, 0, -0.00000000, 0, 0, 0.7418526, 0.000000000, 0.000000000, 0 -), - nrow = p, ncol = 7, byrow = TRUE) - colnames(pvalues) <- c("P1*P1", "P2*P1", "P2*P2", "P3*P1", "P3*P2", "P3*P3", "metap") - set.seed(853) - X <- matrix(runif(p * n), ncol = p) - Y <- matrix(runif(d * n), ncol = d) - C <- matrix(runif(n * n), ncol = n) - # when - mapit <- MvMAPIT(t(X), - t(Y), - C = C, - test = 'davies', - accuracy = 1e-5, - cores = 1, - phenotypeCovariance = 'combinatorial', - logLevel = "ERROR") - # then - expect_equal(mapit$pvalues, pvalues, tolerance = 1e-4) -}) +test_that( + "C is not NULL, test = 'davies'. ", { + # given + p <- 4 + n <- 10 + d <- 3 + pvalues <- matrix( + c( + 0.13395441, NA, 0, NA, NA, 0.009014612, NA, 0, NA, 0, NA, NA, 0, + NA, 0.01039137, NA, 0, NA, NA, 0, NA, 0, NA, 0, NA, NA, 0, NA + ), + nrow = p, ncol = 7, byrow = TRUE + ) + colnames(pvalues) <- c("P1*P1", "P2*P1", "P2*P2", "P3*P1", "P3*P2", "P3*P3", "metap") + set.seed(853) + X <- matrix( + runif(p * n), + ncol = p + ) + Y <- matrix( + runif(d * n), + ncol = d + ) + C <- matrix( + runif(n * n), + ncol = n + ) + # when + mapit <- MvMAPIT( + t(X), + t(Y), + C = C, test = "davies", accuracy = 1e-05, cores = 1, logLevel = "ERROR" + ) + # then + expect_equal(mapit$pvalues, pvalues, tolerance = 1e-04) + } +) -test_that("test = 'davies'. phenotypeCovariance = 'combinatorial', d = 1", { - # given - p <- 10 - n <- 4 - d <- 1 - pvalues <- matrix(c(0.0000000, - 0.7422639, - 0.7635732, - 0.0000000, - 0.1059719, - 0.5984818, - 0.7906796, - 0.0000000, - 0.0000000, - 0.4871070), - nrow = p, ncol = 1, byrow = TRUE) - set.seed(20) - X <- matrix(runif(p * n), ncol = p) - Y <- matrix(runif(d * n), ncol = d) - C <- matrix(runif(n * n), ncol = n) - # when - mapit <- MvMAPIT(t(X), - t(Y), - C = C, - test = 'davies', - accuracy = 1e-5, - cores = 1, - phenotypeCovariance = 'combinatorial', - logLevel = "ERROR") - # then - expect_equal(mapit$pvalues, pvalues, tolerance = 1e-4) -}) +test_that( + "test = 'davies'. , d = 1", { + # given + p <- 10 + n <- 4 + d <- 1 + pvalues <- matrix( + c( + 0, 0.7422639, 0.7635732, 0, 0.1059719, 0.5984818, 0.7906796, 0, 0, + 0.487107 + ), + nrow = p, ncol = 1, byrow = TRUE + ) + set.seed(20) + colnames(pvalues) <- c("P1") + X <- matrix( + runif(p * n), + ncol = p + ) + Y <- matrix( + runif(d * n), + ncol = d + ) + C <- matrix( + runif(n * n), + ncol = n + ) + # when + mapit <- MvMAPIT( + t(X), + t(Y), + C = C, test = "davies", accuracy = 1e-05, cores = 1, logLevel = "ERROR" + ) + # then + expect_equal(mapit$pvalues, pvalues, tolerance = 1e-04) + } +) diff --git a/tests/testthat/test-mvmapit-return-is-labeled-informatively.R b/tests/testthat/test-mvmapit-return-is-labeled-informatively.R index 81c3d840..fd2b75d5 100644 --- a/tests/testthat/test-mvmapit-return-is-labeled-informatively.R +++ b/tests/testthat/test-mvmapit-return-is-labeled-informatively.R @@ -1,55 +1,45 @@ -test_that("pairwise test with d = 3. test = 'normal'.", { - # given - p <- 10 - n <- 5 - d <- 3 - pvalues <- matrix(c(0.4705, 0.5025, 0.4007, 0.3927, 0.6591, 0.6629, 0.4171, 0.0285, 0.3101, 0.1738, 0.0308, 0.0503), - nrow = p, ncol = 6) - set.seed(853) - variants <- sprintf("SNP%s", 1:p) - phenotypes <- sprintf("Q%s", 1:d) - X <- matrix(runif(p * n), ncol = p) - Y <- matrix(runif(d * n), ncol = d) - colnames(X) <- variants - colnames(Y) <- phenotypes - resulting_colnames <- c("Q1*Q1", "Q2*Q1", "Q2*Q2", "Q3*Q1", "Q3*Q2", "Q3*Q3", "metap") - # when - mapit <- MvMAPIT(t(X), - t(Y), - test = 'normal', - cores = 1, - phenotypeCovariance = 'combinatorial', - logLevel = "ERROR") - # then - # print(mapit$pvalues) - expect_equal(rownames(mapit$pvalues), variants) - expect_equal(colnames(mapit$pvalues), resulting_colnames) -}) +test_that( + "pairwise test with d = 3. test = 'normal'.", { + # given + p <- 10 + n <- 5 + d <- 3 + pvalues <- matrix( + c( + 0.4705, 0.5025, 0.4007, 0.3927, 0.6591, 0.6629, 0.4171, 0.0285, 0.3101, + 0.1738, 0.0308, 0.0503 + ), + nrow = p, ncol = 6 + ) + set.seed(853) + variants <- sprintf("SNP%s", 1:p) + phenotypes <- sprintf("Q%s", 1:d) + X <- matrix( + runif(p * n), + ncol = p + ) + Y <- matrix( + runif(d * n), + ncol = d + ) + colnames(X) <- variants + colnames(Y) <- phenotypes + resulting_colnames <- c("Q1*Q1", "Q2*Q1", "Q2*Q2", "Q3*Q1", "Q3*Q2", "Q3*Q3", "metap") + # when + mapit <- MvMAPIT( + t(X), + t(Y), + test = "normal", cores = 1, logLevel = "ERROR" + ) + # then print(mapit$pvalues) + expect_equal( + rownames(mapit$pvalues), + variants + ) + expect_equal( + colnames(mapit$pvalues), + resulting_colnames + ) + } +) -test_that("pairwise test with d = 3. test = 'normal'.", { - # given - p <- 10 - n <- 5 - d <- 3 - pvalues <- matrix(c(0.4705, 0.5025, 0.4007, 0.3927, 0.6591, 0.6629, 0.4171, 0.0285, 0.3101, 0.1738, 0.0308, 0.0503), - nrow = p, ncol = 6) - set.seed(853) - variants <- sprintf("SNP%s", 1:p) - phenotypes <- sprintf("P%s", 1:d) - X <- matrix(runif(p * n), ncol = p) - Y <- matrix(runif(d * n), ncol = d) - colnames(X) <- variants - colnames(Y) <- phenotypes - resulting_colnames <- c("kronecker") - # when - mapit <- MvMAPIT(t(X), - t(Y), - test = 'normal', - cores = 1, - phenotypeCovariance = 'identity', - logLevel = "ERROR") - # then - # print(mapit$pvalues) - expect_equal(rownames(mapit$pvalues), variants) - expect_equal(colnames(mapit$pvalues), resulting_colnames) -}) diff --git a/tests/testthat/test-mvmapit-with-identical-phenotypes.R b/tests/testthat/test-mvmapit-with-identical-phenotypes.R index 9a416b3c..342c639b 100644 --- a/tests/testthat/test-mvmapit-with-identical-phenotypes.R +++ b/tests/testthat/test-mvmapit-with-identical-phenotypes.R @@ -1,20 +1,24 @@ -test_that("test that all pvalues equal if phenotypes equal", { - # given - p <- 20 - n <- 10 - set.seed(853) - X <- matrix(runif(p * n), ncol = p) - y <- runif(n) - Y <- as.matrix(cbind(y, y)) - # when - mapit <- MvMAPIT(t(X), - t(Y), - test = 'normal', - cores = 1, - phenotypeCovariance = 'combinatorial', - logLevel = "DEBUG") - # then - expect_equal(mapit$pvalues[, 1], mapit$pvalues[, 2], tolerance = 1e-8) - expect_equal(mapit$pvalues[, 1], mapit$pvalues[, 3], tolerance = 1e-8) -}) +test_that( + "test that all pvalues equal if phenotypes equal", { + # given + p <- 20 + n <- 10 + set.seed(853) + X <- matrix( + runif(p * n), + ncol = p + ) + y <- runif(n) + Y <- as.matrix(cbind(y, y)) + # when + mapit <- MvMAPIT( + t(X), + t(Y), + test = "normal", cores = 1, logLevel = "DEBUG" + ) + # then + expect_equal(mapit$pvalues[, 1], mapit$pvalues[, 2], tolerance = 1e-08) + expect_equal(mapit$pvalues[, 1], mapit$pvalues[, 3], tolerance = 1e-08) + } +) diff --git a/tests/testthat/test-pvalue-names-from-phenotype-names.R b/tests/testthat/test-pvalue-names-from-phenotype-names.R index 50521ea8..6a5e4675 100644 --- a/tests/testthat/test-pvalue-names-from-phenotype-names.R +++ b/tests/testthat/test-pvalue-names-from-phenotype-names.R @@ -1,31 +1,40 @@ -test_that("combinatorial test with d = 3. combinatorial.", { - # given - phenotypeCovariance <- 'combinatorial' - n <- 5 - d <- 3 - set.seed(853) - phenotypes <- sprintf("P%s", 1:d) - Y <- matrix(runif(d * n), ncol = d) - colnames(Y) <- phenotypes - correct_colnames <- c("P1*P1", "P2*P1", "P2*P2", "P3*P1", "P3*P2", "P3*P3") - # when - result <- mapit_struct_names(t(Y), phenotypeCovariance) - # then - expect_equal(result, correct_colnames) -}) +test_that( + "combinatorial test with d = 3. combinatorial.", { + # given + n <- 5 + d <- 3 + set.seed(853) + phenotypes <- sprintf("p%s", 1:d) + y <- matrix( + runif(d * n), + ncol = d + ) + colnames(y) <- phenotypes + correct_colnames <- c("p1*p1", "p2*p1", "p2*p2", "p3*p1", "p3*p2", "p3*p3") + # when + result <- mapit_struct_names(t(y)) + # then + expect_equal(result, correct_colnames) + } +) + +test_that( + "combinatorial test with d = 1. combinatorial.", { + # given + n <- 5 + d <- 1 + set.seed(853) + phenotypes <- sprintf("p%s", 1:d) + y <- matrix( + runif(d * n), + ncol = d + ) + colnames(y) <- phenotypes + correct_colnames <- c("p1") + # when + result <- mapit_struct_names(t(y)) + # then + expect_equal(result, correct_colnames) + } +) -test_that("combinatorial test with d = 3. Kronecker.", { - # given - phenotypeCovariance <- 'identity' - n <- 5 - d <- 3 - set.seed(853) - phenotypes <- sprintf("P%s", 1:d) - Y <- matrix(runif(d * n), ncol = d) - colnames(Y) <- phenotypes - correct_colnames <- c("kronecker") - # when - result <- mapit_struct_names(t(Y), phenotypeCovariance) - # then - expect_equal(result, correct_colnames) -}) diff --git a/tests/testthat/test-saddlepoint-approximation.R b/tests/testthat/test-saddlepoint-approximation.R index 2e2cd80d..077984c3 100644 --- a/tests/testthat/test-saddlepoint-approximation.R +++ b/tests/testthat/test-saddlepoint-approximation.R @@ -1,14 +1,16 @@ -test_that("saddlepoint approximation and davies similar results", { - # given - x <- c(1, 5, 11, 15, 20) - lambda <- rep(1, 10) - # when - davies_results <- c() - saddle_results <- c() - for (i in x) { - saddle_results <- c(saddle_results, saddlepoint_approximation(i, lambda)) - davies_results <- c(davies_results, davies(i, lambda = lambda)$Qq) - } - # then - expect_equal(saddle_results, davies_results, tolerance = 1e-3) -}) +test_that( + "saddlepoint approximation and davies similar results", { + # given + x <- c(1, 5, 11, 15, 20) + lambda <- rep(1, 10) + # when + davies_results <- c() + saddle_results <- c() + for (i in x) { + saddle_results <- c(saddle_results, saddlepoint_approximation(i, lambda)) + davies_results <- c(davies_results, davies(i, lambda = lambda)$Qq) + } + # then + expect_equal(saddle_results, davies_results, tolerance = 0.001) + } +) diff --git a/tests/testthat/test-simulate_data.R b/tests/testthat/test-simulate_data.R index 15160c07..4379837d 100644 --- a/tests/testthat/test-simulate_data.R +++ b/tests/testthat/test-simulate_data.R @@ -1,212 +1,270 @@ -test_that("Simulate multiple phenotypes returns apropriate phenotype object", { - # given - p <- 20 - f <- 10 - g <- 4 - n <- 5 - d <- 3 - X <- matrix(runif(p * n), ncol = p) - - # when - data <- simulate_phenotypes(X, - n_causal = f, - n_trait_specific = g, - n_pleiotropic = g, - d = d, - maf_threshold = 0.0, - logLevel = 'ERROR') - - # then - expect_equal(nrow(data$phenotype), n) - expect_equal(ncol(data$phenotype), d) - expect_equal(sum(is.na(data$phenotype)), 0) # no NA values in phenotype - expect_equal(length(data), 8) -}) - -test_that("Simulate multiple phenotypes returns causal SNPs", { - # given - p <- 20 - f <- 10 - g <- 4 - n <- 5 - d <- 3 - X <- matrix(runif(p * n), ncol = p) - total_causal <- (f) # single trait SNPs plus pleiotropic SNPs - - # when - data <- simulate_phenotypes(X, - n_causal = f, - n_trait_specific = g, - n_pleiotropic = g, - d = d, - maf_threshold = 0.0, - logLevel = 'ERROR') - - # then - expect_equal(length(data$snps$phenotype_1$causal_snps), total_causal) -}) - -test_that("Simulate multiple phenotypes remove SNPs with low maf", { - # given - p <- 20 - f <- 10 - g <- 4 - n <- 5 - d <- 3 - maf <- 0.05 + 0.45 * runif(p) - set.seed(12345) - X <- matrix((runif(p * n) >= maf) + (runif(p * n) >= maf), ncol = p) - X[, 1] <- 0 - X[, 13] <- 0 - total_causal <- (f * p) # single trait SNPs plus pleiotropic SNPs - - # when - data <- simulate_phenotypes(X, - n_causal = f, - n_trait_specific = g, - n_pleiotropic = g, - d = d, - maf_threshold = 0.05, - logLevel = 'ERROR') - data$snps.filtered - - # then - expect_true(!(1 %in% data$snps.filtered) & !(13 %in% data$snps.filtered)) -}) - -test_that("Simulation of groups with given ratio works as expected.", { - # given - p <- 50 - f <- 30 - g <- 8 - h <- 10 - n <- 5 - d <- 3 - X <- matrix(runif(p * n), ncol = p) - total_causal <- (f) # single trait SNPs plus pleiotropic SNPs - correct_1 <- 2 - correct_2 <- 6 - correct_3 <- 2 - correct_4 <- 8 - - # when - data <- simulate_phenotypes(X, - n_causal = f, - n_trait_specific = g, - n_pleiotropic = h, - d = d, - group_ratio_trait = 3, - group_ratio_pleiotropic = 4, - maf_threshold = 0.0, - logLevel = 'ERROR') - - # then - expect_equal(length(data$snps$phenotype_1$trait_specific_groups$group1), correct_1) - expect_equal(length(data$snps$phenotype_1$trait_specific_groups$group2), correct_2) - expect_equal(length(data$snps$phenotype_1$pleiotropic_groups$group1), correct_3) - expect_equal(length(data$snps$phenotype_1$pleiotropic_groups$group2), correct_4) -}) - -test_that("simulate_phenotypes can handle zero size n_trait_specific groups.", { - # given - p <- 50 - f <- 30 - g <- 0 - h <- 10 - n <- 5 - d <- 3 - X <- matrix(runif(p * n), ncol = p) - total_causal <- (f) # single trait SNPs plus pleiotropic SNPs - correct_1 <- 0 - correct_2 <- 0 - - # when - data <- simulate_phenotypes(X, - n_causal = f, - n_trait_specific = g, - n_pleiotropic = h, - d = d, - group_ratio_trait = 3, - group_ratio_pleiotropic = 4, - maf_threshold = 0.0, - logLevel = 'ERROR') - - # then - expect_equal(length(data$snps$phenotype_1$trait_specific_groups$group1), correct_1) - expect_equal(length(data$snps$phenotype_1$trait_specific_groups$group2), correct_1) -}) - -test_that("simulate_phenotypes can handle zero size n_pleiotropic groups.", { - # given - p <- 50 - f <- 30 - g <- 10 - h <- 0 - n <- 5 - d <- 3 - X <- matrix(runif(p * n), ncol = p) - total_causal <- (f) # single trait SNPs plus pleiotropic SNPs - correct_1 <- 0 - correct_2 <- 0 - - # when - data <- simulate_phenotypes(X, - n_causal = f, - n_trait_specific = g, - n_pleiotropic = h, - d = d, - group_ratio_trait = 3, - group_ratio_pleiotropic = 4, - maf_threshold = 0.0, - logLevel = 'DEBUG') - - # then - expect_equal(length(data$snps$phenotype_1$pleiotropic_groups$group1), correct_1) - expect_equal(length(data$snps$phenotype_1$pleiotropic_groups$group2), correct_2) -}) - -test_that("simulate_phenotypes can handle zero size n_pleiotropic AND n_trait_specific groups.", { - # given - p <- 50 - f <- 30 - g <- 0 - h <- 0 - n <- 5 - d <- 3 - X <- matrix(runif(p * n), ncol = p) - total_causal <- (f) # single trait SNPs plus pleiotropic SNPs - correct_1 <- 0 - correct_2 <- 0 - - # when - data <- simulate_phenotypes(X, - n_causal = f, - n_trait_specific = g, - n_pleiotropic = h, - d = d, - group_ratio_trait = 3, - group_ratio_pleiotropic = 4, - maf_threshold = 0.0, - logLevel = 'DEBUG') - - # then - expect_equal(length(data$snps$phenotype_1$alpha), correct_1) - expect_equal(length(data$snps$phenotype_2$alpha), correct_2) -}) - -test_that("test run", { -ind <- 1e2 -nsnp <- 100 -H2 <- 0.6 -rho <- 0.5 -maf <- 0.05 + 0.45*runif(nsnp) -X <- (runif(ind*nsnp) < maf) + (runif(ind*nsnp) < maf) -X <- matrix(as.double(X),ind,nsnp,byrow = TRUE) -s <- 95345 # sample.int(10000, 1) -sim <- simulate_phenotypes(X, - n_causal = 30, - n_pleiotropic = 6, - n_trait_specific = 4, - epistatic_correlation = 0.8, - H2 = H2, rho = rho, logLevel = 'ERROR', seed = s) -}) +test_that( + "Simulate multiple phenotypes returns apropriate phenotype object", { + # given + p <- 20 + f <- 10 + g <- 4 + n <- 5 + d <- 3 + X <- matrix( + runif(p * n), + ncol = p + ) + + # when + data <- simulate_phenotypes( + X, n_causal = f, n_trait_specific = g, n_pleiotropic = g, d = d, maf_threshold = 0, + logLevel = "ERROR" + ) + + # then + expect_equal( + nrow(data$phenotype), + n + ) + expect_equal( + ncol(data$phenotype), + d + ) + expect_equal( + sum(is.na(data$phenotype)), + 0 + ) # no NA values in phenotype + expect_equal( + length(data), + 8 + ) + } +) + +test_that( + "Simulate multiple phenotypes returns causal SNPs", { + # given + p <- 20 + f <- 10 + g <- 4 + n <- 5 + d <- 3 + X <- matrix( + runif(p * n), + ncol = p + ) + total_causal <- (f) # single trait SNPs plus pleiotropic SNPs + + # when + data <- simulate_phenotypes( + X, n_causal = f, n_trait_specific = g, n_pleiotropic = g, d = d, maf_threshold = 0, + logLevel = "ERROR" + ) + + # then + expect_equal( + length(data$snps$phenotype_1$causal_snps), + total_causal + ) + } +) + +test_that( + "Simulate multiple phenotypes remove SNPs with low maf", { + # given + p <- 20 + f <- 10 + g <- 4 + n <- 5 + d <- 3 + maf <- 0.05 + 0.45 * runif(p) + set.seed(12345) + X <- matrix( + (runif(p * n) >= + maf) + (runif(p * n) >= + maf), ncol = p + ) + X[, 1] <- 0 + X[, 13] <- 0 + total_causal <- (f * p) # single trait SNPs plus pleiotropic SNPs + + # when + data <- simulate_phenotypes( + X, n_causal = f, n_trait_specific = g, n_pleiotropic = g, d = d, maf_threshold = 0.05, + logLevel = "ERROR" + ) + data$snps.filtered + + # then + expect_true(!(1 %in% data$snps.filtered) & !(13 %in% data$snps.filtered)) + } +) + +test_that( + "Simulation of groups with given ratio works as expected.", { + # given + p <- 50 + f <- 30 + g <- 8 + h <- 10 + n <- 5 + d <- 3 + X <- matrix( + runif(p * n), + ncol = p + ) + total_causal <- (f) # single trait SNPs plus pleiotropic SNPs + correct_1 <- 2 + correct_2 <- 6 + correct_3 <- 2 + correct_4 <- 8 + + # when + data <- simulate_phenotypes( + X, n_causal = f, n_trait_specific = g, n_pleiotropic = h, d = d, group_ratio_trait = 3, + group_ratio_pleiotropic = 4, maf_threshold = 0, logLevel = "ERROR" + ) + + # then + expect_equal( + length(data$snps$phenotype_1$trait_specific_groups$group1), + correct_1 + ) + expect_equal( + length(data$snps$phenotype_1$trait_specific_groups$group2), + correct_2 + ) + expect_equal( + length(data$snps$phenotype_1$pleiotropic_groups$group1), + correct_3 + ) + expect_equal( + length(data$snps$phenotype_1$pleiotropic_groups$group2), + correct_4 + ) + } +) + +test_that( + "simulate_phenotypes can handle zero size n_trait_specific groups.", { + # given + p <- 50 + f <- 30 + g <- 0 + h <- 10 + n <- 5 + d <- 3 + X <- matrix( + runif(p * n), + ncol = p + ) + total_causal <- (f) # single trait SNPs plus pleiotropic SNPs + correct_1 <- 0 + correct_2 <- 0 + + # when + data <- simulate_phenotypes( + X, n_causal = f, n_trait_specific = g, n_pleiotropic = h, d = d, group_ratio_trait = 3, + group_ratio_pleiotropic = 4, maf_threshold = 0, logLevel = "ERROR" + ) + + # then + expect_equal( + length(data$snps$phenotype_1$trait_specific_groups$group1), + correct_1 + ) + expect_equal( + length(data$snps$phenotype_1$trait_specific_groups$group2), + correct_1 + ) + } +) + +test_that( + "simulate_phenotypes can handle zero size n_pleiotropic groups.", { + # given + p <- 50 + f <- 30 + g <- 10 + h <- 0 + n <- 5 + d <- 3 + X <- matrix( + runif(p * n), + ncol = p + ) + total_causal <- (f) # single trait SNPs plus pleiotropic SNPs + correct_1 <- 0 + correct_2 <- 0 + + # when + data <- simulate_phenotypes( + X, n_causal = f, n_trait_specific = g, n_pleiotropic = h, d = d, group_ratio_trait = 3, + group_ratio_pleiotropic = 4, maf_threshold = 0, logLevel = "ERROR" + ) + + # then + expect_equal( + length(data$snps$phenotype_1$pleiotropic_groups$group1), + correct_1 + ) + expect_equal( + length(data$snps$phenotype_1$pleiotropic_groups$group2), + correct_2 + ) + } +) + +test_that( + "simulate_phenotypes can handle zero size n_pleiotropic AND n_trait_specific groups.", + { + # given + p <- 50 + f <- 30 + g <- 0 + h <- 0 + n <- 5 + d <- 3 + X <- matrix( + runif(p * n), + ncol = p + ) + total_causal <- (f) # single trait SNPs plus pleiotropic SNPs + correct_1 <- 0 + correct_2 <- 0 + + # when + data <- simulate_phenotypes( + X, n_causal = f, n_trait_specific = g, n_pleiotropic = h, d = d, group_ratio_trait = 3, + group_ratio_pleiotropic = 4, maf_threshold = 0, logLevel = "ERROR" + ) + + # then + expect_equal( + length(data$snps$phenotype_1$alpha), + correct_1 + ) + expect_equal( + length(data$snps$phenotype_2$alpha), + correct_2 + ) + } +) + +test_that( + "test run", { + ind <- 100 + nsnp <- 100 + H2 <- 0.6 + rho <- 0.5 + maf <- 0.05 + 0.45 * runif(nsnp) + X <- (runif(ind * nsnp) < + maf) + (runif(ind * nsnp) < + maf) + X <- matrix( + as.double(X), + ind, nsnp, byrow = TRUE + ) + s <- 95345 # sample.int(10000, 1) + sim <- simulate_phenotypes( + X, n_causal = 30, n_pleiotropic = 6, n_trait_specific = 4, epistatic_correlation = 0.8, + H2 = H2, rho = rho, logLevel = "ERROR", seed = s + ) + } +) diff --git a/tests/testthat/test-variants-not-in-variantIndex-produce-NA.R b/tests/testthat/test-variants-not-in-variantIndex-produce-NA.R index 7ff1401b..6154aa32 100644 --- a/tests/testthat/test-variants-not-in-variantIndex-produce-NA.R +++ b/tests/testthat/test-variants-not-in-variantIndex-produce-NA.R @@ -1,44 +1,57 @@ -test_that("pvalues and pve shows NA for variants not in variantIndex", { - # given - p <- 3 - n <- 10 - d <- 3 - set.seed(853) - X <- matrix(runif(p * n), ncol = p) - Y <- matrix(runif(d * n), ncol = d) - variantIndex <- c(1,3) - otherIndex <- 2 - # when - mapit <- MvMAPIT(t(X), - t(Y), - test = 'normal', - cores = 1, - variantIndex = variantIndex, - phenotypeCovariance = 'combinatorial', - logLevel = "DEBUG") - # then - expect_true(all(is.na(mapit$pves[otherIndex, ]))) - expect_true(all(is.na(mapit$pvalues[otherIndex, ]))) -}) +test_that( + "pvalues and pve shows NA for variants not in variantIndex", { + # given + p <- 3 + n <- 10 + d <- 3 + set.seed(853) + X <- matrix( + runif(p * n), + ncol = p + ) + Y <- matrix( + runif(d * n), + ncol = d + ) + variantIndex <- c(1, 3) + otherIndex <- 2 + # when + mapit <- MvMAPIT( + t(X), + t(Y), + test = "normal", cores = 1, variantIndex = variantIndex, logLevel = "DEBUG" + ) + # then + expect_true(all(is.na(mapit$pves[otherIndex, ]))) + expect_true(all(is.na(mapit$pvalues[otherIndex, ]))) + } +) -test_that("pve shows NA for covariance interactions", { - # given - p <- 3 - n <- 10 - d <- 3 - set.seed(853) - X <- matrix(runif(p * n), ncol = p) - Y <- matrix(runif(d * n), ncol = d) - # when - mapit <- MvMAPIT(t(X), - t(Y), - test = 'normal', - cores = 1, - phenotypeCovariance = 'combinatorial', - logLevel = "DEBUG") - # then - expect_true(all(is.na(mapit$pves[, 2]))) - expect_true(all(is.na(mapit$pves[, 4]))) - expect_true(all(is.na(mapit$pves[, 5]))) -}) +test_that( + "pve shows NA for covariance interactions", { + # given + p <- 3 + n <- 10 + d <- 3 + set.seed(853) + X <- matrix( + runif(p * n), + ncol = p + ) + Y <- matrix( + runif(d * n), + ncol = d + ) + # when + mapit <- MvMAPIT( + t(X), + t(Y), + test = "normal", cores = 1, logLevel = "DEBUG" + ) + # then + expect_true(all(is.na(mapit$pves[, 2]))) + expect_true(all(is.na(mapit$pves[, 4]))) + expect_true(all(is.na(mapit$pves[, 5]))) + } +) diff --git a/tests/testthat/test-zero-variance-function-on-rows.R b/tests/testthat/test-zero-variance-function-on-rows.R index 82a8ba14..6416eb5d 100644 --- a/tests/testthat/test-zero-variance-function-on-rows.R +++ b/tests/testthat/test-zero-variance-function-on-rows.R @@ -1,13 +1,21 @@ -test_that("Zero variance does it's thing.", { - # given - p <- 10 - z <- 3 - o <- 2 * p - z - zeros <- c(rep(0, z)) - ones <- c(rep(1, o)) - X <- matrix(c(zeros, ones), nrow = p) - # when - result <- remove_zero_variance(X) - # then - expect_equal(nrow(result), z) -}) +test_that( + "Zero variance does it's thing.", { + # given + p <- 10 + z <- 3 + o <- 2 * p - z + zeros <- c(rep(0, z)) + ones <- c(rep(1, o)) + X <- matrix( + c(zeros, ones), + nrow = p + ) + # when + result <- remove_zero_variance(X) + # then + expect_equal( + nrow(result), + z + ) + } +)