From 2013515e53422be91436fac074408830aeefa396 Mon Sep 17 00:00:00 2001
From: jdstamp <julian.d.stamp@gmail.com>
Date: Wed, 14 Jul 2021 12:19:37 -0400
Subject: [PATCH] Mm 31/reproduce grant figure (#24)

* MM-31: minimum accuracy

* MM-31: accuracy as input parameter

* MM-31: accuracry available as parameter
---
 DESCRIPTION                                   |  2 +-
 R/MAPIT.R                                     | 11 +--
 configure.ac                                  |  2 +-
 man/MvMAPIT.Rd                                |  3 +
 src/MAPIT.h                                   |  3 +-
 ...mvmapit-kronecker-executes-without-error.R | 24 +++---
 ...-mvmapit-pariwise-executes-without-error.R | 73 +++++++++++++------
 7 files changed, 78 insertions(+), 40 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index a6b41f6f..0b3b86c9 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mvMAPIT
 Type: Package
 Title: Implements the Multivariate MArginal ePIstasis Test (mvMAPIT) from Crawford et al. 2017
-Version: 0.1.0
+Version: 0.1.1
 Author: lcrawlab
 Maintainer: Lorin Crawford <lorin_crawford@brown.edu>
 Description: Epistasis, commonly defined as the interaction between multiple genes, is an important genetic component underlying phenotypic variation. Many statistical methods have been developed to model and identify epistatic interactions between genetic variants. However, because of the large combinatorial search space of interactions, most epistasis mapping methods face enormous computational challenges and often suffer from low statistical power. In Crawford et al. (2017), we present a novel, alternative strategy for mapping epistasis: the MArginal ePIstasis Test (MAPIT). Our method examines one variant at a time, and estimates and tests its "marginal epistatic effects" --- the combined pairwise interaction effects between a given variant and all other variants. By avoiding explicitly searching for interactions, our method avoids the large combinatorial search space and improves power. Our method is novel and relies on a recently developed variance component estimation method for efficient and robust parameter inference and p-value computation.
diff --git a/R/MAPIT.R b/R/MAPIT.R
index 7d2a7b6b..cc2b68b1 100644
--- a/R/MAPIT.R
+++ b/R/MAPIT.R
@@ -34,6 +34,7 @@
 #' @param C is an n x n covariance matrix detailing environmental effects and population structure effects.
 #' @param hybrid is a parameter detailing if the function should run the hybrid hypothesis testing procedure between the normal Z test and the Davies method. Default is TRUE.
 #' @param threshold is a parameter detailing the value at which to recalibrate the Z test p values. If nothing is defined by the user, the default value will be 0.05 as recommended by the Crawford et al. (2017).
+#' @param accuracy is a parameter setting the davies function numerical approximation accuracy. This parameter is not needed for the normal test. Smaller p-values than the accuracy will be zero.
 #' @param test is a parameter defining what hypothesis test should be implemented. Takes on values 'normal' or 'davies'. This parameter only matters when hybrid = FALSE. If test is not defined when hybrid = FALSE, the function will automatically use test = 'normal'.
 #' @param cores is a parameter detailing the number of cores to parallelize over. It is important to note that this value only matters when the user has implemented OPENMP on their operating system. If OPENMP is not installed, then please leave cores = 1 and use the standard version of this code and software.
 #' @param variantIndex is a vector containing indices of variants to be included in the computation.
@@ -51,7 +52,8 @@ MvMAPIT <- function(X,
                     Z = NULL,
                     C = NULL, 
                     hybrid = TRUE, 
-                    threshold = 0.05, 
+                    threshold = 0.05,
+                    accuracy = 1e-8,
                     test = "normal", 
                     cores = 1, 
                     variantIndex = NULL, 
@@ -101,7 +103,7 @@ MvMAPIT <- function(X,
 
     log$info('Running davies method on selected SNPs.')
     vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, NULL, phenotypeCovariance)
-    davies.pvals <- davies_exact_wrapper(vc.mod, X, Y, threshold)
+    davies.pvals <- davies_exact_wrapper(vc.mod, X, accuracy)
     if (is.na(phenotypeCovariance) || phenotypeCovariance == '') {
       pvals[ind_temp] <- davies.pvals[ind_temp]
     } else {
@@ -115,7 +117,7 @@ MvMAPIT <- function(X,
   } else {
     ind <- ifelse(variantIndex, variantIndex, 1:nrow(X))
     vc.mod <- MAPITCpp(X, Y, Z, C, ind, "davies", cores = cores, NULL, phenotypeCovariance)
-    pvals <- davies_exact_wrapper(vc.mod, X, Y, threshold)
+    pvals <- davies_exact_wrapper(vc.mod, X, accuracy)
     pves <- vc.mod$PVE
     timings <- vc.mod$timings
   }
@@ -137,10 +139,9 @@ MvMAPIT <- function(X,
 }
 
 # Runs the Davies portion of the hypothesis testing
-davies_exact_wrapper <- function(cpp_structure, X, Y, alpha) {
+davies_exact_wrapper <- function(cpp_structure, X, accuracy) {
   num_combinations <- dim(cpp_structure$Eigenvalues)[2]
   p <- nrow(X)
-  accuracy <- alpha / (p * num_combinations)
   davies.pvals <- matrix(1, p, num_combinations)
   for (combi in seq_len(num_combinations)) {
     davies.pvals[, combi] <- davies_exact(cpp_structure$Est[, combi],
diff --git a/configure.ac b/configure.ac
index 7d1c7940..d0d1a79e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([mvMAPIT],[0.1.0])
+AC_INIT([mvMAPIT],[0.1.1])
 
 # Find the compiler and compiler flags used by R.
 : ${R_HOME=`R RHOME`}
diff --git a/man/MvMAPIT.Rd b/man/MvMAPIT.Rd
index 88b539f7..5eca6170 100644
--- a/man/MvMAPIT.Rd
+++ b/man/MvMAPIT.Rd
@@ -11,6 +11,7 @@ MvMAPIT(
   C = NULL,
   hybrid = TRUE,
   threshold = 0.05,
+  accuracy = 1e-08,
   test = "normal",
   cores = 1,
   variantIndex = NULL,
@@ -32,6 +33,8 @@ MvMAPIT(
 
 \item{threshold}{is a parameter detailing the value at which to recalibrate the Z test p values. If nothing is defined by the user, the default value will be 0.05 as recommended by the Crawford et al. (2017).}
 
+\item{accuracy}{is a parameter setting the davies function numerical approximation accuracy. This parameter is not needed for the normal test. Smaller p-values than the accuracy will be zero.}
+
 \item{test}{is a parameter defining what hypothesis test should be implemented. Takes on values 'normal' or 'davies'. This parameter only matters when hybrid = FALSE. If test is not defined when hybrid = FALSE, the function will automatically use test = 'normal'.}
 
 \item{cores}{is a parameter detailing the number of cores to parallelize over. It is important to note that this value only matters when the user has implemented OPENMP on their operating system. If OPENMP is not installed, then please leave cores = 1 and use the standard version of this code and software.}
diff --git a/src/MAPIT.h b/src/MAPIT.h
index f3cad8fd..09f191d0 100644
--- a/src/MAPIT.h
+++ b/src/MAPIT.h
@@ -6,7 +6,8 @@
 #include <exception>
 #include <typeinfo>
 
-// #define WITH_LOGGER 1  // uncomment for logging during development
+
+//  #define WITH_LOGGER 1  // uncomment for logging during development
 #define ARMA_64BIT_WORD 1
 #include <RcppArmadillo.h>
 
diff --git a/tests/testthat/test-mvmapit-kronecker-executes-without-error.R b/tests/testthat/test-mvmapit-kronecker-executes-without-error.R
index 0056cf97..791b94a1 100644
--- a/tests/testthat/test-mvmapit-kronecker-executes-without-error.R
+++ b/tests/testthat/test-mvmapit-kronecker-executes-without-error.R
@@ -62,16 +62,16 @@ test_that("MvMapit executes without error when hybrid = FALSE and test = davies.
   n <- 5
   d <- 3
   pvalues <- matrix(
-             c(0.4856689,
-               0.5036839,
-               0.6226294,
-               0.2570358,
-               0.9159115,
-               0.8880792,
-               0.6397031,
-               0.9303882,
-               0.5976025,
-               0.9711116), ncol = 1)
+             c(0.4855840,
+               0.5042984,
+               0.6235571,
+               0.2560571,
+               0.9150943,
+               0.8862566,
+               0.6406932,
+               0.9294612,
+               0.5974704,
+               0.9704182), ncol = 1)
   set.seed(6)
   X <- matrix(runif(p * n), ncol = p)
   Y <- matrix(runif(d * n), ncol = d)
@@ -80,6 +80,7 @@ test_that("MvMapit executes without error when hybrid = FALSE and test = davies.
                    t(Y),
                    hybrid = FALSE,
                    test = 'davies',
+                   accuracy = 1e-5,
                    cores = 1,
                    variantIndex = c(1:p),
                    phenotypeCovariance = 'covariance',
@@ -194,9 +195,10 @@ test_that("hybrid = FALSE. phenotypeCovariance = homogeneous", {
   mapit <- MvMAPIT(t(X),
                    t(Y),
                    hybrid = FALSE,
+                   accuracy = 1e-5,
                    cores = 1,
                    phenotypeCovariance = 'homogeneous',
                    logLevel = "ERROR")
   # then
-  expect_equal(mapit$pvalues, pvalues, tolerance = 1e-7)
+  expect_equal(mapit$pvalues, pvalues, tolerance = 1e-5)
 })
diff --git a/tests/testthat/test-mvmapit-pariwise-executes-without-error.R b/tests/testthat/test-mvmapit-pariwise-executes-without-error.R
index d0cfa15c..1424cea5 100644
--- a/tests/testthat/test-mvmapit-pariwise-executes-without-error.R
+++ b/tests/testthat/test-mvmapit-pariwise-executes-without-error.R
@@ -23,21 +23,22 @@ test_that("hybrid = FALSE. phenotypeCovariance = ''", {
 test_that("hybrid = FALSE, test = davies. phenotypeCovariance = ''", {
   # given
   p <- 2
+  p <- 2
   n <- 10
   d <- 3
   pvalues <- matrix(
-                    c(0.01597664,
-                      0.01640104,
-                      0.0,
+                    c(0.01558980,
+                      0.01606852,
                       0.0,
-                      0.3799469,
-                      0.3618748,
-                      0.0003248277,
-                      0.0966434584,
                       0.0,
-                      0.0,
-                      0.3355311,
-                      0.2968991),
+                      0.3795860,
+                      0.3622172,
+                      0.0002072584,
+                      0.0962535911,
+                      5.302820e-07,
+                      2.003761e-05,
+                      0.3353280,
+                      0.2971691),
                     nrow = p, ncol = 6)
   colnames(pvalues) <- c("P1*P1", "P2*P1", "P2*P2", "P3*P1", "P3*P2", "P3*P3")
   set.seed(853)
@@ -68,10 +69,10 @@ test_that("hybrid = TRUE. phenotypeCovariance = ''", {
                       0.6591476,
                       0.6629176,
                       0.417081,
-                      0.09664346,
+                      0.09625359,
                       0.3101424,
                       0.1737842,
-                      0.33553110,
+                      0.33532798,
                       0.05034315),
                     nrow = p, ncol = 6)
   colnames(pvalues) <- c("P1*P1", "P2*P1", "P2*P2", "P3*P1", "P3*P2", "P3*P3")
@@ -94,11 +95,11 @@ test_that("hybrid = TRUE, C is not NULL. phenotypeCovariance = ''", {
   p <- 4
   n <- 10
   d <- 3
-  pvalues <- matrix(c(0.1659513, 0.6098269, 0.8408090, 0.1122040, 0.2242929, 0.2332077,
-                      0.1349153, 0.4313260, 0.4392573, 0.6874865, 0.8036961, 0.2153343,
-                      0.4798231, 0.2094195, 0.2815634, 0.8230605, 0.5942647, 0.6129602,
-                      0.9890769, 0.4501949, 0.9670782, 0.6231966, 0.3842330, 0.8414133),
-                    nrow = p, ncol = 6)
+  pvalues <- matrix(c(0.1659513, 0.2242929, 0.4392573, 0.4798231, 0.5942647, 0.9670782,
+                      0.6098269, 0.2332077, 0.6874865, 0.2094195, 0.6129602, 0.6231966,
+                      0.8408090, 0.1349153, 0.8036961, 0.2815634, 0.9890769, 0.3842330,
+                      0.1122040, 0.4313260, 0.2153343, 0.8230605, 0.4501949, 0.8414133),
+                    nrow = p, ncol = 6, byrow = TRUE)
   colnames(pvalues) <- c("P1*P1", "P2*P1", "P2*P2", "P3*P1", "P3*P2", "P3*P3")
   set.seed(853)
   X <- matrix(runif(p * n), ncol = p)
@@ -109,6 +110,7 @@ test_that("hybrid = TRUE, C is not NULL. phenotypeCovariance = ''", {
                    t(Y),
                    C = C,
                    hybrid = TRUE,
+                   accuracy = 1e-5,
                    cores = 1,
                    phenotypeCovariance = '',
                    logLevel = "ERROR")
@@ -148,10 +150,10 @@ test_that("hybrid = FALSE, C is not NULL, test = 'davies'. phenotypeCovariance =
   p <- 4
   n <- 10
   d <- 3
-  pvalues <- matrix(c(0.8917604, 0.0, 0.8697775, 0.9576408, 0.126735221, 0.9264005,
-                      0.6499192, 0.0, 0.5692051, 0.5625198, 0.001621203, 0.5336411,
-                      0.7108441, 0.0001347479, 0.8417294, 0.6122342, 0.978414917, 0.0438949,
-                      0.1439900, 0.5461373817, 0.0, 0.8058156, 0.571396524, 0.8780500),
+  pvalues <- matrix(c(0.8920589, 0.0, 0.8704164, 0.9564335, 0.126598227, 0.92516224,
+                      0.6506412, 5.062263e-07, 0.5693004, 0.5624713, 0.001549884, 0.53344421,
+                      0.7103750, 1.108789e-04, 0.8424372, 0.6119419, 0.977816063, 0.04392313,
+                      0.1437574, 0.5462071, 0.0, 0.8068312, 0.571373467, 0.87882231),
                     nrow = p, ncol = 6, byrow = TRUE)
   colnames(pvalues) <- c("P1*P1", "P2*P1", "P2*P2", "P3*P1", "P3*P2", "P3*P3")
   set.seed(853)
@@ -164,6 +166,35 @@ test_that("hybrid = FALSE, C is not NULL, test = 'davies'. phenotypeCovariance =
                    C = C,
                    hybrid = FALSE,
                    test = 'davies',
+                   accuracy = 1e-5,
+                   cores = 1,
+                   phenotypeCovariance = '',
+                   logLevel = "ERROR")
+  # then
+  expect_equal(mapit$pvalues, pvalues, tolerance = 1e-4)
+})
+
+
+
+test_that("hybrid = FALSE, test = 'davies'. phenotypeCovariance = '', d = 1", {
+  # given
+  p <- 10
+  n <- 4
+  d <- 1
+  pvalues <- matrix(c(7.097669e-01,7.849280e-07, 0.000000e+00, 8.378058e-01, 0.000000e+00, 1.899326e-01,
+                      4.319290e-01, 5.793326e-01, 9.784368e-01, 3.942235e-01),
+                    nrow = p, ncol = 1, byrow = TRUE)
+  set.seed(853)
+  X <- matrix(runif(p * n), ncol = p)
+  Y <- matrix(runif(d * n), ncol = d)
+  C <- matrix(runif(n * n), ncol = n)
+  # when
+  mapit <- MvMAPIT(t(X),
+                   t(Y),
+                   C = C,
+                   hybrid = FALSE,
+                   test = 'davies',
+                   accuracy = 1e-5,
                    cores = 1,
                    phenotypeCovariance = '',
                    logLevel = "ERROR")