From 3980100a92bdff6d552a52bd0b5e19dfd7e4f4c6 Mon Sep 17 00:00:00 2001
From: Julian Stamp <julian.d.stamp@gmail.com>
Date: Thu, 6 Jun 2024 14:19:24 +0200
Subject: [PATCH] MM-136 LT MAPIT support

---
 .gitignore                                |   1 +
 DESCRIPTION                               |   3 +-
 NAMESPACE                                 |   3 +
 NEWS.md                                   |   1 +
 R/binary_to_liability.R                   |  41 +++++++++
 README.md                                 |   4 +-
 _pkgdown.yml                              |   1 +
 man/binary_to_liability.Rd                |  22 +++++
 tests/testthat/test-binary_to_liability.R |  26 ++++++
 vignettes/.gitignore                      |   2 +
 vignettes/tutorial-lt-mapit.Rmd           | 105 ++++++++++++++++++++++
 11 files changed, 207 insertions(+), 2 deletions(-)
 create mode 100644 R/binary_to_liability.R
 create mode 100644 man/binary_to_liability.Rd
 create mode 100644 tests/testthat/test-binary_to_liability.R
 create mode 100644 vignettes/.gitignore
 create mode 100644 vignettes/tutorial-lt-mapit.Rmd

diff --git a/.gitignore b/.gitignore
index 2fd30a4d..add772f3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -69,3 +69,4 @@ src/symbols.rds
 /Meta/
 
 /dev
+inst/doc
diff --git a/DESCRIPTION b/DESCRIPTION
index 791e7c55..8b9e88e3 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -47,6 +47,7 @@ Imports:
     Rcpp,
     stats,
     tidyr,
+    truncnorm,
     utils
 Suggests:
     GGally,
@@ -70,4 +71,4 @@ VignetteBuilder:
 Encoding: UTF-8
 LazyData: true
 LazyDataCompression: xz
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
diff --git a/NAMESPACE b/NAMESPACE
index deabf1a2..71268b6f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+export(binary_to_liability)
 export(cauchy_combined)
 export(fishers_combined)
 export(harmonic_combined)
@@ -18,8 +19,10 @@ importFrom(stats,cor)
 importFrom(stats,pcauchy)
 importFrom(stats,pchisq)
 importFrom(stats,pnorm)
+importFrom(stats,qnorm)
 importFrom(stats,sd)
 importFrom(stats,uniroot)
 importFrom(stats,var)
+importFrom(truncnorm,rtruncnorm)
 importFrom(utils,head)
 useDynLib(mvMAPIT)
diff --git a/NEWS.md b/NEWS.md
index 86f1e002..7e134d04 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,7 @@
 # mvMAPIT (development version)
 
 * Fix Bonferroni correction in the `mvMAPIT.Rmd` vignette. 
+* New function `binary_to_liability` for LT-MAPIT support and corresponding vignette
 
 # mvMAPIT 2.0.3 release
 
diff --git a/R/binary_to_liability.R b/R/binary_to_liability.R
new file mode 100644
index 00000000..885c2bf7
--- /dev/null
+++ b/R/binary_to_liability.R
@@ -0,0 +1,41 @@
+#' Convert binary traits to liabilities to run LT-MAPIT (MAPIT on case-control traits)
+#'
+#' This function implements the conversion of binary traits to liabilties as
+#' proposed in the LT-MAPIT model (Crawford and Zhou 2018,
+#' https://doi.org/10.1101/374983).
+#'
+#' @param case_control_trait Case-control trait encoded as binary trait with 0 as control or 1 as case.
+#' @param prevalence Case prevalence between 0 and 1. Proportion of cases in the population.
+#' @returns A trait vector of same length as y with case-control indicators converted
+#' to liabilties.
+#'
+#' @export
+#' @importFrom truncnorm rtruncnorm
+#' @importFrom stats qnorm
+#' @import checkmate
+binary_to_liability <- function(case_control_trait, prevalence) {
+  coll <- makeAssertCollection()
+  assertInteger(
+    case_control_trait,
+    lower = 0,
+    upper = 1,
+    add = coll
+  )
+  assertDouble(prevalence,
+               lower = 0,
+               upper = 1,
+               add = coll)
+  reportAssertions(coll)
+  
+  n_cases = sum(case_control_trait == 1, na.rm = T)
+  n_controls = sum(case_control_trait == 0, na.rm = T)
+  threshold = qnorm(1 - prevalence, mean = 0, sd = 1)
+  
+  liabilities = rep(NA, length(case_control_trait))
+  liabilities[!is.na(case_control_trait) &
+                case_control_trait == 0] = rtruncnorm(n_controls, b = threshold)
+  liabilities[!is.na(case_control_trait) &
+                case_control_trait == 1] = rtruncnorm(n_cases, a = threshold)
+  
+  return(liabilities)
+}
diff --git a/README.md b/README.md
index dd477e9b..ba40a9a7 100644
--- a/README.md
+++ b/README.md
@@ -122,9 +122,11 @@ install.packages(c( 'checkmate',
                     'RcppAlgos', 
                     'RcppArmadillo', 
                     'RcppParallel', 
+                    'RcppProgress', 
                     'RcppSpdlog', 
                     'testthat', 
-                    'tidyr'), 
+                    'tidyr', 
+                    'truncnorm'), 
                     dependencies = TRUE);
 ```
 
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 082c6a79..4562f981 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -10,6 +10,7 @@ reference:
     Functions for the marginal epistasis analysis.
   contents: 
   - mvmapit
+  - binary_to_liability
 - title: "Combining P-values"
   desc: >
     Functions for combining the variance component p-values.
diff --git a/man/binary_to_liability.Rd b/man/binary_to_liability.Rd
new file mode 100644
index 00000000..7a8209f2
--- /dev/null
+++ b/man/binary_to_liability.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/binary_to_liability.R
+\name{binary_to_liability}
+\alias{binary_to_liability}
+\title{Convert binary traits to liabilities}
+\usage{
+binary_to_liability(case_control_trait, prevalence)
+}
+\arguments{
+\item{case_control_trait}{Case-control trait encoded as binary trait with 0 as control or 1 as case.}
+
+\item{prevalence}{Case prevalence between 0 and 1. Proportion of cases in the population.}
+}
+\value{
+A trait vector of same length as y with case-control indicators converted
+to liabilties.
+}
+\description{
+This function implements the conversion of binary traits to liabilties as
+proposed in the LT-MAPIT model (Crawford and Zhou 2018,
+https://doi.org/10.1101/374983).
+}
diff --git a/tests/testthat/test-binary_to_liability.R b/tests/testthat/test-binary_to_liability.R
new file mode 100644
index 00000000..86cb8f10
--- /dev/null
+++ b/tests/testthat/test-binary_to_liability.R
@@ -0,0 +1,26 @@
+test_that("binary_to_liability api test", {
+  # given
+  n_samples <- 10
+  case_control_trait <- sample(0:1, n_samples, replace = TRUE)
+  prevalence <- 0.1
+  # when
+  liabilities <- binary_to_liability(case_control_trait, prevalence)
+  # then
+  expect_true(is.numeric(liabilities))
+  expect_equal(length(case_control_trait),
+               length(liabilities))
+})
+
+test_that("binary_to_liability NA in case_control_trait", {
+  # given
+  n_samples <- 10
+  n_missing <- 2
+  case_control_trait <- sample(0:1, n_samples, replace = TRUE)
+  prevalence <- 0.1
+  case_control_trait[sample(1:n_samples, n_missing)] <- NA
+  # when
+  liabilities <- binary_to_liability(case_control_trait, prevalence)
+  # then
+  expect_equal(case_control_trait[is.na(case_control_trait)], liabilities[is.na(case_control_trait)])
+  expect_equal(sum(!is.na(liabilities)), n_samples - n_missing)
+  })
\ No newline at end of file
diff --git a/vignettes/.gitignore b/vignettes/.gitignore
new file mode 100644
index 00000000..097b2416
--- /dev/null
+++ b/vignettes/.gitignore
@@ -0,0 +1,2 @@
+*.html
+*.R
diff --git a/vignettes/tutorial-lt-mapit.Rmd b/vignettes/tutorial-lt-mapit.Rmd
new file mode 100644
index 00000000..83026978
--- /dev/null
+++ b/vignettes/tutorial-lt-mapit.Rmd
@@ -0,0 +1,105 @@
+---
+title: "Liability threshold MAPIT"
+output: rmarkdown::html_vignette
+description: >
+  Learn how to convert binary traits to liabilities.
+vignette: >
+  %\VignetteIndexEntry{Liability threshold MAPIT}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(mvMAPIT)
+```
+In this tutorial we will illustrate how to use the Liability Threshold MArginal ePIstasis Test (LT-MAPIT) by [Crawford and Zhou (2018)](https://doi.org/10.1101/374983)$^1$. For this purpose we will first simulate synthetic data
+and then analyze it.
+
+The data are single nucleotide polymorphisms (SNPs) with simulated genotypes.
+For the simulation we choose the following set of parameters:
+
+  1. **population_size** - # of samples in the population
+  2. **n_snps** - number of SNPs or variants
+  3. **PVE** - phenotypic variance explained/broad-sense heritability ($H^2$)
+  4. **rho** - measures the portion of $H^2$ that is contributed by the marignal (additive) effects
+  5. **disease_prevalence** - assumed disease prevelance in the population
+  6. **sample_size** - # of samples to analyze
+
+```{r parameters, eval = F}
+population_size <- 1e4
+n_snps <- 2e3
+pve <- 0.6
+rho <- 0.5
+disease_prevalence <- 0.3
+sample_size <- 500
+```
+
+## Simulate random genotypes
+
+Simulate the genotypes such that all variants have minor allele frequency (MAF) > 0.05.
+**NOTE:** As in the paper, we center and scale each genotypic vector such that every SNP has mean 0 and standard deviation 1.
+
+```{r random_genotypes, eval = F}
+maf <- 0.05 + 0.45 * runif(n_snps)
+random_genotypes   <- (runif(population_size * n_snps) < maf) + (runif(population_size *
+                                                                         n_snps) < maf)
+random_genotypes   <- matrix(as.double(random_genotypes), population_size, n_snps, byrow = TRUE)
+```
+
+## Simulate liabilities
+
+We can use the mvMAPIT function `simulate_traits` to simulate the liabilities.
+See the tutorial on simulations for more details on that.
+
+```{r simulate_traits, eval = F}
+n_causal <- 100
+n_epistatic <- 10
+simulated_data <- simulate_traits(
+    random_genotypes,
+    n_causal = n_causal,
+    n_trait_specific = n_epistatic,
+    n_pleiotropic = 0,
+    d = 1,
+    H2 = pve,
+    rho = rho
+)
+```
+Now that we have the liabilities, we can assign case-control labels according to the disease prevelance parameter. We will treat this like the LT-MAPIT paper and take an equal number of cases and controls.
+
+```{r case_control, eval = F}
+liabilities <- simulated_data$trait
+threshold <- qnorm(1 - disease_prevalence, mean = 0, sd = 1)
+case_control_trait <- rep(0, population_size)
+case_control_trait[liabilities > threshold] <- 1
+
+# Subsample a particular number of cases and controls
+cases <- sample(which(liabilities > threshold), sample_size / 2, replace = FALSE)
+controls <- sample(which(liabilities <= threshold), sample_size / 2, replace = FALSE)
+y <- as.integer(case_control_trait[c(cases, controls)])
+X <- simulated_data$genotype[c(cases, controls), ]
+```
+
+## Run MAPIT with Case-Control trait
+
+To run MAPIT with case-control traits, we need to convert the traits back to liabilities.
+The function `binary_to_liability` provides this conversion.
+
+```{r mapit, eval = F}
+y_liabilities <- binary_to_liability(y, disease_prevalence)
+
+lt_mapit <- mvmapit(
+        t(X),
+        t(y_liabilities),
+        test = "hybrid"
+)
+```
+
+# References
+1: Lorin Crawford, Xiang Zhou (2018) Genome-wide Marginal Epistatic Association Mapping in Case-Control Studies bioRxiv 374983; <https://doi.org/10.1101/374983>