From 3980100a92bdff6d552a52bd0b5e19dfd7e4f4c6 Mon Sep 17 00:00:00 2001 From: Julian Stamp Date: Thu, 6 Jun 2024 14:19:24 +0200 Subject: [PATCH] MM-136 LT MAPIT support --- .gitignore | 1 + DESCRIPTION | 3 +- NAMESPACE | 3 + NEWS.md | 1 + R/binary_to_liability.R | 41 +++++++++ README.md | 4 +- _pkgdown.yml | 1 + man/binary_to_liability.Rd | 22 +++++ tests/testthat/test-binary_to_liability.R | 26 ++++++ vignettes/.gitignore | 2 + vignettes/tutorial-lt-mapit.Rmd | 105 ++++++++++++++++++++++ 11 files changed, 207 insertions(+), 2 deletions(-) create mode 100644 R/binary_to_liability.R create mode 100644 man/binary_to_liability.Rd create mode 100644 tests/testthat/test-binary_to_liability.R create mode 100644 vignettes/.gitignore create mode 100644 vignettes/tutorial-lt-mapit.Rmd diff --git a/.gitignore b/.gitignore index 2fd30a4d..add772f3 100644 --- a/.gitignore +++ b/.gitignore @@ -69,3 +69,4 @@ src/symbols.rds /Meta/ /dev +inst/doc diff --git a/DESCRIPTION b/DESCRIPTION index 791e7c55..8b9e88e3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -47,6 +47,7 @@ Imports: Rcpp, stats, tidyr, + truncnorm, utils Suggests: GGally, @@ -70,4 +71,4 @@ VignetteBuilder: Encoding: UTF-8 LazyData: true LazyDataCompression: xz -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 diff --git a/NAMESPACE b/NAMESPACE index deabf1a2..71268b6f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +export(binary_to_liability) export(cauchy_combined) export(fishers_combined) export(harmonic_combined) @@ -18,8 +19,10 @@ importFrom(stats,cor) importFrom(stats,pcauchy) importFrom(stats,pchisq) importFrom(stats,pnorm) +importFrom(stats,qnorm) importFrom(stats,sd) importFrom(stats,uniroot) importFrom(stats,var) +importFrom(truncnorm,rtruncnorm) importFrom(utils,head) useDynLib(mvMAPIT) diff --git a/NEWS.md b/NEWS.md index 86f1e002..7e134d04 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # mvMAPIT (development version) * Fix Bonferroni correction in the `mvMAPIT.Rmd` vignette. +* New function `binary_to_liability` for LT-MAPIT support and corresponding vignette # mvMAPIT 2.0.3 release diff --git a/R/binary_to_liability.R b/R/binary_to_liability.R new file mode 100644 index 00000000..885c2bf7 --- /dev/null +++ b/R/binary_to_liability.R @@ -0,0 +1,41 @@ +#' Convert binary traits to liabilities to run LT-MAPIT (MAPIT on case-control traits) +#' +#' This function implements the conversion of binary traits to liabilties as +#' proposed in the LT-MAPIT model (Crawford and Zhou 2018, +#' https://doi.org/10.1101/374983). +#' +#' @param case_control_trait Case-control trait encoded as binary trait with 0 as control or 1 as case. +#' @param prevalence Case prevalence between 0 and 1. Proportion of cases in the population. +#' @returns A trait vector of same length as y with case-control indicators converted +#' to liabilties. +#' +#' @export +#' @importFrom truncnorm rtruncnorm +#' @importFrom stats qnorm +#' @import checkmate +binary_to_liability <- function(case_control_trait, prevalence) { + coll <- makeAssertCollection() + assertInteger( + case_control_trait, + lower = 0, + upper = 1, + add = coll + ) + assertDouble(prevalence, + lower = 0, + upper = 1, + add = coll) + reportAssertions(coll) + + n_cases = sum(case_control_trait == 1, na.rm = T) + n_controls = sum(case_control_trait == 0, na.rm = T) + threshold = qnorm(1 - prevalence, mean = 0, sd = 1) + + liabilities = rep(NA, length(case_control_trait)) + liabilities[!is.na(case_control_trait) & + case_control_trait == 0] = rtruncnorm(n_controls, b = threshold) + liabilities[!is.na(case_control_trait) & + case_control_trait == 1] = rtruncnorm(n_cases, a = threshold) + + return(liabilities) +} diff --git a/README.md b/README.md index dd477e9b..ba40a9a7 100644 --- a/README.md +++ b/README.md @@ -122,9 +122,11 @@ install.packages(c( 'checkmate', 'RcppAlgos', 'RcppArmadillo', 'RcppParallel', + 'RcppProgress', 'RcppSpdlog', 'testthat', - 'tidyr'), + 'tidyr', + 'truncnorm'), dependencies = TRUE); ``` diff --git a/_pkgdown.yml b/_pkgdown.yml index 082c6a79..4562f981 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -10,6 +10,7 @@ reference: Functions for the marginal epistasis analysis. contents: - mvmapit + - binary_to_liability - title: "Combining P-values" desc: > Functions for combining the variance component p-values. diff --git a/man/binary_to_liability.Rd b/man/binary_to_liability.Rd new file mode 100644 index 00000000..7a8209f2 --- /dev/null +++ b/man/binary_to_liability.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/binary_to_liability.R +\name{binary_to_liability} +\alias{binary_to_liability} +\title{Convert binary traits to liabilities} +\usage{ +binary_to_liability(case_control_trait, prevalence) +} +\arguments{ +\item{case_control_trait}{Case-control trait encoded as binary trait with 0 as control or 1 as case.} + +\item{prevalence}{Case prevalence between 0 and 1. Proportion of cases in the population.} +} +\value{ +A trait vector of same length as y with case-control indicators converted +to liabilties. +} +\description{ +This function implements the conversion of binary traits to liabilties as +proposed in the LT-MAPIT model (Crawford and Zhou 2018, +https://doi.org/10.1101/374983). +} diff --git a/tests/testthat/test-binary_to_liability.R b/tests/testthat/test-binary_to_liability.R new file mode 100644 index 00000000..86cb8f10 --- /dev/null +++ b/tests/testthat/test-binary_to_liability.R @@ -0,0 +1,26 @@ +test_that("binary_to_liability api test", { + # given + n_samples <- 10 + case_control_trait <- sample(0:1, n_samples, replace = TRUE) + prevalence <- 0.1 + # when + liabilities <- binary_to_liability(case_control_trait, prevalence) + # then + expect_true(is.numeric(liabilities)) + expect_equal(length(case_control_trait), + length(liabilities)) +}) + +test_that("binary_to_liability NA in case_control_trait", { + # given + n_samples <- 10 + n_missing <- 2 + case_control_trait <- sample(0:1, n_samples, replace = TRUE) + prevalence <- 0.1 + case_control_trait[sample(1:n_samples, n_missing)] <- NA + # when + liabilities <- binary_to_liability(case_control_trait, prevalence) + # then + expect_equal(case_control_trait[is.na(case_control_trait)], liabilities[is.na(case_control_trait)]) + expect_equal(sum(!is.na(liabilities)), n_samples - n_missing) + }) \ No newline at end of file diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 00000000..097b2416 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/tutorial-lt-mapit.Rmd b/vignettes/tutorial-lt-mapit.Rmd new file mode 100644 index 00000000..83026978 --- /dev/null +++ b/vignettes/tutorial-lt-mapit.Rmd @@ -0,0 +1,105 @@ +--- +title: "Liability threshold MAPIT" +output: rmarkdown::html_vignette +description: > + Learn how to convert binary traits to liabilities. +vignette: > + %\VignetteIndexEntry{Liability threshold MAPIT} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r setup} +library(mvMAPIT) +``` +In this tutorial we will illustrate how to use the Liability Threshold MArginal ePIstasis Test (LT-MAPIT) by [Crawford and Zhou (2018)](https://doi.org/10.1101/374983)$^1$. For this purpose we will first simulate synthetic data +and then analyze it. + +The data are single nucleotide polymorphisms (SNPs) with simulated genotypes. +For the simulation we choose the following set of parameters: + + 1. **population_size** - # of samples in the population + 2. **n_snps** - number of SNPs or variants + 3. **PVE** - phenotypic variance explained/broad-sense heritability ($H^2$) + 4. **rho** - measures the portion of $H^2$ that is contributed by the marignal (additive) effects + 5. **disease_prevalence** - assumed disease prevelance in the population + 6. **sample_size** - # of samples to analyze + +```{r parameters, eval = F} +population_size <- 1e4 +n_snps <- 2e3 +pve <- 0.6 +rho <- 0.5 +disease_prevalence <- 0.3 +sample_size <- 500 +``` + +## Simulate random genotypes + +Simulate the genotypes such that all variants have minor allele frequency (MAF) > 0.05. +**NOTE:** As in the paper, we center and scale each genotypic vector such that every SNP has mean 0 and standard deviation 1. + +```{r random_genotypes, eval = F} +maf <- 0.05 + 0.45 * runif(n_snps) +random_genotypes <- (runif(population_size * n_snps) < maf) + (runif(population_size * + n_snps) < maf) +random_genotypes <- matrix(as.double(random_genotypes), population_size, n_snps, byrow = TRUE) +``` + +## Simulate liabilities + +We can use the mvMAPIT function `simulate_traits` to simulate the liabilities. +See the tutorial on simulations for more details on that. + +```{r simulate_traits, eval = F} +n_causal <- 100 +n_epistatic <- 10 +simulated_data <- simulate_traits( + random_genotypes, + n_causal = n_causal, + n_trait_specific = n_epistatic, + n_pleiotropic = 0, + d = 1, + H2 = pve, + rho = rho +) +``` +Now that we have the liabilities, we can assign case-control labels according to the disease prevelance parameter. We will treat this like the LT-MAPIT paper and take an equal number of cases and controls. + +```{r case_control, eval = F} +liabilities <- simulated_data$trait +threshold <- qnorm(1 - disease_prevalence, mean = 0, sd = 1) +case_control_trait <- rep(0, population_size) +case_control_trait[liabilities > threshold] <- 1 + +# Subsample a particular number of cases and controls +cases <- sample(which(liabilities > threshold), sample_size / 2, replace = FALSE) +controls <- sample(which(liabilities <= threshold), sample_size / 2, replace = FALSE) +y <- as.integer(case_control_trait[c(cases, controls)]) +X <- simulated_data$genotype[c(cases, controls), ] +``` + +## Run MAPIT with Case-Control trait + +To run MAPIT with case-control traits, we need to convert the traits back to liabilities. +The function `binary_to_liability` provides this conversion. + +```{r mapit, eval = F} +y_liabilities <- binary_to_liability(y, disease_prevalence) + +lt_mapit <- mvmapit( + t(X), + t(y_liabilities), + test = "hybrid" +) +``` + +# References +1: Lorin Crawford, Xiang Zhou (2018) Genome-wide Marginal Epistatic Association Mapping in Case-Control Studies bioRxiv 374983;