diff --git a/R/group_genotypes.R b/R/group_genotypes.R index 917017d..5119b43 100644 --- a/R/group_genotypes.R +++ b/R/group_genotypes.R @@ -1,17 +1,19 @@ #' Build grouped genotype #' #' @description Build presence/absence of the grouped genotypes (e.g. gene) from -#' the presence/absence of the ungrouped genotypes (e.g. snps). +#' the presence/absence of the ungrouped genotypes (e.g. snps). Remove columns +#' (genes) where the genotype is too rare or common (presence = 0, 1, N, N-1). #' #' @param geno Matrix. Columns are genotypes. Rows are isolates. #' @param gene_to_snp_lookup_table Matrix. 2 columns. 1st column are the #' ungrouped genotypes 2nd column are the grouped genotypes. The table's 1st #' column must contain only genotypes that are found in the row.names(geno). +#' @param tr Phylogenetic tree. #' #' @return samples_by_genes. Matrix. #' #' @noRd -build_gene_genotype_from_snps <- function(geno, gene_to_snp_lookup_table){ +build_gene_genotype_from_snps <- function(geno, gene_to_snp_lookup_table, tr){ # Check input ---------------------------------------------------------------- check_class(geno, "matrix") check_class(gene_to_snp_lookup_table, "matrix") @@ -38,8 +40,10 @@ build_gene_genotype_from_snps <- function(geno, gene_to_snp_lookup_table){ samples_by_genes <- samples_by_genes > 0 class(samples_by_genes) <- "numeric" + + samples_by_genes <- remove_rare_or_common_geno(samples_by_genes, tr) # Return output -------------------------------------------------------------- - return(samples_by_genes) + return(samples_by_genes$mat) } #' Remove rare and common genotypes from grouped genotypes @@ -51,6 +55,7 @@ build_gene_genotype_from_snps <- function(geno, gene_to_snp_lookup_table){ #' @param geno Matrix. Binary. Nrow = Ntip(tr). Ncol = number of original #' genotypes. #' @param lookup Matrix. Ncol = 2. Nrow = genotypes with group assignments. +#' @param tr Tree. #' #' @return List of four objects: #' \describe{ @@ -62,7 +67,7 @@ build_gene_genotype_from_snps <- function(geno, gene_to_snp_lookup_table){ #' \item{genotype.}{Matrix.} #' } #' @noRd -prepare_grouped_genotype <- function(geno, lookup){ +prepare_grouped_genotype <- function(geno, lookup, tr){ # Check input ---------------------------------------------------------------- check_dimensions(lookup, min_rows = 1, exact_cols = 2, min_cols = 2) check_dimensions(geno, min_cols = 1, min_rows = 1) @@ -99,7 +104,7 @@ prepare_grouped_genotype <- function(geno, lookup){ } # Do the grouping step ---- - grouped_genotype <- build_gene_genotype_from_snps(genotype, gene_snp_lookup) + grouped_genotype <- build_gene_genotype_from_snps(genotype, gene_snp_lookup, tr) # Return output ---- results <- list("snps_per_gene" = snps_per_gene, @@ -186,7 +191,7 @@ prepare_genotype <- function(group_logical, geno, tr, lookup){ # # Function ------------------------------------------------------------------- if (group_logical) { - prepped_geno <- prepare_grouped_genotype(geno, lookup) + prepped_geno <- prepare_grouped_genotype(geno, lookup, tr) } else { prepped_geno <- prepare_ungrouped_genotype(geno, tr) } diff --git a/tests/testthat/test_group_genotypes.R b/tests/testthat/test_group_genotypes.R index 751efdc..010e94d 100644 --- a/tests/testthat/test_group_genotypes.R +++ b/tests/testthat/test_group_genotypes.R @@ -53,9 +53,9 @@ test_that("prepare_genotype gives expected genotype for a grouped input", { # Test # Expected output matrix - expected_geno <- temp_geno[, c(1, 2, 5, 6, 7)] - expected_geno[, 5] <- rowSums(temp_geno[,7:8]) - colnames(expected_geno) <- paste0("GENE", c(1, 2, 5, 6, 7)) + expected_geno <- temp_geno[, c(1, 5, 6, 7)] + expected_geno[, 4] <- rowSums(temp_geno[, 7:8]) + colnames(expected_geno) <- paste0("GENE", c(1, 5, 6, 7)) expect_equal(temp_result$genotype, expected_geno) expect_equal(temp_result$gene_snp_lookup, temp_lookup[c(1, 2, 5, 6, 7, 8), ]) expect_equal(as.numeric(unname(temp_result$snps_per_gene)), c(1, 1, 1, 1, 2)) @@ -211,12 +211,12 @@ test_that("prepare_grouped_genotype works for valid inputs", { temp_lookup[, 2] <- c("GENE1", "GENE2", "GENE3", "GENE4", "GENE5", "GENE6", "GENE7", "GENE7") - temp_result <- prepare_grouped_genotype(temp_geno, temp_lookup) + temp_result <- prepare_grouped_genotype(temp_geno, temp_lookup, temp_tree) # Test - expected_geno <- temp_geno[, c(1, 2, 5, 6, 7)] - expected_geno[, 5] <- rowSums(temp_geno[,7:8]) - colnames(expected_geno) <- paste0("GENE", c(1, 2, 5, 6, 7)) + expected_geno <- temp_geno[, c(1, 5, 6, 7)] + expected_geno[, 4] <- rowSums(temp_geno[,7:8]) + colnames(expected_geno) <- paste0("GENE", c(1, 5, 6, 7)) expect_equal(temp_result$genotype, expected_geno) expect_equal(temp_result$gene_snp_lookup, temp_lookup[c(1, 2, 5, 6, 7, 8), ]) expect_equal(as.numeric(unname(temp_result$snps_per_gene)), c(1, 1, 1, 1, 2)) @@ -227,22 +227,29 @@ test_that("prepare_grouped_genotype works for valid inputs", { # test build_gene_genotype_from_snps test_that("build_gene_genotype_from_snps works for valid inputs", { # Set up + ntip <- 4 temp_geno <- matrix(c(0, 1), ncol = 4, - nrow = 3) + nrow = ntip) + temp_geno[2, 1] <- temp_geno[2, 3] <- temp_geno[4, 2] <- 0 + temp_geno[1, 3] <- 1 + colnames(temp_geno) <- c("a", "b", "d", "h") - row.names(temp_geno) <- c("sample1", "sample2", "sample3") + row.names(temp_geno) <- c("sample1", "sample2", "sample3", "sample4") temp_key <- matrix(c(letters[1:8], c(rep("One", 3), rep("Two", 3), rep("Three", 2))), ncol = 2, nrow = 8) colnames(temp_key) <- c("snp", "gene") temp_key <- temp_key[temp_key[, 1] %in% colnames(temp_geno),, drop = FALSE] - results <- build_gene_genotype_from_snps(temp_geno, temp_key) + temp_tree <- ape::rcoal(ntip) + temp_tree$node.label <- c(100, 100, 100) + temp_tree$tip.label <- row.names(temp_geno) + results <- build_gene_genotype_from_snps(temp_geno, temp_key, temp_tree) - expected_results <- matrix(c(1, 1, 1, 0, 1, 0, 1, 0, 1), nrow = 3, ncol = 3) + expected_results <- temp_geno[ , c(1, 3:4)] + expected_results[2, 1] <- 1 colnames(expected_results) <- c("One", "Two", "Three") - row.names(expected_results) <- c("sample1", "sample2", "sample3") # Test expect_identical(results, expected_results)