Skip to content

Commit

Permalink
Merge pull request #403 from immunomind/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
vadimnazarov authored Mar 19, 2024
2 parents ac2c840 + 93b6e81 commit 0b6544a
Show file tree
Hide file tree
Showing 27 changed files with 405 additions and 827 deletions.
7 changes: 4 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
Package: immunarch
Type: Package
Title: Bioinformatics Analysis of T-Cell and B-Cell Immune Repertoires
Version: 0.9.0
Version: 0.9.1
Authors@R: c(
person("Vadim I.", "Nazarov", , "[email protected]", c("aut", "cre")),
person("Vasily O.", "Tsvetkov", , role = "aut"),
person("Siarhei", "Fiadziushchanka", , role = "aut"),
person("Eugene", "Rumynskiy", , role = "aut"),
person("Aleksandr A.", "Popov", , role = "aut"),
person("Ivan", "Balashov", , role = "aut"),
Expand All @@ -23,7 +24,7 @@ Description: A comprehensive framework for bioinformatics exploratory analysis o
and gene segments, repertoire diversity analysis, annotation of clonotypes using external immune receptor
databases and clonotype tracking in vaccination and cancer studies. A successor to our
previously published 'tcR' immunoinformatics package (Nazarov 2015) <doi:10.1186/s12859-015-0613-1>.
License: AGPL-3
License: Apache License (== 2.0)
URL: https://immunarch.com/, https://github.com/immunomind/immunarch
BugReports: https://github.com/immunomind/immunarch/issues
Imports:
Expand Down Expand Up @@ -84,6 +85,6 @@ Suggests:
rmarkdown
VignetteBuilder: knitr
Encoding: UTF-8
RoxygenNote: 7.2.2
RoxygenNote: 7.3.1
LazyData: true
LazyDataCompression: xz
862 changes: 201 additions & 661 deletions LICENSE

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Generated by roxygen2: do not edit by hand

S3method(cosine_sim,default)
S3method(cosine_sim,numeric)
S3method(jaccard_index,character)
S3method(jaccard_index,default)
S3method(overlap_coef,character)
S3method(overlap_coef,default)
S3method(tversky_index,character)
S3method(tversky_index,default)
S3method(vis,clonal_family)
S3method(vis,clonal_family_tree)
S3method(vis,immunr_chao1)
Expand Down Expand Up @@ -150,6 +158,7 @@ importFrom(dplyr,n)
importFrom(dplyr,one_of)
importFrom(dplyr,pull)
importFrom(dplyr,rename)
importFrom(dplyr,row_number)
importFrom(dplyr,rowwise)
importFrom(dplyr,select)
importFrom(dplyr,select_)
Expand Down
5 changes: 3 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

fill_vec <- function(read_vec, read_indices) {
.Call(`_immunarch_fill_vec`, read_vec, read_indices)
.Call(`_immunarch_fill_vec`, read_vec, read_indices)
}

fill_reads <- function(new_reads, new_counts) {
.Call(`_immunarch_fill_reads`, new_reads, new_counts)
.Call(`_immunarch_fill_reads`, new_reads, new_counts)
}

2 changes: 0 additions & 2 deletions R/explore.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,5 +145,3 @@ repExplore <- function(.data, .method = c("volume", "count", "len", "clones"), .

res
}

rep.ex <- repExplore
25 changes: 25 additions & 0 deletions R/immunarch-remaster.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# .check_immundata <- function (.object) {
# if (!is.instance(.object, "ImmunData")) {
# stop("Error: the input object is not of class ImmunData. Immunarch works on ImmunData only. Helpful manual: ...")
# }
# }
#
# .repertoire_overlap <- function (.data, .method, .verbose = TRUE, ...) {
# check_immundata(.data)
# }
#
# .gene_usage <- function (.data, .gene, .type, .use_counts, .norm, .gene_vec) {
#
# }
#
# .repertoire_diversity <- function (.data, .method, .verbose = TRUE, ...) {
#
# }
#
# .track_clonotypes <- function () {
#
# }
#
# .public_repertoire <- function () {
#
# }
26 changes: 19 additions & 7 deletions R/io-parsers.R
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,10 @@ parse_repertoire <- function(.filename, .mode, .nuc.seq, .aa.seq, .count,
.vend, .dstart, .dend, .jstart,
.total.insertions, .vd.insertions, .dj.insertions
)
if (!is.na(.add[1])) {
if (!has_no_data(.add)) {
vec_names <- c(vec_names, .add)
# add missing columns
df %<>% add_empty_columns(.add[!(.add %in% colnames(df))])
}

df <- df[, vec_names]
Expand Down Expand Up @@ -400,7 +402,7 @@ parse_mitcr <- function(.filename, .mode) {
}

parse_mixcr <- function(.filename, .mode, .count = c("clonecount", "readcount")) {
.filename <- .filename
.filename %<>% .as_tsv()
.id <- "cloneid"
.count %<>% tolower()
.sep <- "\t"
Expand Down Expand Up @@ -727,6 +729,11 @@ parse_mixcr <- function(.filename, .mode, .count = c("clonecount", "readcount"))
}
}

# fill cloneid column if it not exists
if (!(.id %in% colnames(df))) {
df %<>% mutate("{.id}" := row_number())
}

df <- df[, make.names(df_columns)]
colnames(df) <- df_column_names

Expand Down Expand Up @@ -962,13 +969,18 @@ parse_airr <- function(.filename, .mode) {
.as_tsv() %>%
airr::read_rearrangement()

bcr_pipeline_columns <- c(
"cdr1", "cdr2", "cdr1_aa", "cdr2_aa", "fwr1", "fwr2", "fwr3", "fwr4",
"fwr1_aa", "fwr2_aa", "fwr3_aa", "fwr4_aa"
)
df %<>%
select_(
add_empty_columns(bcr_pipeline_columns[!(bcr_pipeline_columns %in% colnames(df))]) %>%
select(
"sequence", "v_call", "d_call", "j_call", "junction", "junction_aa",
~contains("v_germline_end"), ~contains("d_germline_start"),
~contains("d_germline_end"), ~contains("j_germline_start"),
~contains("np1_length"), ~contains("np2_length"),
~contains("duplicate_count"),
contains("v_germline_end"), contains("d_germline_start"),
contains("d_germline_end"), contains("j_germline_start"),
contains("np1_length"), contains("np2_length"),
contains("duplicate_count"),
"cdr1", "cdr2", "cdr1_aa", "cdr2_aa", "fwr1", "fwr2", "fwr3", "fwr4",
"fwr1_aa", "fwr2_aa", "fwr3_aa", "fwr4_aa"
)
Expand Down
7 changes: 5 additions & 2 deletions R/io-utility.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
.remove.ext <- function(.str) {
# gsub(pattern = '.*/|[.].*$', replacement = '', x = .str)
gsub(pattern = ".*/|[.](txt|tsv|csv)$|([.](txt|tsv|csv))?[.](gz|bzip|bzip2|bz2)$", replacement = "", x = .str)
.str %<>% str_replace(".*/", "") %>%
str_replace(".*\\\\", "") %>%
str_replace("(\\.gz|\\.bzip|\\.bzip2|\\.bz2)$", "") %>%
str_replace("(\\.txt|\\.tsv|\\.csv)$", "")
return(.str)
}


Expand Down
6 changes: 3 additions & 3 deletions R/io.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ if (getRversion() >= "2.15.1") {
#' @importFrom jsonlite read_json
#' @importFrom stringr str_split str_detect str_replace_all str_trim
#' @importFrom methods as
#' @importFrom dplyr contains first select_ group_by_at one_of
#' @importFrom dplyr contains first select_ group_by_at one_of row_number
#' @importFrom utils read.table
#' @importFrom data.table setDF
#'
Expand Down Expand Up @@ -291,13 +291,13 @@ repLoad <- function(.path, .mode = "paired", .coding = TRUE, ...) {
missed_in_metadata <- setdiff(.metadata$Sample, .rep_names)
if (length(missed_in_folders) || length(missed_in_metadata)) {
if (length(missed_in_metadata)) {
message(" -- Samples found in the metadata, but not in the folder:\n ", missed_in_metadata)
message(" -- Samples found in the metadata, but not in the folder:\n ", toString(missed_in_metadata))
message(" Did you correctly specify all the sample names in the metadata file?")

error_flag <- TRUE
}
if (length(missed_in_folders)) {
message(" -- Samples found in the folder, but not in the metadata:\n ", missed_in_folders)
message(" -- Samples found in the folder, but not in the metadata:\n ", toString(missed_in_folders))
message(" Did you add all the necessary samples to the metadata file with correct names?")
message(" Creating dummy sample records in the metadata for now...")

Expand Down
8 changes: 8 additions & 0 deletions R/overlap.R
Original file line number Diff line number Diff line change
Expand Up @@ -196,12 +196,14 @@ overlap_coef <- function(.x, .y) {
UseMethod("overlap_coef")
}

#' @export
overlap_coef.default <- function(.x, .y) {
.x <- collect(.x, n = Inf)
.y <- collect(.y, n = Inf)
nrow(dplyr::intersect(.x, .y)) / min(nrow(.x), nrow(.y))
}

#' @export
overlap_coef.character <- function(.x, .y) {
length(dplyr::intersect(.x, .y)) / min(length(.x), length(.y))
}
Expand All @@ -211,13 +213,15 @@ jaccard_index <- function(.x, .y) {
UseMethod("jaccard_index")
}

#' @export
jaccard_index.default <- function(.x, .y) {
.x <- collect(.x, n = Inf)
.y <- collect(.y, n = Inf)
intersection <- nrow(dplyr::intersect(.x, .y))
intersection / (nrow(.x) + nrow(.y) - intersection)
}

#' @export
jaccard_index.character <- function(.x, .y) {
intersection <- length(dplyr::intersect(.x, .y))
intersection / (length(.x) + length(.y) - intersection)
Expand All @@ -227,13 +231,15 @@ tversky_index <- function(.x, .y, .a = .5, .b = .5) {
UseMethod("tversky_index")
}

#' @export
tversky_index.default <- function(.x, .y, .a = .5, .b = .5) {
.x <- collect(.x, n = Inf)
.y <- collect(.y, n = Inf)
intersection <- nrow(dplyr::intersect(.x, .y))
intersection / (.a * nrow(dplyr::setdiff(.x, .y)) + .b * nrow(dplyr::setdiff(.y, .x)) + intersection)
}

#' @export
tversky_index.character <- function(.x, .y, .a = .5, .b = .5) {
intersection <- length(dplyr::intersect(.x, .y))
intersection / (.a * length(dplyr::setdiff(.x, .y)) + .b * length(dplyr::setdiff(.y, .x)) + intersection)
Expand All @@ -243,6 +249,7 @@ cosine_sim <- function(.x, .y, .quant) {
UseMethod("cosine_sim")
}

#' @export
cosine_sim.default <- function(.x, .y, .quant) {
.x <- collect(.x, n = Inf)
.y <- collect(.y, n = Inf)
Expand All @@ -258,6 +265,7 @@ cosine_sim.default <- function(.x, .y, .quant) {
sum(first_col * second_col) / (sqrt(sum(first_col * first_col)) * sqrt(sum(second_col * second_col)))
}

#' @export
cosine_sim.numeric <- function(.x, .y, .quant) {
df <- rbind(.x, .y)
sum(.x * .y) / (sqrt(rowSums(df^2))[1] * sqrt(rowSums(df^2))[2])[[1]]
Expand Down
2 changes: 1 addition & 1 deletion R/sampling.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#'
#' Note: each connection must represent a separate repertoire.
#'
#' @param .method Character. Name of a sampling method. See "Description" for more details. Default value is "downsample"
#' @param .method Character. Name of a sampling method. See "Details" for more details. Default value is "downsample"
#' that downsamples the repertoires to the number of clones (i.e., reads / UMIs) that the smallest repertoire has, if user
#' doesn't set any value to the ".n" argument.
#'
Expand Down
9 changes: 5 additions & 4 deletions R/seqCluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,11 @@ seqCluster <- function(.data, .dist, .perc_similarity, .nt_similarity, .fixed_th
if (!all(is.na(grouping_cols))) {
result_multi %<>% map2_df(., pmap(group_values, data.frame)[!singleseq_flag], ~ cbind(.x, .y))
res <- rbind(result_single, result_multi)
res[grouping_cols] <- str_split(str_split(res[["Cluster"]],
pattern = "_", simplify = TRUE
)[, 1],
pattern = "/", simplify = TRUE
res[grouping_cols] <- str_split(
str_split(res[["Cluster"]],
pattern = "_", simplify = TRUE
)[, 1],
pattern = "/", simplify = TRUE
)[, seq_along(grouping_cols)]
} else {
result_multi %<>% map_df(., ~.x)
Expand Down
1 change: 0 additions & 1 deletion R/shiny.R
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,6 @@ fixVis <- function(.plot = NA) {
#
server <- function(input, output, session) {
create_plot <- function(input) {

# TODO: make automatic detection of available themes from ggplot2 and other packages
choose_theme <- function(theme_label) {
switch(theme_label,
Expand Down
11 changes: 11 additions & 0 deletions R/tools.R
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,17 @@ add_column_with_first_gene <- function(.data, .original_colname, .target_colname
return(.data)
}

# add columns filled with NA
add_empty_columns <- function(.data, .colnames) {
if (length(.colnames) > 0) {
new_columns <- rep(list(NA), length(.colnames))
names(new_columns) <- .colnames
return(do.call(cbind, c(list(.data), new_columns)))
} else {
return(.data)
}
}

# used to add sample name to error/warning messages when sample name is available
optional_sample <- function(prefix, sample_name, suffix) {
if (is.na(sample_name) || (sample_name == "")) {
Expand Down
18 changes: 5 additions & 13 deletions R/vis.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ if (getRversion() >= "2.15.1") {
"Overlap", "head", "Mean", "MeanVal", "MinVal", "MaxVal",
"Q1", "Q2", "Type", "Length", "Gene", "Freq", "Sequence",
"AA", "Clones", "Source.gr", "Target.gr", "Samples", "Samples.y",
"CDR3.aa", "p.adj", "group1", "group2", "y.coord", "..p.adj..", ".SD",
"CDR3.aa", "p.adj", "group1", "group2", "y.coord", ".SD",
"name", "label", "."
))
}
Expand Down Expand Up @@ -47,15 +47,11 @@ if (getRversion() >= "2.15.1") {


.tweak_fill <- function(.n) {
palette_name <- ""
if (.n == 1) {
palette_name <- "Set2"
} else if (.n == 2) {
palette_name <- "Set1"
}
# else if (.n < 4) { palette_name = "YlGnBu" }
# else if (.n < 6) { palette_name = "RdBu" }
else if (.n < 12) {
} else if (.n < 12) {
palette_name <- "Spectral"
} else {
return(scale_fill_hue())
Expand All @@ -65,15 +61,11 @@ if (getRversion() >= "2.15.1") {
}

.tweak_col <- function(.n) {
palette_name <- ""
if (.n == 1) {
palette_name <- "Set2"
} else if (.n == 2) {
palette_name <- "Set1"
}
# else if (.n < 4) { palette_name = "YlGnBu" }
# else if (.n < 6) { palette_name = "RdBu" }
else if (.n < 12) {
} else if (.n < 12) {
palette_name <- "Spectral"
} else {
return(scale_colour_hue())
Expand Down Expand Up @@ -1469,7 +1461,7 @@ vis_box <- function(.data, .by = NA, .meta = NA, .melt = TRUE,
# print(p_df)

p <- p +
stat_compare_means(aes(label = ..p.adj..),
stat_compare_means(aes(label = after_stat(p.adj)),
bracket.size = .5, size = .signif.label.size,
label.y = max(.data$Value, na.rm = TRUE) * 1.07
)
Expand Down Expand Up @@ -2188,7 +2180,7 @@ vis_bar <- function(.data, .by = NA, .meta = NA, .errorbars = c(0.025, 0.975), .
# print(p_df)

p <- p +
stat_compare_means(aes(label = ..p.adj..),
stat_compare_means(aes(label = after_stat(p.adj)),
bracket.size = .5, size = .signif.label.size,
label.y = max(.data$Value, na.rm = TRUE) * 1.07
)
Expand Down
Loading

0 comments on commit 0b6544a

Please sign in to comment.