Skip to content

Commit

Permalink
WIP & version upd
Browse files Browse the repository at this point in the history
  • Loading branch information
mikessh committed Jun 13, 2024
1 parent 85532b9 commit 1286600
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 24 deletions.
2 changes: 1 addition & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
VDJDB: A curated database of T-cell receptor sequences of known antigen specificity

Copyright 2015-2022 VDJdb Developers
Copyright 2015-2024 VDJdb Developers and Maintainers

Attribution-NoDerivatives 4.0 International

Expand Down
1 change: 1 addition & 0 deletions latest-version.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
https://github.com/antigenomics/vdjdb-db/releases/download/2024-06-13/vdjdb-2024-06-13.zip
https://github.com/antigenomics/vdjdb-db/releases/download/2024-05-23/vdjdb-2024-05-23.zip
https://github.com/antigenomics/vdjdb-db/releases/download/2023-06-01/vdjdb-2023-06-01.zip
https://github.com/antigenomics/vdjdb-db/releases/download/2022-03-30/vdjdb-2022-03-30.zip
Expand Down
52 changes: 29 additions & 23 deletions summary/vdjdb_summary.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,10 @@ dt.vdjdb.s2 = dt.vdjdb.s %>%
mhc_count = length(unique(mhc_key[which(pub_date <= pub_date2)])))
p1=ggplot(dt.vdjdb.s2, aes(x = as.integer(pub_date2), y = tcr_count, color = chains)) +
annotate("segment", x = 2017, xend = 2017, y = 0, yend = 19000, linetype="dotted", size = 0.3) +
annotate("text", x = 2017, y = 21000, label = "AIRR-seq for tet+", hjust = 1, vjust = 1) +
annotate("segment", x = 2019, xend = 2019, y = 0, yend = 27000, linetype="dotted", size = 0.3) +
annotate("text", x = 2019, y = 30000, label = "10X & dCODE", hjust = 1, vjust = 1) +
annotate("segment", x = 2017, xend = 2017, y = 0, yend = 19000, linetype="solid", color = "grey25", size = 0.3) +
annotate("text", x = 2017, y = 21500, label = "AIRR-seq for tet+", hjust = 1, vjust = 1) +
annotate("segment", x = 2019, xend = 2019, y = 0, yend = 27000, linetype="solid", color = "grey25", size = 0.3) +
annotate("text", x = 2019, y = 31000, label = "10X & dCODE", hjust = 1, vjust = 1) +
geom_line() +
geom_point() +
ylab("") +
Expand All @@ -190,7 +190,7 @@ p1=ggplot(dt.vdjdb.s2, aes(x = as.integer(pub_date2), y = tcr_count, color = cha
axis.line = element_line(size = 0.3))
p2=ggplot(dt.vdjdb.s2, aes(x = as.integer(pub_date2), y = epi_count, color = chains)) +
annotate("segment", x = 2021, xend = 2021, y = 0, yend = 1000, linetype="dotted", size = 0.3) +
annotate("segment", x = 2021, xend = 2021, y = 0, yend = 1000, linetype="solid", color = "grey25", size = 0.3) +
annotate("text", x = 2021, y = 1150, label = "COVID-19 studies", hjust = 1, vjust = 1) +
geom_line() +
geom_point() +
Expand Down Expand Up @@ -274,10 +274,12 @@ Summary of antigens and T-cell receptors related to COVID-19 pandemic. Number of
df %>%
filter(species == "HomoSapiens",
startsWith(as.character(antigen.species), "SARS-CoV")) %>%
mutate(mhc.a = str_split_fixed(mhc.a, "[,:]", 2)[,1]) %>%
group_by(antigen.gene, mhc.a, antigen.epitope) %>%
mutate(mhc.a = str_split_fixed(mhc.a, "[,:]", 2)[,1],
mhc.b = str_split_fixed(mhc.b, "[,:]", 2)[,1],
mhc = ifelse(mhc.class == "MHCI", mhc.a, paste0(mhc.a, '/', substr(mhc.b, 7, 15)))) %>%
group_by(antigen.gene, mhc, antigen.epitope) %>%
mutate(publications = length(unique(str_split_fixed(reference.id, ",", n = Inf)[,1]))) %>%
group_by(antigen.gene, mhc.a, antigen.epitope, gene, publications) %>%
group_by(antigen.gene, mhc, antigen.epitope, gene, publications) %>%
summarize(records = n()) -> df.c
colnames(df.c) = c("Gene", "HLA", "Epitope", "TCR chain",
Expand All @@ -293,7 +295,7 @@ ggplot(df.c %>%
y = log2(Records))) +
geom_alluvium(aes(fill = substr(Epitope,1,3) %>% as.factor %>% as.integer),
color = "white", alpha = 0.8, curve_type = "sigmoid") +
geom_stratum(fill = "white", color = "black", size=0.2) +
geom_stratum(fill = "grey95", color = "white", size=1.0) +
geom_text(stat = "stratum", aes(label = after_stat(stratum))) +
scale_fill_distiller(palette = "Set3", guide=F, "") +
#scale_fill_hue(guide=F, "") +
Expand All @@ -303,35 +305,38 @@ ggplot(df.c %>%
theme_void() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.x = element_text(size = 16, color = "black"),
axis.text.x = element_text(size = 16, color = "black", vjust = -5),
axis.ticks.x = element_blank(),
panel.grid.major.y = element_blank(),
legend.position = "bottom")
panel.grid.major.y = element_blank())
```

Summary of SARS-CoV-2 epitopes and corresponding TCR alpha and beta chain specificity records (cases with 10+ records)

```{r message=FALSE, warning=FALSE}
kable(format = "html",
df.c %>% reshape2::dcast(Gene + HLA + Epitope + Studies ~ `TCR chain`, fill = 0) %>%
df.c %>%
reshape2::dcast(Gene + HLA + Epitope + Studies ~ `TCR chain`, fill = 0) %>%
mutate(HLA = gsub("*", ".", HLA, fixed = T)) %>%
filter(TRA+TRB >= 10) %>%
arrange(-(TRB+TRA)))
```

---

#### **Neoantigen** data
#### **Self-antigen** data

Summary of potential neoantigen targets for immunotherapy and T-cell receptors recognizing them. Number of records for neoantigens grouped by mutated gene and HLA plotted using alluvium plot. Neoantigens with less than 10 records in total were not counted.
Summary of T-cell receptors recognizing self-antigens, including antigens linked to utoimmune diseases and potential neoantigen targets for cancer immunotherapy. Number of records for self-antigens grouped by (mutated) human gene and corresponding HLAs are plotted using alluvium plot. Only self-antigens with at least 10 records are shown.

```{r message=FALSE, warning=FALSE, fig.width=12, fig.height=10}
df %>%
filter(species == "HomoSapiens",
startsWith(as.character(antigen.species), "HomoSapiens")) %>%
mutate(mhc.a = str_split_fixed(mhc.a, "[,:]", 2)[,1]) %>%
group_by(antigen.gene, mhc.a, antigen.epitope) %>%
mutate(mhc.a = str_split_fixed(mhc.a, "[,:]", 2)[,1],
mhc.b = str_split_fixed(mhc.b, "[,:]", 2)[,1],
mhc = ifelse(mhc.class == "MHCI", mhc.a, paste0(mhc.a, '/', substr(mhc.b, 7, 15)))) %>%
group_by(antigen.gene, mhc, antigen.epitope) %>%
mutate(publications = length(unique(str_split_fixed(reference.id, ",", n = Inf)[,1]))) %>%
group_by(antigen.gene, mhc.a, antigen.epitope, gene, publications) %>%
group_by(antigen.gene, mhc, antigen.epitope, gene, publications) %>%
summarize(records = n()) -> df.n
colnames(df.n) = c("Gene", "HLA", "Epitope", "TCR chain",
Expand All @@ -347,7 +352,7 @@ ggplot(df.n %>%
y = log2(Records))) +
geom_alluvium(aes(fill = substr(Epitope,1,3) %>% as.factor %>% as.integer),
color = "white", alpha = 0.8, curve_type = "sigmoid") +
geom_stratum(fill = "white", color = "black", size=0.2) +
geom_stratum(fill = "grey95", color = "white", size=1.0) +
geom_text(stat = "stratum", aes(label = after_stat(stratum))) +
scale_fill_distiller(palette = "Accent", guide=F, "") +
#scale_fill_hue(guide=F, "") +
Expand All @@ -357,17 +362,18 @@ ggplot(df.n %>%
theme_void() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.x = element_text(size = 16, color = "black"),
axis.text.x = element_text(size = 16, color = "black", vjust = -5),
axis.ticks.x = element_blank(),
panel.grid.major.y = element_blank(),
legend.position = "bottom")
panel.grid.major.y = element_blank())
```

Summary of neoantigens and corresponding TCR alpha and beta chain specificity records (cases with 5+ records)

```{r message=FALSE, warning=FALSE}
kable(format = "html",
df.n %>% reshape2::dcast(Gene + HLA + Epitope + Studies ~ `TCR chain`, fill = 0) %>%
df.n %>%
reshape2::dcast(Gene + HLA + Epitope + Studies ~ `TCR chain`, fill = 0) %>%
mutate(HLA = gsub("*", ".", HLA, fixed = T)) %>%
filter(TRA+TRB >= 5) %>%
arrange(-(TRB+TRA)))
```
Expand Down

0 comments on commit 1286600

Please sign in to comment.