-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathOlink.R
104 lines (95 loc) · 4.56 KB
/
Olink.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/bash
olink_panel <- function(xlsx, tabs, order, nlines, verbose)
{
for (x in tabs)
{
if (verbose) cat("\n\n", x, ":\n", rep("-", nchar(x)+1), "\n\n", sep="")
t <- openxlsx::read.xlsx(xlsx, sheet=x, colNames=TRUE, skipEmptyRows=FALSE, cols=1:16, rows=3:95)
s <- gsub(" |-", "_", x)
if (!order) assign(s, t, envir=.GlobalEnv) else
{
o <- order(t[,2])
assign(s, t[o,], envir=.GlobalEnv)
}
s <- get(s)
t <- "Target"
n <- names(s)
if (verbose)
{
print(head(s["Target"],nlines),right=FALSE)
cat("\n")
print(head(s[setdiff(n,t)],nlines))
}
}
}
options(width=160)
HOME <- Sys.getenv("HOME")
INF <- Sys.getenv("INF")
# UCSC hgTables
hgTables <- read.delim(paste(INF,"doc","hgTables.tsv",sep="/"),as.is=TRUE)
hgTables <- within(hgTables, UniProt <- unlist(lapply(strsplit(hgTables$name,"-"),"[",1)))
xlsx <- paste(INF,"doc/Olink validation data all panels.xlsx",sep="/")
# The tables are ordered below
tabs <-c("Cardiometabolic","Cell Regulation","CVD II","CVD III","Development","Immune Response","Immuno-Oncology",
"Inflammation","Metabolism","Neurology","Oncology II","Organ Damage")
olink_panel(xlsx,tabs,TRUE,92,FALSE)
tabs <- "Inflammation"
olink_panel(xlsx,tabs,FALSE,92,TRUE)
# TWEAK O43508 <- Q4ACW9. See https://www.uniprot.org/uniprot/ for additional information
Inflammation["UniProt.No."] <- with(Inflammation, {replace(UniProt.No.,UniProt.No.=="Q4ACW9","O43508")})
Inflammation["alias"] <- NA
Inflammation["alias"] <- with(Inflammation, {replace(alias, UniProt.No.=="O43508", "Q4ACW9")})
Inflammation["alias"] <- with(Inflammation, {replace(alias, UniProt.No.=="Q8NF90", "P12034")})
Inflammation["alias"] <- with(Inflammation, {replace(alias, UniProt.No.=="Q8WWJ7", "P30203")})
inf.orig <- Inflammation
inf <- read.table(paste(INF,"doc/inf1.list",sep="/"),header=FALSE,col.names=c("prot","UniProt"),sep="\t",as.is=TRUE)
inf1 <- merge(inf,inf.orig,by.x="UniProt",by.y="UniProt.No.")
write.csv(inf1[c("UniProt","prot","Target","alias")], file="inf1.csv", quote=FALSE, row.names=FALSE)
inf <- within(inf,UniProt <- replace(UniProt,UniProt=="Q8NF90","P12034"))
inf <- within(inf,UniProt <- replace(UniProt,UniProt=="Q8WWJ7","P30203"))
inf <- merge(inf,hgTables,by="UniProt",all=TRUE)
inf2 <- subset(inf,UniProt%in%inf1$UniProt|UniProt%in%c("P12034","P30203"))
write.csv(subset(inf2,!grepl("hap",X.chrom)), file="inf2.csv", quote=FALSE, row.names=FALSE)
# Venn diagram with the SomaLogic panel
olink_panel(xlsx,tabs,TRUE,92,FALSE)
library(reshape)
Inflammation <- rename(Inflammation,c(UniProt.No.="UniProt.No"))
Olink <- Inflammation[c("Target","UniProt.No")]
subset(Olink,UniProt.No=="O43508"|UniProt.No=="Q4ACW9")
toreplace <- with(Olink,UniProt.No=="Q4ACW9")
Olink[toreplace,"UniProt.No"] <- "O43508"
Olink <- within(subset(Olink,UniProt.No!="NA"), {OlinkID=UniProt.No; OlinkTarget=Target})
somalogic <- read.delim(paste(HOME,"SomaLogic","doc","SOMALOGIC_Master_Table_160410_1129info.tsv",sep="/"),as.is=TRUE)
SomaLogic <- within(subset(somalogic,UniProt!="NA"),{SomaLogicID=UniProt;SomaLogicTarget=Target})
olink_somalogic <- merge(Olink[c("UniProt.No","OlinkID","OlinkTarget")],
SomaLogic[c("SomaLogicTarget","UniProt","SomaLogicID")],
by.x="UniProt.No",by.y="UniProt")
# TWEAK-O43508 is missing
i <- setdiff(unique(with(olink_somalogic,UniProt.No)),"P23560")
write(i,file="i")
library(VennDiagram)
plist <- list(setdiff(Olink[["UniProt.No"]],"P23560"),setdiff(SomaLogic[["UniProt"]],"P23560"))
cnames <- c("Olink", "SomaLogic")
venn.diagram(x = plist, category.names=cnames, filename='venn_diagram.png', imagetype="png", output=TRUE)
## additional validation
Olink <- paste(INF,"doc/olink.inf.panel.annot.tsv",sep="/")
o <- read.delim(Olink, as.is=TRUE)
SomaLogic <- paste0(HOME,"/SomaLogic/doc/SOMALOGIC_Master_Table_160410_1129info.tsv")
s <- read.delim(SomaLogic, as.is=TRUE)
library(reshape)
s <- rename(s, c(UniProt="uniprot"))
setdiff(intersect(o[["uniprot"]],s[["uniprot"]]),"P23560")
os <- merge(o,s,by="uniprot")
u <- setdiff(unique(os[["uniprot"]]),"P23560")
length(u)
p <- unique(subset(os[c("uniprot","target.short")],uniprot%in%u))
dim(p)
library(dplyr)
nj <- nest_join(o,s,by="uniprot")
nj[order(nj[["uniprot"]]),"uniprot"]
unlist(lapply(nj$s,"[[",7))
library(VennDiagram)
plist <- list(setdiff(o[["uniprot"]],"P23560"),setdiff(s[["uniprot"]],c(NA,"P23560")))
cnames=c("Olink", "SomaLogic")
venn.diagram(x = plist, category.names=cnames, filename='Olink-SomaLogic-Venn-diagram.png',
imagetype="png", height=30,width=30, units="cm", resolution=300, output=TRUE, scaled=FALSE)