-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathsentinels_nold.sh
101 lines (95 loc) · 3.04 KB
/
sentinels_nold.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# 26-8-2019 JHZ
module load bedtools/2.27.1
export tag=_nold
function pgz()
# 1. extract all significant SNPs
{
ls METAL/*-1.tbl.gz | \
sed 's|METAL/||g;s/-1.tbl.gz//g' | \
parallel -j3 -C' ' '
(
# zcat METAL/{}-1.tbl.gz | head -1
zcat METAL/{}-1.tbl.gz | awk "
function abs(x)
{
if (x<0) return -x;
else return x;
}
NR>1 && length(\$4)==1 && length(\$5)==1 && abs(\$10/\$11)>=6.219105" | sort -k1,1n -k2,2n
) | gzip -f > sentinels/{}.p.gz'
}
function _HLA()
# 2. handling HLA
{
for p in $(ls METAL/*-1.tbl.gz | sed 's|METAL/||g;s/-1.tbl.gz//g')
do
(
zcat METAL/${p}-1.tbl.gz | head -1 | awk -vOFS="\t" '{$1="Chrom";$2="Start" "\t" "End";print}'
zcat sentinels/${p}.p.gz | \
awk -vOFS="\t" '{$1="chr" $1; start=$2-1;$2=start "\t" $2;print}' | \
awk '!($1 == "chr6" && $3 >= 25392021 && $3 < 33392022)'
zcat sentinels/${p}.p.gz | \
awk -vOFS="\t" '{$1="chr" $1; start=$2-1;$2=start "\t" $2;print}' | \
awk '$1 == "chr6" && $3 >= 25392021 && $3 < 33392022' | \
sort -k13,13g | \
awk 'NR==1'
) > work/${p}${tag}.p
export lines=$(wc -l work/${p}${tag}.p | cut -d' ' -f1)
if [ $lines -eq 1 ]; then
echo removing ${p}${tag} with $lines lines
rm work/${p}${tag}.p
fi
done
}
function sentinels()
# 3. find sentinels
{
module load intel/redist/2019 intel/perflibs/64/2019 gcc/5.4.0 R/3.3.1-ICC-MKL
export R_LIBS=/home/jinhua/R:$R_LIBS
for prot in $(ls work/*${tag}.p | sed 's|work/||g;s|'"$tag"'.p||g')
do
export prot=${prot}
echo ${prot}${tag}
R --no-save -q < tryggve/sentinels.R > work/${prot}${tag}.o
done
cd work
(
awk -vOFS="," 'BEGIN{print "prot","CHR","BP","SNP","l","u","d","log10p","Groupid", "Type"}'
awk -vFS="," -vOFS="," '!/option/{
SNPID=$2
split(SNPID,a,":")
split(a[2],b,"_")
gsub(/chr/,"",a[1])
$1=$1 "," a[1] "," b[1]
print
}' *${tag}.o
) | sed 's/,/ /g' > INF1${tag}.sentinels
cd -
}
function cvt()
# 4. cis.vs.classification, requiring R-3.5.0 at TRYGGVE
{
cd work
R --no-save -q <<\ \ END
require(gap)
tag <- Sys.getenv("tag")
rt <- paste0("INF1",tag)
clumped <- read.table(paste0(rt,".sentinels"),as.is=TRUE,header=TRUE)
hits <- merge(clumped[c("CHR","BP","SNP","prot","log10p")],inf1[c("prot","uniprot")],by="prot")
names(hits) <- c("prot","Chr","bp","SNP","log10p","uniprot")
cistrans <- cis.vs.trans.classification(hits,inf1,"uniprot")
cis.vs.trans <- with(cistrans,data)
write.table(cis.vs.trans,file=paste0(rt,".sentinels.cis.vs.trans"),row.names=FALSE,quote=TRUE)
cis <- subset(cis.vs.trans,cis.trans=="cis")["SNP"]
write.table(cis,file=paste0(rt,".sentinels.cis"),col.names=FALSE,row.names=FALSE,quote=FALSE)
sink(paste0(rt,".sentinels.out"))
with(cistrans,table)
sink()
with(cistrans,total)
pdf(paste0(rt,".sentinels.circlize.pdf"))
circos.cis.vs.trans.plot(hits=paste0(rt,".sentinels"),inf1,"uniprot")
dev.off()
END
cd -
}
for cmd in pgz _HLA sentinels cvt; do $cmd; done