-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathsnpid_rsid.sb
86 lines (80 loc) · 3.45 KB
/
snpid_rsid.sb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/bin/bash
#SBATCH --account=PETERS-SL3-CPU
#SBATCH --ntasks=1
#SBATCH --job-name=_snpid_rsid
#SBATCH --time=12:00:00
#SBATCH --partition=skylake
#SBATCH --array=1-180%15
#SBATCH --mem=128800
#SBATCH --output=/rds/user/jhz22/hpc-work/work/_snpid_rsid_%A_%a.out
#SBATCH --error=/rds/user/jhz22/hpc-work/work/_snpid_rsid_%A_%a.err
#SBATCH --export ALL
export job=${SLURM_ARRAY_TASK_ID}
export TMPDIR=/rds/user/jhz22/hpc-work/work
export INF=/rds/project/jmmh2/rds-jmmh2-projects/olink_proteomics/scallop/INF
export dir=${INF}/work
export list=${dir}/INF1.merge
export Start=$(awk -v job=${job} 'NR==job+1{print $2}' ${list})
export End=$(awk -v job=${job} 'NR==job+1{print $3}' ${list})
export p=$(awk -v job=${job} 'NR==job+1{print $5}' ${list})
export r=$(awk -v job=${job} 'NR==job+1{print $6}' ${list})
export pr=${p}-${r}
export chr=$(awk -v job=${job} 'NR==job+1{print $8}' ${list})
export pos=$(awk -v job=${job} 'NR==job+1{print $9}' ${list})
export flanking=1e6
export start=$(awk -vpos=${Start} -vflanking=${flanking} 'BEGIN{start=pos-flanking;if(start<0) start=0;print start}')
export end=$(awk -vpos=${End} -vflanking=${flanking} 'BEGIN{print pos+flanking}')
export snpid_rsid=${INF}/work/INTERVAL.rsid
export LDREF=INTERVAL
function prune()
{
module load plink/2.00-alpha
plink2 --bfile ${INF}/${LDREF}/nodup/${pr} --geno 0.1 --mind 0.1 --maf 0.01 --indep-pairwise 1000kb 1 0.1 --out ${dir}/${pr}
export pos=$(awk -v r=${r} 'BEGIN{split(r,snpid,":");split(snpid[2],pos,"_");print pos[1]}')
if [ $(grep ${r} ${dir}/${pr}.prune.in | wc -l) -eq 0 ]; then
export i=$(awk -v pos=${pos} '
function abs(x) {if (x<0) return -x; else return x;}
{split($1,snpid,":");split(snpid[2],p,"_");print $1,abs(p[1]-pos)}' ${dir}/${pr}.prune.in | \
sort -r -k2,2g | awk 'NR==1 {print $1}')
sed -i 's/'"$i"'/'"$r"'/g' ${dir}/${pr}.prune.in
fi
sort -k1,1 ${dir}/${pr}.prune.in > $TMPDIR/${pr}.prune.in
awk 'NR > 1' ${dir}/${p}.ma | \
sort -k1,1 | \
join -j1 - $TMPDIR/${pr}.prune.in | \
awk '{print $1}' > ${dir}/${pr}.prune
join -a2 -e "NA" <(sort -k1,1 ${snpid_rsid}) ${pr}.prune -o2.1,1.2 > ${pr}.prune.rsid
cut -f2 ${pr}.rsid > ${pr}
R --no-save -q <<\ \ END
f <- Sys.getenv("pr")
snpid_rsid <- read.table(paste0(f,".prune.rsid"), as.is=TRUE, col.names=c("rsid","name"), fill=TRUE)
nmiss <- with(snpid_rsid,is.na(name))
snpid_rsid <- within(snpid_rsid, {name[nmiss] <- make.names(rsid[nmiss])})
save(snpid_rsid, file=paste0(f,".prune.rda"))
END
}
function unprune
{
bgenix -g ${LDREF}/nodup/${pr}.bgen -list 2>&1 | awk 'NR>9 && NF==7' | cut -f1,2 | sort -k2,2 > work/${pr}.rsid
cut -f2 ${pr}.rsid > ${pr}
R --no-save -q <<\ \ END
f <- Sys.getenv("pr")
snpid_rsid <- read.table(paste0(f,".rsid"), as.is=TRUE, col.names=c("name","rsid"), fill=TRUE)
nmiss <- with(snpid_rsid,is.na(name))
snpid_rsid <- within(snpid_rsid, {name[nmiss] <- make.names(rsid[nmiss])})
save(snpid_rsid, file=paste0(f,".rda"))
END
}
cd work
prune
cd -
# function snpid_rsid_INTERVAL()
# awk -vchr=$chr -vstart=$start -vend=$end '($1==chr && $4>=start && $4 <=end){print $2}' ${dir}/INTERVAL.bim
# {
# awk '{print $2}' ${INF}/INTERVAL/nodup/${pr}_snpid.bim
# }
# snpid_rsid_INTERVAL | \
# sort -k1,1 > ${pr}
# join -a2 -e "NA" ${snpid_rsid} ${pr} -o2.1,1.2 > ${pr}.rsid
# mkdir snpid_rsid
# sed '1d' INF1.merge | cut -f5,6 | tr '\t' ' ' | parallel -C' ' 'cp {1}-{2}.prune* {1}-{2}.log snpid_rsid'