-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathfinemap.sb
129 lines (116 loc) · 4.19 KB
/
finemap.sb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/bin/bash
#SBATCH --account=PETERS-SL3-CPU
#SBATCH --ntasks=1
#SBATCH --job-name=_finemap
#SBATCH --time=12:00:00
#SBATCH --partition=skylake
#SBATCH --array=1-180%15
#SBATCH --mem=128800
#SBATCH --output=/rds/user/jhz22/hpc-work/work/_finemap_%A_%a.out
#SBATCH --error=/rds/user/jhz22/hpc-work/work/_finemap_%A_%a.err
#SBATCH --export ALL
export job=${SLURM_ARRAY_TASK_ID}
export TMPDIR=/rds/user/jhz22/hpc-work/work
export INF=/rds/project/jmmh2/rds-jmmh2-projects/olink_proteomics/scallop/INF
export list=${INF}/work/INF1.merge
export p=$(awk -v job=${job} 'NR==job+1{print $5}' ${list})
export r=$(awk -v job=${job} 'NR==job+1{print $6}' ${list})
export chr=$(awk -v job=${job} 'NR==job+1{print $8}' ${list})
export pos=$(awk -v job=${job} 'NR==job+1{print $9}' ${list})
export Start=$(awk -v job=${job} 'NR==job+1{print $2}' ${list})
export End=$(awk -v job=${job} 'NR==job+1{print $3}' ${list})
export flanking=1e6
export start=$(awk -vpos=${Start} -vflanking=${flanking} 'BEGIN{start=pos-flanking;if(start<0) start=0;print start}')
export end=$(awk -vpos=${End} -vflanking=${flanking} 'BEGIN{print pos+flanking}')
export pr=${p}-${r}
export sumstats=${INF}/METAL/${p}-1.tbl.gz
export n=10000
export N=4994
export LDREF=INTERVAL
cd work
# z0
function sumstats_z0()
{
(
zcat ${sumstats} | head -1 | awk '{print $3,$1,$2,$5,$4,"MAF",$10,$11}'
zcat ${sumstats} | \
awk 'NR > 1' | \
sort -k3,3 | \
awk -vchr=${chr} -vstart=${start} -vend=${end} -vn=$n '
{
if ($1==chr && $2 >= start && $2 < end) {
if ($6 < 0.5) maf = $6; else maf = 1-$6
if (maf > 0 && maf <= 0.5 && $10 != "NA" && $11 != "NA" && $NF >= n)
print $3, $1, $2, toupper($5), toupper($4), maf, $10, $11
}
} ' | \
join ${pr} -
) > ${pr}.z0
}
function ma_z0()
{
(
echo MarkerName Chromosome Position Allele2 Allele1 MAF Effect StdErr
# SNP A1 A2 freq b se p N
awk -vn=${n} 'NR > 1 {
if ($4 < 0.5) maf = $4; else maf = 1-$4
snpid=$1;
gsub(/chr/,"",snpid);gsub(/:|_/," ",snpid)
split(snpid,chrpos," ");
if (maf > 0 && maf <= 0.5 && $5 != "NA" && $6 != "NA" && $NF >= n)
print $1, chrpos[1], chrpos[2], toupper($3), toupper($2), maf, $5, $6
}' ${INF}/sentinels/${p}.ma | \
sort -k1,1 | \
join ${pr}-NLRP2 -
) > ${pr}.z0
}
sumstats_z0
# bgen and bgi
awk 'NR > 1{print $1} ' ${pr}.z0 > ${pr}.incl
qctool -g ${INF}/${LDREF}/nodup/${pr}.bgen -og ${pr}.bgen -ofiletype bgen -incl-rsids ${pr}.incl
bgenix -g ${pr}.bgen -index -clobber
# z
(
join <(awk 'NR > 1' ${pr}.z0 | cut -d' ' -f1,4,5 | sort -k1,1) \
<(bgenix -g ${pr}.bgen -list 2>&1 | awk 'NR>9 && NF==7'| cut -f2,6,7 | sort -k1,1) | \
awk '{print $1, ($2!=$4)}'
) > ${pr}.flip
(
awk 'BEGIN {print "rsid", "chromosome", "position", "allele1", "allele2", "maf", "beta", "se", "flip"}'
join <(awk 'NR > 1' ${pr}.z0 | sort -k1,1) ${pr}.flip | awk '{if($9==1) {t=$4;$4=$5;$5=t};$7=-$7; print}'
) > ${pr}.z
(
echo "z;ld;snp;config;cred;log;n_samples"
echo "${pr}.z;${pr}.ld;${pr}.snp;${pr}.config;${pr}.cred;${pr}.log;$N"
) > ${pr}.master
(
echo "z;bgen;bgi;bcor;ld;snp;config;cred;log;n_samples"
echo "${pr}.z;${pr}.bgen;${pr}.bgen.bgi;${pr}.bcor;${pr}.ld;${pr}.snp;${pr}.config;${pr}.cred;${pr}.log;$N"
) > ${pr}.master2
# LD-based finemapping
rm -rf ${pr}.cred* ${pr}.log_sss ${pr}.snp ${pr}.config
ldstore_v1.1 --bcor ${pr}-1 --bgen ${pr}.bgen --ld-n-samples-avail-prop 0.95 --ld-thold 0.01 --n-threads 1
ldstore_v1.1 --bcor ${pr}-1 --merge 1
ldstore_v1.1 --bcor ${pr}-1 --matrix ${pr}.ld
rm ${pr}-1_*
mv ${pr}-1 ${pr}.bcor
export n_causal_snps=10
if [ -f slct-${LDREF}.list ]; then
export greped=$(grep ${pr} slct-${LDREF}.list | cut -d' ' -f2)
if [ ${greped} -gt 0 ] && [ ${greped} -lt 10 ]; then
export n_causal_snps=${greped}
fi
fi
function ld_finemap()
## ldstore_v2.0b and finemap_v1.3.1/finemap_v1.4
{
ldstore_v2.0b --in-files ${pr}.master2 --write-bcor
ldstore_v2.0b --in-files ${pr}.master2 --bcor-to-text
finemap_v1.4 --sss --in-files ${pr}.master --log --n-causal-snps ${n_causal_snps} --corr-group 0.7 --group-snps
}
## finemap_v1.3.1
finemap_v1.3.1 --sss --in-files ${pr}.master --log --n-causal-snps ${n_causal_snps} --corr-group 0.7 --group-snps
# xlsx
gzip -f ${pr}.ld
R -q --no-save < ${INF}/csd3/finemap.R > ${pr}-finemap.log
cd -