-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathINTERVAL-fm.sb
82 lines (71 loc) · 2.64 KB
/
INTERVAL-fm.sb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/bin/bash
#SBATCH --ntasks=1
#SBATCH --job-name=_finemap
#SBATCH --time=12:00:00
#SBATCH --partition=medium
#SBATCH --array=1-114%5
#SBATCH --mem=128800
#SBATCH --output=INTERVAL/_finemap_%A_%a.out
#SBATCH --error=INTERVAL/_finemap_%A_%a.err
#SBATCH --export ALL
. /etc/profile.d/modules.sh
module load default-cardio
module load slurm
module load use.own
export TMPDIR=/scratch/jhz22/tmp
export flanking=1e6
export job=${SLURM_ARRAY_TASK_ID}
export p=$(awk 'NR==job+1{print $1}' job=${job} INTERVAL/INTERVAL_nold.sentinels)
export chr=$(awk 'NR==job+1{print $2}' job=${job} INTERVAL/INTERVAL_nold.sentinels)
export pos=$(awk 'NR==job+1{print $3}' job=${job} INTERVAL/INTERVAL_nold.sentinels)
export r=$(awk 'NR==job+1{print $4}' job=${job} INTERVAL/INTERVAL_nold.sentinels)
export start=$(awk -vpos=${pos} -vflanking=${flanking} 'BEGIN{start=pos-flanking;if(start<0) start=0;print start}')
export end=$(awk -vpos=${pos} -vflanking=${flanking} 'BEGIN{print pos+flanking}')
export rt=/scratch/jhz22/data/INTERVAL/INTERVAL
export sumstats=/scratch/jhz22/INF/sumstats/INTERVAL
export pr=${p}-${r}
cd INTERVAL
# z0
(
awk 'BEGIN {print "rsid", "chromosome", "position", "allele1", "allele2", "maf", "beta", "se"}'
zcat ${sumstats}/INTERVAL.${p}.gz | \
awk 'NR > 1' | \
sort -k1,1 | \
awk 'BEGIN {prev="";} {key=$1; if(key==prev) next;print;prev=key}' | \
awk -vchr=${chr} -vstart=${start} -vend=${end} '{
if ($2==chr && $3 >= start && $3 < end) {
if ($8 < 0.5) maf = $8; else maf = 1-$8
if (maf > 0 && maf <= 0.5 && $9 != "NA" && $10 != "NA") print $1, $2, $3, $6, $7, maf, $9, $10
}
} '
) > ${pr}.z0
awk 'NR > 1 {print $1} ' ${pr}.z0 > ${pr}.incl
cp ${pr}.z0 ${pr}.z
# bgen
qctool -g ${rt}.bed -filetype binary_ped -og ${pr}.bgen -ofiletype bgen -incl-rsids ${pr}.incl
# bgi
bgenix -g ${pr}.bgen -index -clobber
ln -sf ${pr}.bgen.bgi ${pr}.bgi
# master
(
echo "z;bgen;bgi;dose;snp;config;cred;log;n_samples"
echo "${pr}.z;${pr}.bgen;${pr}.bgi;${pr}.dose;${pr}.snp;${pr}.config;${pr}.cred;${pr}.log;4994"
) > ${pr}.master
# finemap 1.3
finemap --sss --in-files ${pr}.master | awk 'NR>41{print $6}' | sed "s/'//g" > ${pr}.excl
if [ $(wc -l ${pr}.excl | awk '{print $1}') -gt 0 ]; then
# bgen
qctool -g ${rt}.bed -filetype binary_ped -og ${pr}.bgen -ofiletype bgen -incl-rsids ${pr}.incl -excl-rsids ${pr}.excl
# bgi
bgenix -g ${pr}.bgen -index -clobber
ln -sf ${pr}.bgen.bgi ${pr}.bgi
(
awk 'BEGIN{print "rsid", "chromosome", "position", "allele1", "allele2", "maf", "beta", "se"}'
awk 'NR > 1' ${pr}.z0 | \
sort -k1,1 | \
join -v1 - ${pr}.excl
) > ${pr}.z
# finemap 1.3
finemap --sss --in-files ${pr}.master
fi
cd -