-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathfusion_twas.sb
136 lines (128 loc) · 5.81 KB
/
fusion_twas.sb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/bash
#SBATCH --job-name=FUSION_TWAS
#SBATCH --account CARDIO-SL0-CPU
#SBATCH --partition cardio
#SBATCH --qos=cardio
#SBATCH --array=53
#SBATCH --mem=40800
#SBATCH --time=5-00:00:00
#SBATCH --output=/rds/user/jhz22/hpc-work/work/_fusion_twas_%A_%a.out
#SBATCH --error=/rds/user/jhz22/hpc-work/work/_fusion_twas_%A_%a.err
#SBATCH --export ALL
export FUSION=${HPC_WORK}/fusion_twas
export EWAS_fusion=${HPC_WORK}/EWAS-fusion
export TMPDIR=/rds/user/jhz22/hpc-work/work
export job=$SLURM_ARRAY_TASK_ID
export prot=$(awk 'NR==ENVIRON["job"]' ${INF}/csd3/prot.list)
export trait=ieu-a-31
function init()
{
ln -sf ${FUSION}/gcta_nr_robust ${HPC_WORK}/bin/gcta_nr_robust
ln -sf /usr/local/Cluster-Apps/plink/2.00-alpha/plink2 ${HPC_WORK}/bin/plink2
if [ ! -d ${INF}/FUSION ]; then mkdir ${INF}/FUSION; fi
if [ ! -d ${INF}/FUSION/plink ]; then mkdir ${INF}/FUSION/plink; fi
if [ ! -d ${INF}/FUSION/tmp ]; then mkdir ${INF}/FUSION/tmp; fi
parallel --env FUSION --env INF -C' ' '
(
awk -vOFS="\t" "
{
if (\$5 < \$6) snpid=\"chr\"\$1\":\"\$4\"_\"\$5\"_\"\$6;
else snpid=\"chr\"\$1\":\"\$4\"_\"\$6\"_\"\$5
print snpid, \$2
}" ${FUSION}/LDREF/1000G.EUR.{}.bim > ${INF}/FUSION/1000G.EUR.{}.snpid
cat ${INF}/FUSION/1000G.EUR.{}.snpid
)' ::: $(seq 22) > ${INF}/FUSION/1000G.EUR.snpid
export s=/rds/project/jmmh2/rds-jmmh2-projects/olink_proteomics/scallop/INF/INTERVAL/o5000-inf1-outlier_in-r2.sample
cut -d' ' -f1-2,29- ${s} | sed '1,2d' > ${INF}/FUSION/INTERVAL.pheno
cut -d' ' -f1-2,4-28 ${s} | sed '1,2d' > ${INF}/FUSION/INTERVAL.covars
grep NA ${INF}/FUSION/INTERVAL.pheno | cut -d' ' -f1,2 > ${INF}/FUSION/remove.id
plink2 --bfile ${INF}/INTERVAL/cardio/INTERVAL --pheno ${INF}/FUSION/INTERVAL.pheno \
--extract ${INF}/FUSION/1000G.EUR.snpid --make-bed --out ${INF}/FUSION/INTERVAL
seq 22 | while read chr;
do
echo ${chr}
plink2 --bfile ${INF}/FUSION/INTERVAL --chr ${chr} \
--make-bed --out ${INF}/FUSION/INTERVAL.${chr}
done
Rscript -e "write.table(pQTLtools::inf1,file=file.path(Sys.getenv('INF'),'FUSION','INF1.tsv'),row.names=FALSE,quote=FALSE,sep='\t')"
# sed '1d' ${INF}/FUSION/INF1.tsv | cut -f2,5-8 --output-delimiter=' ' | sort -k2,2 | \
# join -o1.1,1.2,1.3,1.4,2.1,2.2,2.3,2.4 -e NA -a2 -12 -24 - <(sort -k4,4 ${INF}/csd3/glist-hg19)
(
echo SNP A1 A2 N CHISQ Z
bcftools query -f '%CHROM %POS %ALT %REF [%ES] [%SE] [%SS]\n' ${INF}/OpenGWAS/ieu-a-31.vcf.gz | \
awk '
{
if($3<$4) snpid="chr"$1":"$2"_"$3"_"$4;
else snpid="chr"$1":"$2"_"$4"_"$3
print snpid, $3, $4, $7, ($5/$6)^2, $5/$6
}'
) | \
tr ' ' '\t' > ${INF}/FUSION/${trait}.sumstats
bcftools query -f "%ID\t%ALT\t%REF\t[%ES]\t[%SE]\n" ${INF}/METAL/gwas2vcf/IL.12B.vcf.gz | \
awk -v OFS="\t" '{print $1,$2,$3,$4/$5}' > ${EWAS_fusion}/${prot}
}
function compute_weights_parallel()
{
cd ${INF}/FUSION/tmp
ln -sf . output
sed '1d' ${INF}/FUSION/INF1.tsv | cut -f2,5-8 --output-delimiter=' ' | sort -k2,2n | \
parallel --env FUSION --env INF -C' ' '
export prot={1}
export gene={2}
export chr={3}
export P0=$(awk -v start={4} "BEGIN{if(start<1e6) print 1; else print start-1e6}")
export P1=$(expr {5} + 1000000)
export col=$(awk "\$1 == ENVIRON[\"prot\"] {print NR}" ${INF}/csd3/prot.list)
plink2 --bfile ${INF}/FUSION/INTERVAL --pheno ${INF}/FUSION/INTERVAL.pheno --pheno-col-nums ${col} \
--make-bed --out ${INF}/FUSION/plink/${prot}-${chr}-${P0}-${P1}-plink --allow-no-sex \
--chr ${chr} --from-bp ${P0} --to-bp ${P1} --maf 0.01 --remove ${INF}/FUSION/remove.id > /dev/null
Rscript ${FUSION}/FUSION.compute_weights.R --verbose 2 --save_hsq \
--bfile ${INF}/FUSION/plink/${prot}-${chr}-${P0}-${P1}-plink \
--covar ${INF}/FUSION/INTERVAL.covars \
--tmp ${prot}-${chr}-${P0}-${P1} \
--out ${INF}/FUSION/${prot}-${chr}-${P0}-${P1} \
--hsq_set 0.1 --models top1,blup,bslmm,lasso,enet
'
}
function compute_weights()
{
cd ${INF}/FUSION/tmp
ln -sf . output
grep -w ${prot} ${INF}/work/INF1.merge | cut -f8,9 --output-delimiter=' ' | while read chr pos;
do
export P0=$(awk -v pos=${pos} "BEGIN{if(pos<1e6) print 1; else print pos-1e6}")
export P1=$(expr ${pos} + 1000000)
plink2 --bfile ${INF}/FUSION/INTERVAL --pheno ${INF}/FUSION/INTERVAL.pheno --pheno-col-nums ${job} \
--make-bed --out ${INF}/FUSION/plink/${prot}-${chr}-${P0}-${P1}-plink --allow-no-sex \
--chr ${chr} --from-bp ${P0} --to-bp ${P1} --maf 0.01 --remove ${INF}/FUSION/remove.id > /dev/null
Rscript ${FUSION}/FUSION.compute_weights.R --verbose 2 --save_hsq \
--bfile ${INF}/FUSION/plink/${prot}-${chr}-${P0}-${P1}-plink \
--covar ${INF}/FUSION/INTERVAL.covars \
--tmp ${prot}-${chr}-${P0}-${P1} \
--out ${INF}/FUSION/${prot}-${chr}-${P0}-${P1} \
--hsq_set 0.8 --models top1,blup,bslmm,lasso,enet
done
}
function assoc_test()
{
grep -w ${prot} ${INF}/work/INF1.merge | cut -f8,9 --output-delimiter=' ' | while read chr pos;
do
export P0=$(awk -v pos=${pos} "BEGIN{if(pos<1e6) print 1; else print pos-1e6}")
export P1=$(expr ${pos} + 1000000)
(
echo PANEL WGT ID CHR P0 P1 N
echo ${prot} ${prot}-${chr}-${P0}-${P1}.wgt.RDat ${prot} ${chr} ${P0} ${P1} 34652
) | \
tr ' ' '\t' > ${INF}/FUSION/${prot}/${prot}-${chr}-${P0}-${P1}.pos
Rscript ${FUSION}/FUSION.assoc_test.R \
--sumstats ${INF}/FUSION/${trait}.sumstats \
--weights ${INF}/FUSION/${prot}/${prot}-${chr}-${P0}-${P1}.pos \
--weights_dir ${INF}/FUSION/${prot} \
--ref_ld_chr ${INF}/FUSION/INTERVAL. \
--chr ${chr} \
--out ${INF}/FUSION/${trait}.${chr}-${P0}-${P1}.dat
done
cd ${INF}/FUSION
cut -f8- ${trait}*dat | awk 'NR==1 || !/TWAS/' > ${trait}.tsv
}
compute_weights