-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGenerate_GRS.sh
61 lines (45 loc) · 2.07 KB
/
Generate_GRS.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/bin/bash
curl https://www.chg.ox.ac.uk/~gav/resources/bgen_v1.1.4-Ubuntu16.04-x86_64.tgz > bgen.tgz
curl https://www.chg.ox.ac.uk/~gav/resources/qctool_v2.0.8-CentOS_Linux7.6.1810-x86_64.tgz > qctool.tgz
curl https://s3.amazonaws.com/plink1-assets/plink_linux_x86_64_20231211.zip > plink.zip
curl https://s3.amazonaws.com/plink2-assets/plink2_linux_x86_64_20240704.zip > plink2.zip
tar -xvzf bgen.tgz
tar -xvzf qctool.tgz
unzip -o plink.zip
unzip -o plink2.zip
curl https://ftp.gnu.org/gnu/parallel/parallel-latest.tar.bz2 > parallel-latest.tar.bz2
tar -xjf parallel-latest.tar.bz2
target_directory=$(find . -type d -name 'parallel-202*' -print -quit)
cd "$target_directory"
pwd
./configure --prefix=$HOME/.local
make
make install
export PATH=$HOME/.local/bin:$PATH
cd $home
dx download file-Gb53yZjJj59bVvzJZy28zK7V
alias bgenix='bgen_v1.1.4-Ubuntu16.04-x86_64/bgenix'
alias qctool='qctool_v2.0.8-CentOS\ Linux7.6.1810-x86_64/qctool'
alias plink='./plink'
cp bgen_v1.1.4-Ubuntu16.04-x86_64/bgenix bgenix
cp qctool_v2.0.8-CentOS\ Linux7.6.1810-x86_64/qctool qctool
process_chromosome() {
i=$1
input_file=$2
grep -w "^$i" $input_file | awk '{print $2}' > bp${i}
grep -wFf bp${i} "/mnt/project/Bulk/Imputation/UKB imputation from genotype/ukb22828_c${i}_b0_v3.mfi.txt" | awk '{print $2}' > ids${i}
./bgenix -g "/mnt/project/Bulk/Imputation/UKB imputation from genotype/ukb22828_c${i}_b0_v3.bgen" -incl-rsids ids${i} | ./qctool -g - -filetype bgen -og chr${i}.bed
}
export -f process_chromosome
input_file="Prostate_Cancer_Conti_Score"
seq 1 22 | parallel -j 4 process_chromosome {} $1
rm mergelist.txt
for i in `seq 1 22`
do
echo chr${i} >> mergelist.txt
done
./plink --merge-list mergelist.txt --make-bed --out merged_dataset
awk 'NR>2 {print $1, $1, 0, 0, 0, 0}' "/mnt/project/Bulk/Imputation/UKB imputation from genotype/ukb22828_c1_b0_v3.sample" > merged_dataset.fam
./plink2 --bfile merged_dataset --set-all-var-ids @_# --make-bed --out merged_dataset_2
awk 'NR>1 {print $1"_"$2,$4,$5}' $1 > plink_score
./plink --bfile merged_dataset_2 --score plink_score 1 2 3 --out score