-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathINTERVAL-lz.awk
60 lines (57 loc) · 1.35 KB
/
INTERVAL-lz.awk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# 30-10-2018 JHZ
{
OFS="\t"
if (NR==1) print "SNPID", "CHR", "POS", "STRAND", "N", "EFFECT_ALLELE", "REFERENCE_ALLELE", "CODE_ALL_FQ", "BETA", "SE", "PVAL", "RSQ", "RSQ_IMP", "IMP", "MarkerName"
else {
CHR=$3
sub(/^0/,"",CHR)
POS=$4
if (substr($2,1,2)=="rs") SNPID=$2; else SNPID="chr" CHR ":" POS
STRAND="NA"
N_AA=$14
N_AB=$15
N_BB=$16
N=N_AA+N_AB+N_BB
EFFECT_ALLELE=$6
REFERENCE_ALLELE=$5
CODE_ALL_FQ=(N_BB+N_AB*0.5)/N
BETA=$24
SE=$25
PVAL=$22
RSQ="NA"
RSQ_IMP=$23
IMP="NA"
print SNPID, CHR, POS, STRAND, N, EFFECT_ALLELE, REFERENCE_ALLELE, CODE_ALL_FQ, BETA, SE, PVAL, RSQ, RSQ_IMP, IMP, $2
}
}
#1 alternate_ids
#2 rsid, which treats as missing (.) for non-rsid
#3 chromosome
#4 position
#5 alleleA
#6 alleleB
#7 index
#8 average_maximum_posterior_call
#9 info
#10 cohort_1_AA
#11 cohort_1_AB
#12 cohort_1_BB
#13 cohort_1_NULL
#14 all_AA
#15 all_AB
#16 all_BB
#17 all_NULL
#18 all_total
#19 all_maf
#20 missing_data_proportion
#21 cohort_1_hwe
#22 frequentist_add_pvalue
#23 frequentist_add_info
#24 frequentist_add_beta_1
#25 frequentist_add_se_1
#26 comment
# To add STRAND and IMP
# header list obtained from the following code,
# gunzip -c /data/jampet/upload-20170920/INTERVAL_inf1_CXCL1___P09341_chr_merged.gz | \
# head -1 | \
# awk '{gsub(/ /, "\n",$0)};1'| awk '{print "#" NR, $1}'