Skip to content

Commit 8682b1b

Browse files
rwk-unilpd3
authored andcommitted
Invert the phase of diploid genotypes with +setGT -t a -n i
1 parent 0ce7689 commit 8682b1b

File tree

3 files changed

+42
-0
lines changed

3 files changed

+42
-0
lines changed

plugins/setGT.c

+24
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ args_t *args = NULL;
8282
#define GT_CUSTOM (1<<10)
8383
#define GT_X_VAF (1<<11)
8484
#define GT_RAND (1<<12)
85+
#define GT_INVPHASE (1<<13)
8586

8687
#define MINOR_ALLELE -1
8788
#define MAJOR_ALLELE -2
@@ -113,6 +114,7 @@ const char *usage(void)
113114
" X .. allele with bigger read depth as determined from FMT/AD\n"
114115
" p .. phase genotype (0/1 becomes 0|1)\n"
115116
" u .. unphase genotype and sort by allele (1|0 becomes 0/1)\n"
117+
" i .. invert the genotype phase (0|1 becomes 1|0)\n"
116118
"Usage: bcftools +setGT [General Options] -- [Plugin Options]\n"
117119
"Options:\n"
118120
" run \"bcftools plugin\" for a list of common options\n"
@@ -235,6 +237,7 @@ int init(int argc, char **argv, bcf_hdr_t *in, bcf_hdr_t *out)
235237
if ( strchr(optarg,'X') ) args->new_mask |= GT_X_VAF;
236238
if ( strchr(optarg,'p') ) args->new_mask |= GT_PHASED;
237239
if ( strchr(optarg,'u') ) args->new_mask |= GT_UNPHASED;
240+
if ( strchr(optarg,'i') ) args->new_mask |= GT_INVPHASE;
238241
if ( !strncmp(optarg,"c:",2) ) { args->new_mask |= GT_CUSTOM; args->custom.gt_str = optarg; }
239242
if ( args->new_mask==0 ) error("Unknown parameter to --new-gt: %s\n", optarg);
240243
break;
@@ -364,6 +367,21 @@ static inline int unphase_gt(int32_t *ptr, int ngts)
364367
return changed;
365368
}
366369

370+
// invert the phase for a single sample, ngts is the ploidy
371+
static inline int invert_phase_gt(int32_t *ptr, int ngts)
372+
{
373+
int gt;
374+
if ( ngts!=2 ) return 0; // don't invert the phase for sample ploidy != 2
375+
if ( ptr[0]==bcf_int32_vector_end || ptr[1]==bcf_int32_vector_end ) return 0;
376+
gt = bcf_gt_allele(ptr[1]);
377+
if (bcf_gt_is_phased(ptr[1]))
378+
ptr[1] = bcf_gt_phased(bcf_gt_allele(ptr[0]));
379+
else
380+
ptr[1] = bcf_gt_unphased(bcf_gt_allele(ptr[0]));
381+
ptr[0] = bcf_gt_unphased(gt); // first gt doesn't carry the phased flag per BCF specifications
382+
return 2;
383+
}
384+
367385
// sets GT for a single sample, ngts is the ploidy, allele
368386
static inline int set_gt(int32_t *ptr, int ngts, int allele)
369387
{
@@ -543,6 +561,8 @@ bcf1_t *process(bcf1_t *rec)
543561
}
544562
else if ( args->new_mask & GT_X_VAF )
545563
changed += set_gt(ptr, ngts, args->xarr[i]);
564+
else if ( args->new_mask & GT_INVPHASE )
565+
changed += invert_phase_gt(ptr, ngts);
546566
else
547567
changed += set_gt(ptr, ngts, args->new_gt);
548568
}
@@ -584,6 +604,8 @@ bcf1_t *process(bcf1_t *rec)
584604
}
585605
else if ( args->new_mask & GT_X_VAF )
586606
changed += set_gt(args->gts + i*ngts, ngts, args->xarr[i]);
607+
else if ( args->new_mask & GT_INVPHASE )
608+
changed += invert_phase_gt(args->gts, ngts);
587609
else
588610
changed += set_gt(args->gts + i*ngts, ngts, args->new_gt);
589611
}
@@ -620,6 +642,8 @@ bcf1_t *process(bcf1_t *rec)
620642
}
621643
else if ( args->new_mask & GT_X_VAF )
622644
changed += set_gt(ptr, ngts, args->xarr[i]);
645+
else if ( args->new_mask & GT_INVPHASE )
646+
changed += invert_phase_gt(ptr, ngts);
623647
else
624648
changed += set_gt(ptr, ngts, args->new_gt);
625649
}

test/setGT.2.1.out

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
##fileformat=VCFv4.2
2+
##FILTER=<ID=PASS,Description="All filters passed">
3+
##contig=<ID=20,length=81195210>
4+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
5+
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depth">
6+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00100 HG00101 HG00102 HG00200 HG00201 HG00202
7+
20 302 . T TA 999 . . GT:AD 1|0:55,5 0|1:55,5 1|1:5,55 1|0:55,5 0|1:55,5 1|1:5,55
8+
20 828 . T C 999 . . GT:AD 1|0:55,5 1|1:55,5 1|1:5,55 1|0:55,5 0|1:55,5 1|1:5,55
9+
20 834 . G A 999 . . GT:AD 1|0:55,5 1|0:55,5 1|1:5,55 0|1:55,5 0|1:55,5 1|1:5,55
10+
20 1665 . T C 999 . . GT:AD 0|1:55,5 1|0:55,5 1|0:5,55 1|0:55,5 0|1:55,5 1|1:5,55
11+
20 1869 . A T 999 . . GT:AD 1|0:55,5 0|0:55,5 1|1:5,55 1|0:55,5 0|1:55,5 1|1:5,55
12+
20 2041 . G A 999 . . GT:AD 1|0:55,5 1|1:55,5 1|1:5,55 1|0:55,5 0|1:55,5 1|1:5,55
13+
20 2220 . G A 999 . . GT:AD 1|0:55,5 0|1:55,5 1|1:5,55 1|0:55,5 0|1:55,5 1|1:5,55
14+
20 2564 . A G 999 . . GT:AD 1|0:55,5 1|1:55,5 1|1:5,55 1|0:55,5 0|1:55,5 1|1:5,55
15+
20 3104 . C T 999 . . GT:AD 0|0:5,5 0|0:5,5 1|0:5,5 1|0:5,5 0|1:5,5 1|1:5,5
16+
20 3587 . G A 999 . . GT:AD 1|0:5,5 1|0:5,5 1|1:5,5 1|0:5,5 0|1:5,5 1|1:5,5
17+
20 3936 . A G 999 . . GT:AD 1|0:1,2 1|1:3,4 1|1:5,6 1|0:7,8 0|1:9,1 1|1:2,3

test/test.pl

+1
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,7 @@
633633
run_test(\&test_vcf_plugin,$opts,in=>'setGT',out=>'setGT.1.out',cmd=>'+setGT --no-version',args=>'-- -t q -n 0 -i \'GT~"." && FMT/DP=30 && GQ=150\'');
634634
run_test(\&test_vcf_plugin,$opts,in=>'setGT.2',out=>'setGT.2.out',cmd=>'+setGT --no-version',args=>'-- -t q -n . -i \'GT[@{QPATH}/setGT.samples.txt]="het"\'');
635635
run_test(\&test_vcf_plugin,$opts,in=>'setGT.2',out=>'setGT.3.out',cmd=>'+setGT --no-version',args=>'-- -t q -n . -i \'GT[@{QPATH}/setGT.samples.txt]="het" & binom(AD[@{QPATH}/setGT.samples.txt])<0.1\'');
636+
run_test(\&test_vcf_plugin,$opts,in=>'setGT.2',out=>'setGT.2.1.out',cmd=>'+setGT --no-version',args=>'-- -t a -n i');
636637
run_test(\&test_vcf_plugin,$opts,in=>'setGT.3',out=>'setGT.3.1.out',cmd=>'+setGT --no-version',args=>'-- -t a -n pM');
637638
run_test(\&test_vcf_plugin,$opts,in=>'setGT.3',out=>'setGT.3.2.out',cmd=>'+setGT --no-version',args=>'-- -t a -n pm');
638639
run_test(\&test_vcf_plugin,$opts,in=>'setGT.3',out=>'setGT.3.3.out',cmd=>'+setGT --no-version',args=>'-- -t a -n c:1');

0 commit comments

Comments
 (0)