Skip to content

Commit 2ebe686

Browse files
daviesrobpd3
authored andcommitted
Fix handling of skipped lines in bcftools norm -c x
Make normalize_line() report when it skips a line due to a ref mismatch so that normalize_vcf() knows that it has to go on to the next one. Failing to do this caused a crash when the first input line was skipped because normalize_vcf() attempted to access data via args->rbuf when nothing had been added. The input file for the `bcftools norm -c x` did not catch this because the line it skipped was towards the end of the file. An extra record has been added at the front, which does trigger the issue on the original code.
1 parent 57bb27b commit 2ebe686

File tree

2 files changed

+17
-10
lines changed

2 files changed

+17
-10
lines changed

test/norm.vcf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
##INFO=<ID=ISTR,Number=1,Type=String,Description="Test String in INFO">
3636
##INFO=<ID=END,Number=1,Type=Integer,Description="End position">
3737
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT XY00001 XY00002
38+
1 60 . A T 999 PASS AN=4;AC=4 GT 0/0 0/1
3839
1 105 . TAAACCCTAAA TAA,TAACCCTAAA 999 PASS INDEL;AN=4;AC=2,2;DP=19;ISTR=SomeString;XRF=1e+06,2e+06,500000;XRI=1111,2222,5555;XRS=AAA,BBB,DDD;XAF=1e+06,500000;XAI=1111,5555;XAS=AAA,DDD;XGF=1e+06,2e+06,3e+06,500000,.,9e+09;XGI=1111,2222,3333,5555,.,9999;XGS=A,B,C,E,.,F GT:PL:DP:FRF:FRI:FRS:FAF:FAI:FAS:FGF:FGI:FGS 1/2:1,2,3,4,5,6:1:1e+06,2e+06,500000:1111,2222,5555:AAAA,BBB,CC:1e+06,500000:1111,5555:A,BB:1e+06,2e+06,3e+06,500000,.,9e+09:1111,2222,3333,5555,.,9999:A,BB,CCC,EEEE,.,FFFFF 1/2:1,2,3,4,5,6:1:1e+06,2e+06,500000:1111,2222,5555:AAAA,BBB,CC:1e+06,500000:1111,5555:A,BB:1e+06,2e+06,3e+06,500000,.,9e+09:1111,2222,3333,5555,.,9999:A,BB,CCC,EEEE,.,FFFFF
3940
2 1 . GGGCGTCTCATAGCTGGAGCAATGGCGAGCGCCTGGACAAGGGAGGGGAAGGGGTTCTTATTACTGACGCGGGTAGCCCCTACTGCTGTGTGGTTCCCCTATTTTTTTTTTTTTCTTTTTGAGACGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTCGGCTCACTGCAAGCTCCACCT ACGT 999 PASS INDEL;AN=4;AC=2;END=192 GT:DP 1/0:1 1/0:1
4041
2 101 . ATTTTTTTTTTTTT ATTTTTTTTTTTTTTT 999 PASS INDEL;AN=4;AC=4;END=114 GT:DP 1/1:1 1/1:1

vcfnorm.c

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2328,8 +2328,8 @@ static void destroy_data(args_t *args)
23282328
if ( args->mseq ) free(args->seq);
23292329
}
23302330

2331-
2332-
static void normalize_line(args_t *args, bcf1_t *line)
2331+
// return 0 on success, -1 if line was skipped (due to ref mismatch)
2332+
static int normalize_line(args_t *args, bcf1_t *line)
23332333
{
23342334
if ( args->fai )
23352335
{
@@ -2346,7 +2346,7 @@ static void normalize_line(args_t *args, bcf1_t *line)
23462346
if ( ret==ERR_REF_MISMATCH && args->check_ref & CHECK_REF_SKIP )
23472347
{
23482348
args->nskipped++;
2349-
return;
2349+
return -1;
23502350
}
23512351
if ( ret==ERR_DUP_ALLELE )
23522352
{
@@ -2388,9 +2388,10 @@ static void normalize_line(args_t *args, bcf1_t *line)
23882388
}
23892389
if ( !args->filter_pass || args->atomize!=SPLIT ) break;
23902390
}
2391+
return 0;
23912392
}
23922393

2393-
// return 0 on success, 1 when done
2394+
// return 0 on success, 1 when done, -1 if line skipped
23942395
static int split_and_normalize(args_t *args)
23952396
{
23962397
if ( !bcf_sr_next_line(args->files) ) return 1;
@@ -2408,8 +2409,7 @@ static int split_and_normalize(args_t *args)
24082409
if ( args->mrows_op!=MROWS_SPLIT || line->n_allele<=2 || !args->filter_pass )
24092410
{
24102411
// normal operation, no splitting
2411-
normalize_line(args, line);
2412-
return 0;
2412+
return normalize_line(args, line);
24132413
}
24142414

24152415
// any restrictions on variant types to split?
@@ -2418,8 +2418,7 @@ static int split_and_normalize(args_t *args)
24182418
int type = args->mrows_collapse==COLLAPSE_SNPS ? VCF_SNP : VCF_INDEL;
24192419
if ( !(bcf_get_variant_types(line) & type) )
24202420
{
2421-
normalize_line(args, line);
2422-
return 0;
2421+
return normalize_line(args, line);
24232422
}
24242423
}
24252424

@@ -2428,7 +2427,11 @@ static int split_and_normalize(args_t *args)
24282427

24292428
int j;
24302429
for (j=0; j<args->ntmp_lines; j++)
2431-
normalize_line(args, args->tmp_lines[j]);
2430+
{
2431+
int ret = normalize_line(args, args->tmp_lines[j]);
2432+
if (ret)
2433+
return ret;
2434+
}
24322435

24332436
return 0;
24342437
}
@@ -2453,7 +2456,10 @@ static void normalize_vcf(args_t *args)
24532456
int done = 0;
24542457
while (1)
24552458
{
2456-
done = split_and_normalize(args);
2459+
do
2460+
{
2461+
done = split_and_normalize(args);
2462+
} while (done < 0); // Skipped line
24572463
if ( done ) break; // no more lines available
24582464
int i = args->rbuf.f;
24592465
int j = rbuf_last(&args->rbuf);

0 commit comments

Comments
 (0)