Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions test/norm.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
##INFO=<ID=ISTR,Number=1,Type=String,Description="Test String in INFO">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT XY00001 XY00002
1 60 . A T 999 PASS AN=4;AC=4 GT 0/0 0/1
1 105 . TAAACCCTAAA TAA,TAACCCTAAA 999 PASS INDEL;AN=4;AC=2,2;DP=19;ISTR=SomeString;XRF=1e+06,2e+06,500000;XRI=1111,2222,5555;XRS=AAA,BBB,DDD;XAF=1e+06,500000;XAI=1111,5555;XAS=AAA,DDD;XGF=1e+06,2e+06,3e+06,500000,.,9e+09;XGI=1111,2222,3333,5555,.,9999;XGS=A,B,C,E,.,F GT:PL:DP:FRF:FRI:FRS:FAF:FAI:FAS:FGF:FGI:FGS 1/2:1,2,3,4,5,6:1:1e+06,2e+06,500000:1111,2222,5555:AAAA,BBB,CC:1e+06,500000:1111,5555:A,BB:1e+06,2e+06,3e+06,500000,.,9e+09:1111,2222,3333,5555,.,9999:A,BB,CCC,EEEE,.,FFFFF 1/2:1,2,3,4,5,6:1:1e+06,2e+06,500000:1111,2222,5555:AAAA,BBB,CC:1e+06,500000:1111,5555:A,BB:1e+06,2e+06,3e+06,500000,.,9e+09:1111,2222,3333,5555,.,9999:A,BB,CCC,EEEE,.,FFFFF
2 1 . GGGCGTCTCATAGCTGGAGCAATGGCGAGCGCCTGGACAAGGGAGGGGAAGGGGTTCTTATTACTGACGCGGGTAGCCCCTACTGCTGTGTGGTTCCCCTATTTTTTTTTTTTTCTTTTTGAGACGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTCGGCTCACTGCAAGCTCCACCT ACGT 999 PASS INDEL;AN=4;AC=2;END=192 GT:DP 1/0:1 1/0:1
2 101 . ATTTTTTTTTTTTT ATTTTTTTTTTTTTTT 999 PASS INDEL;AN=4;AC=4;END=114 GT:DP 1/1:1 1/1:1
Expand Down
26 changes: 16 additions & 10 deletions vcfnorm.c
Original file line number Diff line number Diff line change
Expand Up @@ -2328,8 +2328,8 @@ static void destroy_data(args_t *args)
if ( args->mseq ) free(args->seq);
}


static void normalize_line(args_t *args, bcf1_t *line)
// return 0 on success, -1 if line was skipped (due to ref mismatch)
static int normalize_line(args_t *args, bcf1_t *line)
{
if ( args->fai )
{
Expand All @@ -2346,7 +2346,7 @@ static void normalize_line(args_t *args, bcf1_t *line)
if ( ret==ERR_REF_MISMATCH && args->check_ref & CHECK_REF_SKIP )
{
args->nskipped++;
return;
return -1;
}
if ( ret==ERR_DUP_ALLELE )
{
Expand Down Expand Up @@ -2388,9 +2388,10 @@ static void normalize_line(args_t *args, bcf1_t *line)
}
if ( !args->filter_pass || args->atomize!=SPLIT ) break;
}
return 0;
}

// return 0 on success, 1 when done
// return 0 on success, 1 when done, -1 if line skipped
static int split_and_normalize(args_t *args)
{
if ( !bcf_sr_next_line(args->files) ) return 1;
Expand All @@ -2408,8 +2409,7 @@ static int split_and_normalize(args_t *args)
if ( args->mrows_op!=MROWS_SPLIT || line->n_allele<=2 || !args->filter_pass )
{
// normal operation, no splitting
normalize_line(args, line);
return 0;
return normalize_line(args, line);
}

// any restrictions on variant types to split?
Expand All @@ -2418,8 +2418,7 @@ static int split_and_normalize(args_t *args)
int type = args->mrows_collapse==COLLAPSE_SNPS ? VCF_SNP : VCF_INDEL;
if ( !(bcf_get_variant_types(line) & type) )
{
normalize_line(args, line);
return 0;
return normalize_line(args, line);
}
}

Expand All @@ -2428,7 +2427,11 @@ static int split_and_normalize(args_t *args)

int j;
for (j=0; j<args->ntmp_lines; j++)
normalize_line(args, args->tmp_lines[j]);
{
int ret = normalize_line(args, args->tmp_lines[j]);
if (ret)
return ret;
}

return 0;
}
Expand All @@ -2453,7 +2456,10 @@ static void normalize_vcf(args_t *args)
int done = 0;
while (1)
{
done = split_and_normalize(args);
do
{
done = split_and_normalize(args);
} while (done < 0); // Skipped line
if ( done ) break; // no more lines available
int i = args->rbuf.f;
int j = rbuf_last(&args->rbuf);
Expand Down