Skip to content

Commit 2191405

Browse files
committed
The -c option can be omitted when a VEP subfield is used in filtering expressions
Note that this is an experimental feature.
1 parent f0ad6aa commit 2191405

File tree

6 files changed

+328
-189
lines changed

6 files changed

+328
-189
lines changed

Diff for: NEWS

+4
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,10 @@ Changes affecting specific commands:
133133
VEP subfields. The +split-vep plugin can now work with such files, replacing the offending
134134
commas with slash (/) characters. See also https://github.com/Ensembl/ensembl-vep/issues/1351
135135

136+
- Newly the `-c, --columns` option can be omitted when a subfield is used in `-i/-e` filtering
137+
expression. Note that `-c` may still have to be given when it is not possible to infer the
138+
type of the subfield. Note that this is an experimental feature.
139+
136140
* bcftools stats
137141

138142
- The per-sample stats (PSC) would not be computed when `-i/-e` filtering options and

Diff for: filter.c

+50-3
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ struct _filter_t
109109
#if ENABLE_PERL_FILTERS
110110
PerlInterpreter *perl;
111111
#endif
112+
char **undef_tag;
113+
int nundef_tag;
114+
int status, exit_on_error;
112115
};
113116

114117

@@ -304,6 +307,28 @@ static int filters_next_token(char **str, int *len)
304307
return TOK_VAL;
305308
}
306309

310+
#define FILTER_OK 0
311+
#define FILTER_ERR_UNKN_TAGS 1
312+
#define FILTER_ERR_OTHER 2
313+
314+
static void filter_add_undef_tag(filter_t *filter, char *str)
315+
{
316+
int i;
317+
for (i=0; i<filter->nundef_tag; i++)
318+
if ( !strcmp(str,filter->undef_tag[i]) ) break;
319+
if ( i<filter->nundef_tag ) return;
320+
filter->nundef_tag++;
321+
filter->undef_tag = (char**)realloc(filter->undef_tag,sizeof(*filter->undef_tag)*filter->nundef_tag);
322+
if ( !filter->undef_tag ) error("Could not allocate memory\n");
323+
filter->undef_tag[filter->nundef_tag-1] = strdup(str);
324+
if ( !filter->undef_tag[filter->nundef_tag-1] ) error("Could not allocate memory\n");
325+
}
326+
const char **filter_list_undef_tags(filter_t *filter, int *ntags)
327+
{
328+
*ntags = filter->nundef_tag;
329+
return (const char**)filter->undef_tag;
330+
}
331+
307332

308333
/*
309334
Simple path expansion, expands ~/, ~user, $var. The result must be freed by the caller.
@@ -3063,15 +3088,20 @@ static int filters_init1(filter_t *filter, char *str, int len, token_t *tok)
30633088
{
30643089
errno = 0;
30653090
tok->threshold = strtod(tmp.s, &end); // float?
3066-
if ( errno!=0 || end!=tmp.s+len ) error("[%s:%d %s] Error: the tag \"%s\" is not defined in the VCF header\n", __FILE__,__LINE__,__FUNCTION__,tmp.s);
3091+
if ( errno!=0 || end!=tmp.s+len )
3092+
{
3093+
if ( filter->exit_on_error )
3094+
error("[%s:%d %s] Error: the tag \"%s\" is not defined in the VCF header\n", __FILE__,__LINE__,__FUNCTION__,tmp.s);
3095+
filter->status |= FILTER_ERR_UNKN_TAGS;
3096+
filter_add_undef_tag(filter,tmp.s);
3097+
}
30673098
}
30683099
tok->is_constant = 1;
30693100

30703101
if ( tmp.s ) free(tmp.s);
30713102
return 0;
30723103
}
30733104

3074-
30753105
static void filter_debug_print(token_t *toks, token_t **tok_ptrs, int ntoks)
30763106
{
30773107
int i;
@@ -3221,12 +3251,13 @@ static void perl_destroy(filter_t *filter)
32213251

32223252

32233253
// Parse filter expression and convert to reverse polish notation. Dijkstra's shunting-yard algorithm
3224-
filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
3254+
static filter_t *filter_init_(bcf_hdr_t *hdr, const char *str, int exit_on_error)
32253255
{
32263256
filter_t *filter = (filter_t *) calloc(1,sizeof(filter_t));
32273257
filter->str = strdup(str);
32283258
filter->hdr = hdr;
32293259
filter->max_unpack |= BCF_UN_STR;
3260+
filter->exit_on_error = exit_on_error;
32303261

32313262
int nops = 0, mops = 0; // operators stack
32323263
int nout = 0, mout = 0; // filter tokens, RPN
@@ -3608,6 +3639,14 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
36083639
filter->flt_stack = (token_t **)malloc(sizeof(token_t*)*nout);
36093640
return filter;
36103641
}
3642+
filter_t *filter_parse(bcf_hdr_t *hdr, const char *str)
3643+
{
3644+
return filter_init_(hdr, str, 0);
3645+
}
3646+
filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
3647+
{
3648+
return filter_init_(hdr, str, 1);
3649+
}
36113650

36123651
void filter_destroy(filter_t *filter)
36133652
{
@@ -3629,6 +3668,8 @@ void filter_destroy(filter_t *filter)
36293668
free(filter->filters[i].regex);
36303669
}
36313670
}
3671+
for (i=0; i<filter->nundef_tag; i++) free(filter->undef_tag[i]);
3672+
free(filter->undef_tag);
36323673
free(filter->cached_GT.buf);
36333674
free(filter->cached_GT.mask);
36343675
free(filter->filters);
@@ -3642,6 +3683,7 @@ void filter_destroy(filter_t *filter)
36423683

36433684
int filter_test(filter_t *filter, bcf1_t *line, const uint8_t **samples)
36443685
{
3686+
if ( filter->status != FILTER_OK ) error("Error: the caller did not check the filter status\n");
36453687
bcf_unpack(line, filter->max_unpack);
36463688

36473689
int i, nstack = 0;
@@ -3804,3 +3846,8 @@ void filter_set_samples(filter_t *filter, const uint8_t *samples)
38043846
}
38053847
}
38063848

3849+
int filter_status(filter_t *filter)
3850+
{
3851+
return filter->status;
3852+
}
3853+

Diff for: filter.h

+20-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* filter.h -- filter expressions.
22
3-
Copyright (C) 2013-2021 Genome Research Ltd.
3+
Copyright (C) 2013-2023 Genome Research Ltd.
44
55
Author: Petr Danecek <[email protected]>
66
@@ -32,6 +32,8 @@ typedef struct _filter_t filter_t;
3232
/**
3333
* @hdr: BCF header file
3434
* @str: see the bcftools filter command help for description
35+
*
36+
* Same as filter_parse() but exits on errors
3537
*/
3638
filter_t *filter_init(bcf_hdr_t *hdr, const char *str);
3739

@@ -61,4 +63,21 @@ const double *filter_get_doubles(filter_t *filter, int *nval, int *nval1);
6163
void filter_expression_info(FILE *fp);
6264
int filter_max_unpack(filter_t *filter);
6365

66+
/**
67+
* Same as filter_init() but may not exit on some type of errors. The caller
68+
* must check if the returned value is not NULL and if the consequent call
69+
* of filter_status() returns FILTER_OK before the filter_pass() can be called.
70+
*/
71+
filter_t *filter_parse(bcf_hdr_t *hdr, const char *str);
72+
73+
#define FILTER_OK 0
74+
#define FILTER_ERR_UNKN_TAGS 1
75+
#define FILTER_ERR_OTHER 2
76+
77+
/**
78+
* Check if filter_parse() was successful
79+
*/
80+
int filter_status(filter_t *filter);
81+
const char **filter_list_undef_tags(filter_t *filter, int *nundef);
82+
6483
#endif

0 commit comments

Comments
 (0)