Skip to content

Commit e412f74

Browse files
committed
Add smpl_count/scount functions, similar to other per-sample functions
1 parent 2563cad commit e412f74

File tree

3 files changed

+38
-3
lines changed

3 files changed

+38
-3
lines changed

convert.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1339,13 +1339,15 @@ static char *set_filter_expr(convert_t *convert, char *key, int is_gtf)
13391339
else if ( !strncasecmp(key,"SMPL_AVG(",9) ) { ptr = function(convert,key,is_gtf); } \
13401340
else if ( !strncasecmp(key,"SMPL_STDEV(",11) ) { ptr = function(convert,key,is_gtf); } \
13411341
else if ( !strncasecmp(key,"SMPL_SUM(",9) ) { ptr = function(convert,key,is_gtf); } \
1342+
else if ( !strncasecmp(key,"SMPL_COUNT(",11) ) { ptr = function(convert,key,is_gtf); } \
13421343
else if ( !strncasecmp(key,"sMAX(",5) ) { ptr = function(convert,key,is_gtf); } \
13431344
else if ( !strncasecmp(key,"sMIN(",5) ) { ptr = function(convert,key,is_gtf); } \
13441345
else if ( !strncasecmp(key,"sMEAN(",6) ) { ptr = function(convert,key,is_gtf); } \
13451346
else if ( !strncasecmp(key,"sMEDIAN(",8) ) { ptr = function(convert,key,is_gtf); } \
13461347
else if ( !strncasecmp(key,"sAVG(",5) ) { ptr = function(convert,key,is_gtf); } \
13471348
else if ( !strncasecmp(key,"sSTDEV(",7) ) { ptr = function(convert,key,is_gtf); } \
1348-
else if ( !strncasecmp(key,"sSUM(",5) ) { ptr = function(convert,key,is_gtf); }
1349+
else if ( !strncasecmp(key,"sSUM(",5) ) { ptr = function(convert,key,is_gtf); } \
1350+
else if ( !strncasecmp(key,"sCOUNT(",7) ) { ptr = function(convert,key,is_gtf); }
13491351

13501352
static void set_type(fmt_t *fmt, int type) { fmt->type = type; }
13511353
static fmt_t *register_tag(convert_t *convert, char *key, int is_gtf, int type)

doc/bcftools.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4092,8 +4092,8 @@ are intended to select sites, not samples, even when applied on FORMAT tags.
40924092
However, when prefixed with SMPL_ (or "s" for brevity, e.g. SMPL_MAX or sMAX),
40934093
they will evaluate to a vector of per-sample values when applied on FORMAT tags:
40944094

4095-
SMPL_MAX, SMPL_MIN, SMPL_AVG, SMPL_MEAN, SMPL_MEDIAN, SMPL_STDEV, SMPL_SUM,
4096-
sMAX, sMIN, sAVG, sMEAN, sMEDIAN, sSTDEV, sSUM
4095+
SMPL_MAX, SMPL_MIN, SMPL_AVG, SMPL_MEAN, SMPL_MEDIAN, SMPL_STDEV, SMPL_SUM, SMPL_COUNT,
4096+
sMAX, sMIN, sAVG, sMEAN, sMEDIAN, sSTDEV, sSUM, sCOUNT
40974097

40984098
* two-tailed binomial and fisher test. Note that for N=0 the test evaluates to a missing
40994099
value and when FORMAT/GT is used to determine the vector indices, it evaluates to 1 for

filter.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ struct _filter_t
168168
#define TOK_MODULO 40 // %
169169
#define TOK_EXT 41 // external values set before each filter_test_ext() call, can be one of {},{str},{int},{float}
170170
#define TOK_FISHER 42
171+
#define TOK_sCOUNT 43
171172

172173
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
173174
// ( ) [ < = > ] ! | & + - * / M m a A O ~ ^ S . l f c p b P i s %
@@ -198,13 +199,15 @@ static int filters_next_token(char **str, int *len)
198199
tmp = *str;
199200
}
200201

202+
if ( !strncasecmp(tmp,"SMPL_COUNT(",11) ) { (*str) += 10; return TOK_sCOUNT; }
201203
if ( !strncasecmp(tmp,"SMPL_MAX(",9) ) { (*str) += 8; return TOK_sMAX; }
202204
if ( !strncasecmp(tmp,"SMPL_MIN(",9) ) { (*str) += 8; return TOK_sMIN; }
203205
if ( !strncasecmp(tmp,"SMPL_MEAN(",10) ) { (*str) += 9; return TOK_sAVG; }
204206
if ( !strncasecmp(tmp,"SMPL_MEDIAN(",12) ) { (*str) += 11; return TOK_sMEDIAN; }
205207
if ( !strncasecmp(tmp,"SMPL_AVG(",9) ) { (*str) += 8; return TOK_sAVG; }
206208
if ( !strncasecmp(tmp,"SMPL_STDEV(",11) ) { (*str) += 10; return TOK_sSTDEV; }
207209
if ( !strncasecmp(tmp,"SMPL_SUM(",9) ) { (*str) += 8; return TOK_sSUM; }
210+
if ( !strncasecmp(tmp,"sCOUNT(",7) ) { (*str) += 6; return TOK_sCOUNT; }
208211
if ( !strncasecmp(tmp,"sMAX(",5) ) { (*str) += 4; return TOK_sMAX; }
209212
if ( !strncasecmp(tmp,"sMIN(",5) ) { (*str) += 4; return TOK_sMIN; }
210213
if ( !strncasecmp(tmp,"sMEAN(",6) ) { (*str) += 5; return TOK_sAVG; }
@@ -1992,6 +1995,35 @@ static int func_count(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stac
19921995
rtok->values[0] = cnt;
19931996
return 1;
19941997
}
1998+
static int func_smpl_count(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
1999+
{
2000+
token_t *tok = stack[nstack - 1];
2001+
if ( !tok->nsamples ) return func_max(flt,line,rtok,stack,nstack);
2002+
rtok->nsamples = tok->nsamples;
2003+
rtok->nvalues = tok->nsamples;
2004+
rtok->nval1 = 1;
2005+
hts_expand(double,rtok->nvalues,rtok->mvalues,rtok->values);
2006+
assert(tok->usmpl);
2007+
if ( !rtok->usmpl ) rtok->usmpl = (uint8_t*) malloc(tok->nsamples);
2008+
memcpy(rtok->usmpl, tok->usmpl, tok->nsamples);
2009+
int i,j;
2010+
assert( tok->tag && tok->nsamples );
2011+
if ( tok->tag && tok->nsamples )
2012+
{
2013+
// raw number of values in a FMT tag, e.g. COUNT(FMT/TAG)
2014+
if ( tok->is_str ) error("todo: Type=String for COUNT on FORMAT fields?\n");
2015+
for (i=0; i<tok->nsamples; i++)
2016+
{
2017+
if ( !tok->usmpl[i] ) continue;
2018+
int cnt = 0;
2019+
double *ptr = tok->values + i*tok->nval1;
2020+
for (j=0; j<tok->nval1; j++)
2021+
if ( !bcf_double_is_missing_or_vector_end(ptr[j]) ) cnt++;
2022+
rtok->values[i] = cnt;
2023+
}
2024+
}
2025+
return 1;
2026+
}
19952027
static int func_strlen(filter_t *flt, bcf1_t *line, token_t *rtok, token_t **stack, int nstack)
19962028
{
19972029
token_t *tok = stack[nstack - 1];
@@ -4088,6 +4120,7 @@ static filter_t *filter_init_(bcf_hdr_t *hdr, const char *str, int exit_on_error
40884120
else if ( out[i].tok_type==TOK_BINOM ) { out[i].func = func_binom; out[i].tok_type = TOK_FUNC; }
40894121
else if ( out[i].tok_type==TOK_FISHER ) { out[i].func = func_fisher; out[i].tok_type = TOK_FUNC; }
40904122
else if ( out[i].tok_type==TOK_PERLSUB ) { out[i].func = perl_exec; out[i].tok_type = TOK_FUNC; }
4123+
else if ( out[i].tok_type==TOK_sCOUNT ) { out[i].func = func_smpl_count; out[i].tok_type = TOK_FUNC; }
40914124
else if ( out[i].tok_type==TOK_sMAX ) { out[i].func = func_smpl_max; out[i].tok_type = TOK_FUNC; }
40924125
else if ( out[i].tok_type==TOK_sMIN ) { out[i].func = func_smpl_min; out[i].tok_type = TOK_FUNC; }
40934126
else if ( out[i].tok_type==TOK_sAVG ) { out[i].func = func_smpl_avg; out[i].tok_type = TOK_FUNC; }

0 commit comments

Comments
 (0)