1
1
/* vcfcall.c -- SNP/indel variant calling from VCF/BCF.
2
2
3
- Copyright (C) 2013-2024 Genome Research Ltd.
3
+ Copyright (C) 2013-2025 Genome Research Ltd.
4
4
5
5
Author: Petr Danecek <[email protected] >
6
6
@@ -112,18 +112,15 @@ typedef struct
112
112
}
113
113
args_t ;
114
114
115
- static char * * add_sample (void * name2idx , char * * lines , int * nlines , int * mlines , char * name , char sex , int * ith )
115
+ static char * * add_sample (void * name2idx , char * * lines , int * nlines , int * mlines , char * name , char * sex , int * ith )
116
116
{
117
117
int ret = khash_str2int_get (name2idx , name , ith );
118
118
if ( ret == 0 ) return lines ;
119
119
120
120
hts_expand (char * ,(* nlines + 1 ),* mlines ,lines );
121
- int len = strlen (name );
122
- lines [* nlines ] = (char * ) malloc (len + 3 );
123
- memcpy (lines [* nlines ],name ,len );
124
- lines [* nlines ][len ] = ' ' ;
125
- lines [* nlines ][len + 1 ] = sex ;
126
- lines [* nlines ][len + 2 ] = 0 ;
121
+ kstring_t str = {0 ,0 ,0 };
122
+ ksprintf (& str ,"%s %s" ,name ,sex );
123
+ lines [* nlines ] = str .s ;
127
124
* ith = * nlines ;
128
125
(* nlines )++ ;
129
126
khash_str2int_set (name2idx , strdup (name ), * ith );
@@ -205,12 +202,14 @@ static ploidy_predef_t ploidy_predefs[] =
205
202
206
203
// only 5 columns are required and the first is ignored:
207
204
// ignored,sample,father(or 0),mother(or 0),sex(1=M,2=F)
208
- static char * * parse_ped_samples (call_t * call , char * * vals , int nvals , int * nsmpl )
205
+ static char * * parse_ped_samples (args_t * args , call_t * call , char * * vals , int nvals , int * nsmpl )
209
206
{
210
207
int i , j , mlines = 0 , nlines = 0 ;
211
208
kstring_t str = {0 ,0 ,0 }, fam_str = {0 ,0 ,0 };
212
209
void * name2idx = khash_str2int_init ();
213
210
char * * lines = NULL ;
211
+
212
+ char * msex = "M" , * fsex = "F" ;
214
213
for (i = 0 ; i < nvals ; i ++ )
215
214
{
216
215
str .l = 0 ;
@@ -232,10 +231,14 @@ static char **parse_ped_samples(call_t *call, char **vals, int nvals, int *nsmpl
232
231
}
233
232
if ( j < 4 ) break ;
234
233
235
- char sex ;
236
- if ( col_ends [3 ][1 ]== '1' ) sex = 'M' ;
237
- else if ( col_ends [3 ][1 ]== '2' ) sex = 'F' ;
238
- else break ;
234
+ char * sex = & col_ends [3 ][1 ];
235
+ if ( ploidy_sex2id (args -> ploidy ,sex )< 0 )
236
+ {
237
+ // this gender is not defined, if 1/2, test if M/F is
238
+ if ( !strcmp (sex ,"1" ) && ploidy_sex2id (args -> ploidy ,msex )>=0 ) sex = msex ;
239
+ else if ( !strcmp (sex ,"2" ) && ploidy_sex2id (args -> ploidy ,fsex )>=0 ) sex = fsex ;
240
+ else error ("[E::%s] The sex \"%s\" has not been declared in --ploidy/--ploidy-file\n" ,__func__ ,sex );
241
+ }
239
242
240
243
lines = add_sample (name2idx , lines , & nlines , & mlines , col_ends [0 ]+ 1 , sex , & j );
241
244
if ( strcmp (col_ends [1 ]+ 1 ,"0" ) && strcmp (col_ends [2 ]+ 1 ,"0" ) ) // father and mother
@@ -248,9 +251,9 @@ static char **parse_ped_samples(call_t *call, char **vals, int nvals, int *nsmpl
248
251
fam -> name = strdup (fam_str .s );
249
252
250
253
if ( !khash_str2int_has_key (name2idx , col_ends [1 ]+ 1 ) )
251
- lines = add_sample (name2idx , lines , & nlines , & mlines , col_ends [1 ]+ 1 , 'M' , & fam -> sample [FATHER ]);
254
+ lines = add_sample (name2idx , lines , & nlines , & mlines , col_ends [1 ]+ 1 , msex , & fam -> sample [FATHER ]);
252
255
if ( !khash_str2int_has_key (name2idx , col_ends [2 ]+ 1 ) )
253
- lines = add_sample (name2idx , lines , & nlines , & mlines , col_ends [2 ]+ 1 , 'F' , & fam -> sample [MOTHER ]);
256
+ lines = add_sample (name2idx , lines , & nlines , & mlines , col_ends [2 ]+ 1 , fsex , & fam -> sample [MOTHER ]);
254
257
255
258
khash_str2int_get (name2idx , col_ends [0 ]+ 1 , & fam -> sample [CHILD ]);
256
259
khash_str2int_get (name2idx , col_ends [1 ]+ 1 , & fam -> sample [FATHER ]);
@@ -281,7 +284,7 @@ static void set_samples(args_t *args, const char *fn, int is_file)
281
284
if ( !lines ) error ("Could not read the file: %s\n" , fn );
282
285
283
286
int nsmpls ;
284
- char * * smpls = parse_ped_samples (& args -> aux , lines , nlines , & nsmpls );
287
+ char * * smpls = parse_ped_samples (args , & args -> aux , lines , nlines , & nsmpls );
285
288
if ( smpls )
286
289
{
287
290
for (i = 0 ; i < nlines ; i ++ ) free (lines [i ]);
0 commit comments