11/* vcfcall.c -- SNP/indel variant calling from VCF/BCF.
22
3- Copyright (C) 2013-2024 Genome Research Ltd.
3+ Copyright (C) 2013-2025 Genome Research Ltd.
44
55 Author: Petr Danecek <[email protected] > 66
@@ -112,18 +112,15 @@ typedef struct
112112}
113113args_t ;
114114
115- static char * * add_sample (void * name2idx , char * * lines , int * nlines , int * mlines , char * name , char sex , int * ith )
115+ static char * * add_sample (void * name2idx , char * * lines , int * nlines , int * mlines , char * name , char * sex , int * ith )
116116{
117117 int ret = khash_str2int_get (name2idx , name , ith );
118118 if ( ret == 0 ) return lines ;
119119
120120 hts_expand (char * ,(* nlines + 1 ),* mlines ,lines );
121- int len = strlen (name );
122- lines [* nlines ] = (char * ) malloc (len + 3 );
123- memcpy (lines [* nlines ],name ,len );
124- lines [* nlines ][len ] = ' ' ;
125- lines [* nlines ][len + 1 ] = sex ;
126- lines [* nlines ][len + 2 ] = 0 ;
121+ kstring_t str = {0 ,0 ,0 };
122+ ksprintf (& str ,"%s %s" ,name ,sex );
123+ lines [* nlines ] = str .s ;
127124 * ith = * nlines ;
128125 (* nlines )++ ;
129126 khash_str2int_set (name2idx , strdup (name ), * ith );
@@ -205,12 +202,14 @@ static ploidy_predef_t ploidy_predefs[] =
205202
206203// only 5 columns are required and the first is ignored:
207204// ignored,sample,father(or 0),mother(or 0),sex(1=M,2=F)
208- static char * * parse_ped_samples (call_t * call , char * * vals , int nvals , int * nsmpl )
205+ static char * * parse_ped_samples (args_t * args , call_t * call , char * * vals , int nvals , int * nsmpl )
209206{
210207 int i , j , mlines = 0 , nlines = 0 ;
211208 kstring_t str = {0 ,0 ,0 }, fam_str = {0 ,0 ,0 };
212209 void * name2idx = khash_str2int_init ();
213210 char * * lines = NULL ;
211+
212+ char * msex = "M" , * fsex = "F" ;
214213 for (i = 0 ; i < nvals ; i ++ )
215214 {
216215 str .l = 0 ;
@@ -232,10 +231,14 @@ static char **parse_ped_samples(call_t *call, char **vals, int nvals, int *nsmpl
232231 }
233232 if ( j < 4 ) break ;
234233
235- char sex ;
236- if ( col_ends [3 ][1 ]== '1' ) sex = 'M' ;
237- else if ( col_ends [3 ][1 ]== '2' ) sex = 'F' ;
238- else break ;
234+ char * sex = & col_ends [3 ][1 ];
235+ if ( ploidy_sex2id (args -> ploidy ,sex )< 0 )
236+ {
237+ // this gender is not defined, if 1/2, test if M/F is
238+ if ( !strcmp (sex ,"1" ) && ploidy_sex2id (args -> ploidy ,msex )>=0 ) sex = msex ;
239+ else if ( !strcmp (sex ,"2" ) && ploidy_sex2id (args -> ploidy ,fsex )>=0 ) sex = fsex ;
240+ else error ("[E::%s] The sex \"%s\" has not been declared in --ploidy/--ploidy-file\n" ,__func__ ,sex );
241+ }
239242
240243 lines = add_sample (name2idx , lines , & nlines , & mlines , col_ends [0 ]+ 1 , sex , & j );
241244 if ( strcmp (col_ends [1 ]+ 1 ,"0" ) && strcmp (col_ends [2 ]+ 1 ,"0" ) ) // father and mother
@@ -248,9 +251,9 @@ static char **parse_ped_samples(call_t *call, char **vals, int nvals, int *nsmpl
248251 fam -> name = strdup (fam_str .s );
249252
250253 if ( !khash_str2int_has_key (name2idx , col_ends [1 ]+ 1 ) )
251- lines = add_sample (name2idx , lines , & nlines , & mlines , col_ends [1 ]+ 1 , 'M' , & fam -> sample [FATHER ]);
254+ lines = add_sample (name2idx , lines , & nlines , & mlines , col_ends [1 ]+ 1 , msex , & fam -> sample [FATHER ]);
252255 if ( !khash_str2int_has_key (name2idx , col_ends [2 ]+ 1 ) )
253- lines = add_sample (name2idx , lines , & nlines , & mlines , col_ends [2 ]+ 1 , 'F' , & fam -> sample [MOTHER ]);
256+ lines = add_sample (name2idx , lines , & nlines , & mlines , col_ends [2 ]+ 1 , fsex , & fam -> sample [MOTHER ]);
254257
255258 khash_str2int_get (name2idx , col_ends [0 ]+ 1 , & fam -> sample [CHILD ]);
256259 khash_str2int_get (name2idx , col_ends [1 ]+ 1 , & fam -> sample [FATHER ]);
@@ -281,7 +284,7 @@ static void set_samples(args_t *args, const char *fn, int is_file)
281284 if ( !lines ) error ("Could not read the file: %s\n" , fn );
282285
283286 int nsmpls ;
284- char * * smpls = parse_ped_samples (& args -> aux , lines , nlines , & nsmpls );
287+ char * * smpls = parse_ped_samples (args , & args -> aux , lines , nlines , & nsmpls );
285288 if ( smpls )
286289 {
287290 for (i = 0 ; i < nlines ; i ++ ) free (lines [i ]);
0 commit comments