Skip to content

Commit 0ce7689

Browse files
committed
Do not translate PED 1/2 sexes into M/F unless declared by --ploidy. Resolves #2122
1 parent 1068d45 commit 0ce7689

File tree

2 files changed

+23
-20
lines changed

2 files changed

+23
-20
lines changed

ploidy.c

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/*
1+
/*
22
Copyright (C) 2014-2016 Genome Research Ltd.
33
44
Author: Petr Danecek <[email protected]>
@@ -9,10 +9,10 @@
99
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1010
copies of the Software, and to permit persons to whom the Software is
1111
furnished to do so, subject to the following conditions:
12-
12+
1313
The above copyright notice and this permission notice shall be included in
1414
all copies or substantial portions of the Software.
15-
15+
1616
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1717
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1818
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -212,7 +212,7 @@ int ploidy_query(ploidy_t *ploidy, char *seq, int pos, int *sex2ploidy, int *min
212212
{
213213
int sex = regitr_payload(ploidy->itr,sex_ploidy_t).sex;
214214
int pld = regitr_payload(ploidy->itr,sex_ploidy_t).ploidy;
215-
if ( pld!=ploidy->dflt )
215+
if ( pld!=ploidy->dflt )
216216
{
217217
if ( sex2ploidy ) sex2ploidy[ sex ] = pld;
218218
if ( _min > pld ) _min = pld;

vcfcall.c

+19-16
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* vcfcall.c -- SNP/indel variant calling from VCF/BCF.
22
3-
Copyright (C) 2013-2024 Genome Research Ltd.
3+
Copyright (C) 2013-2025 Genome Research Ltd.
44
55
Author: Petr Danecek <[email protected]>
66
@@ -112,18 +112,15 @@ typedef struct
112112
}
113113
args_t;
114114

115-
static char **add_sample(void *name2idx, char **lines, int *nlines, int *mlines, char *name, char sex, int *ith)
115+
static char **add_sample(void *name2idx, char **lines, int *nlines, int *mlines, char *name, char *sex, int *ith)
116116
{
117117
int ret = khash_str2int_get(name2idx, name, ith);
118118
if ( ret==0 ) return lines;
119119

120120
hts_expand(char*,(*nlines+1),*mlines,lines);
121-
int len = strlen(name);
122-
lines[*nlines] = (char*) malloc(len+3);
123-
memcpy(lines[*nlines],name,len);
124-
lines[*nlines][len] = ' ';
125-
lines[*nlines][len+1] = sex;
126-
lines[*nlines][len+2] = 0;
121+
kstring_t str = {0,0,0};
122+
ksprintf(&str,"%s %s",name,sex);
123+
lines[*nlines] = str.s;
127124
*ith = *nlines;
128125
(*nlines)++;
129126
khash_str2int_set(name2idx, strdup(name), *ith);
@@ -205,12 +202,14 @@ static ploidy_predef_t ploidy_predefs[] =
205202

206203
// only 5 columns are required and the first is ignored:
207204
// ignored,sample,father(or 0),mother(or 0),sex(1=M,2=F)
208-
static char **parse_ped_samples(call_t *call, char **vals, int nvals, int *nsmpl)
205+
static char **parse_ped_samples(args_t *args, call_t *call, char **vals, int nvals, int *nsmpl)
209206
{
210207
int i, j, mlines = 0, nlines = 0;
211208
kstring_t str = {0,0,0}, fam_str = {0,0,0};
212209
void *name2idx = khash_str2int_init();
213210
char **lines = NULL;
211+
212+
char *msex = "M", *fsex = "F";
214213
for (i=0; i<nvals; i++)
215214
{
216215
str.l = 0;
@@ -232,10 +231,14 @@ static char **parse_ped_samples(call_t *call, char **vals, int nvals, int *nsmpl
232231
}
233232
if ( j<4 ) break;
234233

235-
char sex;
236-
if ( col_ends[3][1]=='1' ) sex = 'M';
237-
else if ( col_ends[3][1]=='2' ) sex = 'F';
238-
else break;
234+
char *sex = &col_ends[3][1];
235+
if ( ploidy_sex2id(args->ploidy,sex)<0 )
236+
{
237+
// this gender is not defined, if 1/2, test if M/F is
238+
if ( !strcmp(sex,"1") && ploidy_sex2id(args->ploidy,msex)>=0 ) sex = msex;
239+
else if ( !strcmp(sex,"2") && ploidy_sex2id(args->ploidy,fsex)>=0 ) sex = fsex;
240+
else error("[E::%s] The sex \"%s\" has not been declared in --ploidy/--ploidy-file\n",__func__,sex);
241+
}
239242

240243
lines = add_sample(name2idx, lines, &nlines, &mlines, col_ends[0]+1, sex, &j);
241244
if ( strcmp(col_ends[1]+1,"0") && strcmp(col_ends[2]+1,"0") ) // father and mother
@@ -248,9 +251,9 @@ static char **parse_ped_samples(call_t *call, char **vals, int nvals, int *nsmpl
248251
fam->name = strdup(fam_str.s);
249252

250253
if ( !khash_str2int_has_key(name2idx, col_ends[1]+1) )
251-
lines = add_sample(name2idx, lines, &nlines, &mlines, col_ends[1]+1, 'M', &fam->sample[FATHER]);
254+
lines = add_sample(name2idx, lines, &nlines, &mlines, col_ends[1]+1, msex, &fam->sample[FATHER]);
252255
if ( !khash_str2int_has_key(name2idx, col_ends[2]+1) )
253-
lines = add_sample(name2idx, lines, &nlines, &mlines, col_ends[2]+1, 'F', &fam->sample[MOTHER]);
256+
lines = add_sample(name2idx, lines, &nlines, &mlines, col_ends[2]+1, fsex, &fam->sample[MOTHER]);
254257

255258
khash_str2int_get(name2idx, col_ends[0]+1, &fam->sample[CHILD]);
256259
khash_str2int_get(name2idx, col_ends[1]+1, &fam->sample[FATHER]);
@@ -281,7 +284,7 @@ static void set_samples(args_t *args, const char *fn, int is_file)
281284
if ( !lines ) error("Could not read the file: %s\n", fn);
282285

283286
int nsmpls;
284-
char **smpls = parse_ped_samples(&args->aux, lines, nlines, &nsmpls);
287+
char **smpls = parse_ped_samples(args, &args->aux, lines, nlines, &nsmpls);
285288
if ( smpls )
286289
{
287290
for (i=0; i<nlines; i++) free(lines[i]);

0 commit comments

Comments
 (0)