@@ -132,6 +132,7 @@ typedef struct
132
132
int mrec ; // allocated size of buf
133
133
maux1_t * rec ; // buffer to keep reader's lines
134
134
bcf1_t * * lines ; // source buffer: either gvcf or readers' buffer
135
+ bcf_hdr_t * hdr ; // this reader's header
135
136
int var_types ; // reader's variant types in the active [beg,end] window
136
137
}
137
138
buffer_t ;
@@ -871,7 +872,10 @@ maux_t *maux_init(args_t *args)
871
872
ma -> smpl_nGsize = (int * ) malloc (n_smpl * sizeof (int ));
872
873
ma -> buf = (buffer_t * ) calloc (ma -> n ,sizeof (buffer_t ));
873
874
for (i = 0 ; i < ma -> n ; i ++ )
875
+ {
874
876
ma -> buf [i ].rid = -1 ;
877
+ ma -> buf [i ].hdr = files -> readers [i ].header ;
878
+ }
875
879
ma -> str = (kstring_t * ) calloc (n_smpl ,sizeof (kstring_t ));
876
880
if ( args -> local_alleles )
877
881
{
@@ -2925,23 +2929,48 @@ static const int
2925
2929
indel_mask = (VCF_INDEL <<1 ),
2926
2930
ins_mask = VCF_INS <<1 ,
2927
2931
del_mask = VCF_DEL <<1 ,
2928
- ref_mask = 1 ;
2932
+ ref_mask = 1 ,
2933
+ other_mask = VCF_OTHER <<1 ;
2934
+
2935
+ typedef struct
2936
+ {
2937
+ int types , // selected types, see the *_mask(s) above
2938
+ end ; // if symbolic allele is involved, the END coordinate of the first record
2939
+ bcf1_t * rec ; // the first record selected
2940
+ }
2941
+ selected_t ;
2929
2942
2930
2943
// Can these types be merged given the -m settings? Despite the function's name, its focus is on
2931
2944
// excluding incompatible records, there will be a finer matching later in stage_line()
2932
- static inline int types_compatible (args_t * args , int selected_types , buffer_t * buf , int irec )
2945
+ static inline int types_compatible (args_t * args , selected_t * selected , buffer_t * buf , int irec )
2933
2946
{
2934
2947
int k ;
2935
2948
maux_t * maux = args -> maux ;
2936
2949
bcf1_t * rec = buf -> lines [irec ];
2937
2950
int rec_types = buf -> rec [irec ].var_types ;
2938
2951
2939
- assert ( selected_types ); // this is trivially true, set in can_merge()
2952
+ int end = -1 ;
2953
+ if ( rec_types & other_mask )
2954
+ {
2955
+ int32_t * itmp = NULL , nitmp = 0 ;
2956
+ bcf_get_info_int32 (buf -> hdr ,rec ,"END" ,& itmp ,& nitmp );
2957
+ end = nitmp == 1 ? itmp [0 ] : -1 ;
2958
+ free (itmp );
2959
+ }
2960
+
2961
+ // First time here?
2962
+ if ( !selected -> types )
2963
+ {
2964
+ selected -> end = end ;
2965
+ selected -> rec = rec ;
2966
+ selected -> types = rec_types ;
2967
+ return 1 ;
2968
+ }
2940
2969
2941
2970
if ( args -> collapse & COLLAPSE_ANY ) return 1 ; // can merge anything with anything
2942
2971
2943
2972
// REF and gVCF_REF with no other alleles present can be merged with anything
2944
- if ( (selected_types & ref_mask ) && !(selected_types & (~ref_mask )) ) return 1 ;
2973
+ if ( (selected -> types & ref_mask ) && !(selected -> types & (~ref_mask )) ) return 1 ;
2945
2974
if ( (rec_types & ref_mask ) && !(rec_types & (~ref_mask )) ) return 1 ;
2946
2975
2947
2976
if ( args -> collapse != COLLAPSE_NONE )
@@ -2952,23 +2981,23 @@ static inline int types_compatible(args_t *args, int selected_types, buffer_t *b
2952
2981
// - rec has indel, we already have an indel, and -m both,indels,snp-ins-del
2953
2982
if ( args -> collapse & (COLLAPSE_SNPS |COLLAPSE_SNP_INS_DEL ) )
2954
2983
{
2955
- if ( (rec_types & snp_mask ) && (selected_types & snp_mask ) ) return 1 ;
2984
+ if ( (rec_types & snp_mask ) && (selected -> types & snp_mask ) ) return 1 ;
2956
2985
}
2957
2986
if ( args -> collapse & COLLAPSE_INDELS )
2958
2987
{
2959
- if ( (rec_types & indel_mask ) && (selected_types & indel_mask ) ) return 1 ;
2988
+ if ( (rec_types & indel_mask ) && (selected -> types & indel_mask ) ) return 1 ;
2960
2989
}
2961
2990
if ( args -> collapse & COLLAPSE_SNP_INS_DEL )
2962
2991
{
2963
- if ( (rec_types & ins_mask ) && (selected_types & ins_mask ) ) return 1 ;
2964
- if ( (rec_types & del_mask ) && (selected_types & del_mask ) ) return 1 ;
2992
+ if ( (rec_types & ins_mask ) && (selected -> types & ins_mask ) ) return 1 ;
2993
+ if ( (rec_types & del_mask ) && (selected -> types & del_mask ) ) return 1 ;
2965
2994
}
2966
2995
// Whatever is left, allow to match if the alleles match exactly
2967
2996
}
2968
2997
2969
2998
// The -m none mode or exact matching requested
2970
2999
// Simple test first: are the variants of the same type?
2971
- int x = selected_types ;
3000
+ int x = selected -> types ;
2972
3001
int y = rec_types ;
2973
3002
if ( !(x & y ) ) return 0 ; // no matching type
2974
3003
if ( (x & y )!= x && (x & y )!= y ) return 0 ; // not a subset
@@ -2980,6 +3009,13 @@ static inline int types_compatible(args_t *args, int selected_types, buffer_t *b
2980
3009
if ( vcmp_find_allele (args -> vcmp ,maux -> als + 1 ,maux -> nals - 1 ,rec -> d .allele [k ])>=0 ) break ;
2981
3010
}
2982
3011
if ( k == rec -> n_allele ) return 0 ; // this record has a new allele rec->d.allele[k]
3012
+
3013
+ if ( selected -> types & other_mask && rec_types & other_mask )
3014
+ {
3015
+ // both records have symbolic alleles and the alleles are the same
3016
+ if ( selected -> end != end ) return 0 ;
3017
+ }
3018
+
2983
3019
return 1 ; // all alleles in rec are also in the records selected thus far, perhaps save for gVCF_REF
2984
3020
}
2985
3021
@@ -3106,7 +3142,7 @@ int can_merge(args_t *args)
3106
3142
}
3107
3143
if ( !ntodo ) return 0 ;
3108
3144
3109
- int selected_types = 0 ;
3145
+ selected_t selected = { 0 , 0 , NULL } ;
3110
3146
3111
3147
// In this loop we select from each reader compatible candidate lines.
3112
3148
// (i.e. SNPs or indels). Go through all files and all lines at this
@@ -3121,7 +3157,7 @@ int can_merge(args_t *args)
3121
3157
gaux [i ].line -> d .allele [0 ][0 ] = ref ;
3122
3158
gaux [i ].line -> pos = maux -> pos ;
3123
3159
maux_update_alleles (args , i , buf -> beg );
3124
- selected_types |= ref_mask ;
3160
+ selected . types |= ref_mask ;
3125
3161
continue ;
3126
3162
}
3127
3163
for (j = buf -> beg ; j < buf -> end ; j ++ )
@@ -3136,7 +3172,7 @@ int can_merge(args_t *args)
3136
3172
{
3137
3173
if ( strcmp (id ,line -> d .id ) ) continue ; // matching by ID and it does not match the selected record
3138
3174
}
3139
- else if ( selected_types && !types_compatible (args ,selected_types ,buf ,j ) ) continue ;
3175
+ else if ( !types_compatible (args ,& selected ,buf ,j ) ) continue ;
3140
3176
3141
3177
// This is not a good code. It makes the incorrect assumption of always having a SNP record available.
3142
3178
// However, that is not always the case and prevents the merging of G>GT,T with G>GT (see test/merge.multiallelics.1.*.vcf).
@@ -3154,7 +3190,7 @@ int can_merge(args_t *args)
3154
3190
// ) continue;
3155
3191
// }
3156
3192
3157
- selected_types |= line_types ;
3193
+ selected . types |= line_types ;
3158
3194
3159
3195
buf -> rec [j ].skip = 0 ; // the j-th record from i-th reader can be included. Final decision will be made in stage_line
3160
3196
maux_update_alleles (args , i , j );
0 commit comments