2
2
#include <math.h>
3
3
#include "free.h"
4
4
#include "likelihood.h"
5
+ #include "split_read.h"
5
6
7
+ struct SplitsInfo * all_split_reads = NULL ;
6
8
7
- void get_sample_name (bam_info * in_bam , char * header_text )
9
+
10
+ SplitRow * createSplitRow (int locMapLeftStart , int locMapLeftEnd , char orientationLeft ,
11
+ int locMapRightStart , int locMapRightEnd , char orientationRight , char svType )
8
12
{
9
- char * tmp_header = NULL ;
10
- set_str ( & ( tmp_header ), header_text );
11
- char * p = strtok ( tmp_header , "\t\n" );
12
- char sample_name_buffer [1024 ];
13
+ SplitRow * newRow = (SplitRow * ) getMem (sizeof (SplitRow ));
14
+
15
+ newRow -> next = NULL ;
16
+
17
+ newRow -> locMapLeftEnd = locMapLeftEnd ;
18
+ newRow -> locMapLeftStart = locMapLeftStart ;
19
+ newRow -> locMapRightStart = locMapRightStart ;
20
+ newRow -> locMapRightEnd = locMapRightEnd ;
21
+
22
+ newRow -> orientationLeft = orientationLeft ;
23
+ newRow -> orientationRight = orientationRight ;
24
+
25
+ newRow -> svType = svType ;
26
+
27
+ return newRow ;
28
+ }
29
+
30
+ SplitRow * determine_SvType ( splitRead * ptrSplitRead , posMapSplitRead * ptrPosMapSplit )
31
+ {
32
+ int pos1_1 , pos1_2 , pos2_1 , pos2_2 ;
33
+
34
+ /* Length of read A(left) and read B(right) */
35
+ int lengthRead , lengthSplit ;
36
+
37
+ /* If soft clip is at the end */
38
+ lengthSplit = ptrSplitRead -> read_length - ptrSplitRead -> split_start ;
39
+ lengthRead = ptrSplitRead -> read_length - lengthSplit ;
13
40
14
- while ( p != NULL )
41
+ if ( ptrSplitRead -> pos < ptrPosMapSplit -> posMap )
15
42
{
16
- /* If the current token has "SM" as the first two characters,
17
- we have found our Sample Name */
18
- if ( p [0 ] == 'S' && p [1 ] == 'M' )
19
- {
20
- /* Get the Sample Name */
21
- strncpy ( sample_name_buffer , p + 3 , strlen ( p ) - 3 );
43
+ pos1_1 = ptrSplitRead -> pos ;
44
+ pos1_2 = ptrSplitRead -> pos + lengthRead ;
45
+ pos2_1 = ptrPosMapSplit -> posMap ;
46
+ pos2_2 = ptrPosMapSplit -> posMap + lengthSplit ;
47
+ }
48
+ else if ( ptrPosMapSplit -> posMap < ptrSplitRead -> pos )
49
+ {
50
+ pos1_1 = ptrPosMapSplit -> posMap ;
51
+ pos1_2 = ptrPosMapSplit -> posMap + lengthSplit ;
52
+ pos2_1 = ptrSplitRead -> pos ;
53
+ pos2_2 = ptrSplitRead -> pos + lengthRead ;
54
+ }
22
55
23
- /* Add the NULL terminator */
24
- sample_name_buffer [ strlen ( p ) - 3 ] = '\0' ;
56
+ if ( pos1_2 >= pos2_1 )
57
+ return NULL ;
25
58
26
- /* Exit loop */
27
- break ;
59
+ if ( ptrSplitRead -> orient == FORWARD && ptrPosMapSplit -> orient == FORWARD )
60
+ {
61
+ if ( ( ptrSplitRead -> pos < ptrPosMapSplit -> posMap ))
62
+ {
63
+ SplitRow * newRow = createSplitRow (pos1_1 , pos1_2 , FORWARD , pos2_1 , pos2_2 , FORWARD , DELETION );
64
+ return newRow ;
65
+ }
66
+ else if ( ( ptrPosMapSplit -> posMap < ptrSplitRead -> pos ))
67
+ {
68
+ SplitRow * newRow = createSplitRow (pos1_1 , pos1_2 , FORWARD , pos2_1 , pos2_2 , FORWARD , DUPLICATION );
69
+ return newRow ;
28
70
}
29
- p = strtok ( NULL , "\t\n" );
30
71
}
72
+ return NULL ;
73
+ }
74
+
75
+ int read_SplitReads (splitRead * ptrSoftClip , parameters * params , int chr_index )
76
+ {
77
+ float is_satellite = 0.0 ;
78
+ SplitRow * newRow = NULL ;
79
+ posMapSplitRead * ptrPosMapSoftClip ;
80
+
81
+ all_split_reads = ( SplitsInfo * ) getMem ( sizeof ( struct SplitsInfo ));
82
+ all_split_reads -> size = 0 ;
83
+ all_split_reads -> head = NULL ;
84
+ all_split_reads -> tail = NULL ;
85
+
86
+ while ( ptrSoftClip != NULL )
87
+ {
88
+ ptrPosMapSoftClip = ptrSoftClip -> ptrSplitMap ;
89
+ while ( ptrPosMapSoftClip != NULL )
90
+ {
91
+ is_satellite = sonic_is_satellite ( params -> this_sonic , ptrSoftClip -> chromosome_name , ptrSoftClip -> pos , ptrSoftClip -> pos + 1 ) +
92
+ sonic_is_satellite ( params -> this_sonic , ptrSoftClip -> chromosome_name , ptrPosMapSoftClip -> posMap , ptrPosMapSoftClip -> posMap + 1 );
93
+
94
+ if ( is_satellite == 0 && ptrSoftClip -> qual > params -> mq_threshold && strcmp (ptrSoftClip -> chromosome_name , params -> this_sonic -> chromosome_names [chr_index ]) == 0
95
+ && ptrPosMapSoftClip -> mapq > params -> mq_threshold && ptrSoftClip -> pos > 0 && ptrPosMapSoftClip -> posMap > 0
96
+ && ptrSoftClip -> pos < params -> this_sonic -> chromosome_lengths [chr_index ] && ptrPosMapSoftClip -> posMap < params -> this_sonic -> chromosome_lengths [chr_index ])
97
+ {
98
+ newRow = determine_SvType (ptrSoftClip , ptrPosMapSoftClip );
99
+ if ( newRow != NULL )
100
+ {
101
+ /* For Deletion */
102
+ if ( newRow -> svType == DELETION )
103
+ {
104
+ newRow -> orientationLeft = FORWARD ;
105
+ newRow -> orientationRight = REVERSE ;
106
+ newRow -> locMapLeftStart -= SOFTCLIP_WRONGMAP_WINDOW ;
107
+ newRow -> locMapLeftEnd -= SOFTCLIP_WRONGMAP_WINDOW ;
108
+ newRow -> locMapRightStart += SOFTCLIP_WRONGMAP_WINDOW ;
109
+ newRow -> locMapRightEnd += SOFTCLIP_WRONGMAP_WINDOW ;
110
+ }
111
+ /* For Duplication */
112
+ else if (newRow -> svType == DUPLICATION )
113
+ {
114
+ newRow -> orientationLeft = REVERSE ;
115
+ newRow -> orientationRight = FORWARD ;
116
+ newRow -> locMapLeftStart -= SOFTCLIP_WRONGMAP_WINDOW ;
117
+ newRow -> locMapLeftEnd -= SOFTCLIP_WRONGMAP_WINDOW ;
118
+ newRow -> locMapRightStart += SOFTCLIP_WRONGMAP_WINDOW ;
119
+ newRow -> locMapRightEnd += SOFTCLIP_WRONGMAP_WINDOW ;
120
+ }
121
+ else
122
+ newRow = NULL ;
123
+ }
31
124
32
- set_str ( & ( in_bam -> sample_name ), sample_name_buffer );
33
- free ( tmp_header );
125
+ if ( newRow == NULL )
126
+ ;//fprintf( stderr, "ERROR loading divet from bam (soft clip)\n");
127
+ else
128
+ {
129
+ if ( all_split_reads -> head == NULL || all_split_reads -> tail == NULL )
130
+ {
131
+ all_split_reads -> head = newRow ;
132
+ all_split_reads -> tail = newRow ;
133
+ }
134
+ else
135
+ {
136
+ all_split_reads -> tail -> next = newRow ;
137
+ all_split_reads -> tail = newRow ;
138
+ }
139
+ all_split_reads -> size ++ ;
140
+ }
141
+ }
142
+ ptrPosMapSoftClip = ptrPosMapSoftClip -> next ;
143
+ }
144
+ ptrSoftClip = ptrSoftClip -> next ;
145
+ }
146
+ fprintf (stderr ,"There are %d split reads\n" ,all_split_reads -> size );
147
+ return all_split_reads -> size ;
34
148
}
35
149
36
- void count_reads_bam ( bam_info * in_bam , parameters * params )
150
+ void count_reads_bam ( bam_info * in_bam , parameters * params , int chr_index )
37
151
{
38
152
bam1_core_t bam_alignment_core ;
39
153
bam1_t * bam_alignment = bam_init1 ();
40
154
155
+ int return_type ;
156
+
41
157
while ( sam_itr_next ( in_bam -> bam_file , in_bam -> iter , bam_alignment ) > 0 )
42
158
{
43
159
bam_alignment_core = bam_alignment -> core ;
44
160
161
+ if ( sonic_is_satellite ( params -> this_sonic , params -> this_sonic -> chromosome_names [chr_index ], bam_alignment_core .pos , bam_alignment_core .pos + 20 ) == 0
162
+ && bam_alignment_core .qual > params -> mq_threshold && is_proper ( bam_alignment_core .flag )
163
+ && bam_alignment_core .l_qseq > params -> min_read_length )
164
+ {
165
+ if ( bam_alignment_core .l_qseq > params -> min_read_length )
166
+ return_type = find_split_reads ( in_bam , params , bam_alignment , chr_index );
167
+
168
+ if ( return_type == -1 )
169
+ continue ;
170
+ }
171
+
45
172
/* Increase the read depth and read count for RD filtering */
46
173
in_bam -> read_depth_per_chr [bam_alignment_core .pos ]++ ;
47
174
in_bam -> read_count ++ ;
@@ -60,7 +187,7 @@ void read_bam( bam_info* in_bam, parameters *params)
60
187
sprintf ( svfile , "%s%s_svs.bed" , params -> outdir , params -> outprefix );
61
188
fprintf ( stderr , "\nOutput SV file: %s\n" , svfile );
62
189
fpSVs = safe_fopen ( svfile ,"w" );
63
- fprintf (fpSVs ,"#CHR\tSTART_SV\tEND_SV\tSV_TYPE\tLIKELIHOOD\tCOPY_NUMBER\n" );
190
+ fprintf (fpSVs ,"#CHR\tSTART_SV\tEND_SV\tSV_TYPE\tLIKELIHOOD\tCOPY_NUMBER\tREAD_PAIR\ n" );
64
191
65
192
sprintf ( svfile_del , "%s%s_dels.bed" , params -> outdir , params -> outprefix );
66
193
fprintf ( stderr , "Output Del file: %s\n" , svfile_del );
@@ -112,6 +239,10 @@ void read_bam( bam_info* in_bam, parameters *params)
112
239
exit ( 1 );
113
240
}
114
241
242
+
243
+ /* Extract the Sample Name from the header text */
244
+ get_sample_name ( in_bam , in_bam -> bam_header -> text );
245
+
115
246
fprintf ( stderr , "\n" );
116
247
fprintf ( stderr , "Reading BAM [%s] - Chromosome: %s" , in_bam -> sample_name , in_bam -> bam_header -> target_name [chr_index_bam ]);
117
248
@@ -121,7 +252,15 @@ void read_bam( bam_info* in_bam, parameters *params)
121
252
init_rd_per_chr ( in_bam , params , chr_index );
122
253
123
254
/* Read bam file for this chromosome */
124
- count_reads_bam ( in_bam , params );
255
+ count_reads_bam ( in_bam , params , chr_index );
256
+
257
+
258
+ fprintf ( stderr , "\nReading the Reference Genome" );
259
+ readReferenceSeq (params , chr_index );
260
+
261
+ fprintf ( stderr , "\nMapping the Splits\n" );
262
+ map_split_reads (in_bam , params , chr_index );
263
+
125
264
126
265
/* Mean value (mu) calculation */
127
266
calc_mean_per_chr ( params , in_bam , chr_index );
@@ -141,7 +280,15 @@ void read_bam( bam_info* in_bam, parameters *params)
141
280
if ( not_in_bam == 1 )
142
281
continue ;
143
282
283
+ //Load Split-Reads
284
+ read_SplitReads (in_bam -> listSplitRead , params , chr_index );
285
+
286
+ //fprintf( stderr, "\nLikelihood Estimation\n");
144
287
find_SVs ( in_bam , params , fpDel , fpDup , fpSVs , chr );
288
+ free_hash_table (params );
289
+
290
+ /* Free the read depth array*/
291
+ free ( in_bam -> read_depth_per_chr );
145
292
}
146
293
fprintf ( stderr , "\n" );
147
294
fclose ( fpDel );
0 commit comments