@@ -50,8 +50,7 @@ void free_hash_table_kmer(parameters *params)
50
50
51
51
int is_kmer_valid_likelihood (char * str )
52
52
{
53
-
54
- int i , l ;
53
+ int i , l = -1 ;
55
54
l = (int ) strlen (str );
56
55
57
56
if (l != KMER )
@@ -122,7 +121,6 @@ int read_kmer_jellyfish( parameters *params)
122
121
rewind (fp_reads );
123
122
//fprintf(stderr,"There are %d lines", mer_count);
124
123
125
- //fprintf(stderr,"HEREE %d\n", mer_count);
126
124
hash_size = firstPrime (mer_count * 1.2 );
127
125
params -> hash_size_kmer = hash_size ;
128
126
@@ -225,7 +223,7 @@ char* read_ref_seq( parameters *params, char* chr_name, int start, int end)
225
223
226
224
return ref_seq ;
227
225
}
228
-
226
+ /*
229
227
int query_jellyfish(char* str, int count, char type)
230
228
{
231
229
int k = 0, z, freq = 0, freq_sum = 0;
@@ -266,8 +264,10 @@ int query_jellyfish(char* str, int count, char type)
266
264
all_svs_dup[count].k_mer += freq_sum;
267
265
268
266
return 1;
269
- }
270
- int calculate_kmers_jellyfish (parameters * params , char * chr_name , int count , int total_variant , char type )
267
+ }*/
268
+
269
+
270
+ /*int calculate_kmers_jellyfish(parameters *params, char* chr_name, int count, int total_variant, char type)
271
271
{
272
272
int variation_length, i, j = 0, freq;
273
273
char* seq = NULL, *seq_tmp = NULL;
@@ -321,7 +321,7 @@ int calculate_kmers_jellyfish(parameters *params, char* chr_name, int count, int
321
321
seq = NULL;
322
322
}
323
323
return 1;
324
- }
324
+ }*/
325
325
326
326
327
327
int kmer_count_interval (parameters * params , int start , int end )
@@ -332,8 +332,9 @@ int kmer_count_interval(parameters *params, int start, int end)
332
332
char seq [KMERWINDOWSIZE + 10 ];
333
333
//char* seq = NULL;
334
334
unsigned int hash_val1 , hash_val2 ;
335
- HashInfo * tmp ;
336
- int k_mer_count = 0 ;
335
+ HashInfo * tmp = NULL ;
336
+ int kmer_count_forward = 0 , kmer_count_reverse = 0 ;
337
+ int is_kmer_valid = 0 ;
337
338
338
339
339
340
variation_length = end - start + 1 ;
@@ -345,74 +346,76 @@ int kmer_count_interval(parameters *params, int start, int end)
345
346
j ++ ;
346
347
}
347
348
seq [j ] = '\0' ;
348
- //fprintf(stderr,"%s\n", seq);
349
349
350
- //fprintf(stderr,"%d of %d (size = %d)\n", count, total_variant, variation_length);
351
350
352
351
for (i = 0 ; i < variation_length - KMER ; i += KMERSLIDE )
353
352
{
354
- for (j = 0 ; j < 2 ; j ++ )
353
+ // For forward strand
354
+ seq_tmp = substring (seq , i , KMER );
355
+ //if(seq_tmp == NULL)
356
+ //continue;
357
+
358
+ is_kmer_valid = is_kmer_valid_likelihood (seq_tmp );
359
+ if (!is_kmer_valid )
355
360
{
356
- if (j == 0 )
357
- {
358
- seq_tmp = substring (seq , i , KMER );
359
- if (seq_tmp == NULL )
360
- continue ;
361
+ free (seq_tmp );
362
+ continue ;
363
+ }
361
364
362
- if (! is_kmer_valid_likelihood (seq_tmp ))
363
- continue ;
365
+ MurmurHash3_x86_128 ( seq_tmp , strlen (seq_tmp ), 42 , hash );
366
+ hash_val1 = ( unsigned ) hash [ 0 ] % params -> hash_size_kmer ;
364
367
365
- MurmurHash3_x86_128 (seq_tmp , strlen (seq_tmp ), 42 , hash );
366
- //fprintf(stderr,"%u - \n", hash[0]);
367
- //fprintf(stderr,"%li - \n", params->hash_size_kmer);
368
- hash_val1 = (unsigned ) hash [0 ] % params -> hash_size_kmer ;
368
+ MurmurHash3_x86_128 (seq_tmp , strlen (seq_tmp ), 11 , hash );
369
+ hash_val2 = (unsigned ) hash [0 ] % params -> hash_size_kmer ;
370
+ //fprintf(stderr,"%u - %u\n", hash_val1, hash_val2);
369
371
370
- MurmurHash3_x86_128 (seq_tmp , strlen (seq_tmp ), 11 , hash );
371
- hash_val2 = (unsigned ) hash [0 ] % params -> hash_size_kmer ;
372
- //fprintf(stderr,"%u - %u\n", hash_val1, hash_val2);
373
- }
374
- else
372
+ tmp = hash_table_kmer [hash_val1 ];
373
+ while (tmp != NULL )
374
+ {
375
+ if (tmp -> hash2 == hash_val2 )
375
376
{
376
- seq_tmp_rev = reverseComplement (seq_tmp );
377
-
378
- if (seq_tmp_rev == NULL )
379
- continue ;
377
+ kmer_count_forward += tmp -> freq ;
378
+ break ;
379
+ }
380
+ tmp = tmp -> next ;
381
+ }
380
382
381
- if (!is_kmer_valid_likelihood (seq_tmp_rev ))
382
- continue ;
383
+ //For reverse strand
384
+ seq_tmp_rev = reverseComplement (seq_tmp );
385
+ //if(seq_tmp_rev == NULL)
386
+ //continue;
383
387
384
- MurmurHash3_x86_128 (seq_tmp_rev , strlen (seq_tmp_rev ), 42 , hash );
385
- hash_val1 = (unsigned ) hash [0 ] % params -> hash_size_kmer ;
388
+ is_kmer_valid = is_kmer_valid_likelihood (seq_tmp_rev );
389
+ if (!is_kmer_valid )
390
+ {
391
+ free (seq_tmp_rev );
392
+ continue ;
393
+ }
386
394
387
- MurmurHash3_x86_128 (seq_tmp_rev , strlen (seq_tmp_rev ), 11 , hash );
388
- hash_val2 = (unsigned ) hash [0 ] % params -> hash_size_kmer ;
395
+ MurmurHash3_x86_128 (seq_tmp_rev , strlen (seq_tmp_rev ), 42 , hash );
396
+ hash_val1 = (unsigned ) hash [0 ] % params -> hash_size_kmer ;
389
397
390
- free ( seq_tmp );
391
- seq_tmp = NULL ;
398
+ MurmurHash3_x86_128 ( seq_tmp_rev , strlen ( seq_tmp_rev ), 11 , hash );
399
+ hash_val2 = ( unsigned ) hash [ 0 ] % params -> hash_size_kmer ;
392
400
393
- free (seq_tmp_rev );
394
- seq_tmp_rev = NULL ;
395
- }
401
+ free (seq_tmp );
402
+ seq_tmp = NULL ;
396
403
397
- tmp = hash_table_kmer [hash_val1 ];
404
+ free (seq_tmp_rev );
405
+ seq_tmp_rev = NULL ;
398
406
399
- while (tmp != NULL )
407
+ tmp = hash_table_kmer [hash_val1 ];
408
+ while (tmp != NULL )
409
+ {
410
+ if (tmp -> hash2 == hash_val2 )
400
411
{
401
- if (tmp -> hash2 == hash_val2 )
402
- {
403
- k_mer_count += tmp -> freq ;
404
- break ;
405
- }
406
- tmp = tmp -> next ;
412
+ kmer_count_reverse += tmp -> freq ;
413
+ break ;
407
414
}
415
+ tmp = tmp -> next ;
408
416
}
409
417
}
410
- /*if(seq != NULL)
411
- {
412
- free(seq);
413
- seq = NULL;
414
- }*/
415
- return k_mer_count ;
418
+ return (kmer_count_forward + kmer_count_reverse );
416
419
}
417
420
418
421
void init_kmer_per_chr ( bam_info * in_bam , parameters * param , int chr_index )
@@ -421,10 +424,11 @@ void init_kmer_per_chr( bam_info* in_bam, parameters* param, int chr_index)
421
424
memset (in_bam -> kmer , 0 , (param -> this_sonic -> chromosome_lengths [chr_index ] * sizeof (short )));
422
425
}
423
426
424
- void calc_kmer_counts (bam_info * in_bam , parameters * params , int chr_index )
427
+ long calc_kmer_counts (bam_info * in_bam , parameters * params , int chr_index )
425
428
{
426
- int i , j , end ;
427
- short tmp ;
429
+ int i , j , end = -1 ;
430
+ short tmp = 0 ;
431
+ long total_kmers = 0 ;
428
432
429
433
for ( i = 0 ; i < params -> this_sonic -> chromosome_lengths [chr_index ]; i += KMERWINDOWSLIDE )
430
434
{
@@ -433,11 +437,13 @@ void calc_kmer_counts(bam_info *in_bam, parameters *params, int chr_index)
433
437
else
434
438
end = params -> this_sonic -> chromosome_lengths [chr_index ];
435
439
440
+ tmp = 0 ;
436
441
tmp = (short ) kmer_count_interval (params , i , end );
437
-
442
+ total_kmers += ( long ) tmp ;
438
443
for (j = 0 ; j < KMERWINDOWSLIDE ; j ++ )
439
444
in_bam -> kmer [i + j ] = tmp ;
440
445
}
446
+ return total_kmers ;
441
447
}
442
448
443
449
void calc_expected_kmer (bam_info * in_bam , parameters * params , int chr_index )
0 commit comments