2
2
3
3
#include < cgranges/IITree.h>
4
4
5
+ #include < algorithm>
5
6
#include < cxxopts.hpp>
7
+ #include < fstream>
6
8
#include < optional>
7
9
#include < random>
8
10
#include < ranges>
9
11
#include < set>
10
12
#include < string>
11
13
#include < variant>
12
14
#include < vector>
13
- #include < fstream>
14
- #include < algorithm>
15
-
16
15
17
16
#include " gtf.h"
18
17
#include " interval.h"
19
18
#include " mdf.h"
20
19
#include " module.h"
21
20
#include " util.h"
22
21
23
-
24
22
using std::set;
25
23
using std::string;
26
24
using std::unordered_map;
@@ -108,31 +106,31 @@ event_type_to_string(EventType type) -> string {
108
106
}
109
107
}
110
108
111
- inline auto string_to_event_type (const string &st) -> EventType {
109
+ inline auto
110
+ string_to_event_type (const string &st) -> EventType {
112
111
string str{st};
113
112
sr::transform (str.begin (), str.end (), str.begin (), ::tolower);
114
- if (str== " INVERSION" ){
113
+ if (str == " INVERSION" ) {
115
114
return EventType::INVERSION;
116
115
}
117
- else if (str== " DELETION" ){
116
+ else if (str == " DELETION" ) {
118
117
return EventType::DELETION;
119
118
}
120
- else if (str== " TRANSLOCATION" ){
119
+ else if (str == " TRANSLOCATION" ) {
121
120
return EventType::TRANSLOCATION;
122
121
}
123
- else if (str== " DUPLICATION" ){
122
+ else if (str == " DUPLICATION" ) {
124
123
return EventType::DUPLICATION;
125
124
}
126
- else if (str== " INSERTION" ){
125
+ else if (str == " INSERTION" ) {
127
126
return EventType::INSERTION;
128
127
}
129
- else if (str== " NONE" ){
128
+ else if (str == " NONE" ) {
130
129
return EventType::NONE;
131
130
}
132
- else {
131
+ else {
133
132
throw runtime_error (" Invalid event type " + str);
134
133
}
135
-
136
134
}
137
135
138
136
class chimeric_event : public ginterval {
@@ -146,12 +144,26 @@ class chimeric_event : public ginterval {
146
144
string orientation2;
147
145
int count;
148
146
// Event
149
- chimeric_event (EventType event_type) : ginterval{}, event_type{event_type}, event_ratio{0.5 }, event_name{" ::" }{}
150
-
151
- chimeric_event (const string &chr, int start, int end, const string &orientation, const string& orientation2, const string &chr2, EventType event_type, const string &event_name, int count)
152
- : ginterval{chr, start, end, orientation}, event_type{event_type}, event_ratio{0.5 }, event_name{event_name}, chr2{chr2}, orientation2{orientation2}, count{count}{}
153
- chimeric_event (const string &chr, int start, int end, const string &orientation, const string& orientation2, const string &chr2, EventType event_type, const string &event_name)
154
- : ginterval{chr, start, end, orientation}, event_type{event_type}, event_ratio{0.5 }, event_name{event_name}, chr2{chr2}, orientation2{orientation2}, count{0 }{}
147
+ chimeric_event (EventType event_type) : ginterval{}, event_type{event_type}, event_ratio{0.5 }, event_name{" ::" } {}
148
+
149
+ chimeric_event (const string &chr, int start, int end, const string &orientation, const string &orientation2,
150
+ const string &chr2, EventType event_type, const string &event_name, int count)
151
+ : ginterval{chr, start, end, orientation},
152
+ event_type{event_type},
153
+ event_ratio{0.5 },
154
+ event_name{event_name},
155
+ chr2{chr2},
156
+ orientation2{orientation2},
157
+ count{count} {}
158
+ chimeric_event (const string &chr, int start, int end, const string &orientation, const string &orientation2,
159
+ const string &chr2, EventType event_type, const string &event_name)
160
+ : ginterval{chr, start, end, orientation},
161
+ event_type{event_type},
162
+ event_ratio{0.5 },
163
+ event_name{event_name},
164
+ chr2{chr2},
165
+ orientation2{orientation2},
166
+ count{0 } {}
155
167
156
168
chimeric_event (const chimeric_event &other) = default ;
157
169
auto cut_transcript (const transcript &t, int cut_position, CUT cut) const
@@ -393,7 +405,8 @@ class chimeric_event : public ginterval {
393
405
locus get_start (bool strand = true ) const { return locus{chr, start, strand}; }
394
406
locus get_end (bool strand = true ) const { return locus{chr, end, strand}; }
395
407
friend ostream &operator <<(ostream &os, const chimeric_event &event) {
396
- os << event.chr << " \t " << event.start << " \t " << event.end << " \t " << event_type_to_string (event.event_type ) << " \t " << event.chr2 << " \t " << event.event_name ;
408
+ os << event.chr << " \t " << event.start << " \t " << event.end << " \t " << event_type_to_string (event.event_type )
409
+ << " \t " << event.chr2 << " \t " << event.event_name ;
397
410
return os;
398
411
}
399
412
};
@@ -409,20 +422,22 @@ read_fusions(std::istream &fusion_file) {
409
422
string chr1, chr2, orientation1, orientation2, event_name;
410
423
int start1, end1;
411
424
double count;
412
- iss >> chr1 >> start1 >> end1 >> orientation1 >> orientation2 >> chr2 >> event_name >> count;
425
+ iss >> chr1 >> start1 >> end1 >> orientation1 >> orientation2 >> chr2 >> event_name >> count;
413
426
std::stringstream oss;
414
427
oss << chr1 << " :" << start1 << " -" << end1 << orientation1;
415
- EventType et = [&] () -> EventType {
428
+ EventType et = [&]() -> EventType {
416
429
if (chr1 == chr2) {
417
430
if (orientation1 == orientation2) {
418
431
return EventType::DELETION;
419
- } else {
432
+ }
433
+ else {
420
434
return EventType::DUPLICATION;
421
435
}
422
- } else {
436
+ }
437
+ else {
423
438
return EventType::TRANSLOCATION;
424
439
}
425
- } ();
440
+ }();
426
441
chimeric_event fusion{chr1, start1, end1, orientation1, orientation2, chr2, et, event_name, count};
427
442
fusions.push_back (fusion);
428
443
}
@@ -578,8 +593,14 @@ class Fusion_submodule : public tksm_submodule {
578
593
579
594
locus g1 = generate_random_breakpoint (gene1);
580
595
locus g2 = generate_random_breakpoint (gene2);
581
- chimeric_event fusion{g1.get_chr (), g1.get_position (), g2.get_position (),
582
- g1.is_plus_strand () ? " +" : " -" ,g2.is_plus_strand () ? " +" : " -" , g2.get_chr (), event_type, gene1.info .at (" gene_name" ) + " ::" + gene2.info .at (" gene_name" )};
596
+ chimeric_event fusion{g1.get_chr (),
597
+ g1.get_position (),
598
+ g2.get_position (),
599
+ g1.is_plus_strand () ? " +" : " -" ,
600
+ g2.is_plus_strand () ? " +" : " -" ,
601
+ g2.get_chr (),
602
+ event_type,
603
+ gene1.info .at (" gene_name" ) + " ::" + gene2.info .at (" gene_name" )};
583
604
logd (" Generated fusion: {}, on genes {} and {}" , fusion, gene1.info [" gene_name" ],
584
605
gene2.info [" gene_name" ]);
585
606
fusions_so_far.push_back (fusion);
@@ -641,7 +662,7 @@ class Fusion_submodule : public tksm_submodule {
641
662
642
663
map<string, std::pair<gtf, vector<gtf>>> expressed_genes;
643
664
for (const auto &[name, gene] : genes) {
644
- if (gene.first .info .at (" gene_biotype" ) != " protein_coding" ) continue ;
665
+ if (gene.first .info .at (" gene_biotype" ) != " protein_coding" ) continue ;
645
666
auto iter = expression_map.find (name);
646
667
if (iter != expression_map.end () && iter->second > 0 ) {
647
668
expressed_genes[name] = gene;
@@ -756,7 +777,7 @@ class Fusion_submodule : public tksm_submodule {
756
777
const auto &genes, auto &gene_expression_map,
757
778
const unordered_map<string, unordered_map<string, double >> &tid_to_tpm,
758
779
const auto &tid_to_gene, const auto &transcript_templates, const auto &args)
759
- -> generator<std::tuple<chimeric_event, transcript>>{
780
+ -> generator<std::tuple<chimeric_event, transcript>> {
760
781
bool do_fall_back_to_expression = args.count (" expression-fallback" ) > 0 ;
761
782
auto tpm_dist = [&]() {
762
783
if (do_fall_back_to_expression) {
@@ -800,7 +821,7 @@ class Fusion_submodule : public tksm_submodule {
800
821
logd (" Skipping empty fusion transcript {}" , t.info .at (" transcript_id" ));
801
822
continue ;
802
823
}
803
- co_yield {event, t};
824
+ co_yield {event, t};
804
825
}
805
826
}
806
827
}
@@ -829,7 +850,7 @@ class Fusion_submodule : public tksm_submodule {
829
850
[[maybe_unused]] double translocation_ratio = args[" translocation-ratio" ].as <double >();
830
851
[[maybe_unused]] bool disable_deletions = args[" disable-deletions" ].as <bool >();
831
852
832
- if (args.count (" fusion-output" ) == 0 ) {
853
+ if (args.count (" fusion-output" ) == 0 ) {
833
854
loge (" No fusion output file specified" );
834
855
return 1 ;
835
856
}
@@ -859,8 +880,8 @@ class Fusion_submodule : public tksm_submodule {
859
880
transcript_id_to_genes, transcript_templates, args)) {
860
881
abundances.emplace_back (t.info .at (" transcript_id" ), t.get_abundance (), t.info .at (" CB" ));
861
882
transcript_templates.emplace (t.info .at (" transcript_id" ), t);
862
- print_tsv (fusion_out, event, t.info .at (" gene_id" ), t.info .at (" gene_name" ), t.info .at (" transcript_id" ), t. info . at ( " transcript_name " ), t. get_abundance ());
863
-
883
+ print_tsv (fusion_out, event, t.info .at (" gene_id" ), t.info .at (" gene_name" ), t.info .at (" transcript_id" ),
884
+ t. info . at ( " transcript_name " ), t. get_abundance ());
864
885
}
865
886
update_expression_of_affected_transcripts (relevant_molecules, abundances);
866
887
return 0 ;
@@ -889,4 +910,3 @@ class Fusion_submodule : public tksm_submodule {
889
910
}
890
911
}
891
912
};
892
-
0 commit comments