1
1
# __main__.py
2
2
3
3
import sys
4
- import numpy as np
5
4
import pandas as pd
6
5
from pathlib import Path
7
- from fba .utils import open_by_suffix , get_logger
8
- from fba .parsers import parse_args
9
- from fba .extract import extract_feature_barcoding
10
- from fba .polyleven import extract_feature_barcoding_polyleven
6
+ from fba import __version__
7
+ from fba .levenshtein import extract_feature_barcoding_fastss
11
8
from fba .map import map_feature_barcoding
12
9
from fba .filter import filter_matching
13
10
from fba .count import generate_matrix
14
- from fba .demultiplex import demultiplex_feature_barcoding
15
11
from fba .qc import (
16
12
summarize_sequence_content ,
17
13
summarize_barcode_positions ,
18
14
analyze_bulk
19
15
)
16
+ from fba .regex import extract_feature_barcoding_regex
17
+ from fba .parsers import parse_args
18
+ from fba .demultiplex import demultiplex_feature_barcoding
20
19
from fba .kallisto import run_kallisto
20
+ from fba .utils import open_by_suffix , get_logger
21
21
22
22
23
23
def main ():
@@ -41,6 +41,7 @@ def main():
41
41
logger .info (banner )
42
42
# print(banner)
43
43
44
+ logger .info (f'fba version: { __version__ } ' )
44
45
logger .info ('Initiating logging ...' )
45
46
logger .info (
46
47
f'Python version: { sys .version_info .major } .{ sys .version_info .minor } ' )
@@ -52,67 +53,35 @@ def main():
52
53
if (args .command == 'extract' ):
53
54
logger .info ('Using extract subcommand ...' )
54
55
55
- if args .method .lower () == 'regex' :
56
- with open_by_suffix (file_name = args .output , mode = 'w' ) as f :
57
-
58
- f .write ('\t ' .join (
59
- [
60
- 'read1_seq' ,
61
- 'cell_barcode' ,
62
- 'cb_matching_pos' ,
63
- 'cb_matching_description' ,
64
- 'read2_seq' ,
65
- 'feature_barcode' ,
66
- 'fb_matching_pos' ,
67
- 'fb_matching_description'
68
- ]
69
- ) + '\n ' )
70
-
71
- for out in extract_feature_barcoding (
72
- read1_file = args .read1 ,
73
- read2_file = args .read2 ,
74
- cb_file = args .whitelist ,
75
- fb_file = args .feature_ref ,
76
- cb_num_mismatches = args .cell_barcode_mismatches ,
77
- fb_num_mismatches = args .feature_barcode_mismatches ,
78
- cb_num_n_threshold = args .cb_num_n_threshold ,
79
- fb_num_n_threshold = args .fb_num_n_threshold ,
80
- read1_coords = args .read1_coords ,
81
- read2_coords = args .read2_coords ,
82
- num_threads = args .threads ,
83
- chunk_size = args .chunk_size ):
84
-
85
- f .write (out + '\n ' )
86
-
87
- elif args .method == 'polyleven' :
88
- with open_by_suffix (file_name = args .output , mode = 'w' ) as f :
56
+ with open_by_suffix (file_name = args .output , mode = 'w' ) as f :
57
+
58
+ f .write ('\t ' .join (
59
+ [
60
+ 'read1_seq' ,
61
+ 'cell_barcode' ,
62
+ 'cb_num_mismatches' ,
63
+ 'read2_seq' ,
64
+ 'feature_barcode' ,
65
+ 'fb_num_mismatches'
66
+ ]
67
+ ) + '\n ' )
68
+
69
+ for out in extract_feature_barcoding_fastss (
70
+ read1_file = args .read1 ,
71
+ read2_file = args .read2 ,
72
+ cb_file = args .whitelist ,
73
+ fb_file = args .feature_ref ,
74
+ cb_num_mismatches = args .cell_barcode_mismatches ,
75
+ fb_num_mismatches = args .feature_barcode_mismatches ,
76
+ read1_coords = args .read1_coords ,
77
+ read2_coords = args .read2_coords ,
78
+ output_file = args .output ,
79
+ cb_num_n_threshold = args .cb_num_n_threshold ,
80
+ fb_num_n_threshold = args .fb_num_n_threshold ,
81
+ exhaustive = args .exhaustive
82
+ ):
83
+ f .write (out + '\n ' )
89
84
90
- f .write ('\t ' .join (
91
- [
92
- 'read1_seq' ,
93
- 'cell_barcode' ,
94
- 'cb_num_mismatches' ,
95
- 'read2_seq' ,
96
- 'feature_barcode' ,
97
- 'fb_num_mismatches'
98
- ]
99
- ) + '\n ' )
100
-
101
- for out in extract_feature_barcoding_polyleven (
102
- read1_file = args .read1 ,
103
- read2_file = args .read2 ,
104
- cb_file = args .whitelist ,
105
- fb_file = args .feature_ref ,
106
- cb_num_mismatches = args .cell_barcode_mismatches ,
107
- fb_num_mismatches = args .feature_barcode_mismatches ,
108
- read1_coords = args .read1_coords ,
109
- read2_coords = args .read2_coords ,
110
- cb_num_n_threshold = args .cb_num_n_threshold ,
111
- fb_num_n_threshold = args .fb_num_n_threshold ,
112
- num_threads = args .threads ,
113
- chunk_size = args .chunk_size ):
114
-
115
- f .write (out + '\n ' )
116
85
logger .info ('Done.' )
117
86
118
87
elif (args .command == 'map' ):
@@ -124,22 +93,20 @@ def main():
124
93
cb_file = args .whitelist ,
125
94
fb_file = args .feature_ref ,
126
95
read1_coords = args .read1_coords ,
127
- num_n_ref = args .num_n_ref ,
128
96
num_mismatches = args .cell_barcode_mismatches ,
97
+ num_n_threshold = args .cb_num_n_threshold ,
98
+ num_n_ref = args .num_n_ref ,
129
99
umi_pos_start = args .umi_pos_start ,
130
100
umi_length = args .umi_length ,
131
101
umi_deduplication_method = args .umi_deduplication_method ,
132
102
umi_deduplication_threshold = args .umi_mismatches ,
133
103
mapq = args .mapq ,
134
104
output_directory = args .output_directory ,
135
- num_threads = args .threads ,
136
- chunk_size = args .chunk_size
105
+ num_threads = args .threads
137
106
)
138
107
139
- matrix_featurecount .to_csv (
140
- path_or_buf = args .output ,
141
- compression = 'infer'
142
- )
108
+ matrix_featurecount .to_csv (path_or_buf = args .output ,
109
+ compression = 'infer' )
143
110
logger .info ('Done.' )
144
111
145
112
elif (args .command == 'filter' ):
@@ -168,8 +135,8 @@ def main():
168
135
169
136
matrix_featurecount = generate_matrix (
170
137
matching_file = args .input ,
171
- umi_length = args .umi_length ,
172
138
umi_pos_start = args .umi_pos_start ,
139
+ umi_length = args .umi_length ,
173
140
umi_deduplication_method = args .umi_deduplication_method ,
174
141
umi_deduplication_threshold = args .umi_mismatches
175
142
)
@@ -194,12 +161,15 @@ def main():
194
161
elif (args .command == 'qc' ):
195
162
logger .info ('Using qc subcommand ...' )
196
163
197
- if args .num_reads .isdigit ():
198
- num_reads = int (args .num_reads )
199
- elif args .num_reads .upper () == 'NONE' :
200
- num_reads = None
164
+ if not isinstance (args .num_reads , int ):
165
+ if args .num_reads .isdigit ():
166
+ num_reads = int (args .num_reads )
167
+ elif args .num_reads .upper () == 'NONE' :
168
+ num_reads = None
169
+ else :
170
+ sys .exit (1 )
201
171
else :
202
- sys . exit ( 1 )
172
+ num_reads = args . num_reads
203
173
204
174
if args .read1 :
205
175
_ = summarize_sequence_content (
@@ -226,15 +196,15 @@ def main():
226
196
]
227
197
) + '\n ' )
228
198
229
- for out in extract_feature_barcoding (
199
+ for out in extract_feature_barcoding_regex (
230
200
read1_file = args .read1 ,
231
201
read2_file = args .read2 ,
232
202
cb_file = args .whitelist ,
233
203
fb_file = args .feature_ref ,
234
204
cb_num_mismatches = args .cell_barcode_mismatches ,
235
205
fb_num_mismatches = args .feature_barcode_mismatches ,
236
- cb_num_n_threshold = np . Inf ,
237
- fb_num_n_threshold = np . Inf ,
206
+ cb_num_n_threshold = args . cb_num_n_threshold ,
207
+ fb_num_n_threshold = args . fb_num_n_threshold ,
238
208
read1_coords = args .read1_coords ,
239
209
read2_coords = args .read2_coords ,
240
210
num_threads = args .threads ,
@@ -259,16 +229,15 @@ def main():
259
229
)
260
230
261
231
fb_frequency = analyze_bulk (
262
- read2_file = args .read2 ,
263
- read2_coords = args .read2_coords ,
232
+ read_file = args .read2 ,
233
+ read_coords = args .read2_coords ,
264
234
fb_file = args .feature_ref ,
265
235
num_mismatches = args .feature_barcode_mismatches ,
266
- num_n_threshold = 3 ,
267
- num_threads = args .threads ,
268
- chunk_size = args .chunk_size ,
236
+ num_n_threshold = args .fb_num_n_threshold ,
269
237
num_reads = num_reads
270
238
)
271
239
240
+ Path (args .output_directory ).mkdir (exist_ok = True )
272
241
OUTPUT_FILE = 'feature_barcode_frequency.csv'
273
242
OUTPUT_FILE = str (Path (args .output_directory ) / OUTPUT_FILE )
274
243
logger .info (f'Output file: { OUTPUT_FILE } ' )
0 commit comments