Skip to content

Commit 3ae4d6b

Browse files
committed
Refactor
1 parent 39a2f66 commit 3ae4d6b

File tree

8 files changed

+172
-128
lines changed

8 files changed

+172
-128
lines changed

fba/__main__.py

Lines changed: 56 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,23 @@
11
# __main__.py
22

33
import sys
4-
import numpy as np
54
import pandas as pd
65
from pathlib import Path
7-
from fba.utils import open_by_suffix, get_logger
8-
from fba.parsers import parse_args
9-
from fba.extract import extract_feature_barcoding
10-
from fba.polyleven import extract_feature_barcoding_polyleven
6+
from fba import __version__
7+
from fba.levenshtein import extract_feature_barcoding_fastss
118
from fba.map import map_feature_barcoding
129
from fba.filter import filter_matching
1310
from fba.count import generate_matrix
14-
from fba.demultiplex import demultiplex_feature_barcoding
1511
from fba.qc import (
1612
summarize_sequence_content,
1713
summarize_barcode_positions,
1814
analyze_bulk
1915
)
16+
from fba.regex import extract_feature_barcoding_regex
17+
from fba.parsers import parse_args
18+
from fba.demultiplex import demultiplex_feature_barcoding
2019
from fba.kallisto import run_kallisto
20+
from fba.utils import open_by_suffix, get_logger
2121

2222

2323
def main():
@@ -41,6 +41,7 @@ def main():
4141
logger.info(banner)
4242
# print(banner)
4343

44+
logger.info(f'fba version: {__version__}')
4445
logger.info('Initiating logging ...')
4546
logger.info(
4647
f'Python version: {sys.version_info.major}.{sys.version_info.minor}')
@@ -52,67 +53,35 @@ def main():
5253
if (args.command == 'extract'):
5354
logger.info('Using extract subcommand ...')
5455

55-
if args.method.lower() == 'regex':
56-
with open_by_suffix(file_name=args.output, mode='w') as f:
57-
58-
f.write('\t'.join(
59-
[
60-
'read1_seq',
61-
'cell_barcode',
62-
'cb_matching_pos',
63-
'cb_matching_description',
64-
'read2_seq',
65-
'feature_barcode',
66-
'fb_matching_pos',
67-
'fb_matching_description'
68-
]
69-
) + '\n')
70-
71-
for out in extract_feature_barcoding(
72-
read1_file=args.read1,
73-
read2_file=args.read2,
74-
cb_file=args.whitelist,
75-
fb_file=args.feature_ref,
76-
cb_num_mismatches=args.cell_barcode_mismatches,
77-
fb_num_mismatches=args.feature_barcode_mismatches,
78-
cb_num_n_threshold=args.cb_num_n_threshold,
79-
fb_num_n_threshold=args.fb_num_n_threshold,
80-
read1_coords=args.read1_coords,
81-
read2_coords=args.read2_coords,
82-
num_threads=args.threads,
83-
chunk_size=args.chunk_size):
84-
85-
f.write(out + '\n')
86-
87-
elif args.method == 'polyleven':
88-
with open_by_suffix(file_name=args.output, mode='w') as f:
56+
with open_by_suffix(file_name=args.output, mode='w') as f:
57+
58+
f.write('\t'.join(
59+
[
60+
'read1_seq',
61+
'cell_barcode',
62+
'cb_num_mismatches',
63+
'read2_seq',
64+
'feature_barcode',
65+
'fb_num_mismatches'
66+
]
67+
) + '\n')
68+
69+
for out in extract_feature_barcoding_fastss(
70+
read1_file=args.read1,
71+
read2_file=args.read2,
72+
cb_file=args.whitelist,
73+
fb_file=args.feature_ref,
74+
cb_num_mismatches=args.cell_barcode_mismatches,
75+
fb_num_mismatches=args.feature_barcode_mismatches,
76+
read1_coords=args.read1_coords,
77+
read2_coords=args.read2_coords,
78+
output_file=args.output,
79+
cb_num_n_threshold=args.cb_num_n_threshold,
80+
fb_num_n_threshold=args.fb_num_n_threshold,
81+
exhaustive=args.exhaustive
82+
):
83+
f.write(out + '\n')
8984

90-
f.write('\t'.join(
91-
[
92-
'read1_seq',
93-
'cell_barcode',
94-
'cb_num_mismatches',
95-
'read2_seq',
96-
'feature_barcode',
97-
'fb_num_mismatches'
98-
]
99-
) + '\n')
100-
101-
for out in extract_feature_barcoding_polyleven(
102-
read1_file=args.read1,
103-
read2_file=args.read2,
104-
cb_file=args.whitelist,
105-
fb_file=args.feature_ref,
106-
cb_num_mismatches=args.cell_barcode_mismatches,
107-
fb_num_mismatches=args.feature_barcode_mismatches,
108-
read1_coords=args.read1_coords,
109-
read2_coords=args.read2_coords,
110-
cb_num_n_threshold=args.cb_num_n_threshold,
111-
fb_num_n_threshold=args.fb_num_n_threshold,
112-
num_threads=args.threads,
113-
chunk_size=args.chunk_size):
114-
115-
f.write(out + '\n')
11685
logger.info('Done.')
11786

11887
elif (args.command == 'map'):
@@ -124,22 +93,20 @@ def main():
12493
cb_file=args.whitelist,
12594
fb_file=args.feature_ref,
12695
read1_coords=args.read1_coords,
127-
num_n_ref=args.num_n_ref,
12896
num_mismatches=args.cell_barcode_mismatches,
97+
num_n_threshold=args.cb_num_n_threshold,
98+
num_n_ref=args.num_n_ref,
12999
umi_pos_start=args.umi_pos_start,
130100
umi_length=args.umi_length,
131101
umi_deduplication_method=args.umi_deduplication_method,
132102
umi_deduplication_threshold=args.umi_mismatches,
133103
mapq=args.mapq,
134104
output_directory=args.output_directory,
135-
num_threads=args.threads,
136-
chunk_size=args.chunk_size
105+
num_threads=args.threads
137106
)
138107

139-
matrix_featurecount.to_csv(
140-
path_or_buf=args.output,
141-
compression='infer'
142-
)
108+
matrix_featurecount.to_csv(path_or_buf=args.output,
109+
compression='infer')
143110
logger.info('Done.')
144111

145112
elif (args.command == 'filter'):
@@ -168,8 +135,8 @@ def main():
168135

169136
matrix_featurecount = generate_matrix(
170137
matching_file=args.input,
171-
umi_length=args.umi_length,
172138
umi_pos_start=args.umi_pos_start,
139+
umi_length=args.umi_length,
173140
umi_deduplication_method=args.umi_deduplication_method,
174141
umi_deduplication_threshold=args.umi_mismatches
175142
)
@@ -194,12 +161,15 @@ def main():
194161
elif (args.command == 'qc'):
195162
logger.info('Using qc subcommand ...')
196163

197-
if args.num_reads.isdigit():
198-
num_reads = int(args.num_reads)
199-
elif args.num_reads.upper() == 'NONE':
200-
num_reads = None
164+
if not isinstance(args.num_reads, int):
165+
if args.num_reads.isdigit():
166+
num_reads = int(args.num_reads)
167+
elif args.num_reads.upper() == 'NONE':
168+
num_reads = None
169+
else:
170+
sys.exit(1)
201171
else:
202-
sys.exit(1)
172+
num_reads = args.num_reads
203173

204174
if args.read1:
205175
_ = summarize_sequence_content(
@@ -226,15 +196,15 @@ def main():
226196
]
227197
) + '\n')
228198

229-
for out in extract_feature_barcoding(
199+
for out in extract_feature_barcoding_regex(
230200
read1_file=args.read1,
231201
read2_file=args.read2,
232202
cb_file=args.whitelist,
233203
fb_file=args.feature_ref,
234204
cb_num_mismatches=args.cell_barcode_mismatches,
235205
fb_num_mismatches=args.feature_barcode_mismatches,
236-
cb_num_n_threshold=np.Inf,
237-
fb_num_n_threshold=np.Inf,
206+
cb_num_n_threshold=args.cb_num_n_threshold,
207+
fb_num_n_threshold=args.fb_num_n_threshold,
238208
read1_coords=args.read1_coords,
239209
read2_coords=args.read2_coords,
240210
num_threads=args.threads,
@@ -259,16 +229,15 @@ def main():
259229
)
260230

261231
fb_frequency = analyze_bulk(
262-
read2_file=args.read2,
263-
read2_coords=args.read2_coords,
232+
read_file=args.read2,
233+
read_coords=args.read2_coords,
264234
fb_file=args.feature_ref,
265235
num_mismatches=args.feature_barcode_mismatches,
266-
num_n_threshold=3,
267-
num_threads=args.threads,
268-
chunk_size=args.chunk_size,
236+
num_n_threshold=args.fb_num_n_threshold,
269237
num_reads=num_reads
270238
)
271239

240+
Path(args.output_directory).mkdir(exist_ok=True)
272241
OUTPUT_FILE = 'feature_barcode_frequency.csv'
273242
OUTPUT_FILE = str(Path(args.output_directory) / OUTPUT_FILE)
274243
logger.info(f'Output file: {OUTPUT_FILE}')

fba/count.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,6 @@ def generate_matrix(matching_file,
7676
i = line.rstrip().split('\t')
7777
line_counter += 1
7878

79-
print(i)
80-
8179
read_seq = i[0]
8280
cell_barcode = i[1]
8381

fba/kallisto.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pandas as pd
55
import scipy.io
66
from pathlib import Path
7-
from .utils import (
7+
from fba.utils import (
88
open_by_suffix,
99
get_binary_path,
1010
get_logger,
@@ -21,7 +21,7 @@ def fb2fa_kallisto(x, fasta_file, t2g_file):
2121
Parameters
2222
----------
2323
x : str
24-
The location of the feature barcode file.
24+
The path and name of feature barcode file.
2525
2626
The example content of the file:
2727
CD3 CTCATTGTAACTCCT
@@ -36,7 +36,7 @@ def fb2fa_kallisto(x, fasta_file, t2g_file):
3636
CD25 TTTGTCCTGTACGCC
3737
3838
fasta_file: str
39-
The location of the generated fasta file. One mismatch at each
39+
The path and name of generated fasta file. One mismatch at each
4040
coordinate.
4141
4242
The example content of the file:
@@ -62,7 +62,7 @@ def fb2fa_kallisto(x, fasta_file, t2g_file):
6262
CTCATTGTAACTCCT
6363
6464
t2g_file: str
65-
The location of the generated t2g file.
65+
The path and name of generated t2g file.
6666
6767
The example content of the file:
6868
CD3_CTCATTGTAACTCCT_0_A CD3_CTCATTGTAACTCCT CD3_CTCATTGTAACTCCT
@@ -134,23 +134,23 @@ def fb2fa_kallisto(x, fasta_file, t2g_file):
134134
def build_kallisto_index(kallisto_index,
135135
kmer,
136136
fasta_file):
137-
"""Builds feature barcoding kallisto index.
137+
"""Builds kallisto index.
138138
139-
A wrapper of `kallisto index [arguments] FASTA-files`
139+
A wrapper of `kallisto index [arguments] FASTA-files`.
140140
141141
Parameters
142142
----------
143143
kallisto_index : str
144-
The location of the feature barcoding kallisto index.
144+
The path and name of kallisto index.
145145
kmer : int
146146
k-mer length, odd number.
147147
fasta_file : str
148-
The location of the feature barcoding fasta file.
148+
The path and name of generated fasta file.
149149
150150
Returns
151151
-------
152152
str
153-
The location of the feature barcoding kallisto index.
153+
The path and name of generated kallisto index.
154154
"""
155155

156156
cmd = [
@@ -174,7 +174,7 @@ def align_reads_kallisto(read1_file,
174174
output_directory,
175175
technology,
176176
num_threads=1):
177-
"""Builds feature barcoding kallisto index."""
177+
"""Aligns reads."""
178178

179179
cmd = [
180180
get_binary_path(binary_name='kallisto'),
@@ -292,8 +292,6 @@ def run_kallisto(read1_file,
292292
num_threads=1):
293293
"""Runs kallisto/bustools."""
294294

295-
logger.info('Initiating logging ...')
296-
297295
output_directory = Path(output_directory)
298296
output_directory.mkdir(exist_ok=True)
299297

0 commit comments

Comments
 (0)