Skip to content

Commit eb8bca8

Browse files
committed
feat: add gc content, read length and N base count
in output for subcli flat
1 parent d995dbb commit eb8bca8

File tree

3 files changed

+38
-1
lines changed

3 files changed

+38
-1
lines changed

src/cli/flatten.rs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ pub fn flatten_fq(
99
out: Option<&String>,
1010
flag: u8,
1111
sep: char,
12+
gap: bool,
13+
len: bool,
14+
gc: bool,
1215
compression_level: u32,
1316
) -> Result<(), Error> {
1417
let start = Instant::now();
@@ -37,11 +40,30 @@ pub fn flatten_fq(
3740

3841
for rec in fq_reader.records().flatten() {
3942
let read = vec![rec.id().as_bytes(), rec.seq(), "+".as_bytes(), rec.qual()];
40-
let res = fields.iter().map(|idx| read[*idx]).collect::<Vec<&[u8]>>();
43+
let res = fields
44+
.iter()
45+
.map(|idx| read[*idx])
46+
.collect::<Vec<&[u8]>>();
47+
4148
let mut out = Vec::new();
4249
for x in res {
4350
out.push(std::str::from_utf8(x)?.to_string());
4451
}
52+
if gap {
53+
out.push(rec.seq().iter().filter(|x| *x == &b'N').count().to_string());
54+
}
55+
if len {
56+
out.push(rec.seq().len().to_string());
57+
}
58+
if gc {
59+
let gc_count = rec
60+
.seq()
61+
.iter()
62+
.filter(|x| *x == &b'G' || *x == &b'C')
63+
.count();
64+
let gc_ratio = format!("{:.2}",gc_count as f64 / rec.seq().len() as f64 * 100.0);
65+
out.push(gc_ratio);
66+
}
4567
out_writer.write_all(out.join(sep.to_string().as_str()).as_bytes())?;
4668
out_writer.write_all("\n".as_bytes())?;
4769
}

src/command.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,15 @@ pub enum Subcli {
435435
/// output seprater, can be ",", ";",
436436
#[arg(short = 's', long = "sep", default_value_t = '\t', value_name = "CHAR")]
437437
sep: char,
438+
/// if specified, add N base count in output
439+
#[arg(short = 'n', long = "gap-n", help_heading = Some("FLAGS"))]
440+
gap: bool,
441+
/// if specified, add read length in output
442+
#[arg(short = 'l', long = "length", help_heading = Some("FLAGS"))]
443+
len: bool,
444+
/// if specified, add GC content(%) in output
445+
#[arg(short = 'g', long = "gc-content", help_heading = Some("FLAGS"))]
446+
gc: bool,
438447
/// output file name or write to stdout, file ending in .gz/.bz2/.xz will be compressed automatically
439448
#[arg(short = 'o', long = "out", value_name = "FILE")]
440449
out: Option<String>,

src/main.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,13 +170,19 @@ fn main() -> Result<(), Error> {
170170
input,
171171
flag,
172172
sep,
173+
gap,
174+
len,
175+
gc,
173176
out,
174177
} => {
175178
flatten_fq(
176179
input.as_ref(),
177180
out.as_ref(),
178181
flag,
179182
sep,
183+
gap,
184+
len,
185+
gc,
180186
arg.compression_level,
181187
)?;
182188
}

0 commit comments

Comments
 (0)