Skip to content

Commit

Permalink
Minor iter fix, better meta_info, simplify
Browse files Browse the repository at this point in the history
* Fix off-by-one error in len() method of InMemoryAlignmentStore this was
previously returning 1 too many elements. Practically, this is unlikely
to result in any difference, but the implementation now should return
the proper lenght. Also, `ExactSizeIterator` has been implemented in
the idiomatic way (providing and exact `size_hint()` in the `Iterator`
implementation and just declaring the iterator as an
`ExactSizeIterator`).

* Output more information in the `meta_info.json` file produced by
`oarfish`.

* Simplify the implementation of the SampleWithReplacementIterator;
removing the old iterator and replacing it with a new, simpler iterator
with direct access to the `InMemoryAlignmentStore`.
  • Loading branch information
Rob Patro committed Mar 3, 2024
1 parent 4e559ee commit b1e4f73
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 91 deletions.
46 changes: 0 additions & 46 deletions src/bootstrap.rs
Original file line number Diff line number Diff line change
@@ -1,52 +1,6 @@
use rand::distributions::{Distribution, Uniform};
use rand::Rng;

/// Wraps an iterator `I` to produce a new iterator that samples the original
/// with replacement.
pub struct SampleWithReplaceIter<'b, I: ExactSizeIterator> {
pub inds: &'b [usize],
pub iter: I,
pub idx: usize,
pub e_ptr: usize,
pub prev: Option<I::Item>,
}

impl<'b, I: ExactSizeIterator> Iterator for SampleWithReplaceIter<'b, I>
where
I::Item: Copy,
{
type Item = I::Item;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if let Some(next_ind) = self.inds.get(self.idx) {
// always makes sure we have a previous
// element. This will get the first
// element but wont yet increment
// the pointer (e_ptr)
if self.prev.is_none() {
self.prev = self.iter.next();
}
// next element is not a duplicate
if self.e_ptr != *next_ind {
// otherwise, call next until we have the
// correct element
loop {
self.prev = self.iter.next();
self.e_ptr += 1;
if self.e_ptr == *next_ind {
break;
}
}
}
self.idx += 1;
self.prev
} else {
None
}
}
}

pub fn get_sample_inds<R: Rng + ?Sized>(n: usize, rng: &mut R) -> Vec<usize> {
let dist = Uniform::new(0, n);
let mut inds: Vec<usize> = dist.sample_iter(rng).take(n).collect();
Expand Down
8 changes: 1 addition & 7 deletions src/em.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,13 +237,7 @@ pub fn do_bootstrap(em_info: &EMInfo) -> Vec<f64> {

// function that produces an iterator over the
// scored alignments on demand
let make_iter = || bootstrap::SampleWithReplaceIter {
inds: &inds,
iter: em_info.eq_map.iter(),
idx: 0,
e_ptr: 0,
prev: None,
};
let make_iter = || em_info.eq_map.random_sampling_iter(&inds);

do_em(em_info, make_iter, false)
}
Expand Down
49 changes: 36 additions & 13 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ use std::{
};

use num_format::{Locale, ToFormattedString};
use serde::Serialize;
use serde_json::json;
use tracing::info;
use tracing_subscriber::{filter::LevelFilter, fmt, prelude::*, EnvFilter};

Expand All @@ -30,14 +32,14 @@ use crate::util::write_function::{write_infrep_file, write_output};
/// These represent different "meta-options", specific settings
/// for all of the different filters that should be applied in
/// different cases.
#[derive(Clone, Debug, clap::ValueEnum)]
#[derive(Clone, Debug, clap::ValueEnum, Serialize)]
enum FilterGroup {
NoFilters,
NanocountFilters,
}

/// accurate transcript quantification from long-read RNA-seq data
#[derive(Parser, Debug)]
#[derive(Parser, Debug, Serialize)]
#[clap(author, version, about, long_about = None)]
struct Args {
/// be quiet (i.e. don't output log messages that aren't at least warnings)
Expand Down Expand Up @@ -168,6 +170,31 @@ fn get_filter_opts(args: &Args) -> AlignmentFilters {
}
}

fn get_json_info(args: &Args, emi: &EMInfo) -> serde_json::Value {
let prob = if args.model_coverage {
"binomial"
} else {
"no_coverage"
};

json!({
"prob_model" : prob,
"num_bins" : args.bins,
"filter_options" : &emi.eq_map.filter_opts,
"discard_table" : &emi.eq_map.discard_table,
"alignments": &args.alignments,
"output": &args.output,
"verbose": &args.verbose,
"quiet": &args.quiet,
"em_max_iter": &args.max_em_iter,
"em_convergence_thresh": &args.convergence_thresh,
"threads": &args.threads,
"filter_group": &args.filter_group,
"short_quant": &args.short_quant,
"num_bootstraps": &args.num_bootstraps
})
}

fn main() -> anyhow::Result<()> {
let env_filter = EnvFilter::builder()
.with_default_directive(LevelFilter::INFO.into())
Expand Down Expand Up @@ -254,8 +281,8 @@ fn main() -> anyhow::Result<()> {

// if we are seeding the quantification estimates with short read
// abundances, then read those in here.
let init_abundances = args.short_quant.map(|sr_path| {
read_short_quant_vec(&sr_path, &txps_name).unwrap_or_else(|e| panic!("{}", e))
let init_abundances = args.short_quant.as_ref().map(|sr_path| {
read_short_quant_vec(sr_path, &txps_name).unwrap_or_else(|e| panic!("{}", e))
});

// wrap up all of the relevant information we need for estimation
Expand All @@ -274,16 +301,12 @@ fn main() -> anyhow::Result<()> {
em::em(&emi, args.threads)
};

// prepare the JSON object we'll write
// to meta_info.json
let json_info = get_json_info(&args, &emi);

// write the output
write_output(
&args.output,
&args.model_coverage,
args.bins,
args.num_bootstraps,
&emi,
&header,
&counts,
)?;
write_output(&args.output, json_info, &header, &counts)?;

// if the user requested bootstrap replicates,
// compute and write those out now.
Expand Down
59 changes: 53 additions & 6 deletions src/util/oarfish_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,10 +240,44 @@ pub struct InMemoryAlignmentStore<'h> {
impl<'h> InMemoryAlignmentStore<'h> {
#[inline]
pub fn len(&self) -> usize {
self.boundaries.len()
self.boundaries.len().saturating_sub(1)
}
}

pub struct InMemoryAlignmentStoreSamplingWithReplacementIter<'a, 'h, 'b> {
pub store: &'a InMemoryAlignmentStore<'h>,
pub rand_inds: std::slice::Iter<'b, usize>,
}

impl<'a, 'b, 'h> Iterator for InMemoryAlignmentStoreSamplingWithReplacementIter<'a, 'b, 'h> {
type Item = (&'a [AlnInfo], &'a [f32], &'a [f64]);

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if let Some(next_ind) = self.rand_inds.next() {
let start = self.store.boundaries[*next_ind];
let end = self.store.boundaries[*next_ind + 1];
Some((
&self.store.alignments[start..end],
&self.store.as_probabilities[start..end],
&self.store.coverage_probabilities[start..end],
))
} else {
None
}
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.rand_inds.len(), Some(self.rand_inds.len()))
}
}

impl<'a, 'b, 'h> ExactSizeIterator
for InMemoryAlignmentStoreSamplingWithReplacementIter<'a, 'b, 'h>
{
}

pub struct InMemoryAlignmentStoreIter<'a, 'h> {
pub store: &'a InMemoryAlignmentStore<'h>,
pub idx: usize,
Expand All @@ -267,15 +301,15 @@ impl<'a, 'h> Iterator for InMemoryAlignmentStoreIter<'a, 'h> {
))
}
}
}

impl<'a, 'h> ExactSizeIterator for InMemoryAlignmentStoreIter<'a, 'h> {
#[inline]
fn len(&self) -> usize {
self.store.boundaries.len()
fn size_hint(&self) -> (usize, Option<usize>) {
(self.store.len(), Some(self.store.len()))
}
}

impl<'a, 'h> ExactSizeIterator for InMemoryAlignmentStoreIter<'a, 'h> {}

impl<'h> InMemoryAlignmentStore<'h> {
pub fn new(fo: AlignmentFilters, header: &'h Header) -> Self {
InMemoryAlignmentStore {
Expand All @@ -296,6 +330,19 @@ impl<'h> InMemoryAlignmentStore<'h> {
}
}

pub fn random_sampling_iter<'a, 'b>(
&'a self,
inds: &'b [usize],
) -> InMemoryAlignmentStoreSamplingWithReplacementIter
where
'b: 'a,
{
InMemoryAlignmentStoreSamplingWithReplacementIter {
store: self,
rand_inds: inds.iter(),
}
}

pub fn add_group<T: sam::alignment::record::Record + std::fmt::Debug>(
&mut self,
txps: &mut [TranscriptInfo],
Expand Down Expand Up @@ -326,7 +373,7 @@ impl<'h> InMemoryAlignmentStore<'h> {

pub fn num_aligned_reads(&self) -> usize {
if !self.boundaries.is_empty() {
self.boundaries.len() - 1
self.len()
} else {
0
}
Expand Down
21 changes: 2 additions & 19 deletions src/util/write_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use arrow2::{
datatypes::{Field, Schema},
};
use path_tools::WithAdditionalExtension;
use serde_json::json;

use std::path::{Path, PathBuf};
use std::{
fs,
Expand All @@ -20,10 +20,7 @@ use std::{
//this part is taken from dev branch
pub fn write_output(
output: &PathBuf,
prob_flag: &bool,
bins: u32,
num_bootstraps: u32,
em_info: &EMInfo,
info: serde_json::Value,
header: &noodles_sam::header::Header,
counts: &[f64],
) -> io::Result<()> {
Expand All @@ -37,21 +34,7 @@ pub fn write_output(
}
}

let prob = if *prob_flag {
"binomial"
} else {
"no_coverage"
};

{
let info = json!({
"prob_model" : prob,
"num_bins" : bins,
"num_bootstraps" : num_bootstraps,
"filter_options" : em_info.eq_map.filter_opts,
"discard_table" : em_info.eq_map.discard_table
});

let info_path = output.with_additional_extension(".meta_info.json");
let write = OpenOptions::new()
.write(true)
Expand Down

0 comments on commit b1e4f73

Please sign in to comment.