Skip to content

Commit

Permalink
Remove dependency from Intel TBB
Browse files Browse the repository at this point in the history
  • Loading branch information
yp committed Jul 1, 2020
1 parent 6f5a499 commit bd93aa9
Show file tree
Hide file tree
Showing 9 changed files with 125 additions and 109 deletions.
13 changes: 8 additions & 5 deletions BloomfilterFiller.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,19 @@ class BloomfilterFiller {
public:
BloomfilterFiller(BF *_bf) : bf(_bf) {}

void operator()(vector<uint64_t> *positions) const {
if(positions) {
for(const auto & p : *positions) {
void operator()(vector<uint64_t> *positions) {
{
std::lock_guard<std::mutex> lock(mtx);
for(const auto p : *positions) {
bf->add_at(p % bf->_size);
}
delete positions;
}
delete positions;
}

private:
BF* bf;
BF *const bf;
std::mutex mtx;

};
#endif
19 changes: 10 additions & 9 deletions FastaSplitter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,40 +23,41 @@
#define FASTA_SPLITTER_HPP

#include "kseq.h"
#include <zlib.h>
#include <string>
#include <vector>
#include <memory>

#include <tbb/pipeline.h>
#include <mutex>

using namespace std;

class FastaSplitter {
public:
FastaSplitter(kseq_t * const _seq, const int _maxnum)
: seq(_seq), maxnum(_maxnum)
FastaSplitter(kseq_t * const _seq, const int _maxnum, vector<string>* const _ids = nullptr)
: seq(_seq), maxnum(_maxnum), ids(_ids)
{ }

~FastaSplitter() {
}

vector<pair<string, string>>* operator()(tbb::flow_control &fc) const {
vector<pair<string, string>>* operator()() {
std::lock_guard<std::mutex> lock(mtx);
vector<pair<string, string>>* const fasta = new vector<pair<string, string>>();
fasta->reserve(maxnum);
int seq_len;
while(fasta->size() < maxnum && (seq_len = kseq_read(seq)) >= 0) {
if (ids != nullptr) ids->push_back(seq->name.s);
fasta->emplace_back(seq->name.s, seq->seq.s);
}
if(fasta->size() > 0) return fasta;
fc.stop();
if (!fasta->empty()) return fasta;
delete fasta;
return NULL;
return nullptr;
}

private:
kseq_t * const seq;
const size_t maxnum;
vector<string>* const ids;
std::mutex mtx;

};

Expand Down
31 changes: 14 additions & 17 deletions FastqSplitter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@
#include <string>
#include <vector>
#include <memory>

#include <tbb/pipeline.h>
#include <mutex>

using namespace std;

Expand All @@ -45,22 +44,22 @@ class FastqSplitter {
~FastqSplitter() {
}

output_t* operator()(tbb::flow_control &fc) const {
output_t* const fastq = new output_t();
fastq->reserve(maxnum);
void operator()(output_t& fastq) {
std::lock_guard<std::mutex> lock(mtx);
fastq.reserve(maxnum);
int seq_len1, seq_len2;
if (min_quality == 0) {
if (seq2 == nullptr) {
while (fastq->size() < maxnum && (seq_len1 = kseq_read(seq1)) >= 0) {
fastq->push_back({
while (fastq.size() < maxnum && (seq_len1 = kseq_read(seq1)) >= 0) {
fastq.push_back({
seq1->seq.s,
{ { seq1->name.s, full_mode ? seq1->seq.s : "", full_mode ? seq1->qual.s : "" },
empty_el }
});
}
} else {
while (fastq->size() < maxnum && (seq_len1 = kseq_read(seq1)) >= 0 && (seq_len2 = kseq_read(seq2)) >= 0) {
fastq->push_back({
while (fastq.size() < maxnum && (seq_len1 = kseq_read(seq1)) >= 0 && (seq_len2 = kseq_read(seq2)) >= 0) {
fastq.push_back({
string(seq1->seq.s) + "N" + string(seq2->seq.s),
{ { seq1->name.s, full_mode ? seq1->seq.s : "", full_mode ? seq1->qual.s : "" },
{ seq2->name.s, full_mode ? seq2->seq.s : "", full_mode ? seq2->qual.s : "" } }
Expand All @@ -70,16 +69,16 @@ class FastqSplitter {
} else {
const char mq = min_quality + 33;
if (seq2 == nullptr) {
while (fastq->size() < maxnum && (seq_len1 = kseq_read(seq1)) >= 0) {
fastq->push_back({
while (fastq.size() < maxnum && (seq_len1 = kseq_read(seq1)) >= 0) {
fastq.push_back({
mask_seq(seq1->seq.s, seq1->qual.s, seq1->qual.l, mq),
{ { seq1->name.s, full_mode ? seq1->seq.s : "", full_mode ? seq1->qual.s : "" },
empty_el }
});
}
} else {
while (fastq->size() < maxnum && (seq_len1 = kseq_read(seq1)) >= 0 && (seq_len2 = kseq_read(seq2)) >= 0) {
fastq->push_back({
while (fastq.size() < maxnum && (seq_len1 = kseq_read(seq1)) >= 0 && (seq_len2 = kseq_read(seq2)) >= 0) {
fastq.push_back({
mask_seq(
string(seq1->seq.s) + "N" + string(seq2->seq.s),
string(seq1->qual.s) + "\33" + string(seq2->qual.s),
Expand All @@ -91,18 +90,16 @@ class FastqSplitter {
}
}
}
if(fastq->size() > 0) return fastq;
fc.stop();
delete fastq;
return nullptr;
}

private:
kseq_t * const seq1;
kseq_t * const seq2;
const size_t maxnum;
const char min_quality;
const bool full_mode;
const sharseq_t empty_el;
std::mutex mtx;

static string mask_seq(string seq, const char* const qual, const size_t l, const char min_quality) {
for (size_t i = 0; i < l; ++i) {
Expand Down
57 changes: 27 additions & 30 deletions KmerBuilder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,44 +38,41 @@ class KmerBuilder {
KmerBuilder(size_t _k) : k(_k) {}

vector<uint64_t>* operator()(vector<pair<string, string>> *texts) const {
if(texts) {
vector<uint64_t>* kmer_pos = new vector<uint64_t>();
uint64_t kmer, rckmer, key;
for(const auto & p : *texts) {
if(p.second.size() >= k) {
int _pos = 0;
kmer = build_kmer(p.second, _pos, k);
if(kmer == (uint64_t)-1) continue;
rckmer = revcompl(kmer, k);
key = min(kmer, rckmer);
kmer_pos->push_back(_get_hash(key));
vector<uint64_t>* kmer_pos = new vector<uint64_t>();
uint64_t kmer, rckmer, key;
for(const auto & p : *texts) {
if(p.second.size() >= k) {
int _pos = 0;
kmer = build_kmer(p.second, _pos, k);
if(kmer == (uint64_t)-1) continue;
rckmer = revcompl(kmer, k);
key = min(kmer, rckmer);
kmer_pos->push_back(_get_hash(key));

for (int pos = _pos; pos < (int)p.second.size(); ++pos) {
uint8_t new_char = to_int[p.second[pos]];
if(new_char == 0) { // Found a char different from A, C, G, T
++pos; // we skip this character then we build a new kmer
kmer = build_kmer(p.second, pos, k);
if(kmer == (uint64_t)-1) break;
rckmer = revcompl(kmer, k);
--pos; // p must point to the ending position of the kmer, it will be incremented by the for
} else {
--new_char; // A is 1 but it should be 0
kmer = lsappend(kmer, new_char, k);
rckmer = rsprepend(rckmer, reverse_char(new_char), k);
}
key = min(kmer, rckmer);
kmer_pos->push_back(_get_hash(key));
for (int pos = _pos; pos < (int)p.second.size(); ++pos) {
uint8_t new_char = to_int[p.second[pos]];
if(new_char == 0) { // Found a char different from A, C, G, T
++pos; // we skip this character then we build a new kmer
kmer = build_kmer(p.second, pos, k);
if(kmer == (uint64_t)-1) break;
rckmer = revcompl(kmer, k);
--pos; // p must point to the ending position of the kmer, it will be incremented by the for
} else {
--new_char; // A is 1 but it should be 0
kmer = lsappend(kmer, new_char, k);
rckmer = rsprepend(rckmer, reverse_char(new_char), k);
}
key = min(kmer, rckmer);
kmer_pos->push_back(_get_hash(key));
}
}
delete texts;
return kmer_pos;
}
return NULL;
delete texts;
return kmer_pos;
}

private:
size_t k;
const size_t k;
};

#endif
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
CFLAGS = -DNDEBUG -march=native -Wno-char-subscripts -Wall -O3 -std=c++14 -I. -I./include
CFLAGS = -DNDEBUG -UDEBUG -Wno-char-subscripts -Wall -O3 -std=c++14 -I. -I./include
CXXFLAGS= ${CFLAGS}
LIBS = -L./lib -lz -lsdsl -ltbb
LIBS = -L./lib -lz -lsdsl -pthread

.PHONY: all clean

Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
Fast tool for mapping-free gene separation of reads, using Bloom filter.

## Dependencies
Shark requires the following libraries and tools:
- [sdsl-lite v2.1.1](https://github.com/simongog/sdsl-lite/tree/v2.1.1)
- Intel TBB (tested with version 2017 update 7)

Shark requires a C++11-compliant compiler and the [`sdsl-lite v2.1.1`](https://github.com/simongog/sdsl-lite/tree/v2.1.1) library.
For convenience, `sdsl-lite` is included in the repository.

## Download and Installation
To install the tool, run the following steps.
Expand Down
14 changes: 3 additions & 11 deletions ReadAnalyzer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,11 @@ class ReadAnalyzer {
ReadAnalyzer(BF *_bf, const vector<string>& _legend_ID, uint _k, double _c, bool _only_single = false) :
bf(_bf), legend_ID(_legend_ID), k(_k), c(_c), only_single(_only_single) {}

output_t* operator()(vector<elem_t> *reads) const {
output_t* associations = new output_t();
void operator()(const vector<elem_t>& reads, output_t& associations) const {
vector<int> genes_idx;
typedef pair<pair<unsigned int, unsigned int>, unsigned int> gene_cov_t;
map<int, gene_cov_t> classification_id;
for(const auto & p : *reads) {
for(const auto & p : reads) {
classification_id.clear();
const string& read_seq = p.first;
unsigned int len = 0;
Expand Down Expand Up @@ -104,17 +103,10 @@ class ReadAnalyzer {

if(max >= c*len && (!only_single || genes_idx.size() == 1)) {
for(const auto idx : genes_idx) {
associations->push_back({ legend_ID[idx], std::move(get<1>(p)) });
associations.push_back({ legend_ID[idx], std::move(get<1>(p)) });
}
}
}
delete reads;
if(associations->size())
return associations;
else {
delete associations;
return NULL;
}
}

private:
Expand Down
29 changes: 15 additions & 14 deletions ReadOutput.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

#include <iostream>
#include <vector>
#include <mutex>

#include "common.hpp"

class ReadOutput {
Expand All @@ -32,26 +34,25 @@ class ReadOutput {
: out1(_out1), out2(_out2)
{ }

void operator()(std::vector<assoc_t> *associations) const {
if(associations) {
string previd = "";
for(const auto & a : *associations) {
const sharseq_t& s1 = a.second.first;
const sharseq_t& s2 = a.second.second;
printf("%s %s\n", s1.id.c_str(), a.first.c_str());
if (out1 != nullptr && previd != s1.id)
fprintf(out1, "@%s\n%s\n+\n%s\n", s1.id.c_str(), s1.seq.c_str(), s1.qual.c_str());
if (out2 != nullptr && previd != s1.id)
fprintf(out2, "@%s\n%s\n+\n%s\n", s2.id.c_str(), s2.seq.c_str(), s2.qual.c_str());
previd = std::move(s1.id);
}
delete associations;
void operator()(const std::vector<assoc_t>& associations) {
std::lock_guard<std::mutex> lock(mtx);
string previd = "";
for(const auto & a : associations) {
const sharseq_t& s1 = a.second.first;
const sharseq_t& s2 = a.second.second;
printf("%s %s\n", s1.id.c_str(), a.first.c_str());
if (out1 != nullptr && previd != s1.id)
fprintf(out1, "@%s\n%s\n+\n%s\n", s1.id.c_str(), s1.seq.c_str(), s1.qual.c_str());
if (out2 != nullptr && previd != s1.id)
fprintf(out2, "@%s\n%s\n+\n%s\n", s2.id.c_str(), s2.seq.c_str(), s2.qual.c_str());
previd = std::move(s1.id);
}
}

private:
FILE* const out1;
FILE* const out2;
std::mutex mtx;
};

#endif
Loading

0 comments on commit bd93aa9

Please sign in to comment.