diff --git a/Makefile b/Makefile index a5c8a6c..6440be8 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,7 @@ shark: main.o @echo '* Compiling $<' $(CXX) $(CXXFLAGS) -o $@ -c $< -main.o: common.hpp argument_parser.hpp bloomfilter.h BloomfilterFiller.hpp KmerBuilder.hpp FastaSplitter.hpp FastqSplitter.hpp ReadAnalyzer.hpp ReadOutput.hpp kmer_utils.hpp +main.o: common.hpp argument_parser.hpp bloomfilter.h BloomfilterFiller.hpp KmerBuilder.hpp FastaSplitter.hpp FastqSplitter.hpp ReadAnalyzer.hpp ReadOutput.hpp kmer_utils.hpp small_vector.hpp clean: rm -rf *.o diff --git a/bloomfilter.h b/bloomfilter.h index 2f56c8e..590a0c3 100644 --- a/bloomfilter.h +++ b/bloomfilter.h @@ -28,6 +28,7 @@ #include #include "kmer_utils.hpp" +#include "small_vector.hpp" using namespace std; using namespace sdsl; @@ -35,6 +36,7 @@ using namespace sdsl; class KmerBuilder; class BloomfilterFiller; + class BF { friend class KmerBuilder; friend class BloomfilterFiller; @@ -45,7 +47,7 @@ class BF { typedef uint64_t hash_t; typedef bit_vector bit_vector_t; typedef bit_vector_t::rank_1_type rank_t; - typedef vector index_t; + typedef small_vector_t index_t; typedef vector set_index_t; typedef vector index_kmer_t; typedef bit_vector_t::select_1_type select_t; @@ -88,7 +90,7 @@ class BF { for (const auto bf_idx: kmers) { int kmer_rank = _brank(bf_idx); const auto size = _set_index[kmer_rank].size(); - if (size == 0 || _set_index[kmer_rank][size-1] != input_idx) + if (size == 0 || _set_index[kmer_rank].last() != input_idx) _set_index[kmer_rank].push_back(input_idx); } } diff --git a/small_vector.hpp b/small_vector.hpp new file mode 100644 index 0000000..cebfa20 --- /dev/null +++ b/small_vector.hpp @@ -0,0 +1,91 @@ +/** + * shark - Mapping-free filtering of useless RNA-Seq reads + * Copyright (C) 2020 Tamara Ceccato, Luca Denti, Yuri Pirola, Marco Previtali + * + * This file is part of shark. + * + * shark is free software: you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * shark is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with shark; see the file LICENSE. If not, see + * . + **/ + +#include +#include + +struct small_vector_t { + union { + struct { + uint8_t flag; + uint8_t size; + uint16_t arr[3]; + } s; + std::vector* l; + } v; + + small_vector_t() { + v.s.flag = 1u; + v.s.size = 0; + } + + ~small_vector_t() { + if ((v.s.flag & 0x1) == 0) { + delete v.l; + } + } + + void push_back(uint16_t x) { + if ((v.s.flag & 0x1) != 0) { + if (v.s.size < 3) v.s.arr[v.s.size++] = x; + else { + std::vector* ptr = new std::vector(v.s.arr, v.s.arr + 3); + ptr->push_back(x); + v.l = ptr; + } + } else { + v.l->push_back(x); + } + } + + size_t size() const { + if ((v.s.flag & 0x1) != 0) { + return v.s.size; + } else { + return v.l->size(); + } + } + + uint16_t last() const { + if ((v.s.flag & 0x1) != 0) { + return v.s.arr[v.s.size - 1]; + } else { + return v.l->back(); + } + } + + const uint16_t* begin() const { + if ((v.s.flag & 0x1) != 0) { + return v.s.arr; + } else { + return v.l->data(); + } + } + + const uint16_t* end() const { + if ((v.s.flag & 0x1) != 0) { + return v.s.arr + v.s.size; + } else { + return v.l->data() + v.l->size(); + } + } + +};