diff --git a/gfalibs b/gfalibs index 76086a8..c141299 160000 --- a/gfalibs +++ b/gfalibs @@ -1 +1 @@ -Subproject commit 76086a873f3d726be7ba720d1de28105cc5b917a +Subproject commit c1412996b463dbbbb114794fc0502cba314e6650 diff --git a/include/input.h b/include/input.h index e92ceea..8da941c 100644 --- a/include/input.h +++ b/include/input.h @@ -20,16 +20,14 @@ struct UserInputTeloscope : UserInput { unsigned short int minBlockCounts = 2; uint32_t terminalLimit = 50000; + bool outFasta = false; bool outWinRepeats = false; bool outGC = false; bool outEntropy = false; bool outMatches = false; bool outITS = false; - // bool outCanMatches = true; // Jack: Always true for now - // bool outPQonly = true; // bool outDistance = true; - // bool outFasta = true; double maxMem = 0; std::string prefix = ".", outFile = ""; // JACK: CHECK diff --git a/include/teloscope.h b/include/teloscope.h index 940d936..b233f94 100644 --- a/include/teloscope.h +++ b/include/teloscope.h @@ -83,7 +83,6 @@ struct SegmentData { std::vector windows; std::vector terminalBlocks; std::vector interstitialBlocks; - std::unordered_map> mergedBlocks; std::vector canonicalMatches; std::vector nonCanonicalMatches; std::vector segMatches; @@ -153,9 +152,9 @@ class Teloscope { void analyzeWindow(const std::string &window, uint32_t windowStart, WindowData& windowData, WindowData& nextOverlapData, - SegmentData& segmentData, uint32_t segmentSize); + SegmentData& segmentData, uint32_t segmentSize, uint32_t absPos); - SegmentData analyzeSegment(std::string &sequence, UserInputTeloscope userInput, uint64_t absPos); + SegmentData analyzeSegment(std::string &sequence, UserInputTeloscope userInput, uint32_t absPos); void insertWindowData(unsigned int seqPos, const std::string& header, std::vector& pathWindows); diff --git a/src/input.cpp b/src/input.cpp index 72b0b6e..86a0ab1 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -62,7 +62,7 @@ void Input::read(InSequences &inSequences) { bool Teloscope::walkPath(InPath* path, std::vector &inSegments, std::vector &inGaps) { Log threadLog; - uint64_t absPos = 0; + uint32_t absPos = 0; unsigned int cUId = 0, gapLen = 0, seqPos = path->getSeqPos(); std::vector pathComponents = path->getComponents(); // uint64_t pathSize = path->getLen(); diff --git a/src/main.cpp b/src/main.cpp index 4c29943..edc5c2a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -50,6 +50,7 @@ int main(int argc, char **argv) { {"max-block-distance", required_argument, 0, 'd'}, {"terminal-limit", no_argument, 0, 't'}, + {"out-fasta", no_argument, 0, 'a'}, {"out-win-repeats", no_argument, 0, 'r'}, {"out-gc", no_argument, 0, 'g'}, {"out-entropy", no_argument, 0, 'e'}, @@ -66,7 +67,7 @@ int main(int argc, char **argv) { int option_index = 0; - c = getopt_long(argc, argv, "-:f:j:o:p:s:w:c:l:d:t:rgemivh", long_options, &option_index); + c = getopt_long(argc, argv, "-:f:j:o:p:s:w:c:l:d:t:argemivh", long_options, &option_index); // if (optind < argc && !isPipe) { // if pipe wasn't assigned already @@ -249,6 +250,9 @@ int main(int argc, char **argv) { userInput.terminalLimit = std::stoi(optarg); break; + case 'a': + userInput.outFasta = true; + break; case 'r': userInput.outWinRepeats = true; diff --git a/src/teloscope.cpp b/src/teloscope.cpp index 035e4a8..d66d79e 100644 --- a/src/teloscope.cpp +++ b/src/teloscope.cpp @@ -255,7 +255,7 @@ std::vector Teloscope::filterInterstitialBlocks( void Teloscope::analyzeWindow(const std::string &window, uint32_t windowStart, WindowData& windowData, WindowData& nextOverlapData, - SegmentData& segmentData, uint32_t segmentSize) { + SegmentData& segmentData, uint32_t segmentSize, uint32_t absPos) { windowData.windowStart = windowStart; // CHECK: Why is this here? unsigned short int longestPatternSize = this->trie.getLongestPatternSize(); @@ -300,14 +300,14 @@ void Teloscope::analyzeWindow(const std::string &window, uint32_t windowStart, if (!current) break; if (current->isEndOfWord) { - uint32_t matchPos = windowStart + i; std::string_view pattern(window.data() + i, (j - i + 1)); - float densityGain = static_cast(pattern.size()) / window.size(); - bool isForward = (pattern.size() >= 3 && pattern.compare(0, 3, "CCC") == 0); bool isCanonical = (pattern == std::string_view(userInput.canonicalPatterns.first) || pattern == std::string_view(userInput.canonicalPatterns.second)); - bool isTerminal = (matchPos <= terminalLimit || matchPos >= segmentSize - terminalLimit); + bool isTerminal = (windowStart + i <= terminalLimit || + windowStart + i >= segmentSize - terminalLimit); + float densityGain = static_cast(pattern.size()) / window.size(); + uint32_t matchPos = absPos + windowStart + i; // Keep absolute positions only MatchInfo matchInfo; matchInfo.position = matchPos; @@ -356,7 +356,7 @@ void Teloscope::analyzeWindow(const std::string &window, uint32_t windowStart, } -SegmentData Teloscope::analyzeSegment(std::string &sequence, UserInputTeloscope userInput, uint64_t absPos) { +SegmentData Teloscope::analyzeSegment(std::string &sequence, UserInputTeloscope userInput, uint32_t absPos) { uint32_t windowSize = userInput.windowSize; uint32_t step = userInput.step; uint32_t segmentSize = sequence.size(); @@ -372,7 +372,6 @@ SegmentData Teloscope::analyzeSegment(std::string &sequence, UserInputTeloscope WindowData nextOverlapData; // Data for next overlap std::vector windows; - std::unordered_map> mergedBlocks; uint32_t windowStart = 0; uint32_t currentWindowSize = std::min(windowSize, segmentSize); // In case first segment is short std::string window = sequence.substr(0, currentWindowSize); @@ -380,7 +379,7 @@ SegmentData Teloscope::analyzeSegment(std::string &sequence, UserInputTeloscope while (windowStart < segmentSize) { // Prepare and analyze current window WindowData windowData = prevOverlapData; - analyzeWindow(window, windowStart, windowData, nextOverlapData, segmentData, segmentSize); + analyzeWindow(window, windowStart, windowData, nextOverlapData, segmentData, segmentSize, absPos); if (userInput.outGC) { windowData.gcContent = getGCContent(windowData.nucleotideCounts, window.size()); } if (userInput.outEntropy) { windowData.shannonEntropy = getShannonEntropy(windowData.nucleotideCounts, window.size()); }