Skip to content

Commit

Permalink
Jul 29, 2024: Test summary
Browse files Browse the repository at this point in the history
  • Loading branch information
AldhairMedico committed Jul 29, 2024
1 parent 7065713 commit 19ea831
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 39 deletions.
30 changes: 23 additions & 7 deletions include/teloscope.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <memory>
#include <unordered_map>

std::string cleanString(const std::string& input);
std::string removeCarriageReturns(const std::string& input);

class Trie {
struct TrieNode {
Expand Down Expand Up @@ -41,6 +41,14 @@ class Trie {
return nullptr;
}

// std::shared_ptr<TrieNode> getChild(const std::shared_ptr<TrieNode>& node, char ch) const {
// auto it = node->children.find(ch);
// if (it != node->children.end()) {
// return it->second;
// }
// return nullptr;
// }

unsigned short int getLongestPatternSize() const {
return longestPatternSize;
}
Expand Down Expand Up @@ -70,7 +78,8 @@ class Teloscope {
Trie trie; // Declare trie instance
UserInputTeloscope userInput; // Declare user input instance
std::vector<std::tuple<unsigned int, std::string, std::vector<WindowData>>> allWindows; // Assembly windows

int totalNWindows = 0; // Total windows analyzed
std::map<std::string, int> patternCounts; // Total counts

float getShannonEntropy(const std::unordered_map<char, uint64_t>& nucleotideCounts, uint32_t windowSize);
float getGCContent(const std::unordered_map<char, uint64_t>& nucleotideCounts, uint32_t windowSize);
Expand All @@ -80,7 +89,7 @@ class Teloscope {

Teloscope(UserInputTeloscope userInput) : userInput(userInput) {
for (const auto& pattern : userInput.patterns) {
trie.insertPattern(pattern);
trie.insertPattern(pattern);
}
}

Expand All @@ -101,17 +110,14 @@ class Teloscope {
});
}

void printAllWindows() {
std::cout << "Printing all windows in BEDs!\n";
}

void generateBEDFile() {
std::ofstream shannonFile; // Declare file streams
std::ofstream gcContentFile;

std::unordered_map<std::string, std::ofstream> patternMatchFiles; // Hold file streams for pattern data
std::unordered_map<std::string, std::ofstream> patternCountFiles;
std::unordered_map<std::string, std::ofstream> patternDensityFiles;
std::cout << "Reporting window matches and metrics in BED/BEDgraphs...\n";

// Only create and write to files if their modes are enabled
if (userInput.modeEntropy) {
Expand All @@ -138,6 +144,7 @@ class Teloscope {
std::tie(seqPos, header, windows) = windowData; // Unpack the tuple

for (const auto& window : windows) {
totalNWindows++; // Update total window count
uint32_t windowEnd = window.windowStart + window.currentWindowSize - 1;

// Write window Shannon entropy if enabled
Expand Down Expand Up @@ -169,6 +176,8 @@ class Teloscope {
patternDensityFiles[pattern] << header << "\t" << window.windowStart << "\t"
<< windowEnd << "\t"
<< data.density << "\n";

patternCounts[pattern] += data.count; // Update total pattern counts
}
}
}
Expand All @@ -192,7 +201,14 @@ class Teloscope {
file.close();
}
}
}

void printSummary() {
std::cout << "Total windows analyzed: " << totalNWindows << "\n";
std::cout << "Total input patterns found: " << std::endl;
for (const auto& [pattern, count] : patternCounts) {
std::cout << "Pattern: " << pattern << " Count: " << count << std::endl;
}
}
};

Expand Down
6 changes: 4 additions & 2 deletions src/input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ void Input::read(InSequences &inSequences) {
std::cout << "All jobs completed" << "\n";

teloscope.sortWindowsBySeqPos();
teloscope.printAllWindows();
teloscope.generateBEDFile();
teloscope.printSummary();
}


Expand All @@ -60,7 +60,9 @@ bool Teloscope::walkPath(InPath* path, std::vector<InSegment*> &inSegments, std:
std::vector<PathComponent> pathComponents = path->getComponents();
uint64_t absPos = 0;
std::vector<WindowData> pathWindows;
std::string header = cleanString(path->getHeader());
std::string header = removeCarriageReturns(path->getHeader());
// std::string header = path->getHeader();
// eraseChar(header, '\r');

for (std::vector<PathComponent>::iterator component = pathComponents.begin(); component != pathComponents.end(); component++) {

Expand Down
28 changes: 0 additions & 28 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,34 +106,6 @@ int main(int argc, char **argv) {
userInput.stats_flag = 1;
break;

// case 'm': {
// std::istringstream modeStream(optarg);
// std::string mode;
// bool allModes = false;

// while (std::getline(modeStream, mode, ',')) {
// if (mode.empty()) continue;

// if (std::any_of(mode.begin(), mode.end(), ::isdigit)) {
// std::cerr << "Error: Mode '" << mode << "' contains numerical characters.\n";
// exit(EXIT_FAILURE);
// }

// if (mode == "all") {
// allModes = true;
// break;
// }
// if (mode == "match") userInput.modeMatch = true;
// else if (mode == "entropy") userInput.modeEntropy = true;
// else if (mode == "gc") userInput.modeGC = true;
// }
// // Set all modes to true if none is specified as a default behavior
// if (allModes || !(userInput.modeMatch || userInput.modeEntropy || userInput.modeGC)) {
// userInput.modeMatch = userInput.modeEntropy = userInput.modeGC = true;
// }
// break;
// }

case 'm': {
std::istringstream modeStream(optarg);
std::string mode;
Expand Down
7 changes: 5 additions & 2 deletions src/teloscope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include <sstream> // check
#include <stdint.h> // what's this for?
#include <vector>
#include <algorithm> // cleanString
#include <algorithm> // removeCarriageReturns
#include <array> // check
#include <cmath>
#include <type_traits> // generateBEDFile
Expand Down Expand Up @@ -46,12 +46,15 @@ void Trie::insertPattern(const std::string& pattern) {
}


std::string cleanString(const std::string& input) {
std::string removeCarriageReturns(const std::string& input) {
std::string output = input;
output.erase(std::remove(output.begin(), output.end(), '\r'), output.end());
return output;
}

// void eraseChar(std::string& input, char rmChar) {
// input.erase(std::remove(input.begin(), input.end(), rmChar), input.end());
// }

float Teloscope::getShannonEntropy(const std::unordered_map<char, uint64_t>& nucleotideCounts, uint32_t windowSize) {
float entropy = 0.0;
Expand Down

0 comments on commit 19ea831

Please sign in to comment.