Skip to content

Commit

Permalink
Jul 30, 2024: Total counts fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
AldhairMedico committed Jul 30, 2024
1 parent 6e4f55e commit 404147d
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 6 deletions.
41 changes: 35 additions & 6 deletions include/teloscope.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,19 @@ class Teloscope {
Trie trie; // Declare trie instance
UserInputTeloscope userInput; // Declare user input instance
std::vector<std::tuple<unsigned int, std::string, std::vector<WindowData>>> allWindows; // Assembly windows

int totalNWindows = 0; // Total windows analyzed
std::map<std::string, int> patternCounts; // Total counts
std::vector<float> entropyValues; // Total entropy values
std::vector<float> gcContentValues; // Total GC content values

float getShannonEntropy(const std::unordered_map<char, uint64_t>& nucleotideCounts, uint32_t windowSize);
float getGCContent(const std::unordered_map<char, uint64_t>& nucleotideCounts, uint32_t windowSize);

float getMean(const std::vector<float>& values);
float getMedian(std::vector<float> values);
float getMin(std::vector<float> values);
float getMax(std::vector<float> values);

public:

Expand Down Expand Up @@ -151,13 +158,15 @@ class Teloscope {
shannonFile << header << "\t" << window.windowStart << "\t"
<< windowEnd << "\t"
<< window.shannonEntropy << "\n";
entropyValues.push_back(window.shannonEntropy); // Update entropy values
}

// Write window GC content if enabled
if (userInput.modeGC) {
gcContentFile << header << "\t" << window.windowStart << "\t"
<< windowEnd << "\t"
<< window.gcContent << "\n";
gcContentValues.push_back(window.gcContent);
}

// Write pattern data if enabled
Expand All @@ -168,15 +177,14 @@ class Teloscope {
<< window.windowStart + pos << "\t"
<< window.windowStart + pos + pattern.length() - 1 << "\t"
<< pattern << "\n";
patternCounts[pattern]++; // Update total pattern counts
}
patternCountFiles[pattern] << header << "\t" << window.windowStart << "\t"
<< windowEnd << "\t"
<< data.count << "\n";
patternDensityFiles[pattern] << header << "\t" << window.windowStart << "\t"
<< windowEnd << "\t"
<< data.density << "\n";

patternCounts[pattern] += data.count; // Update total pattern counts
}
}
}
Expand All @@ -203,13 +211,34 @@ class Teloscope {
}

void printSummary() {
std::cout << "\n" << "+++Summary Report+++" << "\n";
std::cout << "Total windows analyzed:" << "\t" << totalNWindows << "\n";
std::cout << "Total input patterns found:" << "\n";
std::cout << "\n+++Summary Report+++\n";
std::cout << "Total windows analyzed:\t" << totalNWindows << "\n";
std::cout << "Total input patterns found:\n";
for (const auto& [pattern, count] : patternCounts) {
std::cout << "Pattern:" << "\t" << pattern << "\t" << count << "\n";
std::cout << "Pattern:\t" << pattern << "\t" << count << "\n";
}

// For each pattern, print the path header with the highest number of matches - PENDING
// For each pattern, print the path header with the lowest number of matches - PENDING

std::cout << "Max Shannon Entropy:\t" << getMax(entropyValues) << "\n";
std::cout << "Mean Shannon Entropy:\t" << getMean(entropyValues) << "\n";
std::cout << "Median Shannon Entropy:\t" << getMedian(entropyValues) << "\n";
std::cout << "Min Shannon Entropy:\t" << getMin(entropyValues) << "\n";

std::cout << "Max GC Content:\t" << getMax(gcContentValues) << "\n";
std::cout << "Mean GC Content:\t" << getMean(gcContentValues) << "\n";
std::cout << "Median GC Content:\t" << getMedian(gcContentValues) << "\n";
std::cout << "Min GC Content:\t" << getMin(gcContentValues) << "\n";
}

void teloAnnotation() {
/// For each path we need two telomeric coordinates: p (start) and q (end)
/// For p telomere: Start to last semi-continous repeat
/// For q telomere: First semi-continous repeat to end
/// Semi-continous repeat: 2 or more repeats of the same pattern
}

};

#endif // TELOSCOPE_H/
25 changes: 25 additions & 0 deletions src/teloscope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,31 @@ float Teloscope::getGCContent(const std::unordered_map<char, uint64_t>& nucleoti
return float(gcCount) / windowSize * 100.0;
}

float Teloscope::getMean(const std::vector<float>& values) {
if (values.empty()) return 0.0;
float sum = std::accumulate(values.begin(), values.end(), 0.0);
return sum / values.size();
}

float Teloscope::getMedian(std::vector<float> values) {
if (values.empty()) return 0.0;
std::sort(values.begin(), values.end());
size_t size = values.size();
if (size % 2 == 0) {
return (values[size / 2 - 1] + values[size / 2]) / 2;
} else {
return values[size / 2];
}
}
float Teloscope::getMin(const std::vector<float> values) {
if (values.empty()) return 0.0;
return *std::min_element(values.begin(), values.end());
}

float Teloscope::getMax(const std::vector<float> values) {
if (values.empty()) return 0.0;
return *std::max_element(values.begin(), values.end());
}

void Teloscope::analyzeWindow(const std::string &window, uint32_t windowStart, WindowData& windowData) {

Expand Down

0 comments on commit 404147d

Please sign in to comment.