Skip to content

Commit

Permalink
Updated summary
Browse files Browse the repository at this point in the history
  • Loading branch information
AldhairMedico committed Jan 27, 2025
1 parent 16968a3 commit 6f5063e
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 31 deletions.
17 changes: 8 additions & 9 deletions include/teloscope.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ struct SegmentData {
struct PathData {
unsigned int seqPos;
std::string header;
uint16_t gaps = 0;
std::vector<WindowData> windows;
std::vector<TelomereBlock> terminalBlocks;
std::vector<TelomereBlock> interstitialBlocks;
Expand All @@ -114,13 +115,12 @@ class Teloscope {
UserInputTeloscope userInput; // Declare user input instance
std::vector<PathData> allPathData; // Assembly data

int totalPaths = 0; // Total paths analyzed
int totalNWindows = 0; // Total windows analyzed
int totalTelomeres = 0; // Total telomeres found
int totalITS = 0; // Total ITS found
int totalCanMatches = 0; // Total canonical matches found
std::vector<float> entropyValues; // Total entropy values
std::vector<float> gcContentValues; // Total GC content values
uint32_t totalPaths = 0; // Total paths analyzed
uint32_t totalNWindows = 0; // Total windows analyzed
uint32_t totalTelomeres = 0; // Total telomeres found
uint32_t totalITS = 0; // Total ITS found
uint32_t totalCanMatches = 0; // Total canonical matches found
uint32_t totalGaps = 0; // Total gaps found


inline float getShannonEntropy(const uint32_t nucleotideCounts[4], uint32_t windowSize) {
Expand Down Expand Up @@ -158,8 +158,7 @@ class Teloscope {
SegmentData& segmentData, uint32_t segmentSize, uint32_t absPos);

SegmentData analyzeSegment(std::string &sequence, UserInputTeloscope userInput, uint32_t absPos);

void insertWindowData(unsigned int seqPos, const std::string& header, std::vector<WindowData>& pathWindows);
// SegmentData analyzeSegment(std::string &sequence, UserInputTeloscope &userInput, uint32_t absPos);

void sortBySeqPos();

Expand Down
1 change: 1 addition & 0 deletions src/input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ bool Teloscope::walkPath(InPath* path, std::vector<InSegment*> &inSegments, std:
gapLen = inGap->getDist(component->start - component->end);

absPos += gapLen;
pathData.gaps++;

} else {
} // need to handle edges, cigars etc
Expand Down
58 changes: 36 additions & 22 deletions src/teloscope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,7 @@ void Teloscope::analyzeWindow(const std::string_view &window, uint32_t windowSta


SegmentData Teloscope::analyzeSegment(std::string &sequence, UserInputTeloscope userInput, uint32_t absPos) {
// SegmentData analyzeSegment(std::string &sequence, UserInputTeloscope &userInput, uint32_t absPos) {
uint32_t windowSize = userInput.windowSize;
uint32_t step = userInput.step;
uint32_t segmentSize = sequence.size();
Expand Down Expand Up @@ -452,19 +453,26 @@ void Teloscope::writeBEDFile(std::ofstream& windowMetricsFile, std::ofstream& wi
windowMetricsFile << "\n";
}

// Report header
std::cout << "pos\theader\ttelomeres\tlabels\tgaps\ttype\tits\tcanonical\twindows\n";

// BED and reports
for (const auto& pathData : allPathData) {
const auto& header = pathData.header;
const auto& windows = pathData.windows;
totalPaths++;
const auto& pos = pathData.seqPos;
const auto& gaps = pathData.gaps;

// Write blocks
std::string labels;
for (const auto& block : pathData.terminalBlocks) {
uint64_t blockEnd = block.start + block.blockLen;
terminalBlocksFile << header << "\t"
<< block.start << "\t"
<< blockEnd << "\t"
<< block.blockLen << "\t"
<< block.blockLabel << "\n";
labels += block.blockLabel;
}

if (userInput.outITS) {
Expand Down Expand Up @@ -495,10 +503,8 @@ void Teloscope::writeBEDFile(std::ofstream& windowMetricsFile, std::ofstream& wi
}
}


// Process window data
for (const auto& window : windows) {
totalNWindows++; // Update total window count
uint32_t windowEnd = window.windowStart + window.currentWindowSize; // Start is already 0-based

// Write window metrics if enabled
Expand All @@ -507,11 +513,9 @@ void Teloscope::writeBEDFile(std::ofstream& windowMetricsFile, std::ofstream& wi

if (userInput.outEntropy) {
windowMetricsFile << "\t" << window.shannonEntropy;
entropyValues.push_back(window.shannonEntropy); // For summary
}
if (userInput.outGC) {
windowMetricsFile << "\t" << window.gcContent;
gcContentValues.push_back(window.gcContent); // For summary
}
windowMetricsFile << "\n";
}
Expand All @@ -526,7 +530,29 @@ void Teloscope::writeBEDFile(std::ofstream& windowMetricsFile, std::ofstream& wi
<< window.nonCanonicalDensity << "\n";
}
}

// Output path summary
std::cout << pos << "\t"
<< header << "\t"
<< pathData.terminalBlocks.size() << "\t"
<< (labels.empty() ? "none" : labels) << "\t"
<< gaps << "\t"
<< (labels == "pq" && gaps == 0 ? "t2t" :
labels == "pq" && gaps > 0 ? "gapped_t2t" :
(labels == "qp" || labels == "pp" || labels == "qq") ? "missasembly" :
(labels == "p" || labels == "q") ? "incomplete" : "na") << "\t"
<< pathData.interstitialBlocks.size() << "\t"
<< pathData.canonicalMatches.size() << "\t"
<< windows.size() << "\n";

// Update assembly summary
totalNWindows += windows.size();
totalTelomeres += pathData.terminalBlocks.size();
totalITS += pathData.interstitialBlocks.size();
totalCanMatches += pathData.canonicalMatches.size();
totalGaps += gaps;
}
totalPaths = allPathData.size();
}


Expand Down Expand Up @@ -585,23 +611,11 @@ void Teloscope::handleBEDFile() {
}

void Teloscope::printSummary() {
std::cout << "\n+++Summary Report+++\n";
std::cout << "\n+++ Assembly Summary Report+++\n";
std::cout << "Total paths analyzed:\t" << totalPaths << "\n";
std::cout << "Total windows analyzed:\t" << totalNWindows << "\n";

if (userInput.outEntropy) {
Stats entropyStats = getStats(entropyValues);
std::cout << "Max Shannon Entropy:\t" << entropyStats.max << "\n";
std::cout << "Mean Shannon Entropy:\t" << entropyStats.mean << "\n";
std::cout << "Median Shannon Entropy:\t" << entropyStats.median << "\n";
std::cout << "Min Shannon Entropy:\t" << entropyStats.min << "\n";
}

if (userInput.outGC) {
Stats gcContentStats = getStats(gcContentValues);
std::cout << "Max GC Content:\t" << gcContentStats.max << "\n";
std::cout << "Mean GC Content:\t" << gcContentStats.mean << "\n";
std::cout << "Median GC Content:\t" << gcContentStats.median << "\n";
std::cout << "Min GC Content:\t" << gcContentStats.min << "\n";
}
std::cout << "Total telomeres found:\t" << totalTelomeres << "\n";
std::cout << "Total ITS found:\t" << totalITS << "\n";
std::cout << "Total canonical matches found:\t" << totalCanMatches << "\n";
std::cout << "Total gaps found:\t" << totalGaps << "\n";
}

0 comments on commit 6f5063e

Please sign in to comment.