Skip to content

Commit

Permalink
Oct 7, 2024: Updated containers, blocks raw
Browse files Browse the repository at this point in the history
  • Loading branch information
AldhairMedico committed Oct 7, 2024
1 parent 7cb4855 commit ddc0916
Show file tree
Hide file tree
Showing 3 changed files with 196 additions and 46 deletions.
32 changes: 25 additions & 7 deletions include/teloscope.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class Trie {
return root;
}

bool hasChild(const std::shared_ptr<TrieNode>& node, char ch) const { // Giulio: merge with the following method
bool hasChild(const std::shared_ptr<TrieNode>& node, char ch) const { // Merge with the following method
return node->children.find(ch) != node->children.end();
}

Expand All @@ -55,7 +55,7 @@ struct PatternData {

struct TelomereBlock {
uint64_t start;
uint16_t length;
uint16_t blockLen; // End = start + blockLen
};

struct WindowData {
Expand All @@ -65,7 +65,7 @@ struct WindowData {
float shannonEntropy;
std::unordered_map<char, uint32_t> nucleotideCounts;
std::unordered_map<std::string, PatternData> patternMap; // Condensed pattern data
std::vector<TelomereBlock> telomereBlocks;
std::vector<TelomereBlock> winBlocks;

std::vector<uint32_t> canonicalMatches;
std::vector<uint32_t> nonCanonicalMatches;
Expand All @@ -78,11 +78,24 @@ struct WindowData {
WindowData() : windowStart(0), gcContent(0.0f), shannonEntropy(0.0f), nucleotideCounts{{'A', 0}, {'C', 0}, {'G', 0}, {'T', 0}} {}
};

struct SegmentData {
std::vector<WindowData> windows;
std::unordered_map<std::string, std::vector<TelomereBlock>> mergedBlocks;
};


struct PathData {
unsigned int seqPos;
std::string header;
std::vector<WindowData> windows; // Empty unless specified by user
std::unordered_map<std::string, std::vector<TelomereBlock>> mergedBlocks;
};


class Teloscope {
Trie trie; // Declare trie instance
UserInputTeloscope userInput; // Declare user input instance
std::vector<std::tuple<unsigned int, std::string, std::vector<WindowData>>> allWindows; // Assembly windows
std::vector<PathData> allPathData; // Assembly data

int totalNWindows = 0; // Total windows analyzed
std::unordered_map<std::string, int> patternCounts; // Total counts
Expand Down Expand Up @@ -110,16 +123,21 @@ class Teloscope {

void analyzeWindow(const std::string &window, uint32_t windowStart, WindowData& windowData, WindowData& nextOverlapData);

std::vector<WindowData> analyzeSegment(std::string &sequence, UserInputTeloscope userInput, uint64_t absPos);
SegmentData analyzeSegment(std::string &sequence, UserInputTeloscope userInput, uint64_t absPos);

void insertWindowData(unsigned int seqPos, const std::string& header, std::vector<WindowData>& pathWindows);

void sortWindowsBySeqPos();
void sortBySeqPos();

std::vector<TelomereBlock> getTelomereBlocks(const std::vector<uint32_t>& inputMatches, uint64_t windowStart);

std::vector<TelomereBlock> mergeTelomereBlocks(const std::vector<TelomereBlock>& winBlocks);

void writeBEDFile(std::ofstream& shannonFile, std::ofstream& gcContentFile,
std::unordered_map<std::string, std::ofstream>& patternMatchFiles,
std::unordered_map<std::string, std::ofstream>& patternCountFiles,
std::unordered_map<std::string, std::ofstream>& patternDensityFiles);
std::unordered_map<std::string, std::ofstream>& patternDensityFiles,
std::ofstream& telomereBlocksFile);

void handleBEDFile();

Expand Down
30 changes: 19 additions & 11 deletions src/input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ void Input::read(InSequences &inSequences) {
lg.verbose("All jobs completed");
std::cout << "All jobs completed" << std::endl;

teloscope.sortWindowsBySeqPos();
teloscope.sortBySeqPos();
lg.verbose("\nPaths sorted by original position");

teloscope.handleBEDFile();
Expand All @@ -60,7 +60,6 @@ void Input::read(InSequences &inSequences) {


bool Teloscope::walkPath(InPath* path, std::vector<InSegment*> &inSegments, std::vector<InGap> &inGaps) {

Log threadLog;
uint64_t absPos = 0;
unsigned int cUId = 0, gapLen = 0, seqPos = path->getSeqPos();
Expand All @@ -70,8 +69,10 @@ bool Teloscope::walkPath(InPath* path, std::vector<InSegment*> &inSegments, std:
std::string header = path->getHeader();
eraseChar(header, '\r');

std::vector<WindowData> pathWindows;
std::vector<TelomereBlock> pathTelomereBlocks; // CHECK: To implement
// Initialize PathData for this path
PathData pathData;
pathData.seqPos = seqPos;
pathData.header = header;

for (std::vector<PathComponent>::iterator component = pathComponents.begin(); component != pathComponents.end(); component++) {

Expand All @@ -84,10 +85,19 @@ bool Teloscope::walkPath(InPath* path, std::vector<InSegment*> &inSegments, std:
unmaskSequence(sequence);

if (component->orientation == '+') {

std::vector<WindowData> segmentWindows = analyzeSegment(sequence, userInput, absPos);
pathWindows.insert(pathWindows.end(), segmentWindows.begin(), segmentWindows.end());

SegmentData segmentData = analyzeSegment(sequence, userInput, absPos);

if (userInput.keepWindowData) {
pathData.windows.insert(pathData.windows.end(), segmentData.windows.begin(), segmentData.windows.end());
}

for (const auto& [groupName, blocks] : segmentData.mergedBlocks) {
pathData.mergedBlocks[groupName].insert(
pathData.mergedBlocks[groupName].end(),
blocks.begin(),
blocks.end()
);
}
} else {
}

Expand All @@ -106,9 +116,7 @@ bool Teloscope::walkPath(InPath* path, std::vector<InSegment*> &inSegments, std:
}

std::lock_guard<std::mutex> lck(mtx);
insertWindowData(seqPos, header, pathWindows);

allTelomereBlocks.push_back({seqPos, header, pathTelomereBlocks}); // CHECK: To implement
allPathData.push_back(std::move(pathData));

threadLog.add("\tCompleted walking path:\t" + path->getHeader());
logs.push_back(threadLog);
Expand Down
Loading

0 comments on commit ddc0916

Please sign in to comment.