Skip to content

Commit

Permalink
Sep 18, 2024: README and cleaning
Browse files Browse the repository at this point in the history
  • Loading branch information
AldhairMedico committed Sep 18, 2024
1 parent d8cd621 commit 53c302a
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 33 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ Teloscope is a fast and comprehensive tool for matching, counting and reporting
* GC
* Shannon Entropy

Teloscope reports all these metrics and the final telomere coordinates in BED/BEDgraph files and produces a summary report.
Teloscope reports all these metrics in BED/BEDgraph files and produces a summary report.

## Installation

Either download one of the releases or `git clone https://github.com/vgl-hub/teloscope.git --recursive` and `make -j` in `teloscope` folder.

## Usage

`teloscope -f input.[fasta][.gz] -p TTAGGG,TTAGGGG -w [window size] -s [step size]`
`teloscope -f input.[fasta][.gz] -o [output/dir] -p TTAGGG,TTAGGGG -w [window size] -s [step size] -k`

To check out all options and flags, please use:
`teloscope -h`
Expand Down
3 changes: 1 addition & 2 deletions include/input.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@ struct UserInputTeloscope : UserInput {
uint32_t step = 500;
double maxMem = 0;
std::string prefix = ".", outFile = "";
bool keepWindowData = false;
bool keepWindowData = false; // Memory intensive
bool modeMatch = true, modeEntropy = true, modeGC = true; // Change to: de novo, user-defined
bool storeWindowData = false; // Memory intensive
float densityThreshold = 0.5f; // Threshold for telomere detection
uint32_t maxGaps = 2; // Allowed gaps in telomere blocks
uint8_t mergeDistance = 6;
Expand Down
15 changes: 8 additions & 7 deletions include/teloscope.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <memory>
#include <unordered_map>

std::string removeCarriageReturns(const std::string& input);
// std::string removeCarriageReturns(const std::string& input);

class Trie {
struct TrieNode {
Expand Down Expand Up @@ -61,6 +61,7 @@ struct PatternData {
float density = 0.0f; // Density of the pattern
};


struct WindowData {
uint32_t windowStart;
uint32_t currentWindowSize;
Expand Down Expand Up @@ -124,7 +125,7 @@ class Teloscope {
std::unordered_map<std::string, std::ofstream>& patternCountFiles,
std::unordered_map<std::string, std::ofstream>& patternDensityFiles) {

if (!userInput.storeWindowData) { // If windowData is not stored, return
if (!userInput.keepWindowData) { // If windowData is not stored, return
return;
}

Expand Down Expand Up @@ -202,8 +203,8 @@ class Teloscope {
}

if (userInput.modeMatch) {
telomereBEDFile.open(outRoute + "/telomere_blocks.bed"); // CHECK
telomereCountFile.open(outRoute + "/telomere_block_counts.txt"); // CHECK
// telomereBEDFile.open(outRoute + "/telomere_blocks.bed"); // CHECK
// telomereCountFile.open(outRoute + "/telomere_block_counts.txt"); // CHECK

for (const auto& pattern : userInput.patterns) {
patternMatchFiles[pattern].open(outRoute + "/" + pattern + "_matches.bed");
Expand All @@ -225,8 +226,8 @@ class Teloscope {
gcContentFile.close();
}
if (userInput.modeMatch) {
telomereBEDFile.close(); // CHECK
telomereCountFile.close(); // CHECK
// telomereBEDFile.close(); // CHECK
// telomereCountFile.close(); // CHECK

for (auto& [pattern, file] : patternMatchFiles) {
file.close();
Expand All @@ -251,7 +252,7 @@ class Teloscope {

// For each pattern, print the path header with the highest number of matches - PENDING
// For each pattern, print the path header with the lowest number of matches - PENDING
if (userInput.storeWindowData) {
if (userInput.keepWindowData) {
std::cout << "Max Shannon Entropy:\t" << getMax(entropyValues) << "\n";
std::cout << "Mean Shannon Entropy:\t" << getMean(entropyValues) << "\n";
std::cout << "Median Shannon Entropy:\t" << getMedian(entropyValues) << "\n";
Expand Down
9 changes: 5 additions & 4 deletions src/input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,16 @@ bool Teloscope::walkPath(InPath* path, std::vector<InSegment*> &inSegments, std:
unsigned int cUId = 0, gapLen = 0, seqPos = path->getSeqPos();
std::vector<PathComponent> pathComponents = path->getComponents();

std::string header = removeCarriageReturns(path->getHeader());
// std::string header = removeCarriageReturns(path->getHeader());
std::string header = path->getHeader();
std::cout << "header_before: " << header << std::endl;
eraseChar(header, '\r');
std::cout << "header_after: " << header << std::endl;
threadLog.add("\n\tWalking path:\t" + path->getHeader());

std::vector<WindowData> pathWindows;
std::vector<TelomereBlock> pathTelomereBlocks;

// std::string header = path->getHeader();
// eraseChar(header, '\r');

for (std::vector<PathComponent>::iterator component = pathComponents.begin(); component != pathComponents.end(); component++) {

cUId = component->id;
Expand Down
2 changes: 1 addition & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ int main(int argc, char **argv) {

int option_index = 0;

c = getopt_long(argc, argv, "-:f:j:m:o:p:s:w:vh", long_options, &option_index);
c = getopt_long(argc, argv, "-:f:j:m:o:p:s:w:kvh", long_options, &option_index);

// if (optind < argc && !isPipe) { // if pipe wasn't assigned already

Expand Down
18 changes: 1 addition & 17 deletions src/teloscope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,16 @@
#include "log.h"
#include "global.h"
#include "uid-generator.h"

#include "bed.h"
#include "struct.h"
#include "functions.h"

#include "gfa-lines.h"
#include "gfa.h"
#include "sak.h"

#include "stream-obj.h"

#include "input-agp.h"
#include "input-filters.h"
#include "input-gfa.h"

#include "teloscope.h"
#include "input.h"

Expand All @@ -46,17 +41,6 @@ void Trie::insertPattern(const std::string& pattern) {
}


std::string removeCarriageReturns(const std::string& input) {
std::string output = input;
output.erase(std::remove(output.begin(), output.end(), '\r'), output.end());
return output;
}

// void eraseChar(std::string& input, char rmChar) {
// input.erase(std::remove(input.begin(), input.end(), rmChar), input.end());
// }


float Teloscope::getShannonEntropy(const std::unordered_map<char, uint32_t>& nucleotideCounts, uint32_t windowSize) {
float entropy = 0.0;
for (auto &[nucleotide, count] : nucleotideCounts) {
Expand Down Expand Up @@ -107,7 +91,7 @@ float Teloscope::getMax(const std::vector<float> values) {


void Teloscope::insertWindowData(unsigned int seqPos, const std::string& header, std::vector<WindowData>& pathWindows) {
if (userInput.storeWindowData) {
if (userInput.keepWindowData) {
allWindows.push_back(std::make_tuple(seqPos, header, pathWindows));
}
}
Expand Down

0 comments on commit 53c302a

Please sign in to comment.