-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
4eae9f5
commit dd86e72
Showing
7 changed files
with
172 additions
and
64 deletions.
There are no files selected for viewing
Submodule gfalibs
updated
8 files
+2 −2 | include/functions.h | |
+26 −3 | include/output.h | |
+9 −2 | include/struct.h | |
+2 −2 | src/gfa-lines.cpp | |
+1 −2 | src/gfa.cpp | |
+184 −342 | src/output.cpp | |
+0 −2 | src/stream-obj.cpp | |
+16 −3 | src/struct.cpp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#ifndef FUNCTIONS_H | ||
#define FUNCTIONS_H | ||
|
||
#include <vector> | ||
#include <string> | ||
|
||
void generate_combinations(const std::string &pattern, std::string ¤t, size_t index, std::vector<std::string> &combinations); | ||
|
||
#endif // FUNCTIONS_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,5 +47,6 @@ | |
#include "input-gfa.h" | ||
|
||
#include "input.h" // was in Mac's code | ||
#include "functions.h" | ||
|
||
#endif /* MAIN_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#include "functions.h" | ||
#include <unordered_map> | ||
|
||
std::unordered_map<char, std::vector<char>> IUPAC_DNA_map = { | ||
{'A', {'A'}}, | ||
{'C', {'C'}}, | ||
{'G', {'G'}}, | ||
{'T', {'T'}}, | ||
{'R', {'A', 'G'}}, | ||
{'Y', {'C', 'T'}}, | ||
{'M', {'A', 'C'}}, | ||
{'K', {'G', 'T'}}, | ||
{'S', {'C', 'G'}}, | ||
{'W', {'A', 'T'}}, | ||
{'H', {'A', 'C', 'T'}}, | ||
{'B', {'C', 'G', 'T'}}, | ||
{'V', {'A', 'C', 'G'}}, | ||
{'D', {'A', 'G', 'T'}}, | ||
{'N', {'A', 'C', 'G', 'T'}} | ||
}; | ||
|
||
|
||
void generate_combinations(const std::string &pattern, std::string ¤t, size_t index, std::vector<std::string> &combinations) { | ||
if (index == pattern.size()) { | ||
combinations.push_back(current); | ||
return; | ||
} | ||
|
||
char base = pattern[index]; | ||
for (char c : IUPAC_DNA_map[base]) { | ||
current[index] = c; | ||
generate_combinations(pattern, current, index + 1, combinations); | ||
} | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -118,92 +118,118 @@ int main(int argc, char **argv) { | |
break; | ||
|
||
|
||
case 'o': | ||
{ | ||
outRoute = optarg; | ||
|
||
if (outRoute.empty()) { | ||
fprintf(stderr, "Error: Output route is required. Use --output or -o to specify it.\n"); // Jack: we have to define output as default | ||
exit(EXIT_FAILURE); | ||
} | ||
} | ||
break; | ||
|
||
|
||
case 'j': // max threads | ||
maxThreads = atoi(optarg); | ||
userInput.stats_flag = 1; | ||
break; | ||
|
||
|
||
// case 'c': // canonical pattern | ||
// userInput.canPatterns = optarg; | ||
// break; | ||
|
||
|
||
case 'm': { | ||
std::istringstream modeStream(optarg); | ||
std::string mode; | ||
std::set<std::string> providedModes; | ||
|
||
while (std::getline(modeStream, mode, ',')) { | ||
if (mode.empty()) continue; | ||
|
||
if (std::any_of(mode.begin(), mode.end(), ::isdigit)) { | ||
std::cerr << "Error: Mode '" << mode << "' contains numerical characters.\n"; | ||
exit(EXIT_FAILURE); | ||
} | ||
case 'c': { // Handle canonical pattern | ||
std::string canonicalPattern = optarg; | ||
unmaskSequence(canonicalPattern); | ||
|
||
std::transform(mode.begin(), mode.end(), mode.begin(), ::tolower); | ||
if (canonicalPattern.empty()) { | ||
canonicalPattern = "TTAGGG"; | ||
} | ||
|
||
if (mode == "all") { | ||
providedModes = {"match", "entropy", "gc"}; | ||
break; | ||
} else { | ||
providedModes.insert(mode); | ||
} | ||
// Check for numerical characters | ||
if (std::any_of(canonicalPattern.begin(), canonicalPattern.end(), ::isdigit)) { | ||
std::cerr << "Error: Canonical pattern '" << canonicalPattern << "' contains numerical characters.\n"; | ||
exit(EXIT_FAILURE); | ||
} | ||
|
||
// Only disable modes that were not provided | ||
if (providedModes.find("match") == providedModes.end()) userInput.modeMatch = false; | ||
if (providedModes.find("entropy") == providedModes.end()) userInput.modeEntropy = false; | ||
if (providedModes.find("gc") == providedModes.end()) userInput.modeGC = false; | ||
// Store canonical pattern and its reverse complement | ||
userInput.canonicalPatterns.first = canonicalPattern; | ||
userInput.canonicalPatterns.second = revCom(canonicalPattern); | ||
|
||
break; | ||
std::cout << "Setting canonical pattern: " << canonicalPattern << " and its reverse complement: " << userInput.canonicalPatterns.second << "\n"; | ||
} | ||
break; | ||
|
||
|
||
case 'o': | ||
{ | ||
outRoute = optarg; | ||
// case 'p': | ||
// { | ||
// std::istringstream patternStream(optarg); | ||
// std::string pattern; | ||
|
||
if (outRoute.empty()) { | ||
fprintf(stderr, "Error: Output route is required. Use --output or -o to specify it.\n"); // Jack: we have to define output as default | ||
exit(EXIT_FAILURE); | ||
} | ||
} | ||
break; | ||
|
||
// while (std::getline(patternStream, pattern, ',')) { | ||
// if (pattern.empty()) continue; | ||
|
||
// if (std::any_of(pattern.begin(), pattern.end(), ::isdigit)) { | ||
// std::cerr << "Error: Pattern '" << pattern << "' contains numerical characters.\n"; | ||
// exit(EXIT_FAILURE); | ||
// } | ||
|
||
// unmaskSequence(pattern); | ||
|
||
// std::cout << "Adding pattern: " << pattern << " and its reverse complement" << "\n"; | ||
// userInput.patterns.emplace_back(pattern); | ||
// userInput.patterns.emplace_back(revCom(pattern)); | ||
// } | ||
|
||
// if (userInput.patterns.empty()) { | ||
// userInput.patterns = {"TTAGGG", "CCCTAA"}; | ||
// std::cout << "No patterns provided. Only scanning for canonical patterns: TTAGGG, CCCTAA" << "\n"; | ||
// } else { | ||
// // Remove duplicates | ||
// std::sort(userInput.patterns.begin(), userInput.patterns.end()); | ||
// auto last = std::unique(userInput.patterns.begin(), userInput.patterns.end()); | ||
// userInput.patterns.erase(last, userInput.patterns.end()); | ||
// } | ||
// } | ||
// break; | ||
|
||
case 'p': | ||
{ | ||
std::istringstream patternStream(optarg); | ||
std::string pattern; | ||
|
||
while (std::getline(patternStream, pattern, ',')) { | ||
if (pattern.empty()) continue; | ||
|
||
if (std::any_of(pattern.begin(), pattern.end(), ::isdigit)) { | ||
std::cerr << "Error: Pattern '" << pattern << "' contains numerical characters.\n"; | ||
exit(EXIT_FAILURE); | ||
} | ||
|
||
unmaskSequence(pattern); | ||
|
||
std::cout << "Adding pattern: " << pattern << " and its reverse complement" << "\n"; | ||
userInput.patterns.emplace_back(pattern); | ||
userInput.patterns.emplace_back(revCom(pattern)); | ||
|
||
// Generate all combinations for the pattern based on IUPAC codes | ||
std::vector<std::string> combinations; | ||
std::string current_pattern = pattern; | ||
generate_combinations(pattern, current_pattern, 0, combinations); | ||
|
||
// Add each combination and its reverse complement to userInput.patterns | ||
for (const std::string &comb : combinations) { | ||
std::cout << "Adding pattern: " << comb << " and its reverse complement" << "\n"; | ||
userInput.patterns.emplace_back(comb); | ||
userInput.patterns.emplace_back(revCom(comb)); | ||
} | ||
} | ||
|
||
if (userInput.patterns.empty()) { | ||
userInput.patterns = {"TTAGGG", "CCCTAA"}; | ||
std::cout << "No patterns provided. Using canonical patterns: TTAGGG, CCCTAA" << "\n"; | ||
std::cout << "No patterns provided. Only scanning for canonical patterns: TTAGGG, CCCTAA" << "\n"; | ||
} else { | ||
// Remove duplicates | ||
std::sort(userInput.patterns.begin(), userInput.patterns.end()); | ||
auto last = std::unique(userInput.patterns.begin(), userInput.patterns.end()); | ||
userInput.patterns.erase(last, userInput.patterns.end()); | ||
} | ||
} | ||
break; | ||
break; | ||
|
||
|
||
case 'w': | ||
|
@@ -230,6 +256,38 @@ int main(int argc, char **argv) { | |
break; | ||
|
||
|
||
case 'm': { | ||
std::istringstream modeStream(optarg); | ||
std::string mode; | ||
std::set<std::string> providedModes; | ||
|
||
while (std::getline(modeStream, mode, ',')) { | ||
if (mode.empty()) continue; | ||
|
||
if (std::any_of(mode.begin(), mode.end(), ::isdigit)) { | ||
std::cerr << "Error: Mode '" << mode << "' contains numerical characters.\n"; | ||
exit(EXIT_FAILURE); | ||
} | ||
|
||
std::transform(mode.begin(), mode.end(), mode.begin(), ::tolower); | ||
|
||
if (mode == "all") { | ||
providedModes = {"match", "entropy", "gc"}; | ||
break; | ||
} else { | ||
providedModes.insert(mode); | ||
} | ||
} | ||
|
||
// Only disable modes that were not provided | ||
if (providedModes.find("match") == providedModes.end()) userInput.modeMatch = false; | ||
if (providedModes.find("entropy") == providedModes.end()) userInput.modeEntropy = false; | ||
if (providedModes.find("gc") == providedModes.end()) userInput.modeGC = false; | ||
|
||
break; | ||
} | ||
|
||
|
||
case 'v': // software version | ||
printf("/// Teloscope v%s\n", version.c_str()); | ||
printf("\nDeveloped by:\nGiulio Formenti [email protected]\n"); | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters