Skip to content

Commit

Permalink
correcting some errors
Browse files Browse the repository at this point in the history
  • Loading branch information
juanjo255 committed May 9, 2024
1 parent f163394 commit e28f198
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 29 deletions.
30 changes: 6 additions & 24 deletions mitnanex_cli.sh
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ while getopts 'i:t:p:m:M:w:c:x:r:s:q:f:g:k:d' opt; do
c)
coverage=$OPTARG
;;
;;
x)
min_num_clusters=$OPTARG
;;
Expand Down Expand Up @@ -149,20 +148,10 @@ subsample(){
echo $timestamp': Step 1: Sampling with seqtk'
echo " "
seqkit seq -g --threads $threads --min-len $min_len --max-len $max_len $input_file | \
seqtk sample $input_file $proportion > $wd$prefix"_sample.sorted.fastq"
seqtk sample - $proportion > $wd$prefix"_sample.sorted.fastq"
echo $timestamp": $(samtools view -c $wd$prefix"_sample.sorted.fastq") reads outputted"
}

trim_adapters(){
## $1 input
## $2 output
## TRIM ADAPTERS
echo " "
echo $timestamp': Trimming adapters with porechop'
echo " "
porechop --verbosity 0 -t $threads -i $1 -o $2
}

sort_file(){
## SORT FILE
echo " "
Expand All @@ -178,7 +167,7 @@ reads_overlap(){
echo " "
echo $timestamp': Looking for overlaps with minimap2'
echo " "
minimap2 -x ava-ont -t $threads --dual=yes \
minimap2 -x ava-ont -t $threads --dual=yes --split-prefix $prefix \
$wd$prefix"_sample.sorted.fastq" $wd$prefix"_sample.sorted.fastq" | \
fpa drop --internalmatch --length-lower $min_len > $wd$prefix".paf"
}
Expand Down Expand Up @@ -294,21 +283,14 @@ start=$SECONDS

#### PIPELINE ####

### Trimming adapter with porechop. There in the order of how they were extracted.
### It's slow, so I will comfort the dorado feature to remove adapters

##&& trim_adapters $wd$prefix"_sample.sorted.fastq" $wd$prefix"_sample.sorted.fastq" \
##&& trim_adapters $wd$prefix"_collected_reads.fastq" $wd$prefix"_collected_reads.fastq" \
##&& trim_adapters $wd$prefix"_collected_reads.fastq" $wd$prefix"_collected_reads.fastq" \

create_wd && subsample \
&& sort_file && reads_overlap && mt_reads_filt && first_assembly && gfa2fasta \
&& sort_file && reads_overlap && mt_reads_filt \
&& first_assembly && gfa2fasta \
&& collecting_mt_reads $wd$prefix"_first_draft_asm.fasta" $input_file $wd$prefix"_align.bam" $wd$prefix"_collected_reads.fastq" \
&& quality_control && second_assembly && select_contig \
&& collecting_mt_reads $wd$prefix"_second_draft_asm.fasta" $input_file $wd$prefix"_align.bam" $wd$prefix"_collected_reads.fastq" \
&& quality_control && correct_reads #&& polish_asm

#correct_reads
&& quality_control
#&& correct_reads #&& polish_asm

echo ""
echo "### MITNANEX finished ###"
Expand Down
4 changes: 2 additions & 2 deletions src/select_contig.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ def select_contig(flye_metadata: str, flye_asm_file: str) -> str:
flye_metadata_df = pd.read_csv(flye_metadata, delimiter="\t", header=0)
print("")
print("---> metadata of contigs generated by Flye <---")
flye_metadata_df.sort_values(by=["cov.", "length"], inplace=True, ascending=False)
print(flye_metadata_df)
flye_metadata_df.sort_values(by=["cov.", "length"], inplace=True)

## Check if there is a circular genome. If so and there are more than one, select the longest
if flye_metadata_df.empty:
Expand All @@ -24,7 +24,7 @@ def select_contig(flye_metadata: str, flye_asm_file: str) -> str:
print("A circular genome found!!")
flye_metadata_df = flye_metadata_df[
flye_metadata_df["circ."] == "Y"
].sort_values(by=["cov.", "length"])
].sort_values(by=["cov.", "length"], ascending=False)
return get_contig_sequence(flye_metadata_df["#seq_name"].iloc[0], flye_asm_file)

else:
Expand Down
4 changes: 2 additions & 2 deletions src/utils_rs/.gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/target

._target
# Byte-compiled / optimized / DLL files
__pycache__/
.pytest_cache/
Expand Down Expand Up @@ -69,4 +69,4 @@ docs/_build/
.vscode/

# Pyenv
.python-version
.python-version
2 changes: 1 addition & 1 deletion src/utils_rs/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requires = ["maturin>=1.3,<2.0"]
build-backend = "maturin"

[project]
name = "utils"
name = "utils_mitnanex"
requires-python = ">=3.7"
classifiers = [
"Programming Language :: Rust",
Expand Down

0 comments on commit e28f198

Please sign in to comment.