Skip to content

Commit

Permalink
Update to version 1.3.1 (#15)
Browse files Browse the repository at this point in the history
* Add gene_id to gtf output
* Adjust version checks and allow more recent nhmmer versions
* Handle 'X' and '=' cigar operations
* Fix bug when Q step is run with minimal arguments
* Handle secondary alignments
* Update how reads are split for C steps
* Improve error message when chr from alignment is not found in reference fasta
* Fix bug where start of novel isoform was off by 1 in gtf output
* Add automated tests
* Improve thread error checking
* Add sorting to get deterministic novel isoform IDs
* Use the split .fasta in the C step
* Update expected example output
* Use TMPDIR in S step in snakemake
* Update parsing for new version of snakemake
* Suppress error message if git commit not found
* Handle reused slurm IDs
* Increase default Q step memory
  • Loading branch information
EricKutschera authored Mar 29, 2023
1 parent f7289a7 commit 6629e09
Show file tree
Hide file tree
Showing 41 changed files with 4,746 additions and 588 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ ESPRESSO (Error Statistics PRomoted Evaluator of Splice Site Options) is a novel
* [Visualization](#visualization)
+ [Visualization Arguments](#visualization-arguments)
+ [IGV](#igv)
* [Test](#test)
* [References](#references)

## Dependencies
Expand Down Expand Up @@ -354,6 +355,10 @@ optional arguments:
+ Edit the text on the left axis.
+ Add a text title at the bottom.

## Test

* Run the automated tests in [tests/](tests/) with [./run_tests](./run_tests)

## References

1. Li H. Minimap2: pairwise alignment for nucleotide sequences[J]. Bioinformatics, 2018, 34(18): 3094-3100.
25 changes: 25 additions & 0 deletions run_tests
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash

function set_script_dir() {
local ORIG_DIR="$(pwd)" || return 1

local REL_SCRIPT_DIR="$(dirname ${BASH_SOURCE[0]})" || return 1
cd "${REL_SCRIPT_DIR}" || return 1
SCRIPT_DIR="$(pwd)" || return 1
cd "${ORIG_DIR}" || return 1
}

function main() {
set_script_dir || return 1
export PYTHONPATH="${SCRIPT_DIR}" || return 1

if [[ "$#" -eq 0 ]]; then
python "${SCRIPT_DIR}/tests/runner.py" || return 1
else
# run a specific test file if passed as an argument:
# ./run_tests tests/sirv_example/test.py
python "$1" || return 1
fi
}

main "$@"
9 changes: 7 additions & 2 deletions snakemake/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ rule run_espresso_s:
script=os.path.join('scripts', 'espresso_s_wrapper.py'),
espresso_s_path=os.path.join(config['espresso_path'], 'ESPRESSO_S.pl'),
out_dir=lambda wildcards, output: os.path.dirname(output.updated_tsv),
tmp_dir='espresso_out',
threads: config['espresso_s_threads']
resources:
mem_mb=config['espresso_s_mem_gb'] * 1024,
Expand All @@ -440,6 +441,7 @@ rule run_espresso_s:
'{params.conda_wrapper} python {params.script}'
' --s-input {input.s_input}'
' --out-tsv {output.samples_tsv}'
' --tmp-dir {params.tmp_dir}'
' --command'
' perl {params.espresso_s_path}'
' -A {input.gtf}'
Expand Down Expand Up @@ -467,6 +469,7 @@ checkpoint split_s_for_c:
new_base_dir=os.path.join('espresso_out', 'c_work_dir'),
target_reads_per_c=config['target_reads_per_espresso_c_job'],
sort_memory_buffer_size='2G',
num_threads_per_c=config['espresso_c_threads'],
resources:
mem_mb=config['split_s_for_c_mem_gb'] * 1024,
time_hours=config['split_s_for_c_time_hr'],
Expand All @@ -475,6 +478,7 @@ checkpoint split_s_for_c:
' --orig-work-dir {params.orig_work_dir}'
' --new-base-dir {params.new_base_dir}'
' --target-reads-per-c {params.target_reads_per_c}'
' --num-threads-per-c {params.num_threads_per_c}'
' --genome-fasta {input.fasta}'
' --sort-memory-buffer-size {params.sort_memory_buffer_size}'
' 1> {log.out}'
Expand All @@ -490,7 +494,6 @@ rule run_espresso_c:
input:
updated_tsv=os.path.join('espresso_out', 'c_work_dir',
'samples.tsv.updated'),
fasta=os.path.join('references', config['fasta_name']),
output:
done=touch(os.path.join('espresso_out', 'c_work_dir',
'run_espresso_c_{updated_tsv_i}.done')),
Expand All @@ -505,6 +508,8 @@ rule run_espresso_c:
keep_temp='-K' if config['keep_espresso_c_temp'] else '',
out_dir=run_espresso_c_out_dir_param,
sub_dir_i='0',
fasta=os.path.join('espresso_out', 'c_work_dir', 'fastas',
'{updated_tsv_i}.fa'),
threads: config['espresso_c_threads']
resources:
mem_mb=config['espresso_c_mem_gb'] * 1024,
Expand All @@ -513,7 +518,7 @@ rule run_espresso_c:
'{params.conda_wrapper}'
' perl {params.espresso_c_path}'
' -I {params.out_dir}'
' -F {input.fasta}'
' -F {params.fasta}'
' -X {params.sub_dir_i}'
' -T {threads}'
' {params.keep_temp}'
Expand Down
2 changes: 1 addition & 1 deletion snakemake/conda_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
beautifulsoup4=4.8.2
blast=2.10.1
hmmer=3.3.1
hmmer=3.3.2
minimap2=2.17
numpy=1.22.4
perl-storable=3.15
Expand Down
10 changes: 10 additions & 0 deletions snakemake/scripts/espresso_s_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import argparse
import os
import os.path
import subprocess


Expand All @@ -7,11 +9,17 @@ def parse_args():
description=('Wrapper to write the tsv needed to run ESPRESSO_S'))
parser.add_argument(
'--s-input',
required=True,
help=('a txt file where 1st line is comma separated paths of sam files'
' for each input, and 2nd line is comma separated sample names'
' for each input'))
parser.add_argument('--out-tsv',
required=True,
help='the path of the sample tsv to write')
parser.add_argument(
'--tmp-dir',
required=True,
help='the path of the directory to use for temporary files')
parser.add_argument('--command',
nargs=argparse.REMAINDER,
help='the espresso_s command to add the sample tsv to')
Expand Down Expand Up @@ -41,6 +49,8 @@ def run_espresso_s(args):
sams, names = read_s_input(args.s_input)
write_sample_tsv(sams, names, args.out_tsv)
espresso_command = args.command + ['-L', args.out_tsv]
abs_tmp_path = os.path.abspath(args.tmp_dir)
os.environ['TMPDIR'] = abs_tmp_path
subprocess.run(espresso_command, check=True)


Expand Down
Loading

0 comments on commit 6629e09

Please sign in to comment.