From f9dd8615763433bab6713abba1434883fa058a16 Mon Sep 17 00:00:00 2001 From: jahn Date: Wed, 6 Nov 2024 15:41:50 +0100 Subject: [PATCH] feat: added global env directive, closes #11 --- .gitignore | 4 ++ README.md | 108 +++++++++++++++++++-------------------- workflow/Snakefile | 5 +- workflow/envs/global.yml | 6 +++ 4 files changed, 67 insertions(+), 56 deletions(-) create mode 100644 workflow/envs/global.yml diff --git a/.gitignore b/.gitignore index 23a8550..0cf8249 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,10 @@ resources/** !.gitignore !.gitattributes !.editorconfig +**.conda +**.condarc +**.cache +**.java # Custom additions Notes.md diff --git a/README.md b/README.md index 5ed7df8..a01ba01 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # snakemake-bacterial-riboseq ![Platform](https://img.shields.io/badge/platform-all-green) -[![Snakemake](https://img.shields.io/badge/snakemake-≥7.0.0-brightgreen.svg)](https://snakemake.github.io) +[![Snakemake](https://img.shields.io/badge/snakemake-≥8.0.0-brightgreen.svg)](https://snakemake.github.io) [![Tests](https://github.com/MPUSP/snakemake-bacterial-riboseq/actions/workflows/main.yml/badge.svg)](https://github.com/MPUSP/snakemake-bacterial-riboseq/actions/workflows/main.yml) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1D355C.svg?labelColor=000000)](https://sylabs.io/docs/) @@ -133,69 +133,69 @@ snakemake --dry-run To run the complete workflow with test files using **`conda`**, execute the following command. The definition of the number of compute cores is mandatory. ```bash -snakemake --cores 10 --use-conda --directory .test +snakemake --cores 10 --sdm conda --directory .test ``` -To run the workflow with **singularity**, use: +To run the workflow with **singularity** / **apptainer**, use: ```bash -snakemake --cores 10 --use-singularity --use-conda --directory .test +snakemake --cores 10 --sdm conda apptainer --directory .test ``` ### Parameters This table lists all parameters that can be used to run the workflow. -| parameter | type | details | default | -| ---------------------------- | ---- | ------------------------------------------- | ---------------------------------------------------- | -| **samplesheet** | | | | -| path | str | path to samplesheet, mandatory | "config/samples.tsv" | -| **get_genome** | | | | -| database | str | one of `manual`, `ncbi` | `ncbi` | -| assembly | str | RefSeq ID | `GCF_000006785.2` | -| fasta | str | optional path to fasta file | Null | -| gff | str | optional path to gff file | Null | -| gff_source_type | str | list of name/value pairs for GFF source | see config file | -| **cutadapt** | | | | -| fivep_adapter | str | sequence of the 5' adapter | Null | -| threep_adapter | str | sequence of the 3' adapter | `ATCGTAGATCGGAAGAGCACACGTCTGAA` | -| default | str | additional options passed to `cutadapt` | [`-q 10 `, `-m 22 `, `-M 52`, `--overlap=3`] | -| **umi_extraction** | | | | -| method | str | one of `string` or `regex`, see manual | `regex` | -| pattern | str | string or regular expression | `^(?P.{5}).*(?P.{2})$` | -| **umi_dedup** | | | | -| options | str | default options for deduplication | see config file | -| **star** | | | | -| index | str | location of genome index; if Null, is made | Null | -| genomeSAindexNbases | num | length of pre-indexing string, see STAR man | 9 | -| multi | num | max number of loci read is allowed to map | 10 | -| sam_multi | num | max number of alignments reported for read | 1 | -| intron_max | num | max length of intron; 0 = automatic choice | 1 | -| default | str | default options for STAR aligner | see config file | -| **extract_features** | | | | -| biotypes | str | biotypes to exclude from mapping | [`rRNA`, `tRNA`] | -| CDS | str | CDS type to include for mapping | [`protein_coding`] | -| **bedtools_intersect** | | | | -| defaults | str | remove hits, sense strand, min overlap 20% | [`-v `, `-s `, `-f 0.2`] | -| **annotate_orfs** | | | | -| window_size | num | size of 5'-UTR added to CDS | 30 | -| **shift_reads** | | | | -| window_size | num | start codon window to determine shift | 30 | -| read_length | num | size range of reads to use for shifting | [27, 45] | -| end_alignment | str | end used for alignment of RiboSeq reads | `3prime` | -| shift_table | str | optional table with offsets per read length | Null | -| export_bigwig | str | export shifted reads as bam file | True | -| export_ofst | str | export shifted reads as ofst file | False | -| skip_shifting | str | skip read shifting entirely | False | -| skip_length_filter | str | skip filtering reads by length | False | -| **multiqc** | | | | -| config | str | path to multiqc config | `config/multiqc_config.yml` | -| **report** | | | | -| export_figures | bool | export figures as `.svg` and `.png` | True | -| export_dir | str | sub-directory for figure export | `figures/` | -| figure_width | num | standard figure width in px | 875 | -| figure_height | num | standard figure height in px | 500 | -| figure_resolution | num | standard figure resolution in dpi | 125 | +| parameter | type | details | default | +| ---------------------- | ---- | ------------------------------------------- | -------------------------------------------- | +| **samplesheet** | | | | +| path | str | path to samplesheet, mandatory | "config/samples.tsv" | +| **get_genome** | | | | +| database | str | one of `manual`, `ncbi` | `ncbi` | +| assembly | str | RefSeq ID | `GCF_000006785.2` | +| fasta | str | optional path to fasta file | Null | +| gff | str | optional path to gff file | Null | +| gff_source_type | str | list of name/value pairs for GFF source | see config file | +| **cutadapt** | | | | +| fivep_adapter | str | sequence of the 5' adapter | Null | +| threep_adapter | str | sequence of the 3' adapter | `ATCGTAGATCGGAAGAGCACACGTCTGAA` | +| default | str | additional options passed to `cutadapt` | [`-q 10 `, `-m 22 `, `-M 52`, `--overlap=3`] | +| **umi_extraction** | | | | +| method | str | one of `string` or `regex`, see manual | `regex` | +| pattern | str | string or regular expression | `^(?P.{5}).*(?P.{2})$` | +| **umi_dedup** | | | | +| options | str | default options for deduplication | see config file | +| **star** | | | | +| index | str | location of genome index; if Null, is made | Null | +| genomeSAindexNbases | num | length of pre-indexing string, see STAR man | 9 | +| multi | num | max number of loci read is allowed to map | 10 | +| sam_multi | num | max number of alignments reported for read | 1 | +| intron_max | num | max length of intron; 0 = automatic choice | 1 | +| default | str | default options for STAR aligner | see config file | +| **extract_features** | | | | +| biotypes | str | biotypes to exclude from mapping | [`rRNA`, `tRNA`] | +| CDS | str | CDS type to include for mapping | [`protein_coding`] | +| **bedtools_intersect** | | | | +| defaults | str | remove hits, sense strand, min overlap 20% | [`-v `, `-s `, `-f 0.2`] | +| **annotate_orfs** | | | | +| window_size | num | size of 5'-UTR added to CDS | 30 | +| **shift_reads** | | | | +| window_size | num | start codon window to determine shift | 30 | +| read_length | num | size range of reads to use for shifting | [27, 45] | +| end_alignment | str | end used for alignment of RiboSeq reads | `3prime` | +| shift_table | str | optional table with offsets per read length | Null | +| export_bigwig | str | export shifted reads as bam file | True | +| export_ofst | str | export shifted reads as ofst file | False | +| skip_shifting | str | skip read shifting entirely | False | +| skip_length_filter | str | skip filtering reads by length | False | +| **multiqc** | | | | +| config | str | path to multiqc config | `config/multiqc_config.yml` | +| **report** | | | | +| export_figures | bool | export figures as `.svg` and `.png` | True | +| export_dir | str | sub-directory for figure export | `figures/` | +| figure_width | num | standard figure width in px | 875 | +| figure_height | num | standard figure height in px | 500 | +| figure_resolution | num | standard figure resolution in dpi | 125 | ## Authors diff --git a/workflow/Snakefile b/workflow/Snakefile index ab52982..f1551e0 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -8,13 +8,14 @@ # may apply. # # ----------------------------------------------------- # +conda: + "envs/global.yml" + import os import pandas as pd from datetime import date from snakemake.utils import min_version -# min_version("7.0") - __author__ = "Rina Ahmed-Begrich, Michael Jahn" __year__ = str(date.today()).split("-")[0] diff --git a/workflow/envs/global.yml b/workflow/envs/global.yml new file mode 100644 index 0000000..8c334a1 --- /dev/null +++ b/workflow/envs/global.yml @@ -0,0 +1,6 @@ +name: global +channels: + - conda-forge + - bioconda +dependencies: + - pandas=2.2.2