From f9dd8615763433bab6713abba1434883fa058a16 Mon Sep 17 00:00:00 2001
From: jahn <jahn@mpusp.mpg.de>
Date: Wed, 6 Nov 2024 15:41:50 +0100
Subject: [PATCH] feat: added global env directive, closes #11

---
 .gitignore               |   4 ++
 README.md                | 108 +++++++++++++++++++--------------------
 workflow/Snakefile       |   5 +-
 workflow/envs/global.yml |   6 +++
 4 files changed, 67 insertions(+), 56 deletions(-)
 create mode 100644 workflow/envs/global.yml

diff --git a/.gitignore b/.gitignore
index 23a8550..0cf8249 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,10 @@ resources/**
 !.gitignore
 !.gitattributes
 !.editorconfig
+**.conda
+**.condarc
+**.cache
+**.java
 
 # Custom additions
 Notes.md
diff --git a/README.md b/README.md
index 5ed7df8..a01ba01 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # snakemake-bacterial-riboseq
 
 ![Platform](https://img.shields.io/badge/platform-all-green)
-[![Snakemake](https://img.shields.io/badge/snakemake-≥7.0.0-brightgreen.svg)](https://snakemake.github.io)
+[![Snakemake](https://img.shields.io/badge/snakemake-≥8.0.0-brightgreen.svg)](https://snakemake.github.io)
 [![Tests](https://github.com/MPUSP/snakemake-bacterial-riboseq/actions/workflows/main.yml/badge.svg)](https://github.com/MPUSP/snakemake-bacterial-riboseq/actions/workflows/main.yml)
 [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
 [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1D355C.svg?labelColor=000000)](https://sylabs.io/docs/)
@@ -133,69 +133,69 @@ snakemake --dry-run
 To run the complete workflow with test files using **`conda`**, execute the following command. The definition of the number of compute cores is mandatory.
 
 ```bash
-snakemake --cores 10 --use-conda --directory .test
+snakemake --cores 10 --sdm conda --directory .test
 ```
 
-To run the workflow with **singularity**, use:
+To run the workflow with **singularity** / **apptainer**, use:
 
 ```bash
-snakemake --cores 10 --use-singularity --use-conda --directory .test
+snakemake --cores 10 --sdm conda apptainer --directory .test
 ```
 
 ### Parameters
 
 This table lists all parameters that can be used to run the workflow.
 
-| parameter                    | type | details                                     | default                                              |
-| ---------------------------- | ---- | ------------------------------------------- | ---------------------------------------------------- |
-| **samplesheet**        |      |                                             |                                                      |
-| path                         | str  | path to samplesheet, mandatory              | "config/samples.tsv"                                 |
-| **get_genome**         |      |                                             |                                                      |
-| database                     | str  | one of `manual`, `ncbi`                 | `ncbi`                                             |
-| assembly                     | str  | RefSeq ID                                   | `GCF_000006785.2`                                  |
-| fasta                        | str  | optional path to fasta file                 | Null                                                 |
-| gff                          | str  | optional path to gff file                   | Null                                                 |
-| gff_source_type              | str  | list of name/value pairs for GFF source     | see config file                                      |
-| **cutadapt**           |      |                                             |                                                      |
-| fivep_adapter                | str  | sequence of the 5' adapter                  | Null                                                 |
-| threep_adapter               | str  | sequence of the 3' adapter                  | `ATCGTAGATCGGAAGAGCACACGTCTGAA`                    |
-| default                      | str  | additional options passed to `cutadapt`   | [`-q 10 `, `-m 22 `, `-M 52`, `--overlap=3`] |
-| **umi_extraction**     |      |                                             |                                                      |
-| method                       | str  | one of `string` or `regex`, see manual  | `regex`                                            |
-| pattern                      | str  | string or regular expression                | `^(?P<umi_0>.{5}).*(?P<umi_1>.{2})$`               |
-| **umi_dedup**          |      |                                             |                                                      |
-| options                      | str  | default options for deduplication           | see config file                                      |
-| **star**               |      |                                             |                                                      |
-| index                        | str  | location of genome index; if Null, is made  | Null                                                 |
-| genomeSAindexNbases          | num  | length of pre-indexing string, see STAR man | 9                                                    |
-| multi                        | num  | max number of loci read is allowed to map   | 10                                                   |
-| sam_multi                    | num  | max number of alignments reported for read  | 1                                                    |
-| intron_max                   | num  | max length of intron; 0 = automatic choice  | 1                                                    |
-| default                      | str  | default options for STAR aligner            | see config file                                      |
-| **extract_features**   |      |                                             |                                                      |
-| biotypes                     | str  | biotypes to exclude from mapping            | [`rRNA`, `tRNA`]                                 |
-| CDS                          | str  | CDS type to include for mapping             | [`protein_coding`]                                 |
-| **bedtools_intersect** |      |                                             |                                                      |
-| defaults                     | str  | remove hits, sense strand, min overlap 20%  | [`-v `, `-s `, `-f 0.2`]                       |
-| **annotate_orfs**      |      |                                             |                                                      |
-| window_size                  | num  | size of 5'-UTR added to CDS                 | 30                                                   |
-| **shift_reads**        |      |                                             |                                                      |
-| window_size                  | num  | start codon window to determine shift       | 30                                                   |
-| read_length                  | num  | size range of reads to use for shifting     | [27, 45]                                             |
-| end_alignment                | str  | end used for alignment of RiboSeq reads     | `3prime`                                           |
-| shift_table                  | str  | optional table with offsets per read length | Null                                                 |
-| export_bigwig                | str  | export shifted reads as bam file            | True                                                 |
-| export_ofst                  | str  | export shifted reads as ofst file           | False                                                |
-| skip_shifting                | str  | skip read shifting entirely                 | False                                                |
-| skip_length_filter           | str  | skip filtering reads by length              | False                                                |
-| **multiqc**            |      |                                             |                                                      |
-| config                       | str  | path to multiqc config                      | `config/multiqc_config.yml`                        |
-| **report**             |      |                                             |                                                      |
-| export_figures               | bool | export figures as `.svg` and `.png`     | True                                                 |
-| export_dir                   | str  | sub-directory for figure export             | `figures/`                                         |
-| figure_width                 | num  | standard figure width in px                 | 875                                                  |
-| figure_height                | num  | standard figure height in px                | 500                                                  |
-| figure_resolution            | num  | standard figure resolution in dpi           | 125                                                  |
+| parameter              | type | details                                     | default                                      |
+| ---------------------- | ---- | ------------------------------------------- | -------------------------------------------- |
+| **samplesheet**        |      |                                             |                                              |
+| path                   | str  | path to samplesheet, mandatory              | "config/samples.tsv"                         |
+| **get_genome**         |      |                                             |                                              |
+| database               | str  | one of `manual`, `ncbi`                     | `ncbi`                                       |
+| assembly               | str  | RefSeq ID                                   | `GCF_000006785.2`                            |
+| fasta                  | str  | optional path to fasta file                 | Null                                         |
+| gff                    | str  | optional path to gff file                   | Null                                         |
+| gff_source_type        | str  | list of name/value pairs for GFF source     | see config file                              |
+| **cutadapt**           |      |                                             |                                              |
+| fivep_adapter          | str  | sequence of the 5' adapter                  | Null                                         |
+| threep_adapter         | str  | sequence of the 3' adapter                  | `ATCGTAGATCGGAAGAGCACACGTCTGAA`              |
+| default                | str  | additional options passed to `cutadapt`     | [`-q 10 `, `-m 22 `, `-M 52`, `--overlap=3`] |
+| **umi_extraction**     |      |                                             |                                              |
+| method                 | str  | one of `string` or `regex`, see manual      | `regex`                                      |
+| pattern                | str  | string or regular expression                | `^(?P<umi_0>.{5}).*(?P<umi_1>.{2})$`         |
+| **umi_dedup**          |      |                                             |                                              |
+| options                | str  | default options for deduplication           | see config file                              |
+| **star**               |      |                                             |                                              |
+| index                  | str  | location of genome index; if Null, is made  | Null                                         |
+| genomeSAindexNbases    | num  | length of pre-indexing string, see STAR man | 9                                            |
+| multi                  | num  | max number of loci read is allowed to map   | 10                                           |
+| sam_multi              | num  | max number of alignments reported for read  | 1                                            |
+| intron_max             | num  | max length of intron; 0 = automatic choice  | 1                                            |
+| default                | str  | default options for STAR aligner            | see config file                              |
+| **extract_features**   |      |                                             |                                              |
+| biotypes               | str  | biotypes to exclude from mapping            | [`rRNA`, `tRNA`]                             |
+| CDS                    | str  | CDS type to include for mapping             | [`protein_coding`]                           |
+| **bedtools_intersect** |      |                                             |                                              |
+| defaults               | str  | remove hits, sense strand, min overlap 20%  | [`-v `, `-s `, `-f 0.2`]                     |
+| **annotate_orfs**      |      |                                             |                                              |
+| window_size            | num  | size of 5'-UTR added to CDS                 | 30                                           |
+| **shift_reads**        |      |                                             |                                              |
+| window_size            | num  | start codon window to determine shift       | 30                                           |
+| read_length            | num  | size range of reads to use for shifting     | [27, 45]                                     |
+| end_alignment          | str  | end used for alignment of RiboSeq reads     | `3prime`                                     |
+| shift_table            | str  | optional table with offsets per read length | Null                                         |
+| export_bigwig          | str  | export shifted reads as bam file            | True                                         |
+| export_ofst            | str  | export shifted reads as ofst file           | False                                        |
+| skip_shifting          | str  | skip read shifting entirely                 | False                                        |
+| skip_length_filter     | str  | skip filtering reads by length              | False                                        |
+| **multiqc**            |      |                                             |                                              |
+| config                 | str  | path to multiqc config                      | `config/multiqc_config.yml`                  |
+| **report**             |      |                                             |                                              |
+| export_figures         | bool | export figures as `.svg` and `.png`         | True                                         |
+| export_dir             | str  | sub-directory for figure export             | `figures/`                                   |
+| figure_width           | num  | standard figure width in px                 | 875                                          |
+| figure_height          | num  | standard figure height in px                | 500                                          |
+| figure_resolution      | num  | standard figure resolution in dpi           | 125                                          |
 
 ## Authors
 
diff --git a/workflow/Snakefile b/workflow/Snakefile
index ab52982..f1551e0 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -8,13 +8,14 @@
 #          may apply.                                   #
 # ----------------------------------------------------- #
 
+conda:
+    "envs/global.yml"
+
 import os
 import pandas as pd
 from datetime import date
 from snakemake.utils import min_version
 
-# min_version("7.0")
-
 __author__ = "Rina Ahmed-Begrich, Michael Jahn"
 __year__ = str(date.today()).split("-")[0]
 
diff --git a/workflow/envs/global.yml b/workflow/envs/global.yml
new file mode 100644
index 0000000..8c334a1
--- /dev/null
+++ b/workflow/envs/global.yml
@@ -0,0 +1,6 @@
+name: global
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - pandas=2.2.2