Skip to content

Commit

Permalink
fix lints
Browse files Browse the repository at this point in the history
  • Loading branch information
johanneskoester committed Jan 14, 2021
1 parent 98c3bb2 commit d937905
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 46 deletions.
6 changes: 3 additions & 3 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ rule all:
"qc/multiqc_report.html",


##### setup singularity #####
##### setup container #####


# this container defines the underlying OS for each job when using the workflow
# this container image defines the underlying OS for each job when using the workflow
# with --use-conda --use-singularity
singularity: "docker://continuumio/miniconda3"
container: "docker://continuumio/miniconda3"


##### setup report #####
Expand Down
15 changes: 0 additions & 15 deletions rules/align.smk
Original file line number Diff line number Diff line change
@@ -1,18 +1,3 @@
def get_fq(wildcards):
if config["trimming"]["skip"]:
# no trimming, use raw reads
return units.loc[(wildcards.sample, wildcards.unit), ["fq1", "fq2"]].dropna()
else:
# yes trimming, use trimmed data
if not is_single_end(**wildcards):
# paired-end sample
return expand(
"trimmed/{sample}-{unit}.{group}.fastq.gz", group=[1, 2], **wildcards
)
# single end sample
return "trimmed/{sample}-{unit}.fastq.gz".format(**wildcards)


rule align:
input:
sample=get_fq,
Expand Down
41 changes: 41 additions & 0 deletions rules/common.smk
Original file line number Diff line number Diff line change
@@ -1,2 +1,43 @@
def is_single_end(sample, unit):
return pd.isnull(units.loc[(sample, unit), "fq2"])


def get_fastq(wildcards):
return units.loc[(wildcards.sample, wildcards.unit), ["fq1", "fq2"]].dropna()


def get_fq(wildcards):
if config["trimming"]["skip"]:
# no trimming, use raw reads
return units.loc[(wildcards.sample, wildcards.unit), ["fq1", "fq2"]].dropna()
else:
# yes trimming, use trimmed data
if not is_single_end(**wildcards):
# paired-end sample
return expand(
"trimmed/{sample}-{unit}.{group}.fastq.gz", group=[1, 2], **wildcards
)
# single end sample
return "trimmed/{sample}-{unit}.fastq.gz".format(**wildcards)


def get_strandedness(units):
if "strandedness" in units.columns:
return units["strandedness"].tolist()
else:
strand_list = ["none"]
return strand_list * units.shape[0]


def get_deseq2_threads(wildcards=None):
# https://twitter.com/mikelove/status/918770188568363008
few_coeffs = False if wildcards is None else len(get_contrast(wildcards)) < 10
return 1 if len(samples) < 100 or few_coeffs else 6


def strip_suffix(pattern, suffix):
return pattern[: -len(suffix)]


def get_contrast(wildcards):
return config["diffexp"]["contrasts"][wildcards.contrast]
22 changes: 3 additions & 19 deletions rules/diffexp.smk
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
def get_strandness(units):
if "strandedness" in units.columns:
return units["strandedness"].tolist()
else:
strand_list = ["none"]
return strand_list * units.shape[0]


rule count_matrix:
input:
expand(
Expand All @@ -14,21 +6,17 @@ rule count_matrix:
),
output:
"counts/all.tsv",
log:
"logs/count-matrix.log",
params:
samples=units["sample"].tolist(),
strand=get_strandness(units),
strand=get_strandedness(units),
conda:
"../envs/pandas.yaml"
script:
"../scripts/count-matrix.py"


def get_deseq2_threads(wildcards=None):
# https://twitter.com/mikelove/status/918770188568363008
few_coeffs = False if wildcards is None else len(get_contrast(wildcards)) < 10
return 1 if len(samples) < 100 or few_coeffs else 6


rule deseq2_init:
input:
counts="counts/all.tsv",
Expand Down Expand Up @@ -60,10 +48,6 @@ rule pca:
"../scripts/plot-pca.R"


def get_contrast(wildcards):
return config["diffexp"]["contrasts"][wildcards.contrast]


rule deseq2:
input:
"deseq2/all.rds",
Expand Down
12 changes: 7 additions & 5 deletions rules/qc.smk
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ rule rseqc_junction_annotation:
"logs/rseqc/rseqc_junction_annotation/{sample}-{unit}.log",
params:
extra=r"-q 255", # STAR uses 255 as a score for unique mappers
prefix="qc/rseqc/{sample}-{unit}.junctionanno",
prefix=lambda w, output: strip_suffix(output[0], ".junction.bed"),
conda:
"../envs/rseqc.yaml"
shell:
Expand All @@ -45,7 +45,9 @@ rule rseqc_junction_saturation:
"logs/rseqc/rseqc_junction_saturation/{sample}-{unit}.log",
params:
extra=r"-q 255",
prefix="qc/rseqc/{sample}-{unit}.junctionsat",
prefix=lambda w, output: strip_suffix(
output[0], ".junctionSaturation_plot.pdf"
),
conda:
"../envs/rseqc.yaml"
shell:
Expand Down Expand Up @@ -92,7 +94,7 @@ rule rseqc_innerdis:
log:
"logs/rseqc/rseqc_innerdis/{sample}-{unit}.log",
params:
prefix="qc/rseqc/{sample}-{unit}.inner_distance_freq",
prefix=lambda w, output: strip_suffix(output[0], ".inner_distance.txt"),
conda:
"../envs/rseqc.yaml"
shell:
Expand Down Expand Up @@ -123,7 +125,7 @@ rule rseqc_readdup:
log:
"logs/rseqc/rseqc_readdup/{sample}-{unit}.log",
params:
prefix="qc/rseqc/{sample}-{unit}.readdup",
prefix=lambda w, output: strip_suffix(output[0], ".DupRate_plot.pdf"),
conda:
"../envs/rseqc.yaml"
shell:
Expand All @@ -139,7 +141,7 @@ rule rseqc_readgc:
log:
"logs/rseqc/rseqc_readgc/{sample}-{unit}.log",
params:
prefix="qc/rseqc/{sample}-{unit}.readgc",
prefix=lambda w, output: strip_suffix(output[0], ".GC_plot.pdf"),
conda:
"../envs/rseqc.yaml"
shell:
Expand Down
4 changes: 0 additions & 4 deletions rules/trim.smk
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
def get_fastq(wildcards):
return units.loc[(wildcards.sample, wildcards.unit), ["fq1", "fq2"]].dropna()


rule cutadapt_pe:
input:
get_fastq,
Expand Down
4 changes: 4 additions & 0 deletions scripts/count-matrix.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import sys
# logging
sys.stderr = open(snakemake.log[0], "w")

import pandas as pd

def get_column(strandedness):
Expand Down

0 comments on commit d937905

Please sign in to comment.