-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathpermutation.snakefile
148 lines (122 loc) · 4.47 KB
/
permutation.snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# permcutter - permutation testing for leafcutter
# number of permutations of the condition column to perform differential splicing on
nPerm = 1000
# snakefile for leafcutter pipeline
# dependencies:
# samtools
# regtools
# python - pandas
# R - leafcutter
# R - a bunch of packages
import pandas as pd
import os
import socket
# get variables out of config.yaml
leafcutterPath = config['leafcutterPath']
python2Path = config['python2Path']
python3Path = config['python3Path']
dataCode = config['dataCode']
print(dataCode)
inFolder = config['inFolder']
# create outFolder path using dataCode
outFolder = config['outFolder'] + config['dataCode'] + '/'
permFolder = config['outFolder'] + config['dataCode'] + '/permutation/'
stranded = config['stranded']
juncSuffix = config['juncSuffix']
if stranded != 0:
strandParam = "--strand"
else:
strandParam = ""
print(outFolder)
metadata = config['metadata']
refCondition = config['refCondition']
altCondition = config['altCondition']
# leafcutter options
leafcutterOpt = config['leafcutter']
# ds options
samplesPerIntron = leafcutterOpt["samplesPerIntron"]
samplesPerGroup = leafcutterOpt["samplesPerGroup"]
minCoverage = leafcutterOpt["minCoverage"]
# get sample information of support file
# using pandas
samples = pd.read_csv(metadata, sep = '\t')['sample']
# Chimera specific options
isChimera = "hpc.mssm.edu" in socket.getfqdn()
# not sure if this works when running in serial on interactive node
if isChimera:
shell.prefix('export PS1="";source activate leafcutter-pipeline;ml R/3.6.0;')
#else:
#shell.prefix('conda activate leafcutterpipeline;')
# default R is now 3.6 - doesn't support leafcutter yet
#shell.prefix('ml R/3.6.0;')
clusterRegtools = config["clusterRegtools"]
if clusterRegtools == True:
clusterScript = python3Path + " scripts/leafcutter_cluster_regtools.py"
junctionMode = "regtools"
strandParam = "" # strandParam only needed for normal clustering
else:
clusterScript = python2Path + " scripts/leafcutter_cluster.py"
junctionMode = "RAPiD"
localrules: copyConfig, writeJunctionList
rule all:
#input: outFolder + "junctionList.txt"
#input: outFolder + dataCode + "_perind_numers.counts.gz"
#input: outFolder + dataCode + "_ds_support.tsv"
input:
outFolder + "config.yaml",
permFolder + dataCode + "_all_permutation_results.tsv"
#expand(permFolder + dataCode + "_permutation_{permutation}_cluster_significance.txt", permutation = range(1,nPerm+1) )
#outFolder + dataCode + "_classifications.tsv",
#outFolder + dataCode + "_summary.tsv",
#outFolder + dataCode + "_cassette_inclusion.tsv",
#outFolder + dataCode + "_residual.counts.gz"
# prepare support table the way leafcutter likes it
# be wary of sample names - sometimes the juncSuffix will be appended
rule permuteMeta:
input: metadata
output: support = permFolder + dataCode + "_permutation_{permutation}_ds_support.tsv"
params: js = juncSuffix,
permDataCode = dataCode + "_permutation_{permutation}"
shell:
"Rscript scripts/sort_support.R "
" --metadata {metadata} "
" --dataCode {params.permDataCode} "
" --refCondition {refCondition} "
" --altCondition {altCondition} "
" --outFolder {permFolder} "
" --junctionMode {junctionMode} "
" --permutation "
" --juncSuffix \"{params.js}\" "
";"
# run differential splicing
rule leafcutterDS:
input:
support = permFolder + dataCode + "_permutation_{permutation}_ds_support.tsv",
clusters = outFolder + dataCode + "_filtered_perind_numers.counts.gz"
output:
sigClusters = permFolder + dataCode + "_permutation_{permutation}_cluster_significance.txt",
effectSizes = permFolder + dataCode + "_permutation_{permutation}_effect_sizes.txt"
params:
perm = "{permutation}",
n_threads = leafcutterOpt['n_threads']
shell:
'Rscript {leafcutterPath}/scripts/leafcutter_ds.R '
' --output_prefix {permFolder}{dataCode}_permutation_{params.perm} '
' --num_threads {params.n_threads} '
' --min_samples_per_intron {samplesPerIntron} '
' --min_samples_per_group {samplesPerGroup} '
' --min_coverage {minCoverage} '
' {input.clusters} '
' {input.support} '
rule combinePermutationResults:
input:
expand(permFolder + dataCode + "_permutation_{permutation}_cluster_significance.txt", permutation = range(1,nPerm+1) )
output:
permFolder + dataCode + "_all_permutation_results.tsv"
params:
script = "scripts/combine_permutation_results.R"
shell:
"ml R/3.6.0;"
"Rscript {params.script} "
" --outFolder {permFolder} "
" --dataCode {dataCode} "