Skip to content

Commit 4a251ef

Browse files
committed
Restructure for nextflow pipeline
1 parent 4bd8a16 commit 4a251ef

18 files changed

+251
-119
lines changed

.editorconfig

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# http://editorconfig.org
2+
root = true
3+
4+
[*]
5+
indent_style = space
6+
indent_size = 4
7+
end_of_line = lf
8+
charset = utf-8
9+
trim_trailing_whitespace = true
10+
insert_final_newline = true
11+
12+
# Use 4 spaces for the Python files
13+
[*.py]
14+
indent_size = 4
15+
max_line_length = 80
16+
17+
# The JSON files contain newlines inconsistently
18+
[*.json]
19+
insert_final_newline = ignore
20+
21+
# Minified JavaScript files shouldn't be changed
22+
[**.min.js]
23+
indent_style = ignore
24+
insert_final_newline = ignore
25+
26+
# Makefiles always use tabs for indentation
27+
[Makefile]
28+
indent_style = tab
29+
30+
# Batch files use tabs for indentation
31+
[*.bat]
32+
indent_style = tab
33+
34+
[*.md]
35+
trim_trailing_whitespace = false
36+

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
* text=auto eol=lf

.gitmodules

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
[submodule "single-cell-analysis-nf"]
2-
path = single-cell-analysis-nf
3-
url = [email protected]:icbi-lab/single-cell-analysis-nf.git
41
[submodule "lib/scanpy_helper_submodule"]
52
path = lib/scanpy_helper_submodule
63
url = [email protected]:icbi-lab/tools/scanpy_helpers.git

analyses/10_integrate_scrnaseq_data/11_merge_all.ipynb renamed to analyses/20_integrate_scrnaseq_data/21_merge_all.ipynb

Lines changed: 27 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,23 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 1,
5+
"execution_count": 2,
66
"metadata": {},
7-
"outputs": [],
7+
"outputs": [
8+
{
9+
"name": "stdout",
10+
"output_type": "stream",
11+
"text": [
12+
"The autoreload extension is already loaded. To reload it, use:\n",
13+
" %reload_ext autoreload\n"
14+
]
15+
}
16+
],
817
"source": [
918
"%load_ext autoreload\n",
1019
"%autoreload 2\n",
1120
"\n",
21+
"from nxfvars import nxfvars\n",
1222
"import scanpy as sc\n",
1323
"import numpy as np\n",
1424
"import itertools\n",
@@ -37,6 +47,15 @@
3747
"import re"
3848
]
3949
},
50+
{
51+
"cell_type": "code",
52+
"execution_count": 3,
53+
"metadata": {},
54+
"outputs": [],
55+
"source": [
56+
"out_dir = nxfvars.get(\"outdir\", \"/tmp\")"
57+
]
58+
},
4059
{
4160
"cell_type": "code",
4261
"execution_count": 2,
@@ -54,7 +73,7 @@
5473
}
5574
],
5675
"source": [
57-
"threadpool_limits(8)"
76+
"threadpool_limits(int(nxfvars.get(\"cpus\", \"8\")))"
5877
]
5978
},
6079
{
@@ -72,7 +91,7 @@
7291
"metadata": {},
7392
"outputs": [],
7493
"source": [
75-
"dataset_table = pd.read_csv(\"../../tables/samplesheet_scrnaseq_preprocessing.csv\")"
94+
"dataset_table = pd.read_csv(nxfvars.get(\"samplesheet\", \"../../tables/samplesheet_scrnaseq_preprocessing.csv\"))"
7695
]
7796
},
7897
{
@@ -478,9 +497,10 @@
478497
}
479498
],
480499
"source": [
500+
"dataset_path = nxfvars.get(\"dataset_path\", \"../../data/20_qc_norm_scrnaseq/01_qc_and_filtering\")\n",
481501
"datasets = {\n",
482502
" dataset_id: sc.read_h5ad(\n",
483-
" f\"../../data/20_qc_norm_scrnaseq/01_qc_and_filtering/{dataset_id}/{dataset_id}.qc.h5ad\"\n",
503+
" f\"{dataset_id}.qc.h5ad\" if dataset_path == \".\" else f\"{dataset_path}/{dataset_id}/{dataset_id}.qc.h5ad\"\n",
484504
" )\n",
485505
" for dataset_id in tqdm(dataset_table[\"id\"])\n",
486506
"}"
@@ -2621,7 +2641,7 @@
26212641
"metadata": {},
26222642
"outputs": [],
26232643
"source": [
2624-
"obs_all.to_excel(\"../../data/50_integrate_scrnaseq_data/51_merge_all/obs_all.xlsx\")"
2644+
"obs_all.to_excel(f\"{out_dir}/obs_all.xlsx\")"
26252645
]
26262646
},
26272647
{
@@ -2921,64 +2941,9 @@
29212941
],
29222942
"source": [
29232943
"merged_all.write_h5ad(\n",
2924-
" \"../../data/50_integrate_scrnaseq_data/51_merge_all/merged_all.h5ad\"\n",
2944+
" \"{out_dir}/merged_all.h5ad\"\n",
29252945
")"
29262946
]
2927-
},
2928-
{
2929-
"cell_type": "markdown",
2930-
"metadata": {},
2931-
"source": [
2932-
"## Export for NSCLC heterogeneity\n",
2933-
" * only tumor samples (no controls, no metastases)\n",
2934-
" * all NSCLC subtypes"
2935-
]
2936-
},
2937-
{
2938-
"cell_type": "code",
2939-
"execution_count": 33,
2940-
"metadata": {},
2941-
"outputs": [],
2942-
"source": [
2943-
"# datasets_nsclc_heterogeneity = dict()\n",
2944-
"# for dataset_id, dataset in datasets.items():\n",
2945-
"# if \"tumor_primary\" in dataset.obs[\"origin\"].values:\n",
2946-
"# datasets_nsclc_heterogeneity[dataset_id] = dataset[\n",
2947-
"# dataset.obs[\"origin\"] == \"tumor_primary\", :\n",
2948-
"# ].copy()\n",
2949-
"# del datasets_nsclc_heterogeneity[\"Pircher_batch1_NSCLC\"]"
2950-
]
2951-
},
2952-
{
2953-
"cell_type": "code",
2954-
"execution_count": 34,
2955-
"metadata": {},
2956-
"outputs": [],
2957-
"source": [
2958-
"# merged_nsclc_heterogeneity = merge_datasets(\n",
2959-
"# datasets_nsclc_heterogeneity.values(), symbol_in_n_datasets=5\n",
2960-
"# )"
2961-
]
2962-
},
2963-
{
2964-
"cell_type": "code",
2965-
"execution_count": 35,
2966-
"metadata": {},
2967-
"outputs": [],
2968-
"source": [
2969-
"# merged_nsclc_heterogeneity.shape"
2970-
]
2971-
},
2972-
{
2973-
"cell_type": "code",
2974-
"execution_count": 36,
2975-
"metadata": {},
2976-
"outputs": [],
2977-
"source": [
2978-
"# merged_nsclc_heterogeneity.write_h5ad(\n",
2979-
"# \"../../data/50_integrate_scrnaseq_data/51_merge_all/merged_nsclc_heterogeneity.h5ad\"\n",
2980-
"# )"
2981-
]
29822947
}
29832948
],
29842949
"metadata": {

analyses/10_integrate_scrnaseq_data/11_merge_all.py renamed to analyses/20_integrate_scrnaseq_data/21_merge_all.py

Lines changed: 10 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
# %load_ext autoreload
1717
# %autoreload 2
1818

19+
from nxfvars import nxfvars
1920
import scanpy as sc
2021
import numpy as np
2122
import itertools
@@ -44,21 +45,25 @@
4445
import re
4546

4647
# %%
47-
threadpool_limits(8)
48+
out_dir = nxfvars.get("outdir", "/tmp")
49+
50+
# %%
51+
threadpool_limits(int(nxfvars.get("cpus", "8")))
4852

4953
# %%
5054
sc.set_figure_params(figsize=(5, 5))
5155

5256
# %%
53-
dataset_table = pd.read_csv("../../tables/samplesheet_scrnaseq_preprocessing.csv")
57+
dataset_table = pd.read_csv(nxfvars.get("samplesheet", "../../tables/samplesheet_scrnaseq_preprocessing.csv"))
5458

5559
# %%
5660
dataset_table
5761

5862
# %%
63+
dataset_path = nxfvars.get("dataset_path", "../../data/20_qc_norm_scrnaseq/01_qc_and_filtering")
5964
datasets = {
6065
dataset_id: sc.read_h5ad(
61-
f"../../data/20_qc_norm_scrnaseq/01_qc_and_filtering/{dataset_id}/{dataset_id}.qc.h5ad"
66+
f"{dataset_id}.qc.h5ad" if dataset_path == "." else f"{dataset_path}/{dataset_id}/{dataset_id}.qc.h5ad"
6267
)
6368
for dataset_id in tqdm(dataset_table["id"])
6469
}
@@ -288,7 +293,7 @@
288293
)
289294

290295
# %%
291-
obs_all.to_excel("../../data/50_integrate_scrnaseq_data/51_merge_all/obs_all.xlsx")
296+
obs_all.to_excel(f"{out_dir}/obs_all.xlsx")
292297

293298
# %%
294299
merged_all = merge_datasets(datasets.values(), symbol_in_n_datasets=17)
@@ -301,32 +306,5 @@
301306

302307
# %%
303308
merged_all.write_h5ad(
304-
"../../data/50_integrate_scrnaseq_data/51_merge_all/merged_all.h5ad"
309+
"{out_dir}/merged_all.h5ad"
305310
)
306-
307-
# %% [markdown]
308-
# ## Export for NSCLC heterogeneity
309-
# * only tumor samples (no controls, no metastases)
310-
# * all NSCLC subtypes
311-
312-
# %%
313-
# datasets_nsclc_heterogeneity = dict()
314-
# for dataset_id, dataset in datasets.items():
315-
# if "tumor_primary" in dataset.obs["origin"].values:
316-
# datasets_nsclc_heterogeneity[dataset_id] = dataset[
317-
# dataset.obs["origin"] == "tumor_primary", :
318-
# ].copy()
319-
# del datasets_nsclc_heterogeneity["Pircher_batch1_NSCLC"]
320-
321-
# %%
322-
# merged_nsclc_heterogeneity = merge_datasets(
323-
# datasets_nsclc_heterogeneity.values(), symbol_in_n_datasets=5
324-
# )
325-
326-
# %%
327-
# merged_nsclc_heterogeneity.shape
328-
329-
# %%
330-
# merged_nsclc_heterogeneity.write_h5ad(
331-
# "../../data/50_integrate_scrnaseq_data/51_merge_all/merged_nsclc_heterogeneity.h5ad"
332-
# )

conf/modules.config

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,17 @@
2020
params {
2121
modules {
2222
'SCQC' {
23-
publish_dir = '01_qc_and_filtering'
23+
publish_dir = '20_qc_and_filtering'
2424
publish_by_id = true
2525
}
26+
'SCQC_MERGE_STATS' {
27+
publish_dir = '20_qc_and_filtering'
28+
}
29+
'P11_MERGE_ALL' {
30+
publish_dir = '21_merge_all'
31+
}
32+
'SCVI' {
33+
publish_dir = '21_merge_all'
34+
}
2635
}
27-
}
36+
}

integrate_single_cell.config

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
includeConfig 'conf/modules.config'
2+
3+
4+
params {
5+
input = "tables/samplesheet_scrnaseq_preprocessing.csv"
6+
outdir = "data/20_integrate_scrnaseq_data"
7+
publish_dir_mode = "link"
8+
}
9+

integrate_single_cell.nf

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/usr/bin/env nextflow
2+
3+
nextflow.enable.dsl = 2
4+
5+
def modules = params.modules.clone()
6+
assert params.input: "Input samplesheet not specified!"
7+
8+
include { check_samplesheet } from './modules/local/check_samplesheet' params(params)
9+
10+
include { SCQC } from "./modules/local/scqc/main.nf" addParams(
11+
options: modules['SCQC']
12+
)
13+
include { SCQC_MERGE_STATS } from "./modules/local/scqc_merge_stats/main.nf" addParams(
14+
options: modules['SCQC_MERGE_STATS']
15+
)
16+
include { P11_MERGE_ALL } from "./modules/local/analyses/20_integrate_scrnaseq_data.nf" addParams (
17+
options: modules["P11_MERGE_ALL"]
18+
)
19+
20+
21+
workflow {
22+
ch_samples = Channel.from(check_samplesheet(params.input))
23+
24+
SCQC(ch_samples)
25+
SCQC_MERGE_STATS(SCQC.out.qc_stats.collect())
26+
27+
P11_MERGE_ALL(
28+
Channel.fromPath(params.input),
29+
SCQC.out.adata.flatMap{ meta, adata -> adata }
30+
)
31+
}
32+

0 commit comments

Comments
 (0)