Skip to content

Commit

Permalink
generalizing filter to two classes for both lefse and ancombc
Browse files Browse the repository at this point in the history
  • Loading branch information
adamcantor22 committed Aug 14, 2024
1 parent 82b3919 commit 8d80f94
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 30 deletions.
62 changes: 39 additions & 23 deletions mmeds/snakemake/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -5,43 +5,59 @@ from mmeds.config import TOOLS_DIR

metadata = pd.read_csv("tables/qiime_mapping_file.tsv", sep='\t', header=[0], skiprows=[1])

def lefse_splits(wildcards):
def pairwise_splits(wildcards, tool, vars):
if "tables" in config:
tables = config["tables"]
else:
tables = [f"taxa_table_L{x}" for x in config["taxa_levels"]]

subclasses = None
if tool == "lefse" and "subclasses" in config and config["subclasses"]:
subclasses = deepcopy(config["subclasses"])

splits = []
for lefse_class in config["classes"]:
categories = list(metadata[lefse_class].unique())
for var in vars:
categories = list(metadata[var].unique())
categories = [c for c in categories if str(c) != "nan"]
value_counts = metadata[lefse_class].value_counts()

subclasses = []
if "subclasses" in config and config["subclasses"]:
subclasses = deepcopy(config["subclasses"])
value_counts = metadata[var].value_counts()

if len(categories) < 2:
continue

if len(categories) < 3:
if not sufficient_values(value_counts, categories[0], categories[1]):
continue
splits += expand("results/{lefse_class}/lefse_plot.{feature_table}.{lefse_class}.NA.pdf",
feature_table=config["tables"], lefse_class=lefse_class)
if subclasses:
splits += expand("results/{lefse_class}/lefse_plot.{feature_table}.{lefse_class}.{subclass}.pdf",
feature_table=config["tables"], lefse_class=lefse_class, subclass=subclasses)
if tool == "lefse":
splits += expand("results/{var}/lefse_plot.{feature_table}.{var}.NA.pdf",
feature_table=tables, var=var)
if subclasses:
splits += expand("results/{var}/lefse_plot.{feature_table}.{var}.{subclass}.pdf",
feature_table=tables, var=var, subclass=subclasses)
elif tool == "ancombc":
splits += expand("differential_abundance/{var}/ancom-bc_barplot.{feature_table}.{var}.qzv",
feature_table=tables, var=var)
continue


splits += expand("results/{lefse_class}/lefse_plot_strict.{feature_table}.{lefse_class}.{subclass}.pdf",
feature_table=config["tables"], lefse_class=lefse_class, subclass=subclasses)

for i in range(len(categories)-1):
for j in range(i+1, len(categories)):
if not sufficient_values(value_counts, categories[i], categories[j]):
continue
splits += expand("results/{lefse_class}/lefse_plot.{feature_table}_{lefse_class}_{cat1}_or_{cat2}.{lefse_class}.NA.pdf",
feature_table=config["tables"], lefse_class=lefse_class, cat1=categories[i], cat2=categories[j])
if not sufficient_values(value_counts, categories[i], categories[j]):
continue
if tool == "lefse":
splits += expand("results/{var}/lefse_plot.{feature_table}.{var}_{cat1}_or_{cat2}.{var}.NA.pdf",
feature_table=tables, var=var, cat1=categories[i], cat2=categories[j])
if subclasses:
splits += expand("results/{lefse_class}/lefse_plot.{feature_table}_{lefse_class}_{cat1}_or_{cat2}.{lefse_class}.{subclass}.pdf",
feature_table=config["tables"], lefse_class=lefse_class, cat1=categories[i], cat2=categories[j], subclass=subclasses)
splits += expand("results/{var}/lefse_plot.{feature_table}.{var}_{cat1}_or_{cat2}.{var}.{subclass}.pdf",
feature_table=tables, var=var, cat1=categories[i], cat2=categories[j], subclass=subclasses)
elif tool == "ancombc":
splits += expand("differential_abundance/{var}/ancom-bc_barplot.{feature_table}.{var}_{cat1}_or_{cat2}.{var}.qzv",
feature_table=tables, var=var, cat1=categories[i], cat2=categories[j])
return splits

def ancombc_splits(wildcards):
return pairwise_splits(wildcards, "ancombc", config["metadata"])

def lefse_splits(wildcards):
splits = pairwise_splits(wildcards, "lefse", config["classes"])
formatted_splits = []
for s in splits:
separated = s.split(".")
Expand Down
4 changes: 2 additions & 2 deletions mmeds/snakemake/rules/differential_abundance.smk
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ rule differential_abundance_ancom_bc:
feature_table = "tables/{table}.qza",
mapping_file = "tables/qiime_mapping_file.tsv"
output:
diffs = "differential_abundance/{var}/ancom-bc_{table}_{var}_diffs.qza",
barplot = "differential_abundance/{var}/ancom-bc_{table}_{var}_barplot.qzv"
diffs = "differential_abundance/{var}/ancom-bc_diffs.{table}.{var}.qza",
barplot = "differential_abundance/{var}/ancom-bc_barplot.{table}.{var}.qzv"
conda:
"qiime2-2023.9"
shell:
Expand Down
2 changes: 1 addition & 1 deletion mmeds/snakemake/rules/table_filtering.smk
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ rule filter_table_to_two_classes:
feature_table = "tables/{table}.qza",
mapping_file = "tables/qiime_mapping_file.tsv"
output:
"tables/{table}_{category}_{class1}_or_{class2}.qza"
"tables/{table}.{category}_{class1}_or_{class2}.qza"
conda:
"qiime2-2020.8.0"
shell:
Expand Down
2 changes: 1 addition & 1 deletion mmeds/snakemake/rules/taxonomy.smk
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ rule taxonomy_collapse:
feature_table = "tables/asv_table.qza",
taxonomy = "tables/taxonomy.qza"
output:
"tables/taxa_table_L{level}.qza"
"tables/taxa_table_L{level,[1-8]}.qza"
conda:
"qiime2-2020.8.0"
shell:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ rule results:
expand("diversity/PERMANOVA/{{var}}/{{metric}}_{{var}}_PERMANOVA.qzv", metric=config['beta_metrics'], var=config['metadata']),
"diversity/alpha_rarefaction.qzv",
"tables/taxa_barplot.qzv",
expand("differential_abundance/{{var}}/ancom-bc_{{table}}_{{var}}_barplot.qzv", table=expand("taxa_table_L{{level}}", level=config['taxa_levels']), var=config['metadata'])
ancombc_splits
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,4 @@ rule results:
"diversity/alpha_rarefaction.qzv",
"tables/taxa_barplot.qzv",
expand("tables/taxa_table_L{level}.qza", level=config['taxa_levels']),
expand("differential_abundance/{var}/ancom-bc_{table}_{var}_barplot.qzv", table=expand("taxa_table_L{level}", level=config['taxa_levels']), var=config['metadata'])

ancombc_splits
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ sequencing_runs:
metadata:
- Sex
- Group
- IgAsort
alpha_metrics:
- faith_pd
- observed_features
Expand Down

0 comments on commit 8d80f94

Please sign in to comment.