Run snakefmt to destroy the formatting

lczech · lczech · commit 135b1937586b · 2025-05-05T19:46:17.000-03:00
diff --git a/workflow/rules/calling-gatk-genotype-db-split.smk b/workflow/rules/calling-gatk-genotype-db-split.smk
@@ -2,6 +2,7 @@
 #     Make Contig Shards
 # =================================================================================================
 
+
 # Special rule for the case that we want to split the contigs for the genomics db import,
 # but are not using contig groups. In that case, we could pretend to have contig groups,
 # and create fake ones that each contain a single chromosome or contig of the reference.
@@ -13,9 +14,9 @@ rule make_contig_bed:
     input:
         fai=get_fai,
     output:
-        bed="calling/contig-shards/{contig}/contig.bed"
+        bed="calling/contig-shards/{contig}/contig.bed",
     params:
-        contig="{contig}"
+        contig="{contig}",
     run:
         # Produce only the matching contig and write 0–length interval
         contig_lengths = get_contig_lengths(input.fai)
@@ -37,20 +38,20 @@ localrules:
 # Why that then is an option at all is beyond my comprehension. GATK, WTF.
 checkpoint preprocess_contig_shard:
     input:
-        dict = genome_dict(),
-        ref  = config["data"]["reference-genome"],
+        dict=genome_dict(),
+        ref=config["data"]["reference-genome"],
         # contigs=contigs_groups_input,
-        contig = (
+        contig=(
             "calling/contig-groups/{contig}.bed"
             if config["settings"].get("contig-group-size", 0) > 0
             else "calling/contig-shards/{contig}/contig.bed"
-        )
+        ),
     output:
-        interval_list = "calling/contig-shards/{contig}/contig.interval_list",
-        shard_list = "calling/contig-shards/{contig}/shards.interval_list",
+        interval_list="calling/contig-shards/{contig}/contig.interval_list",
+        shard_list="calling/contig-shards/{contig}/shards.interval_list",
     params:
-        bin_length = config["params"]["gatk"]["GenomicsDBImport-interval-size"],
-        padding    = config["params"]["gatk"]["GenomicsDBImport-interval-padding"],
+        bin_length=config["params"]["gatk"]["GenomicsDBImport-interval-size"],
+        padding=config["params"]["gatk"]["GenomicsDBImport-interval-padding"],
     log:
         "logs/calling/preprocess-contig-shard/{contig}.log",
     conda:
@@ -85,18 +86,18 @@ localrules:
 rule extract_contig_shard:
     input:
         # shard_list=checkpoints.preprocess_contig_shard.output.shard_list
-        shard_list="calling/contig-shards/{contig}/shards.interval_list"
+        shard_list="calling/contig-shards/{contig}/shards.interval_list",
     output:
-        shard = "calling/contig-shards/{contig}/shard-{shard}.interval_list"
+        shard="calling/contig-shards/{contig}/shard-{shard}.interval_list",
     run:
         header, data = [], []
         for line in open(input.shard_list):
-            if line.startswith('@'):
+            if line.startswith("@"):
                 header.append(line)
             else:
                 data.append(line)
         i = int(wildcards.shard)
-        with open(output.shard, 'w') as out:
+        with open(output.shard, "w") as out:
             out.writelines(header + [data[i]])
 
 
@@ -108,7 +109,7 @@ localrules:
 def get_shard_indices(wc):
     r = checkpoints.preprocess_contig_shard.get(contig=wc.contig)
     # skip header lines
-    shards = [l for l in open(r.output.shard_list) if not l.startswith('@')]
+    shards = [l for l in open(r.output.shard_list) if not l.startswith("@")]
     return [i for i in range(len(shards))]
     # return [{"shard": i} for i in range(len(shards))]
 
@@ -208,8 +209,7 @@ rule genotype_variants:
         + " "
         + config["params"]["gatk"]["GenotypeGVCFs-extra"],
         java_opts=config["params"]["gatk"]["GenotypeGVCFs-java-opts"],
-    threads:
-        get_rule_threads("genotype_variants")
+    threads: get_rule_threads("genotype_variants")
     log:
         "logs/calling/gatk-genotype-gvcfs/{contig}/shard-{shard}.log",
     benchmark:
@@ -241,6 +241,7 @@ rule genotype_variants:
 #         shard=get_shard_indices(wildcards)
 #     )
 
+
 def merge_vcfs_vcfs_input(wc):
     cp = checkpoints.preprocess_contig_shard.get(**wc)
     # cp = checkpoints.preprocess_contig_shard.get(contig=wc.contig)
@@ -250,9 +251,10 @@ def merge_vcfs_vcfs_input(wc):
     return expand(
         "calling/genotyped/{contig}/shard-{shard}.vcf.gz",
         contig=wc.contig,
-        shard=list(range(len(shards)))
+        shard=list(range(len(shards))),
     )
 
+
 def merge_vcfs_done_input(wc):
     cp = checkpoints.preprocess_contig_shard.get(**wc)
     # cp = checkpoints.preprocess_contig_shard.get(contig=wc.contig)
@@ -261,13 +263,14 @@ def merge_vcfs_done_input(wc):
     return expand(
         "calling/genotyped/{contig}/shard-{shard}.vcf.gz.done",
         contig=wc.contig,
-        shard=list(range(len(shards)))
+        shard=list(range(len(shards))),
     )
 
+
 rule merge_shard_vcfs:
     input:
-        dict = genome_dict(),
-        ref  = config["data"]["reference-genome"],
+        dict=genome_dict(),
+        ref=config["data"]["reference-genome"],
         contigs=contigs_groups_input,
         # vcfs = expand("calling/genotyped/{contig}/shard-{shard}.vcf.gz", get_shard_indices),
         # done = expand("calling/genotyped/{contig}/shard-{shard}.vcf.gz.done", get_shard_indices),
@@ -288,8 +291,7 @@ rule merge_shard_vcfs:
             if platform.system() == "Darwin"
             else ""
         ),
-    threads:
-        get_rule_threads("merge_shard_vcfs")
+    threads: get_rule_threads("merge_shard_vcfs")
     log:
         "logs/calling/picard-merge-vcfs/{contig}.log",
     benchmark:
diff --git a/workflow/rules/calling-gatk.smk b/workflow/rules/calling-gatk.smk
@@ -142,6 +142,7 @@ else:
 #     Merging Variants
 # =================================================================================================
 
+
 # Need an input function to work with the fai checkpoint
 def merge_variants_vcfs_input(wildcards):
     fai = checkpoints.samtools_faidx.get().output[0]
diff --git a/workflow/rules/calling.smk b/workflow/rules/calling.smk
@@ -2,6 +2,7 @@
 #     Grouping of (Small) Contigs
 # =================================================================================================
 
+
 # Get the list of chromosome names that are present in the fai file,
 # and their length, with a length filter if needed.
 def read_contigs_from_fai(fai, min_contig_size=0):
@@ -23,7 +24,6 @@ def read_contigs_from_fai(fai, min_contig_size=0):
 # If we want to combine contigs into groups, use the rules and functions for this.
 if config["settings"].get("contig-group-size", 0) > 0:
 
-
     include: "calling-contig-groups.smk"
 
 
diff --git a/workflow/rules/initialize.smk b/workflow/rules/initialize.smk
@@ -48,6 +48,7 @@ extra_logfile = os.path.abspath(
 )
 logger_manager.logger.addHandler(logging.FileHandler(extra_logfile))
 
+
 # For now, we define our own wrapper around the wrapper of the snakemake logging...
 # That allows us to use this as a single point of modification should they finally
 # manage to get the logging to work properly.
@@ -62,12 +63,14 @@ def fix_log_info(message):
     sys.__stdout__.write(message + "\n")
     sys.__stdout__.flush()
 
+
 def fix_log_warn(message):
     logger.warning(message)
     # print(message, file=sys.stdout)
     sys.__stdout__.write(message + "\n")
     sys.__stdout__.flush()
 
+
 # =================================================================================================
 #     Basic Configuration
 # =================================================================================================
@@ -248,33 +251,33 @@ if resources_file:
 cfgfiles = "\n                        ".join(cfgfiles)
 
 # Main grenepipe header, helping with debugging etc for user issues
-fix_log_info( "=====================================================================================")
-fix_log_info( r"       _____         _______ __   __   _______ ______  ___   ______   _______ ")
-fix_log_info( r"      /  ___\ ____  /  ____//  \ /  / /  ____/|   _  \ \  \ |   _  \ /  ____/ ")
-fix_log_info( r"     /  /____|  _ \|  |___  |   \|  ||  |___  |  |_]  ||  | |  |_]  |  |___   ")
-fix_log_info( r"    |  /|__  | |_) |   ___| |       ||   ___| |   ___/ |  | |   ___/|   ___|  ")
-fix_log_info( r"    \  \__|  |  _ <|  |____ |  |\   ||  |____ |  |     |  | |  |    |  |____  ")
-fix_log_info( r"     \______/|_| \_\_______\/__| \__|\_______\|__|     \___\|__|    \_______\ ")
-fix_log_info( "")
-fix_log_info( "    Date:               " + datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
-fix_log_info( "    Platform:           " + pltfrm)
-fix_log_info( "    Host:               " + hostname)
-fix_log_info( "    User:               " + username)
-fix_log_info( "    Conda:              " + str(conda_ver))
-fix_log_info( "    Mamba:              " + str(mamba_ver))
-fix_log_info( "    Python:             " + str(sys.version.split(" ")[0]))
-fix_log_info( "    Snakemake:          " + str(snakemake.__version__))
-fix_log_info( "    Grenepipe:          " + str(grenepipe_version))
-fix_log_info( "    Conda env:          " + str(conda_env))
-fix_log_info( "    Command:            " + cmdline)
-fix_log_info( "")
-fix_log_info( "    Base directory:     " + workflow.basedir)
-fix_log_info( "    Working directory:  " + os.getcwd())
-fix_log_info( "    Config file(s):     " + cfgfiles)
-fix_log_info( "    Samples:            " + get_sample_units_print())
-fix_log_info( "")
-fix_log_info( "=====================================================================================")
-fix_log_info( "")
+fix_log_info("=====================================================================================")
+fix_log_info(r"       _____         _______ __   __   _______ ______  ___   ______   _______ ")
+fix_log_info(r"      /  ___\ ____  /  ____//  \ /  / /  ____/|   _  \ \  \ |   _  \ /  ____/ ")
+fix_log_info(r"     /  /____|  _ \|  |___  |   \|  ||  |___  |  |_]  ||  | |  |_]  |  |___   ")
+fix_log_info(r"    |  /|__  | |_) |   ___| |       ||   ___| |   ___/ |  | |   ___/|   ___|  ")
+fix_log_info(r"    \  \__|  |  _ <|  |____ |  |\   ||  |____ |  |     |  | |  |    |  |____  ")
+fix_log_info(r"     \______/|_| \_\_______\/__| \__|\_______\|__|     \___\|__|    \_______\ ")
+fix_log_info("")
+fix_log_info("    Date:               " + datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
+fix_log_info("    Platform:           " + pltfrm)
+fix_log_info("    Host:               " + hostname)
+fix_log_info("    User:               " + username)
+fix_log_info("    Conda:              " + str(conda_ver))
+fix_log_info("    Mamba:              " + str(mamba_ver))
+fix_log_info("    Python:             " + str(sys.version.split(" ")[0]))
+fix_log_info("    Snakemake:          " + str(snakemake.__version__))
+fix_log_info("    Grenepipe:          " + str(grenepipe_version))
+fix_log_info("    Conda env:          " + str(conda_env))
+fix_log_info("    Command:            " + cmdline)
+fix_log_info("")
+fix_log_info("    Base directory:     " + workflow.basedir)
+fix_log_info("    Working directory:  " + os.getcwd())
+fix_log_info("    Config file(s):     " + cfgfiles)
+fix_log_info("    Samples:            " + get_sample_units_print())
+fix_log_info("")
+fix_log_info("=====================================================================================")
+fix_log_info("")
 
 
 # No need to have these output vars available in the rest of the snakefiles
diff --git a/workflow/rules/prepare-reference.smk b/workflow/rules/prepare-reference.smk
@@ -331,7 +331,7 @@ def check_fai_contig_names(fai_file):
                     "Problematic reference genome names:"
                 )
                 printed_warning_header = True
-            fix_log_warn( " - " + contig)
+            fix_log_warn(" - " + contig)
     checked_fai_contig_names = True
 
 

Original file line number	Diff line number	Diff line change
`@@ -331,7 +331,7 @@ def check_fai_contig_names(fai_file):`
`331`	`331`	`"Problematic reference genome names:"`
`332`	`332`	`)`
`333`	`333`	`printed_warning_header = True`
`334`		`- fix_log_warn( " - " + contig)`
	`334`	`+ fix_log_warn(" - " + contig)`
`335`	`335`	`checked_fai_contig_names = True`
`336`	`336`
`337`	`337`