|
| 1 | +part=['host','spikein'] |
| 2 | +blacklist_dict={"host": blacklist_bed,"spikein": spikein_blacklist_bed } |
| 3 | +region_dict={"host": " ".join(host_chr.keys()),"spikein": " ".join(spikein_chr.keys())} |
| 4 | + |
| 5 | + |
| 6 | +def get_scaling_factor(sample,input): |
| 7 | + sample_names=[] |
| 8 | + scale_factors=[] |
| 9 | + if os.path.isfile(os.path.join(outdir,input)): |
| 10 | + with open(os.path.join(outdir,input)) as f: |
| 11 | + for idx, line in enumerate(f): |
| 12 | + if idx > 0: |
| 13 | + sample_names.append(line.split('\t')[0]) |
| 14 | + scale_factors.append((line.split('\t')[1]).rstrip("\n")) |
| 15 | + sf_dict = dict(zip(sample_names, scale_factors)) |
| 16 | + scale_factor = sf_dict[sample] |
| 17 | + |
| 18 | + return float(scale_factor) |
| 19 | + else: |
| 20 | + return float(1) |
| 21 | + |
| 22 | + |
| 23 | +rule split_bamfiles_by_genome: |
| 24 | + input: |
| 25 | + bam = "filtered_bam/{sample}.filtered.bam", |
| 26 | + bai = "filtered_bam/{sample}.filtered.bam.bai" |
| 27 | + output: |
| 28 | + bam = "split_bam/{sample}_{part}.bam", |
| 29 | + bai = "split_bam/{sample}_{part}.bam.bai" |
| 30 | + params: |
| 31 | + region = lambda wildcards: region_dict[wildcards.part] |
| 32 | + conda: CONDA_SAMBAMBA_ENV |
| 33 | + threads: 4 |
| 34 | + shell: """ |
| 35 | + sambamba slice -o {output.bam} {input.bam} {params.region}; |
| 36 | + sambamba index -t {threads} {output.bam} |
| 37 | + """ |
| 38 | + |
| 39 | +rule multiBamSummary_by_part: |
| 40 | + input: |
| 41 | + bams = lambda wildcards: expand("split_bam/{sample}_{part}.bam", sample=samples,part=wildcards.part), |
| 42 | + bais = lambda wildcards: expand("split_bam/{sample}_{part}.bam.bai", sample=samples,part=wildcards.part) |
| 43 | + output: |
| 44 | + npz = "split_deepTools_qc/multiBamSummary/{part}_read_coverage.bins.npz", |
| 45 | + scale_factors = "split_deepTools_qc/multiBamSummary/{part}.scaling_factors.txt" |
| 46 | + params: |
| 47 | + labels = " ".join(samples), |
| 48 | + blacklist = lambda wildcards: "--blackListFileName {}".format(blacklist_dict[wildcards.part]) if blacklist_dict[wildcards.part] else "", |
| 49 | + read_extension = "--extendReads" if pairedEnd |
| 50 | + else "--extendReads {}".format(fragmentLength), |
| 51 | + scaling_factors = "--scalingFactors split_deepTools_qc/multiBamSummary/{part}.scaling_factors.txt", |
| 52 | + binSize = lambda wildcards: " --binSize "+str(spikein_bin_size) if wildcards.part=="spikein" else "", |
| 53 | + spikein_region = lambda wildcards: " --region "+spikein_region if ((wildcards.part=="spikein") and (spikein_region != "")) else "" |
| 54 | + benchmark: |
| 55 | + "split_deepTools_qc/.benchmark/{part}_multiBamSummary.benchmark" |
| 56 | + threads: lambda wildcards: 24 if 24<max_thread else max_thread |
| 57 | + conda: CONDA_SHARED_ENV |
| 58 | + shell: multiBamSummary_cmd |
| 59 | + |
| 60 | + |
| 61 | +rule bamCoverage_by_part: |
| 62 | + input: |
| 63 | + bam = "split_bam/{sample}_host.bam" , |
| 64 | + bai = "split_bam/{sample}_host.bam.bai", |
| 65 | + scale_factors = "split_deepTools_qc/multiBamSummary/{part}.scaling_factors.txt" |
| 66 | + output: |
| 67 | + "bamCoverage/{sample}.host_scaled.BY{part}.bw" |
| 68 | + params: |
| 69 | + bwBinSize = bwBinSize, |
| 70 | + genome_size = int(genome_size), |
| 71 | + ignoreForNorm = "--ignoreForNormalization {}".format(ignoreForNormalization) if ignoreForNormalization else "", |
| 72 | + read_extension = "--extendReads" if pairedEnd |
| 73 | + else "--extendReads {}".format(fragmentLength), |
| 74 | + blacklist = "--blackListFileName {}".format(blacklist_bed) if blacklist_bed |
| 75 | + else "", |
| 76 | + scaling_factors = lambda wildcards,input: "--scaleFactor {}".format(get_scaling_factor(wildcards.sample,input.scale_factors)) ## subset for the one factor needed |
| 77 | + benchmark: |
| 78 | + "bamCoverage/.benchmark/bamCoverage.{sample}.BY{part}.filtered.benchmark" |
| 79 | + threads: lambda wildcards: 16 if 16<max_thread else max_thread # 4GB per core |
| 80 | + conda: CONDA_SHARED_ENV |
| 81 | + shell: bamcov_spikein_cmd |
| 82 | + |
| 83 | + |
| 84 | +rule bamPE_fragment_size_by_part: |
| 85 | + input: |
| 86 | + bams = lambda wildcards: expand("split_bam/{sample}_{part}.bam", sample=samples,part=wildcards.part), |
| 87 | + bais = lambda wildcards: expand("split_bam/{sample}_{part}.bam.bai", sample=samples,part=wildcards.part) |
| 88 | + output: |
| 89 | + "split_deepTools_qc/bamPEFragmentSize/{part}.fragmentSize.metric.tsv" |
| 90 | + params: |
| 91 | + plotcmd = lambda wildcards: "" if plotFormat == 'None' else |
| 92 | + "-o split_deepTools_qc/bamPEFragmentSize/" + wildcards.part + ".fragmentSizes.{}".format(plotFormat) |
| 93 | + threads: lambda wildcards: 24 if 24<max_thread else max_thread |
| 94 | + conda: CONDA_SHARED_ENV |
| 95 | + shell: bamPEFragmentSize_cmd |
0 commit comments