From eae7c09c16192186a674d1e2daa8cd58860b4deb Mon Sep 17 00:00:00 2001 From: Shettland Date: Fri, 26 Jan 2024 11:07:36 +0100 Subject: [PATCH 1/3] Small changes in variables names for clarification --- .../viralrecon/ANALYSIS/create_summary_report.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bu_isciii/templates/viralrecon/ANALYSIS/create_summary_report.sh b/bu_isciii/templates/viralrecon/ANALYSIS/create_summary_report.sh index 4ed9b1929..f57349842 100644 --- a/bu_isciii/templates/viralrecon/ANALYSIS/create_summary_report.sh +++ b/bu_isciii/templates/viralrecon/ANALYSIS/create_summary_report.sh @@ -22,18 +22,18 @@ do reads_hostR1=$(cat ${arr[1]}*/kraken2/${arr[0]}.kraken2.report.txt | grep -v 'unclassified' | cut -f3 | awk '{s+=$1}END{print s}') reads_host_x2=$(echo $((reads_hostR1 * 2)) ) - perc_mapped=$(echo $(awk -v v1=$total_reads -v v2=$reads_host_x2 'BEGIN {print (v2*100)/v1}') ) + perc_host=$(echo $(awk -v v1=$total_reads -v v2=$reads_host_x2 'BEGIN {print (v2*100)/v1}') ) reads_virus=$(cat ${arr[1]}*/variants/bowtie2/samtools_stats/${arr[0]}.sorted.bam.flagstat | grep '+ 0 mapped' | cut -d ' ' -f1) unmapped_reads=$(echo $((total_reads - (reads_host_x2+reads_virus))) ) perc_unmapped=$(echo $(awk -v v1=$total_reads -v v2=$unmapped_reads 'BEGIN {print (v2/v1)*100}') ) - n_count=$(cat %Ns.tab | grep -w ${arr[0]} | grep ${arr[1]} | cut -f2) + ns_10x_perc=$(cat %Ns.tab | grep -w ${arr[0]} | grep ${arr[1]} | cut -f2) missense=$(LC_ALL=C awk -F, '{if($10 >= 0.75)print $0}' ${arr[1]}*/variants/ivar/variants_long_table.csv | grep ^${arr[0]}, | grep 'missense' | wc -l) - Ns_10x_perc=$(zcat ${arr[1]}*/variants/ivar/consensus/bcftools/${arr[0]}.filtered.vcf.gz | grep -v '^#' | wc -l) + vars_in_cons10x=$(zcat ${arr[1]}*/variants/ivar/consensus/bcftools/${arr[0]}.filtered.vcf.gz | grep -v '^#' | wc -l) lineage=$(cat ${arr[1]}*/variants/ivar/consensus/bcftools/pangolin/${arr[0]}.pangolin.csv | tail -n1 | cut -d ',' -f2) @@ -47,5 +47,5 @@ do analysis_date=$(date '+%Y%m%d') # Introduce data row into output file - echo -e "${RUN}\t${USER}\t${HOST}\t${arr[1]}\t${arr[0]}\t$total_reads\t$reads_hostR1\t$reads_host_x2\t$perc_mapped\t$reads_virus\t$reads_virus_perc\t$unmapped_reads\t$perc_unmapped\t$medianDPcov\t$cov10x\t$Ns_10x_perc\t$missense\t$n_count\t$lineage\t$read_length\t$analysis_date" >> mapping_illumina_$(date '+%Y%m%d').tab + echo -e "${RUN}\t${USER}\t${HOST}\t${arr[1]}\t${arr[0]}\t$total_reads\t$reads_hostR1\t$reads_host_x2\t$perc_host\t$reads_virus\t$reads_virus_perc\t$unmapped_reads\t$perc_unmapped\t$medianDPcov\t$cov10x\t$vars_in_cons10x\t$missense\t$ns_10x_perc\t$lineage\t$read_length\t$analysis_date" >> mapping_illumina_$(date '+%Y%m%d').tab done From 2f133269726dd3eb2b2e4ececfd235f0203cfcc3 Mon Sep 17 00:00:00 2001 From: Shettland Date: Fri, 26 Jan 2024 11:08:00 +0100 Subject: [PATCH 2/3] Fixed small typo in regex from last PR --- bu_isciii/templates/viralrecon/RESULTS/excel_generator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bu_isciii/templates/viralrecon/RESULTS/excel_generator.py b/bu_isciii/templates/viralrecon/RESULTS/excel_generator.py index b554a324a..c9243324e 100755 --- a/bu_isciii/templates/viralrecon/RESULTS/excel_generator.py +++ b/bu_isciii/templates/viralrecon/RESULTS/excel_generator.py @@ -104,7 +104,7 @@ def excel_generator(csv_files: List[str]): table["analysis_date"] = pd.to_datetime( table["analysis_date"].astype(str), format="%Y%m%d" ) - elif "assembly" in str(file) or "tsv" in str(file) or "tab" in str(file): + elif "assembly" in str(file) or ".tsv" in str(file) or ".tab" in str(file): table = pd.read_csv(file, sep="\t", header=0) else: try: @@ -163,6 +163,7 @@ def main(args): ) except FileNotFoundError as e: print("Not variants_long_table found for ", str(e)) + # Create excel files for individual tables valid_extensions = [".csv", ".tsv", ".tab"] rest_of_csvs = [ From fb17677fdfacab9143fb89200c317b142e07cea0 Mon Sep 17 00:00:00 2001 From: Shettland Date: Fri, 26 Jan 2024 11:16:30 +0100 Subject: [PATCH 3/3] Updated linting --- bu_isciii/autoclean_sftp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bu_isciii/autoclean_sftp.py b/bu_isciii/autoclean_sftp.py index fd77380c8..85c735c9a 100644 --- a/bu_isciii/autoclean_sftp.py +++ b/bu_isciii/autoclean_sftp.py @@ -121,9 +121,9 @@ def get_sftp_services(self): # Get sftp-service last modification service_finder = LastMofdificationFinder(sftp_service_fullPath) service_last_modification = service_finder.find_last_modification() - self.sftp_services[ - sftp_service_fullPath - ] = service_last_modification + self.sftp_services[sftp_service_fullPath] = ( + service_last_modification + ) if len(self.sftp_services) == 0: sys.exit(f"No services found in {self.path}")