Merge pull request #67 from artic-network/symposiumRelease

Will Rowe · web-flow · commit d5a0cb89ec46 · 2021-01-12T14:40:00.000Z
Final changes for 1.2.0
diff --git a/artic/deprecated/plot_amplicon_depth.py b/artic/deprecated/plot_amplicon_depth.py
diff --git a/artic/minion.py b/artic/minion.py
@@ -90,6 +90,11 @@ def get_scheme(scheme_name, scheme_directory, scheme_version="1"):
 
 def run(parser, args):
 
+    # check for medaka-model
+    if args.medaka and (args.medaka_model is None):
+        print(colored.red('Must specify --medaka-model if using the --medaka workflow.'))
+        raise SystemExit(1)
+
     # 1) check the parameters and set up the filenames
     ## find the primer scheme, reference sequence and confirm scheme version
     bed, ref, _ = get_scheme(args.scheme, args.scheme_directory, args.scheme_version)
@@ -204,7 +209,6 @@ def run(parser, args):
 
     # 9) get the depth of coverage for each readgroup, create a coverage mask and plots, and add failed variants to the coverage mask (artic_mask must be run before bcftools consensus)
     cmds.append("artic_make_depth_mask --store-rg-depths %s %s.primertrimmed.rg.sorted.bam %s.coverage_mask.txt" % (ref, args.sample, args.sample))
-    cmds.append("artic_plot_amplicon_depth --primerScheme %s --sampleID %s --outFilePrefix %s %s*.depths" % (bed, args.sample, args.sample, args.sample))
     cmds.append("artic_mask %s %s.coverage_mask.txt %s.fail.vcf %s.preconsensus.fasta" % (ref, args.sample, args.sample, args.sample))
 
     # 10) generate the consensus sequence
diff --git a/artic/minion_validator.py b/artic/minion_validator.py
@@ -191,6 +191,8 @@ def genCommand(sampleID, workflow):
     ]
     if workflow=="medaka":
         cmd.append("--medaka")
+        cmd.append("--medaka-model")
+        cmd.append("r941_min_high_g351")
     if sampleID in extraFlags[workflow]:
         for flag in extraFlags[workflow][sampleID]:
             cmd.append(flag)
diff --git a/artic/pipeline.py b/artic/pipeline.py
@@ -97,8 +97,7 @@ def init_pipeline_parser():
         'sample', metavar='sample', help='The name of the sample')
     parser_minion.add_argument('--medaka', dest='medaka', action='store_true',
                                help='Use medaka instead of nanopolish for variants')
-    parser_minion.add_argument('--medaka-model', metavar='medaka_model',
-                                default='r941_min_high_g351', help='The model to use for medaka (default: %(default)s)')
+    parser_minion.add_argument('--medaka-model', metavar='medaka_model', help='The model to use for medaka (required if using --medaka)')
     parser_minion.add_argument('--no-longshot', dest='no_longshot', action='store_true', help='Do not use Longshot for variant filtering after medaka')
     parser_minion.add_argument('--minimap2', dest='minimap2', default=True,
                                action='store_true', help='Use minimap2 (default)')
diff --git a/artic/version.py b/artic/version.py
@@ -1 +1 @@
-__version__ = "1.1.3"
+__version__ = "1.2.0"
diff --git a/docs/commands.md b/docs/commands.md
@@ -234,6 +234,7 @@ artic minion <scheme> <sample>
 | scheme               | Y        | NA             | The name of the primer scheme                                                                |
 | sample               | Y        | NA             | The name of the sample                                                                       |
 | --medaka             | N        | False          | Use medaka instead of nanopolish for variants                                                |
+| --medaka-model       | -        | NA             | Medaka model to use (required if --medaka set)                                               |
 | --minimap2           | N        | True           | Use minimap2                                                                                 |
 | --bwa                | N        | False          | Use bwa instead of minimap2                                                                  |
 | --normalise          | N        | 100            | Normalise down to moderate coverage to save runtime                                          |
diff --git a/docs/minion.md b/docs/minion.md
@@ -16,6 +16,8 @@ This page describes the core pipeline which is run via the `artic minion` comman
 There are **2 workflows** baked into the core pipeline, one which uses signal data (via [nanopolish](https://github.com/jts/nanopolish)) and one that does not (via [medaka](https://github.com/nanoporetech/medaka)). As the workflows are identical in many ways, this page will describe the pipeline as whole and notify the reader when there is dfferent behaviour between the two workflows.
 It should be noted here that by default the `nanopolish` workflow is selected; you need to specify `--medaka` (and `--medaka-model`) if you want the medaka workflow enabled.
 
+> **NOTE**: It is very important that you select the appropriate value for `--medaka-model`.
+
 At the end of each stage, we list here the "useful" stage output files which are kept. There will also be some additional files leftover at the end of the pipeline but these can be ignored (and are hopefully quite intuitively named).
 
 ## Stages
@@ -100,7 +102,7 @@ Finally, we use the `artic_vcf_filter` module to filter the merged variant file
 
 ### Consensus building
 
-Prior to building a consensus, we use the post-processed alignment from the previous step to check each position of the reference sequence for sample coverage. Any poition that is not covered by at least 20 reads from either read group are marked as low coverage. We use the `artic_make_depth_mask` module for this, which produces coverage information for each read group and also produces a coverage mask to tell us which coordinates in the reference sequence failed the coverage threshold. We use `artic_plot_amplicon_depth` to take the read group depth data and plot amplicon coverage.
+Prior to building a consensus, we use the post-processed alignment from the previous step to check each position of the reference sequence for sample coverage. Any poition that is not covered by at least 20 reads from either read group are marked as low coverage. We use the `artic_make_depth_mask` module for this, which produces coverage information for each read group and also produces a coverage mask to tell us which coordinates in the reference sequence failed the coverage threshold.
 
 Next, to build a consensus sequence for a sample, we require a pre-consensus sequence based on the input reference sequence. The preconsensus has low quality sites masked out with `N`'s using the coverage mask and the `$SAMPLE.fail.vcf` file. We then use `bcftools consensus` to combine the preconsensus with the `$SAMPLE.pass.vcf` variants to produce a consensus sequence for the sample. The consensus sequence has the artic workflow written to its header.
 
@@ -122,6 +124,13 @@ Finally, the consensus sequence is aligned against the reference sequence using
 | artic_vcf_merge           | combines VCF files from multiple read groups                                                         |
 | artic_vcf_filter          | filters a combined VCF into PASS and FAIL variant files                                              |
 | artic_make_depth_mask     | create a coverage mask from the post-processed alignment                                             |
-| artic_plot_amplicon_depth | plots per amplicon coverage                                                                          |
 | artic_mask                | combines the reference sequence, FAIL variants and coverage mask to produce a pre-consensus sequence |
 | artic_fasta_header        | applies the artic workflow and identifier to the consensus sequence header                           |
+
+## Optional pipeline report
+
+As of version 1.2.0, you can run the artic fork of MultiQC (which should be installed as part of the artic conda environment) and this will produce a report containing amplicon coverage plots and variant call information. To generate a report from within your pipeline output directory:
+
+```
+multiqc .
+```
diff --git a/environment.yml b/environment.yml
@@ -25,3 +25,5 @@ dependencies:
   - pyvcf=0.6.8
   - samtools=1.10
   - tqdm
+  - pip:
+      - git+https://github.com/will-rowe/MultiQC.git@artic
diff --git a/requirements.txt b/requirements.txt
@@ -1,10 +1,8 @@
 biopython
 clint
-matplotlib
 pandas
 pysam
 pytest
 pyvcf
 requests
-seaborn
 tqdm
diff --git a/setup.py b/setup.py
@@ -38,7 +38,6 @@
             'artic_vcf_merge=artic.vcf_merge:main',
             'artic_vcf_filter=artic.vcf_filter:main',
             'artic_make_depth_mask=artic.make_depth_mask:main',
-            'artic_plot_amplicon_depth=artic.plot_amplicon_depth:main',
             'artic_fasta_header=artic.fasta_header:main',
             'artic_mask=artic.mask:main',
         ],
diff --git a/test-runner.sh b/test-runner.sh
@@ -72,6 +72,7 @@ minionCmd_m="artic minion \
             --scheme-directory ${primerSchemes} \
             --read-file ${prefix}_guppyplex_fastq_pass-NB${barcode}.fastq \
             --medaka \
+            --medaka-model r941_min_high_g351 \
             ${primerScheme} \
             ${prefix}"
 

Original file line number	Diff line number	Diff line change
`@@ -191,6 +191,8 @@ def genCommand(sampleID, workflow):`
`191`	`191`	`]`
`192`	`192`	`if workflow=="medaka":`
`193`	`193`	`cmd.append("--medaka")`
	`194`	`+ cmd.append("--medaka-model")`
	`195`	`+ cmd.append("r941_min_high_g351")`
`194`	`196`	`if sampleID in extraFlags[workflow]:`
`195`	`197`	`for flag in extraFlags[workflow][sampleID]:`
`196`	`198`	`cmd.append(flag)`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "1.1.3"`
	`1`	`+__version__ = "1.2.0"`