Skip to content

Commit 163d122

Browse files
committed
Experimental plasmid support
1 parent 3240538 commit 163d122

File tree

21 files changed

+200
-61
lines changed

21 files changed

+200
-61
lines changed

lib/miga/cli/objects_helper.rb

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ def load_and_filter_datasets(silent = false)
6060
o &&= (d.ref? == self[:ref]) unless self[:ref].nil?
6161
o &&= (d.active? == self[:active]) unless self[:active].nil?
6262
o &&= (self[:multi] ? d.multi? : d.nonmulti?) unless self[:multi].nil?
63+
unless self[:markers].nil?
64+
o &&= (self[:markers] ? d.markers? : !d.markers?)
65+
end
6366
unless self[:taxonomy].nil?
6467
o &&= !d.metadata[:tax].nil? && d.metadata[:tax].in?(self[:taxonomy])
6568
end

lib/miga/cli/opt_helper.rb

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,11 @@ def opt_object(opt, what = %i[project dataset])
120120
# as determined by +what+ an Array with any combination of:
121121
# - :ref To filter by reference (--ref) or query (--no-ref)
122122
# - :multi To filter by multiple (--multi) or single (--no-multi) species
123+
# - :markers To filter by with (--markers) or without markers (--no-markers)
123124
# - :active To filter by active (--active) or inactive (--no-active)
124125
# - :taxonomy To filter by taxonomy (--taxonomy)
125126
# The "k-th" filter (--dataset-k) is always included
126-
def opt_filter_datasets(opt, what = %i[ref multi active taxonomy])
127+
def opt_filter_datasets(opt, what = %i[ref multi markers active taxonomy])
127128
what.each do |w|
128129
case w
129130
when :ref
@@ -136,6 +137,11 @@ def opt_filter_datasets(opt, what = %i[ref multi active taxonomy])
136137
'--[no-]multi',
137138
'Use only multi-species (or only single-species) datasets'
138139
) { |v| self[:multi] = v }
140+
when :markers
141+
opt.on(
142+
'--[no-]markers',
143+
'Use only datasets with (or without) markers'
144+
) { |v| self[:markers] = v }
139145
when :active
140146
opt.on(
141147
'--[no-]active',

lib/miga/dataset.rb

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
require 'miga/metadata'
77
require 'miga/dataset/result'
88
require 'miga/dataset/status'
9+
require 'miga/dataset/type'
910
require 'miga/dataset/hooks'
1011

1112
# This library is only required by +#closest_relatives+, so it is now
@@ -18,6 +19,7 @@
1819
class MiGA::Dataset < MiGA::MiGA
1920
include MiGA::Dataset::Result
2021
include MiGA::Dataset::Status
22+
include MiGA::Dataset::Type
2123
include MiGA::Dataset::Hooks
2224

2325
# Class-level
@@ -56,6 +58,7 @@ def initialize(project, name, is_ref = true, metadata = {})
5658
name.to_s
5759
@project, @name, @metadata = project, name, nil
5860
metadata[:ref] = is_ref
61+
metadata[:type] ||= :empty
5962
@metadata_future = [
6063
File.join(project.path, 'metadata', "#{name}.json"),
6164
metadata
@@ -89,12 +92,6 @@ def save
8992
# +Project+ interface
9093
alias :save! :save
9194

92-
##
93-
# Get the type of dataset as Symbol
94-
def type
95-
metadata[:type]
96-
end
97-
9895
##
9996
# Delete the dataset with all it's contents (including results) and returns
10097
# nil
@@ -146,22 +143,6 @@ def query?
146143
!metadata[:ref]
147144
end
148145

149-
##
150-
# Is this dataset known to be multi-organism?
151-
def multi?
152-
return false if metadata[:type].nil? || @@KNOWN_TYPES[type].nil?
153-
154-
@@KNOWN_TYPES[type][:multi]
155-
end
156-
157-
##
158-
# Is this dataset known to be single-organism?
159-
def nonmulti?
160-
return false if metadata[:type].nil? || @@KNOWN_TYPES[type].nil?
161-
162-
!@@KNOWN_TYPES[type][:multi]
163-
end
164-
165146
##
166147
# Is this dataset active?
167148
def active?

lib/miga/dataset/base.rb

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ def EXCLUDE_NOREF_TASKS
3232
@@EXCLUDE_NOREF_TASKS
3333
end
3434

35+
##
36+
# Tasks to be excluded from datasets without markers
37+
def EXCLUDE_NOMARKER_TASKS
38+
@@EXCLUDE_NOMARKER_TASKS
39+
end
40+
3541
##
3642
# Tasks to be executed only in datasets that are single-organism. These
3743
# tasks are ignored for multi-organism datasets or for unknown types
@@ -81,45 +87,67 @@ module MiGA::Dataset::Base
8187
# Supported dataset types
8288
@@KNOWN_TYPES = {
8389
genome: {
84-
description: 'The genome from an isolate', multi: false
90+
description: 'The genome from an isolate',
91+
multi: false, markers: true,
92+
project_types: %i[mixed genomes clade]
8593
},
8694
scgenome: {
87-
description: 'A Single-cell Amplified Genome (SAG)', multi: false
95+
description: 'A Single-cell Amplified Genome (SAG)',
96+
multi: false, markers: true,
97+
project_types: %i[mixed genomes clade]
8898
},
8999
popgenome: {
90-
description: 'A Metagenome-Assembled Genome (MAG)', multi: false
100+
description: 'A Metagenome-Assembled Genome (MAG)',
101+
multi: false, markers: true,
102+
project_types: %i[mixed genomes clade]
91103
},
92104
metagenome: {
93-
description: 'A metagenome (excluding viromes)', multi: true
105+
description: 'A metagenome (excluding viromes)',
106+
multi: true, markers: true,
107+
project_types: %i[mixed metagenomes]
94108
},
95109
virome: {
96-
description: 'A viral metagenome', multi: true
110+
description: 'A viral metagenome',
111+
multi: true,
112+
markers: true, # <- We don't expect, but can be useful for contamination
113+
project_types: %i[mixed metagenomes]
114+
},
115+
plasmid: {
116+
description: 'An individual plasmid',
117+
multi: false, markers: false,
118+
project_types: %i[mixed plasmids]
97119
}
98120
}
99121

100122
##
101123
# Returns an Array of tasks (Symbols) to be executed before project-wide tasks
102-
@@PREPROCESSING_TASKS = [
103-
:raw_reads, :trimmed_reads, :read_quality, :trimmed_fasta,
104-
:assembly, :cds, :essential_genes, :mytaxa, :mytaxa_scan,
105-
:taxonomy, :distances, :ssu, :stats
124+
@@PREPROCESSING_TASKS = %i[
125+
raw_reads trimmed_reads read_quality trimmed_fasta
126+
assembly cds essential_genes mytaxa mytaxa_scan
127+
taxonomy distances ssu stats
106128
]
107129

108130
##
109131
# Tasks to be excluded from query datasets
110-
@@EXCLUDE_NOREF_TASKS = [:mytaxa_scan, :taxonomy]
132+
@@EXCLUDE_NOREF_TASKS = %i[mytaxa_scan taxonomy]
111133
@@_EXCLUDE_NOREF_TASKS_H = Hash[@@EXCLUDE_NOREF_TASKS.map { |i| [i, true] }]
112134

135+
##
136+
# Tasks to be excluded from datasets without markers
137+
@@EXCLUDE_NOMARKER_TASKS = %i[essential_genes ssu]
138+
@@_EXCLUDE_NOMARKER_TASKS_H =
139+
Hash[@@EXCLUDE_NOMARKER_TASKS.map { |i| [i, true] }]
140+
113141
##
114142
# Tasks to be executed only in datasets that are single-organism. These
115143
# tasks are ignored for multi-organism datasets or for unknown types
116-
@@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :taxonomy, :distances]
144+
@@ONLY_NONMULTI_TASKS = %i[mytaxa_scan taxonomy distances]
117145
@@_ONLY_NONMULTI_TASKS_H = Hash[@@ONLY_NONMULTI_TASKS.map { |i| [i, true] }]
118146

119147
##
120148
# Tasks to be executed only in datasets that are multi-organism. These
121149
# tasks are ignored for single-organism datasets or for unknwon types
122-
@@ONLY_MULTI_TASKS = [:mytaxa]
150+
@@ONLY_MULTI_TASKS = %i[mytaxa]
123151
@@_ONLY_MULTI_TASKS_H = Hash[@@ONLY_MULTI_TASKS.map { |i| [i, true] }]
124152

125153
##

lib/miga/dataset/hooks.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# Supported hooks:
1616
# - run_lambda(lambda, args...)
1717
# - recalculate_status()
18+
# - check_type()
1819
# - clear_run_counts()
1920
# - run_cmd(cmd)
2021
# Internal hooks:
@@ -27,6 +28,7 @@ module MiGA::Dataset::Hooks
2728
def default_hooks
2829
{
2930
on_create: [[:recalculate_status]],
31+
on_save: [[:check_type]],
3032
on_activate: [[:clear_run_counts], [:recalculate_status]],
3133
on_inactivate: [[:recalculate_status]],
3234
on_result_ready: [[:_pull_result_hooks]],
@@ -51,6 +53,12 @@ def hook_recalculate_status(_hook_args, _event_args)
5153
recalculate_status
5254
end
5355

56+
##
57+
# Ensure that the dataset type exists and is compatible with the project type
58+
def hook_check_type(_hook_args, _event_args)
59+
check_type
60+
end
61+
5462
##
5563
# Run +cmd+ in the command-line with {{variables}}:
5664
# dataset, project, project_name, miga, object (if defined for the event)

lib/miga/dataset/result/ignore.rb

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,14 @@ def ignore_task?(task)
1717
# - project: incompatible project
1818
# - noref: incompatible dataset, only for reference
1919
# - multi: incompatible dataset, only for multi
20+
# - nomarkers: incompatible dataset, only for markers
2021
# - nonmulti: incompatible dataset, only for nonmulti
2122
# - complete: the task is already complete
2223
def ignore_reasons
23-
%i[empty inactive upstream force project noref multi nonmulti complete]
24+
%i[
25+
empty inactive upstream force project
26+
noref multi nonmulti nomarkers complete
27+
]
2428
end
2529

2630
##
@@ -91,9 +95,15 @@ def ignore_nonmulti?(task)
9195
ignore_by_type?(task, :nonmulti)
9296
end
9397

98+
##
99+
# Ignore +task+ because it's not a markers dataset
100+
def ignore_nomarkers?(task)
101+
ignore_by_type?(task, :nomarkers)
102+
end
103+
94104
##
95105
# Ignore +task+ by +type+ of dataset, one of: +:noref+, +:multi+, or
96-
# +:nonmulti+
106+
# +:nonmulti+, +:nomarkers+
97107
def ignore_by_type?(task, type)
98108
return false if force_task?(task)
99109

@@ -105,6 +115,8 @@ def ignore_by_type?(task, type)
105115
[:multi?, self.class.ONLY_MULTI_TASKS]
106116
when :nonmulti
107117
[:nonmulti?, self.class.ONLY_NONMULTI_TASKS]
118+
when :nomarkers
119+
[:markers?, self.class.EXCLUDE_NOMARKER_TASKS]
108120
else
109121
raise "Unexpected error, unknown type reason: #{type}"
110122
end

lib/miga/dataset/type.rb

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
##
2+
# Helper module including specific functions for dataset type
3+
module MiGA::Dataset::Type
4+
##
5+
# Get the type of dataset as Symbol
6+
def type
7+
metadata[:type]
8+
end
9+
10+
##
11+
# Is this dataset known to be multi-organism?
12+
def multi?
13+
self.class.KNOWN_TYPES.dig(type, :multi)
14+
end
15+
16+
##
17+
# Is this dataset known to be single-organism?
18+
def nonmulti?
19+
y = self.class.KNOWN_TYPES.dig(type, :multi)
20+
y.nil? ? nil : !y
21+
end
22+
23+
##
24+
# Are universal marker genes expected to be found in this dataset?
25+
def markers?
26+
self.class.KNOWN_TYPES.dig(type, :markers)
27+
end
28+
29+
##
30+
# Check that the dataset type is defined, known, and compatible with the
31+
# project type and raise an exception if any of these checks fail
32+
#
33+
# If the dataset type is +:empty+, it returns +false+ without raising an
34+
# exception, and true otherwise (and no tests are failed)
35+
def check_type
36+
raise MiGA::Error.new('Undefined dataset type') unless type
37+
return false if type == :empty
38+
39+
unless self.class.KNOWN_TYPES[type]
40+
raise MiGA::Error.new("Unknown dataset type: #{type}")
41+
end
42+
unless self.class.KNOWN_TYPES[type][:project_types].include? project.type
43+
raise MiGA::Error.new(
44+
"Dataset type (#{type}) incompatible with project (#{project.type})"
45+
)
46+
end
47+
48+
true
49+
end
50+
51+
end

lib/miga/project.rb

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def type
9898
##
9999
# Is this a clade project?
100100
def clade?
101-
type == :clade
101+
%i[clade plasmids].include? type
102102
end
103103

104104
##
@@ -115,6 +115,12 @@ def multi?
115115
# Same as multi? For backward compatibility
116116
alias is_multi? multi?
117117

118+
##
119+
# Does the project support the use of universal markers?
120+
def markers?
121+
@@KNOWN_TYPES[type][:markers]
122+
end
123+
118124
##
119125
# Is this project active? Currently a dummy function, returns
120126
# always true.

lib/miga/project/base.rb

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -89,32 +89,36 @@ module MiGA::Project::Base
8989
@@KNOWN_TYPES = {
9090
mixed: {
9191
description: 'Mixed collection of genomes, metagenomes, and viromes',
92-
single: true, multi: true
92+
single: true, multi: true, markers: true
9393
},
9494
genomes: {
9595
description: 'Collection of genomes',
96-
single: true, multi: false
96+
single: true, multi: false, markers: true
9797
},
9898
clade: {
9999
description: 'Collection of closely-related genomes (ANI >= 90%)',
100-
single: true, multi: false
100+
single: true, multi: false, markers: true
101101
},
102102
metagenomes: {
103103
description: 'Collection of metagenomes and/or viromes',
104-
single: false, multi: true
104+
single: false, multi: true, markers: true
105+
},
106+
plasmids: {
107+
description: 'Collection of plasmids',
108+
single: true, multi: false, markers: false
105109
}
106110
}
107111

108112
##
109113
# Project-wide distance estimations
110-
@@DISTANCE_TASKS = [
111-
:project_stats, :haai_distances, :aai_distances, :ani_distances,
112-
:clade_finding
114+
@@DISTANCE_TASKS = %i[
115+
project_stats haai_distances aai_distances ani_distances
116+
clade_finding
113117
]
114118

115119
##
116120
# Project-wide tasks for :clade projects
117-
@@INCLADE_TASKS = [:subclades, :ogs]
121+
@@INCLADE_TASKS = %i[subclades ogs]
118122

119123
##
120124
# Options supported by projects
@@ -131,7 +135,9 @@ module MiGA::Project::Base
131135
},
132136
haai_p: {
133137
desc: 'Value of aai.rb -p on hAAI', type: String,
134-
default: proc { |project| project.clade? ? 'no' : 'fastaai' },
138+
default: proc { |project|
139+
project.clade? || !project.markers? ? 'no' : 'fastaai'
140+
},
135141
in: %w[blast+ blast blat diamond fastaai no]
136142
},
137143
aai_p: {

0 commit comments

Comments
 (0)