Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions lib/galaxy/managers/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,7 @@ def create_dataset_collection(
# else if elements is set, it better be an ordered dict!

if elements is not self.ELEMENTS_UNINITIALIZED:
self._validate_nested_collection_elements(collection_type_description, elements)
type_plugin = collection_type_description.rank_type_plugin()
dataset_collection = builder.build_collection(
type_plugin, elements, fields=fields, column_definitions=column_definitions, rows=rows
Expand Down Expand Up @@ -577,6 +578,32 @@ def __persist(
session.commit()
return dataset_collection_instance

def _validate_nested_collection_elements(self, collection_type_description, elements) -> None:
"""For nested collection types (e.g. ``list:paired_or_unpaired``), verify that
every element value is a :class:`DatasetCollection` whose ``collection_type``
matches the expected sub-collection type. Otherwise creating a
``list:paired_or_unpaired`` from raw HDAs would silently produce a
structurally invalid collection that downstream tools cannot handle.
"""
if not collection_type_description.has_subcollections():
return
if not isinstance(elements, dict):
return
expected_sub = collection_type_description.subcollection_type_description().collection_type
for identifier, value in elements.items():
if isinstance(value, DatasetCollection) and value.collection_type == expected_sub:
continue
if isinstance(value, DatasetCollection):
actual = f"a collection of type '{value.collection_type}'"
elif getattr(value, "history_content_type", None) == "dataset":
actual = "a dataset"
else:
actual = type(value).__name__
raise RequestParameterInvalidException(
f"Element '{identifier}' of collection type '{collection_type_description.collection_type}' "
f"must be a sub-collection of type '{expected_sub}', got {actual}."
)

def __recursively_create_collections_for_identifiers(
self, trans, element_identifiers, hide_source_items: bool, copy_elements: bool, history=None
):
Expand Down
20 changes: 17 additions & 3 deletions lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
ToolRequest,
)
from galaxy.model.dataset_collections.matching import MatchingCollections
from galaxy.model.dataset_collections.types.paired_or_unpaired import SINGLETON_IDENTIFIER
from galaxy.model.dataset_collections.types.sample_sheet_workbook import _sample_sheet_to_list_collection_type
from galaxy.objectstore import ObjectStorePopulator
from galaxy.schema.credentials import CredentialsContext
Expand Down Expand Up @@ -4036,14 +4037,26 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history

def _handle_unpaired(dce):
element_identifier = dce.element_identifier
assert getattr(dce.element_object, "history_content_type", None) == "dataset"
copied_value = dce.element_object.copy(copy_tags=dce.element_object.tags, flush=False)
element_object = dce.element_object
# In list:paired_or_unpaired collections, unpaired elements are
# wrapped in a 1-element sub-collection. Unwrap to get the dataset.
if getattr(element_object, "elements", None):
inner_element = element_object.elements[0]
assert inner_element.element_identifier == SINGLETON_IDENTIFIER
element_object = inner_element.element_object
assert element_object.history_content_type == "dataset"
copied_value = element_object.copy(copy_tags=element_object.tags, flush=False)
unpaired_dce_copies[element_identifier] = copied_value
unpaired_dce_columns[element_identifier] = dce.columns

def _handle_paired(dce):
element_identifier = dce.element_identifier
copied_value = dce.element_object.copy(flush=False)
# Normalize to 'paired' for list:paired output, since a
# list:paired_or_unpaired input may contain 2-element
# paired_or_unpaired sub-collections that are structurally
# equivalent to paired but carry the wider collection_type.
copied_value.collection_type = "paired"
paired_dce_copies[element_identifier] = copied_value
paired_datasets.append(copied_value.elements[0].element_object)
paired_datasets.append(copied_value.elements[1].element_object)
Expand All @@ -4057,7 +4070,8 @@ def _handle_paired(dce):
_handle_paired(element)
elif collection_type == "list:paired_or_unpaired":
for element in collection.elements:
if getattr(element.element_object, "history_content_type", None) == "dataset":
sub_collection = element.element_object
if len(sub_collection.elements) == 1:
_handle_unpaired(element)
else:
_handle_paired(element)
Expand Down
12 changes: 9 additions & 3 deletions lib/galaxy/tools/split_paired_and_unpaired.xml
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,20 @@
<param name="input">
<collection type="list:paired_or_unpaired">
<element name="el1">
<collection type="paired">
<collection type="paired_or_unpaired">
<element name="forward" value="simple_line.txt" />
<element name="reverse" value="simple_line_alternative.txt" />
</collection>
</element>
<element name="el2" value="simple_line.txt">
<element name="el2">
<collection type="paired_or_unpaired">
<element name="unpaired" value="simple_line.txt" />
</collection>
</element>
<element name="el3" value="simple_line_alternative.txt">
<element name="el3">
<collection type="paired_or_unpaired">
<element name="unpaired" value="simple_line_alternative.txt" />
</collection>
</element>
</collection>
</param>
Expand Down
25 changes: 25 additions & 0 deletions lib/galaxy_test/api/test_dataset_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from galaxy.tool_util_models.sample_sheet import SampleSheetColumnDefinitions
from galaxy.util import galaxy_root_path
from galaxy_test.base.api_asserts import (
assert_error_message_contains,
assert_has_key,
assert_object_id_error,
assert_status_code_is,
Expand Down Expand Up @@ -152,6 +153,30 @@ def test_create_paried_or_unpaired(self, history_id):
returned_collections = dataset_collection["elements"]
assert len(returned_collections) == 1, dataset_collection

def test_create_list_paired_or_unpaired_rejects_raw_hda_child(self, history_id):
element_identifiers = self.dataset_collection_populator.list_identifiers(history_id, contents=[("el1", "hi")])
payload = dict(
instance_type="history",
history_id=history_id,
element_identifiers=element_identifiers,
collection_type="list:paired_or_unpaired",
)
create_response = self._post("dataset_collections", payload, json=True)
assert_status_code_is(create_response, 400)
assert_error_message_contains(create_response, "sub-collection of type 'paired_or_unpaired'")

def test_create_list_paired_rejects_raw_hda_child(self, history_id):
element_identifiers = self.dataset_collection_populator.list_identifiers(history_id, contents=[("el1", "hi")])
payload = dict(
instance_type="history",
history_id=history_id,
element_identifiers=element_identifiers,
collection_type="list:paired",
)
create_response = self._post("dataset_collections", payload, json=True)
assert_status_code_is(create_response, 400)
assert_error_message_contains(create_response, "sub-collection of type 'paired'")

def test_create_record(self, history_id):
contents = [
("condition", "1\t2\t3"),
Expand Down
48 changes: 48 additions & 0 deletions lib/galaxy_test/workflow/split_paired_and_unpaired.gxwf-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
- doc: |
Test splitting a list:paired_or_unpaired collection with mixed paired
and unpaired elements into separate paired and unpaired output collections.
job:
input_collection:
class: Collection
collection_type: list:paired_or_unpaired
elements:
- identifier: el1
class: Collection
type: paired_or_unpaired
elements:
- identifier: forward
class: File
contents: "forward content"
- identifier: reverse
class: File
contents: "reverse content"
- identifier: el2
class: Collection
type: paired_or_unpaired
elements:
- identifier: unpaired
class: File
contents: "unpaired content"
outputs:
output_unpaired:
class: Collection
collection_type: list
elements:
el2:
asserts:
- that: has_text
text: "unpaired content"
output_paired:
class: Collection
collection_type: list:paired
elements:
el1:
elements:
forward:
asserts:
- that: has_text
text: "forward content"
reverse:
asserts:
- that: has_text
text: "reverse content"
15 changes: 15 additions & 0 deletions lib/galaxy_test/workflow/split_paired_and_unpaired.gxwf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
class: GalaxyWorkflow
inputs:
input_collection:
type: collection
collection_type: list:paired_or_unpaired
outputs:
output_unpaired:
outputSource: split/output_unpaired
output_paired:
outputSource: split/output_paired
steps:
split:
tool_id: '__SPLIT_PAIRED_AND_UNPAIRED__'
in:
input: input_collection
Loading