Skip to content

Commit

Permalink
Add DataFile accession as datafile comment
Browse files Browse the repository at this point in the history
  • Loading branch information
kdp-cloud committed Jan 3, 2025
1 parent 07f0fcd commit 8ab6950
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 15 deletions.
95 changes: 80 additions & 15 deletions mars-cli/mars_lib/isa_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
MaterialAttributeValue,
Study,
OntologyAnnotation,
Data as DataFile,
)
from pydantic import ValidationError
from mars_lib.target_repo import TARGET_REPO_KEY, TargetRepository
Expand Down Expand Up @@ -143,6 +144,20 @@ def apply_filter(filter: Filter, nodes: Union[List[Study], List[Assay]]) -> Any:
)


def accession_data_file_comment_present(node: DataFile) -> bool:
accession_data_file_comments = [
comment for comment in node.comments if comment.name == "accession"
]
if len(accession_data_file_comments) > 1:
raise AttributeError(
"There should not be more than 1 data file comment with the name 'accession'."
)
elif len(accession_data_file_comments) > 0:
return True
else:
return False


def accession_characteristic_category_present(node: Union[Study, Assay]) -> bool:
"""
Checks if the node has an accession characteristic category.
Expand Down Expand Up @@ -212,7 +227,7 @@ def accession_characteristic_present(
return False


def add_accession_to_node(
def add_accession_to_material_node(
node: Any, accession_number: str, material_type_path: Path
) -> None:
"""
Expand All @@ -231,7 +246,7 @@ def add_accession_to_node(
updated_material = apply_filter(material_type_path.where, node_materials)
else:
raise ValueError(
f"'where' atribute is missing in path {material_type_path.key}."
f"'where' attribute is missing in path {material_type_path.key}."
)

accession_characteristics_category = next(
Expand Down Expand Up @@ -271,6 +286,23 @@ def add_accession_to_node(
updated_material.characteristics.append(updated_material_accession_characteristic)


def add_accession_to_data_file_node(node: DataFile, accession_number: str) -> None:
accession_comment = next(
(
comment
for comment in node.comments
if comment.name and comment.name.lower() == "accession"
),
None,
)
if not accession_comment:
raise ValueError(
f"Accession comment not found in data file'{node.id} - {node.name}'."
)

accession_comment.value = accession_number


def create_accession_characteristic_category(
node: Union[Study, Assay]
) -> Tuple[str, MaterialAttribute]:
Expand All @@ -292,7 +324,27 @@ def create_accession_characteristic_category(
category.characteristicType = OntologyAnnotation(annotationValue="accession")
node.characteristicCategories.append(category)

return (accession_id, category)
return accession_id, category


def create_accession_data_file_comment(node: DataFile) -> Comment:
comment = Comment()
accession_id = str(uuid.uuid4())
comment.id = f"#comment/accession_{accession_id}"
comment.name = "accession"
node.comments.append(comment)
return comment


def fetch_existing_accession_data_file_comment(node: DataFile) -> Comment:
accession_comment = next(
comment
for comment in node.comments
if comment.name and comment.name.lower() == "accession"
)
if not accession_comment:
raise ValueError(f"Accession comment not found in {node.id}.")
return accession_comment


def fetch_existing_characteristic_category(
Expand Down Expand Up @@ -372,30 +424,34 @@ def update_isa_json(isa_json: IsaJson, repo_response: RepositoryResponse) -> Isa
has_materials_in_path = (
len([p for p in accession.path if p.key == "materials"]) > 0
)
has_data_files_in_path = (
len([p for p in accession.path if p.key == "dataFiles"]) > 0
)
target_level = "assay" if has_assay_in_path else "study"

study_filter = get_filter_for_accession_key(accession, "studies")
if not study_filter:
raise ValueError(f"Study filter is not present in {accession.path}.")

################################################################################################################
# Filter the study or assay based on the filter
################################################################################################################
updated_node = apply_filter(study_filter, investigation.studies)
if target_level == "assay":
assay_filter = get_filter_for_accession_key(accession, "assays")
if not assay_filter:
raise ValueError(f"Assay filter is not present in {accession.path}.")

updated_node = apply_filter(assay_filter, updated_node.assays)
################################################################################################################

if has_materials_in_path:
material_type_path = next(
p
for p in accession.path
if p.key in ["sources", "samples", "otherMaterials"]
)

updated_node = apply_filter(study_filter, investigation.studies)

if target_level == "assay":
assay_filter = get_filter_for_accession_key(accession, "assays")
if not assay_filter:
raise ValueError(
f"Assay filter is not present in {accession.path}."
)

updated_node = apply_filter(assay_filter, updated_node.assays)

if not updated_node:
raise ValueError(f"Node not found for {accession.value}.")
if not accession_characteristic_category_present(updated_node):
Expand All @@ -412,7 +468,16 @@ def update_isa_json(isa_json: IsaJson, repo_response: RepositoryResponse) -> Isa
updated_node, material_type_path, category, accession_id
)

add_accession_to_node(updated_node, accession.value, material_type_path)
add_accession_to_material_node(
updated_node, accession.value, material_type_path
)
elif has_data_files_in_path:
data_file_filter = get_filter_for_accession_key(accession, "dataFiles")
updated_node = apply_filter(data_file_filter, updated_node.dataFiles)
if not accession_data_file_comment_present(updated_node):
create_accession_data_file_comment(updated_node)

add_accession_to_data_file_node(updated_node, accession.value)
else:
updated_study = apply_filter(study_filter, investigation.studies)

Expand Down
17 changes: 17 additions & 0 deletions mars-cli/tests/test_isa_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,23 @@ def test_update_study_with_ena_study_accession_comment():
assert next(accession_comment).value == ena_study_accession_number


def test_update_datafile_comment_with_accession_comment_present():
json_path = "tests/fixtures/isa_jsons/1_after_biosamples.json"
isa_json = load_isa_json(json_path, False)
response_file_path = "tests/fixtures/mars_receipts/ena_success_response.json"
ena_response = RepositoryResponse.from_json_file(response_file_path)
data_file_accession_number = "ERN00000001"

updated_isa_json = update_isa_json(isa_json, ena_response)
data_file_comments = (
updated_isa_json.investigation.studies[0].assays[0].dataFiles[0].comments
)
accession_comment = filter(lambda x: x.name == "accession", data_file_comments)
assert next(accession_comment).value == data_file_accession_number


def test_update_datafile_comment_without_accession_comment_present():
pass


def test_filename_validation():
Expand Down

0 comments on commit 8ab6950

Please sign in to comment.