Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into cfm-submission_page
Browse files Browse the repository at this point in the history
  • Loading branch information
crfmc committed Feb 27, 2025
2 parents c044e30 + 820b818 commit ab1b2b9
Show file tree
Hide file tree
Showing 13 changed files with 272 additions and 35 deletions.
13 changes: 12 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,24 @@ smaht-portal
Change Log
----------

0.139.1
0.140.1
=======
`PR 361: fix: comment out unused Submissions page <https://github.com/smaht-dac/smaht-portal/pull/361>`_

* Comment out the submissions page calculated property


0.140.0
=======
`PR 354: SN Release Tracker title <https://github.com/smaht-dac/smaht-portal/pull/354>`_
* 2025-02-21 / dmichaels
- Branch: dmichaels-20250221-release-tracker-api-title | PR-355
- Derived from branch: sn_release_tracker_title (commit: d202c1c55b69389d031070ada85ce180b1ed603d)
- Changes to the release tracker API (i.e. /recent_files_summary) to use the new (calculated)
property release_tracker_title (created by Sarah in branch: sn_release_tracker_title);
old way of doing it can be accessed using the legacy=true URL query argument.
* Add calcprop to File, `release_tracker_title`, which displays in order of priority `override_release_tracker_title`, `CellCultureMixture.code`, `CellLine.code`, or `Tissue.display_title` for use as a header in the Release Tracker on the home page

0.139.0
=======
`PR 360: fix: add resources to hard-coded disabled breadcrumbs <https://github.com/smaht-dac/smaht-portal/pull/360>`_
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "encoded"
version = "0.139.1"
version = "0.140.1"
description = "SMaHT Data Analysis Portal"
authors = ["4DN-DCIC Team <[email protected]>"]
license = "MIT"
Expand Down
87 changes: 78 additions & 9 deletions src/encoded/commands/release_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,11 @@ class AnnotatedFilenameInfo:


# dataset is required but comes in through input args for now
REQUIRED_FILE_PROPS = [file_constants.SEQUENCING_CENTER, "release_tracker_description"]

REQUIRED_FILE_PROPS = [file_constants.SEQUENCING_CENTER]
SECONDARY_REQUIRED_FILE_PROPS = [
"release_tracker_description",
"release_tracker_title"
]

class FileRelease:

Expand Down Expand Up @@ -238,6 +241,8 @@ def prepare(
) -> None:
self.validate_file()
self.add_file_patchdict(dataset)
self.add_release_file_patchdict(dataset)

self.add_release_items_to_patchdict(self.quality_metrics, "QualityMetric")
self.add_release_items_to_patchdict(
self.quality_metrics_zips, "Compressed QC metrics file"
Expand All @@ -259,7 +264,6 @@ def prepare(
if obsolete_file_identifier:
obsolete_file = self.get_metadata(obsolete_file_identifier)
self.add_obsolete_file_patchdict(obsolete_file)

print("\nThe following metadata patches will be carried out in the next step:")
for info in self.patch_infos:
print(info)
Expand All @@ -270,23 +274,44 @@ def prepare(
for warning in self.warnings:
print(warning)

def execute_initial(self) -> None:
print("Validating file patch dictionary...")
try:
self.validate_patch(self.patch_dicts[0])
except Exception as e:
print(str(e))
self.print_error_and_exit("Validation failed.")

print("Validation done. Patchin file metadata...")
try:
self.patch_metadata(self.patch_dicts[0])
print(f"Patching of File {self.file_accession} completed.")
except Exception as e:
print(str(e))
self.print_error_and_exit("Patching failed.")

to_print = f"Patching of File {self.file_accession} completed."
print(ok_green_text(to_print))

def execute(self) -> None:
print("Validating all patch dictionaries...")
self.file = self.get_metadata(item_utils.get_uuid(self.file))
self.validate_file_after_patch()
try:
for patch_dict in self.patch_dicts:
for patch_dict in self.patch_dicts[1:]:
self.validate_patch(patch_dict)
except Exception as e:
print(str(e))
self.print_error_and_exit("Validation failed.")

print("Validation done. Patching...")
try:
for patch_dict in self.patch_dicts:
for patch_dict in self.patch_dicts[1:]:
self.patch_metadata(patch_dict)
except Exception as e:
print(str(e))
self.print_error_and_exit("Patching failed.")

to_print = f"Release of File {self.file_accession} completed."
print(ok_green_text(to_print))

Expand Down Expand Up @@ -396,6 +421,21 @@ def add_file_patchdict(self, dataset: str) -> None:
)
self.patch_dicts.append(patch_body)

def add_release_file_patchdict(self, dataset: str) -> None:
patch_body = {
item_constants.UUID: item_utils.get_uuid(self.file),
item_constants.STATUS: item_constants.STATUS_RELEASED,
}
self.patch_infos.extend(
[
f"\nFile ({self.file_accession}):",
self.get_okay_message(
item_constants.STATUS, item_constants.STATUS_RELEASED
),
]
)
self.patch_dicts.append(patch_body)

def get_annotated_filename_info(self) -> AnnotatedFilenameInfo:
annotated_filename = file_utils.get_annotated_filename(self.file)
if annotated_filename:
Expand Down Expand Up @@ -583,6 +623,9 @@ def validate_file(self) -> None:
self.validate_file_output_status()
self.validate_file_status()

def validate_file_after_patch(self) -> None:
self.validate_secondary_required_file_props()

def validate_required_file_props(self) -> None:
for prop in REQUIRED_FILE_PROPS:
if prop not in self.file:
Expand All @@ -591,6 +634,14 @@ def validate_required_file_props(self) -> None:
f" `{prop}`."
)

def validate_secondary_required_file_props(self) -> None:
for prop in SECONDARY_REQUIRED_FILE_PROPS:
if prop not in self.file:
self.print_error_and_exit(
f"File {self.file_accession} does not have the required property"
f" `{prop}`."
)

def validate_existing_file_sets(self) -> None:
existing_file_sets = file_utils.get_file_sets(self.file)
if output_file_utils.is_output_file(self.file) and existing_file_sets:
Expand Down Expand Up @@ -751,7 +802,7 @@ def main() -> None:

while True:
resp = input(
f"\nDo you want to proceed with release and execute patches above? "
f"\nDo you want to proceed with file patching above? "
f"Data will be patched on {warning_text(server)}."
f"\nYou have the following options: "
f"\ny - Proceed with release"
Expand All @@ -761,8 +812,26 @@ def main() -> None:
)

if resp in ["y", "yes"]:
file_release.execute()
break
file_release.execute_initial()
resp = input(
f"\nDo you want to proceed with release and execute all patches above? "
f"Data will be patched on {warning_text(server)}."
f"\nYou have the following options: "
f"\ny - Proceed with release"
f"\np - Show patch dictionaries "
f"\nn - Abort "
f"\n(y,p,n): "
)

if resp in ["y", "yes"]:
file_release.execute()
break
elif resp in ["p"]:
file_release.show_patch_dicts()
continue
else:
print(f"{warning_text('Aborted by user.')}")
exit()
elif resp in ["p"]:
file_release.show_patch_dicts()
continue
Expand Down
47 changes: 39 additions & 8 deletions src/encoded/endpoints/recent_files_summary/recent_files_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from encoded.endpoints.recent_files_summary.recent_files_summary_fields import (
AGGREGATION_FIELD_RELEASE_DATE,
AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR,
AGGREGATION_FIELD_RELEASE_TRACKER_FILE_TITLE,
AGGREGATION_FIELD_CELL_MIXTURE,
AGGREGATION_FIELD_DONOR,
AGGREGATION_FIELD_DSA_DONOR,
Expand All @@ -29,12 +30,20 @@
from snovault.search.search import search as snovault_search
from snovault.search.search_utils import make_search_subreq as snovault_make_search_subreq

# N.B. This implementation has undergone a number of iterations/changes. Currently (2025-02-24) it is much simpler
# than it originally was due to the introduction of the (calculated) property "release_tracker_title"; previous to
# that we used special code ("painless") in the ElasticSearch aggregation query to choose from among three possible
# properties (AGGREGATION_FIELD_CELL_MIXTURE, AGGREGATION_FIELD_DONOR, AGGREGATION_FIELD_DSA_DONOR); but now the
# query is much mroe straightforward. So some this code can/should eventually be elided; though for the time being
# it is still here and even accessible using the "legacy=true" URL query argument to the API.

QUERY_FILE_TYPES = ["OutputFile", "SubmittedFile"]
QUERY_FILE_STATUSES = ["released"]
QUERY_FILE_CATEGORIES = ["!Quality Control"]
QUERY_RECENT_MONTHS = 3
QUERY_INCLUDE_CURRENT_MONTH = True
BASE_SEARCH_QUERY = "/search/"
LEGACY_DEFAULT = False


def recent_files_summary_endpoint(context, request):
Expand All @@ -48,12 +57,14 @@ def recent_files_summary_endpoint(context, request):
text_query = request_arg_bool(request, "text_query")
text_verbose = request_arg_bool(request, "text_verbose")
text_debug = request_arg_bool(request, "text_debug")
legacy = request_arg_bool(request, "legacy", LEGACY_DEFAULT)
results = get_normalized_aggregation_results_as_html_for_troublehshooting(results,
uuids=text_uuids,
uuid_details=text_uuid_details,
query=text_query,
verbose=text_verbose,
debug=text_debug)
debug=text_debug,
legacy=legacy)
results = PyramidResponse(f"<pre>{results}</pre>", content_type='text/html')
return results

Expand Down Expand Up @@ -100,6 +111,7 @@ def recent_files_summary(request: PyramidRequest,
troubleshoot = request_arg_bool(request, "troubleshoot")
troubleshoot_elasticsearch = request_arg_bool(request, "troubleshoot_elasticsearch")
raw = request_arg_bool(request, "raw")
legacy = request_arg_bool(request, "legacy", LEGACY_DEFAULT)

if troubleshooting is True:
debug = True
Expand All @@ -111,7 +123,11 @@ def get_aggregation_field_grouping_cell_or_donor() -> List[str]:
# and then alternatively (if a cell-line field does not exist) by the donor field.
# For troubleshooting/testing/or-maybe-if-we-change-our-minds we can alternatively
# look first for the donor field and then secondarily for the cell-line field.
nonlocal legacy
aggregation_field_grouping_cell_or_donor = deepcopy(AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR)
if not legacy:
# 2025-02-21: This is now the default (using release_tracker_title).
aggregation_field_grouping_cell_or_donor = [AGGREGATION_FIELD_RELEASE_TRACKER_FILE_TITLE]
return aggregation_field_grouping_cell_or_donor

def create_base_query_arguments(request: PyramidRequest) -> dict:
Expand Down Expand Up @@ -157,7 +173,13 @@ def create_query_arguments(request: PyramidRequest, base_query_arguments: Option
return query_arguments

def create_query(request: PyramidRequest, base_query_arguments: Optional[dict] = None) -> str:
nonlocal legacy
query_arguments = create_query_arguments(request, base_query_arguments)
if not legacy:
if AGGREGATION_FIELD_RELEASE_TRACKER_FILE_TITLE not in query_arguments:
query_arguments[AGGREGATION_FIELD_RELEASE_TRACKER_FILE_TITLE] = f"!{AGGREGATION_NO_VALUE}"
if AGGREGATION_FIELD_FILE_DESCRIPTOR not in query_arguments:
query_arguments[AGGREGATION_FIELD_FILE_DESCRIPTOR] = f"!{AGGREGATION_NO_VALUE}"
query_string = create_query_string(query_arguments)
return f"{BASE_SEARCH_QUERY}?{query_string}"

Expand All @@ -175,7 +197,7 @@ def create_aggregation_query(aggregation_fields: List[str]) -> dict:
return {}

def create_field_aggregation(field: str) -> Optional[dict]: # noqa
nonlocal aggregation_field_grouping_cell_or_donor, date_property_name, multi
nonlocal aggregation_field_grouping_cell_or_donor, date_property_name, legacy, multi
if field == date_property_name:
return {
"date_histogram": {
Expand All @@ -186,7 +208,7 @@ def create_field_aggregation(field: str) -> Optional[dict]: # noqa
"order": {"_key": "desc"}
}
}
elif field == AGGREGATION_FIELD_CELL_MIXTURE:
elif legacy and (field == AGGREGATION_FIELD_CELL_MIXTURE):
# Note how we prefix the result with the aggregation field name;
# this is so later we can tell which grouping/field was matched;
# see fixup_names_values_for_normalized_results for this fixup.
Expand Down Expand Up @@ -294,7 +316,7 @@ def hoist_items_additional_value_up_one_level(data: dict,
results at the inner-most (hit) level. The normalize_elasticsearch_aggregation_results function puts these
in the additional_value property there, but we don't want these values at that inner-most level, i.e. at the
level of release_tracker_description, but rather one level up at the donor level (i.e. donors.display_title).
So this hoists these up to that level, but only if their values are all the same, which is practice they are.
So this hoists these up to that level, but only if their values are all the same, which in practice they are.
"""
if not isinstance(items_property_name, str):
items_property_name = "items"
Expand Down Expand Up @@ -322,7 +344,7 @@ def hoist_items_additional_value_up_one_level(data: dict,

def add_queries_to_normalized_results(normalized_results: dict, base_query_arguments: dict) -> None:
global BASE_SEARCH_QUERY
nonlocal date_property_name
nonlocal date_property_name, legacy
if isinstance(normalized_results, dict):
if name := normalized_results.get("name"):
if value := normalized_results.get("value"):
Expand All @@ -337,7 +359,13 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
f"{name}.from": from_date, f"{name}.to": thru_date}
else:
base_query_arguments = {**base_query_arguments, name: value}
normalized_results["query"] = create_query_string(base_query_arguments, BASE_SEARCH_QUERY)
query_arguments = deepcopy(base_query_arguments)
if not legacy:
if AGGREGATION_FIELD_RELEASE_TRACKER_FILE_TITLE not in query_arguments:
query_arguments[AGGREGATION_FIELD_RELEASE_TRACKER_FILE_TITLE] = f"!{AGGREGATION_NO_VALUE}"
if AGGREGATION_FIELD_FILE_DESCRIPTOR not in query_arguments:
query_arguments[AGGREGATION_FIELD_FILE_DESCRIPTOR] = f"!{AGGREGATION_NO_VALUE}"
normalized_results["query"] = create_query_string(query_arguments, BASE_SEARCH_QUERY)
if isinstance(items := normalized_results.get("items"), list):
for element in items:
add_queries_to_normalized_results(element, base_query_arguments)
Expand All @@ -352,7 +380,7 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
aggregate_by_cell_line_property_name = "aggregate_by_cell_line"
aggregate_by_cell_line = [
date_property_name,
AGGREGATION_FIELD_CELL_MIXTURE,
AGGREGATION_FIELD_CELL_MIXTURE if legacy else AGGREGATION_FIELD_RELEASE_TRACKER_FILE_TITLE,
AGGREGATION_FIELD_FILE_DESCRIPTOR
]
aggregation_query = {
Expand Down Expand Up @@ -453,7 +481,10 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
remove_empty_items=not include_missing)

if not exclude_tissue_info:
hoist_items_additional_value_up_one_level(normalized_results)
if False:
# 2025-02-24: No longer hoist this (sample_summary.tissues) property up one level;
# actually forget why this was originally done this way; but in any case no longer desired.
hoist_items_additional_value_up_one_level(normalized_results)

fixup_names_values_for_normalized_results(normalized_results)
add_queries_to_normalized_results(normalized_results, base_query_arguments)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

AGGREGATION_FIELD_RELEASE_DATE = "file_status_tracking.released"
AGGREGATION_FIELD_CELL_MIXTURE = "file_sets.libraries.analytes.samples.sample_sources.code"
AGGREGATION_FIELD_RELEASE_TRACKER_FILE_TITLE = "release_tracker_title" # 2025-02-21: new/default (if NOT legacy=true)
AGGREGATION_FIELD_DONOR = "donors.display_title"
AGGREGATION_FIELD_DSA_DONOR = "donor_specific_assembly.donors.display_title" # 2025-02-04
AGGREGATION_FIELD_CELL_LINE = "file_sets.libraries.analytes.samples.sample_sources.cell_line.code" # unused by default
Expand Down
Loading

0 comments on commit ab1b2b9

Please sign in to comment.