Merge remote-tracking branch 'origin/main' into cfm-submission_page

smaht-dac · Feb 27, 2025 · ab1b2b9 · ab1b2b9
2 parents c044e30 + 820b818
commit ab1b2b9
Show file tree

Hide file tree

Showing 13 changed files with 272 additions and 35 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -7,13 +7,24 @@ smaht-portal
 Change Log
 ----------
 
-0.139.1
+0.140.1
 =======
 `PR 361: fix: comment out unused Submissions page <https://github.com/smaht-dac/smaht-portal/pull/361>`_
 
 * Comment out the submissions page calculated property
 
 
+0.140.0
+=======
+`PR 354: SN Release Tracker title <https://github.com/smaht-dac/smaht-portal/pull/354>`_
+* 2025-02-21 / dmichaels
+  - Branch: dmichaels-20250221-release-tracker-api-title | PR-355
+    - Derived from branch: sn_release_tracker_title (commit: d202c1c55b69389d031070ada85ce180b1ed603d)
+  - Changes to the release tracker API (i.e. /recent_files_summary) to use the new (calculated)
+    property release_tracker_title (created by Sarah in branch: sn_release_tracker_title);
+    old way of doing it can be accessed using the legacy=true URL query argument.
+* Add calcprop to File, `release_tracker_title`, which displays in order of priority `override_release_tracker_title`, `CellCultureMixture.code`, `CellLine.code`, or `Tissue.display_title` for use as a header in the Release Tracker on the home page
+
 0.139.0
 =======
 `PR 360: fix: add resources to hard-coded disabled breadcrumbs <https://github.com/smaht-dac/smaht-portal/pull/360>`_

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "encoded"
-version = "0.139.1"
+version = "0.140.1"
 description = "SMaHT Data Analysis Portal"
 authors = ["4DN-DCIC Team <[email protected]>"]
 license = "MIT"

diff --git a/src/encoded/commands/release_file.py b/src/encoded/commands/release_file.py
@@ -63,8 +63,11 @@ class AnnotatedFilenameInfo:
 
 
 # dataset is required but comes in through input args for now
-REQUIRED_FILE_PROPS = [file_constants.SEQUENCING_CENTER, "release_tracker_description"]
-
+REQUIRED_FILE_PROPS = [file_constants.SEQUENCING_CENTER]
+SECONDARY_REQUIRED_FILE_PROPS = [
+    "release_tracker_description",
+    "release_tracker_title"
+]
 
 class FileRelease:
 
@@ -238,6 +241,8 @@ def prepare(
     ) -> None:
         self.validate_file()
         self.add_file_patchdict(dataset)
+        self.add_release_file_patchdict(dataset)
+
         self.add_release_items_to_patchdict(self.quality_metrics, "QualityMetric")
         self.add_release_items_to_patchdict(
             self.quality_metrics_zips, "Compressed QC metrics file"
@@ -259,7 +264,6 @@ def prepare(
         if obsolete_file_identifier:
             obsolete_file = self.get_metadata(obsolete_file_identifier)
             self.add_obsolete_file_patchdict(obsolete_file)
-
         print("\nThe following metadata patches will be carried out in the next step:")
         for info in self.patch_infos:
             print(info)
@@ -270,23 +274,44 @@ def prepare(
             for warning in self.warnings:
                 print(warning)
 
+    def execute_initial(self) -> None:
+        print("Validating file patch dictionary...")
+        try:
+            self.validate_patch(self.patch_dicts[0])
+        except Exception as e:
+            print(str(e))
+            self.print_error_and_exit("Validation failed.")
+
+        print("Validation done. Patchin file metadata...")
+        try:
+            self.patch_metadata(self.patch_dicts[0])
+            print(f"Patching of File {self.file_accession} completed.")
+        except Exception as e:
+            print(str(e))
+            self.print_error_and_exit("Patching failed.")
+
+        to_print = f"Patching of File {self.file_accession} completed."
+        print(ok_green_text(to_print))
+
     def execute(self) -> None:
         print("Validating all patch dictionaries...")
+        self.file = self.get_metadata(item_utils.get_uuid(self.file))
+        self.validate_file_after_patch()
         try:
-            for patch_dict in self.patch_dicts:
+            for patch_dict in self.patch_dicts[1:]:
                 self.validate_patch(patch_dict)
         except Exception as e:
             print(str(e))
             self.print_error_and_exit("Validation failed.")
 
         print("Validation done. Patching...")
         try:
-            for patch_dict in self.patch_dicts:
+            for patch_dict in self.patch_dicts[1:]:
                 self.patch_metadata(patch_dict)
         except Exception as e:
             print(str(e))
             self.print_error_and_exit("Patching failed.")
-
+        
         to_print = f"Release of File {self.file_accession} completed."
         print(ok_green_text(to_print))
 
@@ -396,6 +421,21 @@ def add_file_patchdict(self, dataset: str) -> None:
         )
         self.patch_dicts.append(patch_body)
 
+    def add_release_file_patchdict(self, dataset: str) -> None:
+        patch_body = {
+            item_constants.UUID: item_utils.get_uuid(self.file),
+            item_constants.STATUS: item_constants.STATUS_RELEASED,
+        }
+        self.patch_infos.extend(
+            [
+                f"\nFile ({self.file_accession}):",
+                self.get_okay_message(
+                    item_constants.STATUS, item_constants.STATUS_RELEASED
+                ),
+            ]
+        )
+        self.patch_dicts.append(patch_body)
+
     def get_annotated_filename_info(self) -> AnnotatedFilenameInfo:
         annotated_filename = file_utils.get_annotated_filename(self.file)
         if annotated_filename:
@@ -583,6 +623,9 @@ def validate_file(self) -> None:
         self.validate_file_output_status()
         self.validate_file_status()
 
+    def validate_file_after_patch(self) -> None:
+        self.validate_secondary_required_file_props()
+
     def validate_required_file_props(self) -> None:
         for prop in REQUIRED_FILE_PROPS:
             if prop not in self.file:
@@ -591,6 +634,14 @@ def validate_required_file_props(self) -> None:
                     f" `{prop}`."
                 )
 
+    def validate_secondary_required_file_props(self) -> None:
+        for prop in SECONDARY_REQUIRED_FILE_PROPS:
+            if prop not in self.file:
+                self.print_error_and_exit(
+                    f"File {self.file_accession} does not have the required property"
+                    f" `{prop}`."
+                )
+
     def validate_existing_file_sets(self) -> None:
         existing_file_sets = file_utils.get_file_sets(self.file)
         if output_file_utils.is_output_file(self.file) and existing_file_sets:
@@ -751,7 +802,7 @@ def main() -> None:
 
     while True:
         resp = input(
-            f"\nDo you want to proceed with release and execute patches above? "
+            f"\nDo you want to proceed with file patching above? "
             f"Data will be patched on {warning_text(server)}."
             f"\nYou have the following options: "
             f"\ny - Proceed with release"
@@ -761,8 +812,26 @@ def main() -> None:
         )
 
         if resp in ["y", "yes"]:
-            file_release.execute()
-            break
+            file_release.execute_initial()
+            resp = input(
+            f"\nDo you want to proceed with release and execute all patches above? "
+            f"Data will be patched on {warning_text(server)}."
+            f"\nYou have the following options: "
+            f"\ny - Proceed with release"
+            f"\np - Show patch dictionaries "
+            f"\nn - Abort "
+            f"\n(y,p,n): "
+            )
+
+            if resp in ["y", "yes"]:
+                file_release.execute()
+                break
+            elif resp in ["p"]:
+                file_release.show_patch_dicts()
+                continue
+            else:
+                print(f"{warning_text('Aborted by user.')}")
+                exit()
         elif resp in ["p"]:
             file_release.show_patch_dicts()
             continue

diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary.py
@@ -18,6 +18,7 @@
 from encoded.endpoints.recent_files_summary.recent_files_summary_fields import (
         AGGREGATION_FIELD_RELEASE_DATE,
         AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR,
+        AGGREGATION_FIELD_RELEASE_TRACKER_FILE_TITLE,
         AGGREGATION_FIELD_CELL_MIXTURE,
         AGGREGATION_FIELD_DONOR,
         AGGREGATION_FIELD_DSA_DONOR,
@@ -29,12 +30,20 @@
 from snovault.search.search import search as snovault_search
 from snovault.search.search_utils import make_search_subreq as snovault_make_search_subreq
 
+# N.B. This implementation has undergone a number of iterations/changes. Currently (2025-02-24) it is much simpler
+# than it originally was due to the introduction of the (calculated) property "release_tracker_title"; previous to
+# that we used special code ("painless") in the ElasticSearch aggregation query to choose from among three possible
+# properties (AGGREGATION_FIELD_CELL_MIXTURE, AGGREGATION_FIELD_DONOR, AGGREGATION_FIELD_DSA_DONOR); but now the
+# query is much mroe straightforward. So some this code can/should eventually be elided; though for the time being
+# it is still here and even accessible using the "legacy=true" URL query argument to the API.
+
 QUERY_FILE_TYPES = ["OutputFile", "SubmittedFile"]
 QUERY_FILE_STATUSES = ["released"]
 QUERY_FILE_CATEGORIES = ["!Quality Control"]
 QUERY_RECENT_MONTHS = 3
 QUERY_INCLUDE_CURRENT_MONTH = True
 BASE_SEARCH_QUERY = "/search/"
+LEGACY_DEFAULT = False
 
 
 def recent_files_summary_endpoint(context, request):
@@ -48,12 +57,14 @@ def recent_files_summary_endpoint(context, request):
         text_query = request_arg_bool(request, "text_query")
         text_verbose = request_arg_bool(request, "text_verbose")
         text_debug = request_arg_bool(request, "text_debug")
+        legacy = request_arg_bool(request, "legacy", LEGACY_DEFAULT)
         results = get_normalized_aggregation_results_as_html_for_troublehshooting(results,
                                                                                   uuids=text_uuids,
                                                                                   uuid_details=text_uuid_details,
                                                                                   query=text_query,
                                                                                   verbose=text_verbose,
-                                                                                  debug=text_debug)
+                                                                                  debug=text_debug,
+                                                                                  legacy=legacy)
         results = PyramidResponse(f"<pre>{results}</pre>", content_type='text/html')
     return results
 
@@ -100,6 +111,7 @@ def recent_files_summary(request: PyramidRequest,
     troubleshoot = request_arg_bool(request, "troubleshoot")
     troubleshoot_elasticsearch = request_arg_bool(request, "troubleshoot_elasticsearch")
     raw = request_arg_bool(request, "raw")
+    legacy = request_arg_bool(request, "legacy", LEGACY_DEFAULT)
 
     if troubleshooting is True:
         debug = True
@@ -111,7 +123,11 @@ def get_aggregation_field_grouping_cell_or_donor() -> List[str]:
         # and then alternatively (if a cell-line field does not exist) by the donor field.
         # For troubleshooting/testing/or-maybe-if-we-change-our-minds we can alternatively
         # look first for the donor field and then secondarily for the cell-line field.
+        nonlocal legacy
         aggregation_field_grouping_cell_or_donor = deepcopy(AGGREGATION_FIELD_GROUPING_CELL_OR_DONOR)
+        if not legacy:
+            # 2025-02-21: This is now the default (using release_tracker_title).
+            aggregation_field_grouping_cell_or_donor = [AGGREGATION_FIELD_RELEASE_TRACKER_FILE_TITLE]
         return aggregation_field_grouping_cell_or_donor
 
     def create_base_query_arguments(request: PyramidRequest) -> dict:
@@ -157,7 +173,13 @@ def create_query_arguments(request: PyramidRequest, base_query_arguments: Option
         return query_arguments
 
     def create_query(request: PyramidRequest, base_query_arguments: Optional[dict] = None) -> str:
+        nonlocal legacy
         query_arguments = create_query_arguments(request, base_query_arguments)
+        if not legacy:
+            if AGGREGATION_FIELD_RELEASE_TRACKER_FILE_TITLE not in query_arguments:
+                query_arguments[AGGREGATION_FIELD_RELEASE_TRACKER_FILE_TITLE] = f"!{AGGREGATION_NO_VALUE}"
+            if AGGREGATION_FIELD_FILE_DESCRIPTOR not in query_arguments:
+                query_arguments[AGGREGATION_FIELD_FILE_DESCRIPTOR] = f"!{AGGREGATION_NO_VALUE}"
         query_string = create_query_string(query_arguments)
         return f"{BASE_SEARCH_QUERY}?{query_string}"
 
@@ -175,7 +197,7 @@ def create_aggregation_query(aggregation_fields: List[str]) -> dict:
             return {}
 
         def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
-            nonlocal aggregation_field_grouping_cell_or_donor, date_property_name, multi
+            nonlocal aggregation_field_grouping_cell_or_donor, date_property_name, legacy, multi
             if field == date_property_name:
                 return {
                     "date_histogram": {
@@ -186,7 +208,7 @@ def create_field_aggregation(field: str) -> Optional[dict]:  # noqa
                         "order": {"_key": "desc"}
                     }
                 }
-            elif field == AGGREGATION_FIELD_CELL_MIXTURE:
+            elif legacy and (field == AGGREGATION_FIELD_CELL_MIXTURE):
                 # Note how we prefix the result with the aggregation field name;
                 # this is so later we can tell which grouping/field was matched;
                 # see fixup_names_values_for_normalized_results for this fixup.
@@ -294,7 +316,7 @@ def hoist_items_additional_value_up_one_level(data: dict,
         results at the inner-most (hit) level. The normalize_elasticsearch_aggregation_results function puts these
         in the additional_value property there, but we don't want these values at that inner-most level, i.e. at the
         level of release_tracker_description, but rather one level up at the donor level (i.e. donors.display_title).
-        So this hoists these up to that level, but only if their values are all the same, which is practice they are.
+        So this hoists these up to that level, but only if their values are all the same, which in practice they are.
         """
         if not isinstance(items_property_name, str):
             items_property_name = "items"
@@ -322,7 +344,7 @@ def hoist_items_additional_value_up_one_level(data: dict,
 
     def add_queries_to_normalized_results(normalized_results: dict, base_query_arguments: dict) -> None:
         global BASE_SEARCH_QUERY
-        nonlocal date_property_name
+        nonlocal date_property_name, legacy
         if isinstance(normalized_results, dict):
             if name := normalized_results.get("name"):
                 if value := normalized_results.get("value"):
@@ -337,7 +359,13 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
                                                     f"{name}.from": from_date, f"{name}.to": thru_date}
                     else:
                         base_query_arguments = {**base_query_arguments, name: value}
-                normalized_results["query"] = create_query_string(base_query_arguments, BASE_SEARCH_QUERY)
+                query_arguments = deepcopy(base_query_arguments)
+                if not legacy:
+                    if AGGREGATION_FIELD_RELEASE_TRACKER_FILE_TITLE not in query_arguments:
+                        query_arguments[AGGREGATION_FIELD_RELEASE_TRACKER_FILE_TITLE] = f"!{AGGREGATION_NO_VALUE}"
+                    if AGGREGATION_FIELD_FILE_DESCRIPTOR not in query_arguments:
+                        query_arguments[AGGREGATION_FIELD_FILE_DESCRIPTOR] = f"!{AGGREGATION_NO_VALUE}"
+                normalized_results["query"] = create_query_string(query_arguments, BASE_SEARCH_QUERY)
             if isinstance(items := normalized_results.get("items"), list):
                 for element in items:
                     add_queries_to_normalized_results(element, base_query_arguments)
@@ -352,7 +380,7 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
     aggregate_by_cell_line_property_name = "aggregate_by_cell_line"
     aggregate_by_cell_line = [
         date_property_name,
-        AGGREGATION_FIELD_CELL_MIXTURE,
+        AGGREGATION_FIELD_CELL_MIXTURE if legacy else AGGREGATION_FIELD_RELEASE_TRACKER_FILE_TITLE,
         AGGREGATION_FIELD_FILE_DESCRIPTOR
     ]
     aggregation_query = {
@@ -453,7 +481,10 @@ def add_queries_to_normalized_results(normalized_results: dict, base_query_argum
         remove_empty_items=not include_missing)
 
     if not exclude_tissue_info:
-        hoist_items_additional_value_up_one_level(normalized_results)
+        if False:
+            # 2025-02-24: No longer hoist this (sample_summary.tissues) property up one level;
+            # actually forget why this was originally done this way; but in any case no longer desired.
+            hoist_items_additional_value_up_one_level(normalized_results)
 
     fixup_names_values_for_normalized_results(normalized_results)
     add_queries_to_normalized_results(normalized_results, base_query_arguments)

diff --git a/src/encoded/endpoints/recent_files_summary/recent_files_summary_fields.py b/src/encoded/endpoints/recent_files_summary/recent_files_summary_fields.py
@@ -6,6 +6,7 @@
 
 AGGREGATION_FIELD_RELEASE_DATE = "file_status_tracking.released"
 AGGREGATION_FIELD_CELL_MIXTURE = "file_sets.libraries.analytes.samples.sample_sources.code"
+AGGREGATION_FIELD_RELEASE_TRACKER_FILE_TITLE = "release_tracker_title"  # 2025-02-21: new/default (if NOT legacy=true)
 AGGREGATION_FIELD_DONOR = "donors.display_title"
 AGGREGATION_FIELD_DSA_DONOR = "donor_specific_assembly.donors.display_title"  # 2025-02-04
 AGGREGATION_FIELD_CELL_LINE = "file_sets.libraries.analytes.samples.sample_sources.cell_line.code"  # unused by default