DataBiosphere
diff --git a/‎lambdas/service/app.py
Lines changed: 37 additions & 20 deletions b/‎lambdas/service/app.py
Lines changed: 37 additions & 20 deletions
diff --git a/‎lambdas/service/openapi.json
Lines changed: 76 additions & 3 deletions b/‎lambdas/service/openapi.json
Lines changed: 76 additions & 3 deletions
@@ -87,6 +87,7 @@
 from azul.service.app_controller import (
     validate_catalog,
     validate_params,
+    validate_wait,
 )
 from azul.service.catalog_controller import (
     CatalogController,
@@ -1096,6 +1097,26 @@ def get_summary():
                                              authentication=request.authentication)
 
 
+def wait_parameter_spec(*, default: int) -> JSON:
+    valid_values = [0, 1]
+    assert default in valid_values, default
+    return params.query(
+        'wait',
+        schema.optional(schema.default(default,
+                                       form=schema.enum(*valid_values))),
+        description=fd('''
+            If 0, the client is responsible for honoring the waiting period
+            specified in the `Retry-After` response header. If 1, the server
+            will delay the response in order to consume as much of that waiting
+            period as possible. This parameter should only be set to 1 by
+            clients who can't honor the `Retry-After` header, preventing them
+            from quickly exhausting the maximum number of redirects. If the
+            server cannot wait the full amount, any amount of wait time left
+            will still be returned in the `Retry-After` header of the response.
+        ''')
+    )
+
+
 post_manifest_example_url = (
     f'{app.base_url}/manifest/files'
     f'?catalog={list(config.catalogs.keys())[0]}'
@@ -1130,7 +1151,8 @@ def manifest_route(*, fetch: bool, initiate: bool, curl: bool = False):
             'parameters': [
                 params.path('token', str, description=fd('''
                     An opaque string representing the manifest preparation job
-                '''))
+                ''')),
+                *([] if fetch else [wait_parameter_spec(default=0)])
             ]
         },
         spec={
@@ -1248,6 +1270,7 @@ def manifest_route(*, fetch: bool, initiate: bool, curl: bool = False):
             '''),
             'parameters': [
                 catalog_param_spec,
+                *([wait_parameter_spec(default=1)] if curl else []),
                 filters_param_spec,
                 params.query(
                     'format',
@@ -1443,24 +1466,32 @@ def _file_manifest(fetch: bool, token_or_key: str | None = None):
         and request.headers.get('content-type') == 'application/x-www-form-urlencoded'
         and request.raw_body != b''
     ):
-        raise BRE('The body must be empty for a POST request of content-type '
-                  '`application/x-www-form-urlencoded` to this endpoint')
+        raise BRE('POST requests to this endpoint must have an empty body if '
+                  'they specify a `Content-Type` header of '
+                  '`application/x-www-form-urlencoded`')
     query_params = request.query_params or {}
     _hoist_parameters(query_params, request)
     if token_or_key is None:
         query_params.setdefault('filters', '{}')
+        if post:
+            query_params.setdefault('wait', '1')
         # We list the `catalog` validator first so that the catalog is validated
         # before any other potentially catalog-dependent validators are invoked
         validate_params(query_params,
                         catalog=validate_catalog,
                         format=validate_manifest_format,
-                        filters=validate_filters)
+                        filters=validate_filters,
+                        **({'wait': validate_wait} if post else {}))
         # Now that the catalog is valid, we can provide the default format that
         # depends on it
         default_format = app.metadata_plugin.manifest_formats[0].value
         query_params.setdefault('format', default_format)
     else:
-        validate_params(query_params)
+        validate_params(query_params,
+                        # If the initial request was a POST to the non-fetch
+                        # endpoint, the 'wait' param will be carried over to
+                        # each subsequent GET request to the non-fetch endpoint.
+                        **({'wait': validate_wait} if not fetch else {}))
     return app.manifest_controller.get_manifest_async(query_params=query_params,
                                                       token_or_key=token_or_key,
                                                       fetch=fetch,
@@ -1507,21 +1538,7 @@ def generate_manifest(event: AnyJSON, _context: LambdaContext):
                 made. If that fails, the UUID of the file will be used instead.
             ''')
         ),
-        params.query(
-            'wait',
-            schema.optional(schema.default(0)),
-            description=fd('''
-                If 0, the client is responsible for honoring the waiting period
-                specified in the Retry-After response header. If 1, the server
-                will delay the response in order to consume as much of that
-                waiting period as possible. This parameter should only be set to
-                1 by clients who can't honor the `Retry-After` header,
-                preventing them from quickly exhausting the maximum number of
-                redirects. If the server cannot wait the full amount, any amount
-                of wait time left will still be returned in the Retry-After
-                header of the response.
-            ''')
-        ),
+        wait_parameter_spec(default=0),
         params.query(
             'replica',
             schema.optional(str),
 
@@ -8799,6 +8799,22 @@
                                             ],
                                             "additionalProperties": false
                                         },
+                                        "dataUseRestriction": {
+                                            "type": "object",
+                                            "properties": {
+                                                "is": {
+                                                    "type": "array",
+                                                    "items": {
+                                                        "type": "string",
+                                                        "nullable": true
+                                                    }
+                                                }
+                                            },
+                                            "required": [
+                                                "is"
+                                            ],
+                                            "additionalProperties": false
+                                        },
                                         "developmentStage": {
                                             "type": "object",
                                             "properties": {
@@ -8873,6 +8889,22 @@
                                             ],
                                             "additionalProperties": false
                                         },
+                                        "duosId": {
+                                            "type": "object",
+                                            "properties": {
+                                                "is": {
+                                                    "type": "array",
+                                                    "items": {
+                                                        "type": "string",
+                                                        "nullable": true
+                                                    }
+                                                }
+                                            },
+                                            "required": [
+                                                "is"
+                                            ],
+                                            "additionalProperties": false
+                                        },
                                         "effectiveCellCount": {
                                             "oneOf": [
                                                 {
@@ -9842,7 +9874,7 @@
                                 }
                             }
                         },
-                        "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow, accessible\n"
+                        "description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, dataUseRestriction, developmentStage, donorCount, donorDisease, duosId, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow, accessible\n"
                     },
                     {
                         "name": "format",
@@ -9892,6 +9924,9 @@
                                 }
                             }
                         }
+                    },
+                    "504": {
+                        "description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n"
                     }
                 }
             },
@@ -9916,6 +9951,21 @@
                         },
                         "description": "The name of the catalog to query."
                     },
+                    {
+                        "name": "wait",
+                        "in": "query",
+                        "required": false,
+                        "schema": {
+                            "type": "integer",
+                            "format": "int64",
+                            "enum": [
+                                0,
+                                1
+                            ],
+                            "default": 1
+                        },
+                        "description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
+                    },
                     {
                         "name": "filters",
                         "in": "query",
@@ -11308,6 +11358,21 @@
                         "type": "string"
                     },
                     "description": "\nAn opaque string representing the manifest preparation job\n"
+                },
+                {
+                    "name": "wait",
+                    "in": "query",
+                    "required": false,
+                    "schema": {
+                        "type": "integer",
+                        "format": "int64",
+                        "enum": [
+                            0,
+                            1
+                        ],
+                        "default": 0
+                    },
+                    "description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
                 }
             ],
             "get": {
@@ -12902,9 +12967,13 @@
                         "schema": {
                             "type": "integer",
                             "format": "int64",
+                            "enum": [
+                                0,
+                                1
+                            ],
                             "default": 0
                         },
-                        "description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the Retry-After response header. If 1, the server\nwill delay the response in order to consume as much of that\nwaiting period as possible. This parameter should only be set to\n1 by clients who can't honor the `Retry-After` header,\npreventing them from quickly exhausting the maximum number of\nredirects. If the server cannot wait the full amount, any amount\nof wait time left will still be returned in the Retry-After\nheader of the response.\n"
+                        "description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
                     },
                     {
                         "name": "replica",
@@ -13041,9 +13110,13 @@
                         "schema": {
                             "type": "integer",
                             "format": "int64",
+                            "enum": [
+                                0,
+                                1
+                            ],
                             "default": 0
                         },
-                        "description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the Retry-After response header. If 1, the server\nwill delay the response in order to consume as much of that\nwaiting period as possible. This parameter should only be set to\n1 by clients who can't honor the `Retry-After` header,\npreventing them from quickly exhausting the maximum number of\nredirects. If the server cannot wait the full amount, any amount\nof wait time left will still be returned in the Retry-After\nheader of the response.\n"
+                        "description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
                     },
                     {
                         "name": "replica",