Skip to content

Commit 16f80de

Browse files
committed
[2/2] [a] Fix: Can't use curl to download a single manifest in one invocation (#5918)
Add a wait parameter option to the manifest endpoint
1 parent c4075b3 commit 16f80de

File tree

6 files changed

+150
-33
lines changed

6 files changed

+150
-33
lines changed

lambdas/service/app.py

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@
8787
from azul.service.app_controller import (
8888
validate_catalog,
8989
validate_params,
90+
validate_wait,
9091
)
9192
from azul.service.catalog_controller import (
9293
CatalogController,
@@ -1096,6 +1097,26 @@ def get_summary():
10961097
authentication=request.authentication)
10971098

10981099

1100+
def wait_parameter_spec(*, default: int) -> JSON:
1101+
valid_values = [0, 1]
1102+
assert default in valid_values, default
1103+
return params.query(
1104+
'wait',
1105+
schema.optional(schema.default(default,
1106+
form=schema.enum(*valid_values))),
1107+
description=fd('''
1108+
If 0, the client is responsible for honoring the waiting period
1109+
specified in the `Retry-After` response header. If 1, the server
1110+
will delay the response in order to consume as much of that waiting
1111+
period as possible. This parameter should only be set to 1 by
1112+
clients who can't honor the `Retry-After` header, preventing them
1113+
from quickly exhausting the maximum number of redirects. If the
1114+
server cannot wait the full amount, any amount of wait time left
1115+
will still be returned in the `Retry-After` header of the response.
1116+
''')
1117+
)
1118+
1119+
10991120
post_manifest_example_url = (
11001121
f'{app.base_url}/manifest/files'
11011122
f'?catalog={list(config.catalogs.keys())[0]}'
@@ -1130,7 +1151,8 @@ def manifest_route(*, fetch: bool, initiate: bool, curl: bool = False):
11301151
'parameters': [
11311152
params.path('token', str, description=fd('''
11321153
An opaque string representing the manifest preparation job
1133-
'''))
1154+
''')),
1155+
*([] if fetch else [wait_parameter_spec(default=0)])
11341156
]
11351157
},
11361158
spec={
@@ -1248,6 +1270,7 @@ def manifest_route(*, fetch: bool, initiate: bool, curl: bool = False):
12481270
'''),
12491271
'parameters': [
12501272
catalog_param_spec,
1273+
*([wait_parameter_spec(default=1)] if curl else []),
12511274
filters_param_spec,
12521275
params.query(
12531276
'format',
@@ -1443,24 +1466,32 @@ def _file_manifest(fetch: bool, token_or_key: str | None = None):
14431466
and request.headers.get('content-type') == 'application/x-www-form-urlencoded'
14441467
and request.raw_body != b''
14451468
):
1446-
raise BRE('The body must be empty for a POST request of content-type '
1447-
'`application/x-www-form-urlencoded` to this endpoint')
1469+
raise BRE('POST requests to this endpoint must have an empty body if '
1470+
'they specify a `Content-Type` header of '
1471+
'`application/x-www-form-urlencoded`')
14481472
query_params = request.query_params or {}
14491473
_hoist_parameters(query_params, request)
14501474
if token_or_key is None:
14511475
query_params.setdefault('filters', '{}')
1476+
if post:
1477+
query_params.setdefault('wait', '1')
14521478
# We list the `catalog` validator first so that the catalog is validated
14531479
# before any other potentially catalog-dependent validators are invoked
14541480
validate_params(query_params,
14551481
catalog=validate_catalog,
14561482
format=validate_manifest_format,
1457-
filters=validate_filters)
1483+
filters=validate_filters,
1484+
**({'wait': validate_wait} if post else {}))
14581485
# Now that the catalog is valid, we can provide the default format that
14591486
# depends on it
14601487
default_format = app.metadata_plugin.manifest_formats[0].value
14611488
query_params.setdefault('format', default_format)
14621489
else:
1463-
validate_params(query_params)
1490+
validate_params(query_params,
1491+
# If the initial request was a POST to the non-fetch
1492+
# endpoint, the 'wait' param will be carried over to
1493+
# each subsequent GET request to the non-fetch endpoint.
1494+
**({'wait': validate_wait} if not fetch else {}))
14641495
return app.manifest_controller.get_manifest_async(query_params=query_params,
14651496
token_or_key=token_or_key,
14661497
fetch=fetch,
@@ -1507,21 +1538,7 @@ def generate_manifest(event: AnyJSON, _context: LambdaContext):
15071538
made. If that fails, the UUID of the file will be used instead.
15081539
''')
15091540
),
1510-
params.query(
1511-
'wait',
1512-
schema.optional(schema.default(0)),
1513-
description=fd('''
1514-
If 0, the client is responsible for honoring the waiting period
1515-
specified in the Retry-After response header. If 1, the server
1516-
will delay the response in order to consume as much of that
1517-
waiting period as possible. This parameter should only be set to
1518-
1 by clients who can't honor the `Retry-After` header,
1519-
preventing them from quickly exhausting the maximum number of
1520-
redirects. If the server cannot wait the full amount, any amount
1521-
of wait time left will still be returned in the Retry-After
1522-
header of the response.
1523-
''')
1524-
),
1541+
wait_parameter_spec(default=0),
15251542
params.query(
15261543
'replica',
15271544
schema.optional(str),

lambdas/service/openapi.json

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8799,6 +8799,22 @@
87998799
],
88008800
"additionalProperties": false
88018801
},
8802+
"dataUseRestriction": {
8803+
"type": "object",
8804+
"properties": {
8805+
"is": {
8806+
"type": "array",
8807+
"items": {
8808+
"type": "string",
8809+
"nullable": true
8810+
}
8811+
}
8812+
},
8813+
"required": [
8814+
"is"
8815+
],
8816+
"additionalProperties": false
8817+
},
88028818
"developmentStage": {
88038819
"type": "object",
88048820
"properties": {
@@ -8873,6 +8889,22 @@
88738889
],
88748890
"additionalProperties": false
88758891
},
8892+
"duosId": {
8893+
"type": "object",
8894+
"properties": {
8895+
"is": {
8896+
"type": "array",
8897+
"items": {
8898+
"type": "string",
8899+
"nullable": true
8900+
}
8901+
}
8902+
},
8903+
"required": [
8904+
"is"
8905+
],
8906+
"additionalProperties": false
8907+
},
88768908
"effectiveCellCount": {
88778909
"oneOf": [
88788910
{
@@ -9842,7 +9874,7 @@
98429874
}
98439875
}
98449876
},
9845-
"description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow, accessible\n"
9877+
"description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, dataUseRestriction, developmentStage, donorCount, donorDisease, duosId, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow, accessible\n"
98469878
},
98479879
{
98489880
"name": "format",
@@ -9892,6 +9924,9 @@
98929924
}
98939925
}
98949926
}
9927+
},
9928+
"504": {
9929+
"description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n"
98959930
}
98969931
}
98979932
},
@@ -9916,6 +9951,21 @@
99169951
},
99179952
"description": "The name of the catalog to query."
99189953
},
9954+
{
9955+
"name": "wait",
9956+
"in": "query",
9957+
"required": false,
9958+
"schema": {
9959+
"type": "integer",
9960+
"format": "int64",
9961+
"enum": [
9962+
0,
9963+
1
9964+
],
9965+
"default": 1
9966+
},
9967+
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
9968+
},
99199969
{
99209970
"name": "filters",
99219971
"in": "query",
@@ -11308,6 +11358,21 @@
1130811358
"type": "string"
1130911359
},
1131011360
"description": "\nAn opaque string representing the manifest preparation job\n"
11361+
},
11362+
{
11363+
"name": "wait",
11364+
"in": "query",
11365+
"required": false,
11366+
"schema": {
11367+
"type": "integer",
11368+
"format": "int64",
11369+
"enum": [
11370+
0,
11371+
1
11372+
],
11373+
"default": 0
11374+
},
11375+
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
1131111376
}
1131211377
],
1131311378
"get": {
@@ -12902,9 +12967,13 @@
1290212967
"schema": {
1290312968
"type": "integer",
1290412969
"format": "int64",
12970+
"enum": [
12971+
0,
12972+
1
12973+
],
1290512974
"default": 0
1290612975
},
12907-
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the Retry-After response header. If 1, the server\nwill delay the response in order to consume as much of that\nwaiting period as possible. This parameter should only be set to\n1 by clients who can't honor the `Retry-After` header,\npreventing them from quickly exhausting the maximum number of\nredirects. If the server cannot wait the full amount, any amount\nof wait time left will still be returned in the Retry-After\nheader of the response.\n"
12976+
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
1290812977
},
1290912978
{
1291012979
"name": "replica",
@@ -13041,9 +13110,13 @@
1304113110
"schema": {
1304213111
"type": "integer",
1304313112
"format": "int64",
13113+
"enum": [
13114+
0,
13115+
1
13116+
],
1304413117
"default": 0
1304513118
},
13046-
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the Retry-After response header. If 1, the server\nwill delay the response in order to consume as much of that\nwaiting period as possible. This parameter should only be set to\n1 by clients who can't honor the `Retry-After` header,\npreventing them from quickly exhausting the maximum number of\nredirects. If the server cannot wait the full amount, any amount\nof wait time left will still be returned in the Retry-After\nheader of the response.\n"
13119+
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
1304713120
},
1304813121
{
1304913122
"name": "replica",

0 commit comments

Comments
 (0)