From 961ad5b1351d086a00327997a81905a6f254eae2 Mon Sep 17 00:00:00 2001 From: Volker Theile Date: Thu, 23 Nov 2023 11:36:25 +0100 Subject: [PATCH 1/2] UI: The prefix handling is buggy at several places in the REST API Fixes: https://github.com/aquarist-labs/s3gw/issues/831 Signed-off-by: Volker Theile --- src/backend/api/objects.py | 56 ++++----- src/backend/api/types.py | 10 +- .../tests/unit/api/test_api_objects.py | 108 +++++++++++++++--- ...object-version-datatable-page.component.ts | 10 +- .../pages/user/user-pages-routing.module.ts | 4 +- .../shared/services/api/s3-bucket.service.ts | 6 +- 6 files changed, 143 insertions(+), 51 deletions(-) diff --git a/src/backend/api/objects.py b/src/backend/api/objects.py index 6791fa1c..e2432f1a 100644 --- a/src/backend/api/objects.py +++ b/src/backend/api/objects.py @@ -273,6 +273,12 @@ async def list_object_versions( key_marker = s3_res["NextKeyMarker"] except s3.exceptions.ClientError: return None + + if params.Strict: + # Return only that object versions that exactly match the given + # prefix. + res = [obj for obj in res if obj.Key == params.Prefix] + return res @@ -551,22 +557,22 @@ async def restore_object( https://repost.aws/knowledge-center/s3-undelete-configuration https://www.middlewareinventory.com/blog/recover-s3/ """ + # Remove existing deletion markers. + api_res: Optional[List[ObjectVersion]] = await list_object_versions( + conn, + bucket, + ListObjectVersionsRequest(Prefix=params.Key, Strict=True), + ) + del_objects: List[ObjectIdentifierTypeDef] = [ + parse_obj_as(ObjectIdentifierTypeDef, obj) + for obj in api_res or [] + if obj.IsDeleted + ] + if del_objects: + await delete_objects(conn, bucket, del_objects) + + # Make a copy of the object to restore. async with conn.conn() as s3: - # Remove existing deletion markers. - s3_res = await s3.list_object_versions(Bucket=bucket, Prefix=params.Key) - del_objects: List[ObjectIdentifierTypeDef] = [] - dm = DeleteMarkerEntryTypeDef - for dm in s3_res.get("DeleteMarkers", []): - if dm["IsLatest"]: - del_objects.append( - {"Key": dm["Key"], "VersionId": dm["VersionId"]} - ) - if del_objects: - await s3.delete_objects( - Bucket=bucket, - Delete={"Objects": del_objects, "Quiet": True}, - ) - # Make a copy of the object to restore. copy_source: CopySourceTypeDef = { "Bucket": bucket, "Key": params.Key, @@ -600,18 +606,13 @@ async def collect_objects() -> List[ObjectIdentifierTypeDef]: api_res: Optional[List[ObjectVersion]] = await list_object_versions( conn, bucket, - ListObjectVersionsRequest(Prefix=params.Key, Delimiter=""), + ListObjectVersionsRequest(Prefix=params.Key, Strict=True), ) - obj: ObjectVersion - res_objects: List[ObjectIdentifierTypeDef] = [] - for obj in api_res or []: - # Skip "virtual folders" and objects that do not match - # the given key. - if obj.Type != "OBJECT" or obj.Key != params.Key: - continue - version_id: str = obj.VersionId if obj.VersionId else "" - res_objects.append({"Key": obj.Key, "VersionId": version_id}) - return res_objects + return [ + parse_obj_as(ObjectIdentifierTypeDef, obj) + for obj in api_res or [] + if obj.Type == "OBJECT" + ] objects: List[ObjectIdentifierTypeDef] if params.AllVersions: @@ -678,6 +679,9 @@ async def collect_objects(prefix: str) -> List[ObjectIdentifierTypeDef]: async def delete_objects( conn: S3GWClientDep, bucket: str, objects: List[ObjectIdentifierTypeDef] ) -> List[DeletedObject]: + """ + Helper function to delete the specified objects. + """ async with conn.conn() as s3: s3_res: DeleteObjectsOutputTypeDef = await s3.delete_objects( Bucket=bucket, Delete={"Objects": objects} diff --git a/src/backend/api/types.py b/src/backend/api/types.py index 6fd673b6..97a72a1b 100644 --- a/src/backend/api/types.py +++ b/src/backend/api/types.py @@ -135,7 +135,11 @@ class ListObjectsRequest(BaseModel): class ListObjectVersionsRequest(ListObjectsRequest): - pass + Strict: bool = Field( + default=False, + description="If `True`, then only the objects whose key " + "exactly match the specified prefix are returned.", + ) class ObjectVersion(Object): @@ -165,7 +169,7 @@ class RestoreObjectRequest(ObjectIdentifier): class DeleteObjectRequest(ObjectIdentifier): AllVersions: bool = Field( - False, + default=False, description="If `True`, all versions will be deleted, otherwise " "only the specified one.", ) @@ -180,7 +184,7 @@ class DeleteObjectByPrefixRequest(BaseModel): ) Delimiter: str = "/" AllVersions: bool = Field( - False, + default=False, description="If `True`, all versions will be deleted, otherwise " "the latest one.", ) diff --git a/src/backend/tests/unit/api/test_api_objects.py b/src/backend/tests/unit/api/test_api_objects.py index b1a01fbd..dedf628b 100644 --- a/src/backend/tests/unit/api/test_api_objects.py +++ b/src/backend/tests/unit/api/test_api_objects.py @@ -20,6 +20,7 @@ from botocore.exceptions import ClientError from fastapi import HTTPException, Response, UploadFile, status from pytest_mock import MockerFixture +from types_aiobotocore_s3.type_defs import ObjectIdentifierTypeDef from backend.api import S3GWClient, objects from backend.api.objects import ObjectBodyStreamingResponse @@ -1124,7 +1125,19 @@ async def test_restore_object( } ), ) - s3api_mock.patch("delete_objects", return_value=async_return(None)) + s3api_mock.patch( + "delete_objects", + return_value=async_return( + { + "Deleted": [ + { + "Key": "a/b/file2.txt", + "VersionId": "453bhjb6553hb34j53j5bj34jhjh", + } + ] + } + ), + ) s3api_mock.patch( "copy_object", return_value=async_return( @@ -1152,19 +1165,6 @@ async def test_restore_object( Key="a/b/file2.txt", VersionId="F2b5Z0ezvNExjWH3lolr1BUfOn17Zw6" ), ) - s3api_mock.mocked_fn["list_object_versions"].assert_called() - s3api_mock.mocked_fn["delete_objects"].assert_called_with( - Bucket="test01", - Delete={ - "Objects": [ - { - "Key": "a/b/file2.txt", - "VersionId": "453bhjb6553hb34j53j5bj34jhjh", - } - ], - "Quiet": True, - }, - ) s3api_mock.mocked_fn["copy_object"].assert_called() @@ -1678,3 +1678,83 @@ async def test_upload_object( UploadFile(file=io.BytesIO(), filename="file2.txt"), ) s3api_mock.mocked_fn["upload_fileobj"].assert_called() + + +@pytest.mark.anyio +async def test_delete_objects_1( + s3_client: S3GWClient, mocker: MockerFixture +) -> None: + objs = [ + { + "Key": "a/b/file2.txt", + "VersionId": "453bhjb6553hb34j53j5bj34jhjh", + } + ] + s3api_mock = S3ApiMock(s3_client, mocker) + s3api_mock.patch( + "delete_objects", + return_value=async_return({"Deleted": objs}), + ) + + res: List[DeletedObject] = await objects.delete_objects( + s3_client, "test01", [ObjectIdentifierTypeDef(**obj) for obj in objs] + ) + + s3api_mock.mocked_fn["delete_objects"].assert_called_once_with( + Bucket="test01", Delete={"Objects": objs} + ) + assert len(res) == 1 + assert res[0].Key == "a/b/file2.txt" + assert res[0].VersionId == "453bhjb6553hb34j53j5bj34jhjh" + + +@pytest.mark.anyio +async def test_delete_objects_2( + s3_client: S3GWClient, mocker: MockerFixture +) -> None: + objs = [ + { + "Key": "a/b/file2.txt", + "VersionId": "vXrrVkZNXIbprzSpR4hdGt", + }, + { + "Key": "a/b/file3.txt", + "VersionId": "HkW2UWxXxASjRRlBhEfEsCL", + }, + ] + s3api_mock = S3ApiMock(s3_client, mocker) + s3api_mock.patch( + "delete_objects", + return_value=async_return( + { + "Errors": [ + { + "Key": "a/b/file2.txt", + "VersionId": "vXrrVkZNXIbprzSpR4hdGt", + "Code": "AuthorizationHeaderMalformed", + "Message": "The authorization header ...", + }, + { + "Key": "a/b/file3.txt", + "VersionId": "HkW2UWxXxASjRRlBhEfEsCL-", + "Code": "AccessDenied", + "Message": "AccessDenied", + }, + ] + } + ), + ) + + with pytest.raises(HTTPException) as e: + await objects.delete_objects( + s3_client, + "test01", + [ObjectIdentifierTypeDef(**obj) for obj in objs], + ) + + assert e.value.status_code == 500 + assert ( + e.value.detail == "Could not delete object(s) " + "a/b/file2.txt (AuthorizationHeaderMalformed), " + "a/b/file3.txt (AccessDenied)" + ) diff --git a/src/frontend/src/app/pages/user/object/object-version-datatable-page/object-version-datatable-page.component.ts b/src/frontend/src/app/pages/user/object/object-version-datatable-page/object-version-datatable-page.component.ts index 8e6231c7..1ef328fb 100644 --- a/src/frontend/src/app/pages/user/object/object-version-datatable-page/object-version-datatable-page.component.ts +++ b/src/frontend/src/app/pages/user/object/object-version-datatable-page/object-version-datatable-page.component.ts @@ -20,9 +20,9 @@ import { PageAction } from '~/app/shared/models/page-action.type'; import { S3BucketName, S3BucketService, + S3ObjectKey, S3ObjectVersion, - S3ObjectVersionList, - S3Prefix + S3ObjectVersionList } from '~/app/shared/services/api/s3-bucket.service'; import { BlockUiService } from '~/app/shared/services/block-ui.service'; import { ModalDialogService } from '~/app/shared/services/modal-dialog.service'; @@ -40,7 +40,7 @@ export class ObjectVersionDatatablePageComponent implements OnInit { private subscriptions: Subscription = new Subscription(); public bid: S3BucketName = ''; - public prefix: S3Prefix = ''; + public key: S3ObjectKey = ''; public objects: S3ObjectVersionList = []; public datatableActions: DatatableAction[] = []; public datatableColumns: DatatableColumn[] = []; @@ -62,7 +62,7 @@ export class ObjectVersionDatatablePageComponent implements OnInit { return; } this.bid = decodeURIComponent(value['bid']); - this.prefix = decodeURIComponent(value['prefix']); + this.key = decodeURIComponent(value['key']); this.loadData(); }); this.datatableActions = [ @@ -169,7 +169,7 @@ export class ObjectVersionDatatablePageComponent implements OnInit { this.objects = []; this.pageStatus = PageStatus.loading; this.subscriptions.add( - this.s3BucketService.listObjectVersions(this.bid, this.prefix).subscribe({ + this.s3BucketService.listObjectVersions(this.bid, this.key, true).subscribe({ next: (objects: S3ObjectVersionList) => { this.objects = [...this.objects, ...objects]; }, diff --git a/src/frontend/src/app/pages/user/user-pages-routing.module.ts b/src/frontend/src/app/pages/user/user-pages-routing.module.ts index 26d50b6b..3ff1ade1 100644 --- a/src/frontend/src/app/pages/user/user-pages-routing.module.ts +++ b/src/frontend/src/app/pages/user/user-pages-routing.module.ts @@ -54,9 +54,9 @@ const routes: Routes = [ component: ObjectDatatablePageComponent }, { - path: 'versions/:prefix', + path: 'versions/:key', data: { - subTitle: '{{ bid }}/{{ prefix | decodeUriComponent }} - Versions', + subTitle: '{{ bid }}/{{ key | decodeUriComponent }} - Versions', title: TEXT('Object:'), url: '../..' }, diff --git a/src/frontend/src/app/shared/services/api/s3-bucket.service.ts b/src/frontend/src/app/shared/services/api/s3-bucket.service.ts index 030b9d47..3f3c0034 100644 --- a/src/frontend/src/app/shared/services/api/s3-bucket.service.ts +++ b/src/frontend/src/app/shared/services/api/s3-bucket.service.ts @@ -525,6 +525,8 @@ export class S3BucketService { * @param bucket The name of the bucket. * @param prefix Limits the response to objects with keys that begin * with the specified prefix. + * @param strict if `true`, then only the objects whose key corresponds + * to the specified prefix are returned. Defaults to `false`. * @param credentials The AWS credentials to sign requests with. * Defaults to the credentials of the currently logged-in user. * @@ -534,12 +536,14 @@ export class S3BucketService { public listObjectVersions( bucket: S3BucketName, prefix?: S3Prefix, + strict?: boolean, credentials?: Credentials ): Observable { credentials = credentials ?? this.authSessionService.getCredentials(); const body: Record = { /* eslint-disable @typescript-eslint/naming-convention */ - Delimiter: this.delimiter + Delimiter: this.delimiter, + Strict: strict /* eslint-enable @typescript-eslint/naming-convention */ }; if (_.isString(prefix)) { From 83c4fe728997c273cbfcc5ce1015dd43c65d8bb5 Mon Sep 17 00:00:00 2001 From: Volker Theile Date: Fri, 24 Nov 2023 08:56:34 +0100 Subject: [PATCH 2/2] Remove exception handling in functions Error handling will be done in the upper layer. Signed-off-by: Volker Theile --- src/backend/api/objects.py | 182 +++++++++--------- .../tests/unit/api/test_api_objects.py | 9 +- 2 files changed, 95 insertions(+), 96 deletions(-) diff --git a/src/backend/api/objects.py b/src/backend/api/objects.py index e2432f1a..dfb204f6 100644 --- a/src/backend/api/objects.py +++ b/src/backend/api/objects.py @@ -138,14 +138,14 @@ async def stream_response(self, send: Send) -> None: @router.post( "/{bucket}", - response_model=Optional[List[Object]], + response_model=List[Object], responses=s3gw_client_responses(), ) async def list_objects( conn: S3GWClientDep, bucket: str, params: ListObjectsRequest = ListObjectsRequest(), -) -> Optional[List[Object]]: +) -> List[Object]: """ Note that this is a POST request instead of a usual GET request because the parameters specified in `ListObjectsRequest` need to @@ -160,55 +160,52 @@ async def list_objects( https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/list_objects_v2.html """ async with conn.conn() as s3: - try: - res: List[Object] = [] - continuation_token: str = "" - while True: - s3_res: ListObjectsV2OutputTypeDef = await s3.list_objects_v2( - Bucket=bucket, - Prefix=params.Prefix, - Delimiter=params.Delimiter, - ContinuationToken=continuation_token, - ) - content: ObjectTypeDef - for content in s3_res.get("Contents", []): - res.append( - parse_obj_as( - Object, - { - "Name": split_key(content["Key"]).pop(), - "Type": "OBJECT", - **content, - }, - ) + res: List[Object] = [] + continuation_token: str = "" + while True: + s3_res: ListObjectsV2OutputTypeDef = await s3.list_objects_v2( + Bucket=bucket, + Prefix=params.Prefix, + Delimiter=params.Delimiter, + ContinuationToken=continuation_token, + ) + content: ObjectTypeDef + for content in s3_res.get("Contents", []): + res.append( + parse_obj_as( + Object, + { + "Name": split_key(content["Key"]).pop(), + "Type": "OBJECT", + **content, + }, ) - cp: CommonPrefixTypeDef - for cp in s3_res.get("CommonPrefixes", []): - res.append( - Object( - Key=build_key(cp["Prefix"]), - Name=split_key(cp["Prefix"]).pop(), - Type="FOLDER", - ) + ) + cp: CommonPrefixTypeDef + for cp in s3_res.get("CommonPrefixes", []): + res.append( + Object( + Key=build_key(cp["Prefix"]), + Name=split_key(cp["Prefix"]).pop(), + Type="FOLDER", ) - if not s3_res.get("IsTruncated", False): - break - continuation_token = s3_res["NextContinuationToken"] - except s3.exceptions.ClientError: - return None + ) + if not s3_res.get("IsTruncated", False): + break + continuation_token = s3_res["NextContinuationToken"] return res @router.post( "/{bucket}/versions", - response_model=Optional[List[ObjectVersion]], + response_model=List[ObjectVersion], responses=s3gw_client_responses(), ) async def list_object_versions( conn: S3GWClientDep, bucket: str, params: ListObjectVersionsRequest = ListObjectVersionsRequest(), -) -> Optional[List[ObjectVersion]]: +) -> List[ObjectVersion]: """ Note that this is a POST request instead of a usual GET request because the parameters specified in `ListObjectVersionsRequest` @@ -219,60 +216,57 @@ async def list_object_versions( https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/list_object_versions.html """ async with conn.conn() as s3: - try: - res: List[ObjectVersion] = [] - key_marker: str = "" - while True: - s3_res: ListObjectVersionsOutputTypeDef = ( - await s3.list_object_versions( - Bucket=bucket, - Prefix=params.Prefix, - Delimiter=params.Delimiter, - KeyMarker=key_marker, - ) + res: List[ObjectVersion] = [] + key_marker: str = "" + while True: + s3_res: ListObjectVersionsOutputTypeDef = ( + await s3.list_object_versions( + Bucket=bucket, + Prefix=params.Prefix, + Delimiter=params.Delimiter, + KeyMarker=key_marker, ) - version: ObjectVersionTypeDef - for version in s3_res.get("Versions", []): - res.append( - parse_obj_as( - ObjectVersion, - { - "Name": split_key(version["Key"]).pop(), - "Type": "OBJECT", - "IsDeleted": False, - **version, - }, - ) + ) + version: ObjectVersionTypeDef + for version in s3_res.get("Versions", []): + res.append( + parse_obj_as( + ObjectVersion, + { + "Name": split_key(version["Key"]).pop(), + "Type": "OBJECT", + "IsDeleted": False, + **version, + }, ) - cp: CommonPrefixTypeDef - for cp in s3_res.get("CommonPrefixes", []): - res.append( - ObjectVersion( - Key=build_key(cp["Prefix"]), - Name=split_key(cp["Prefix"]).pop(), - Type="FOLDER", - IsDeleted=False, - IsLatest=True, - ) + ) + cp: CommonPrefixTypeDef + for cp in s3_res.get("CommonPrefixes", []): + res.append( + ObjectVersion( + Key=build_key(cp["Prefix"]), + Name=split_key(cp["Prefix"]).pop(), + Type="FOLDER", + IsDeleted=False, + IsLatest=True, ) - dm: DeleteMarkerEntryTypeDef - for dm in s3_res.get("DeleteMarkers", []): - res.append( - ObjectVersion.parse_obj( - { - "Name": split_key(dm["Key"]).pop(), - "Type": "OBJECT", - "Size": 0, - "IsDeleted": True, - **dm, - } - ) + ) + dm: DeleteMarkerEntryTypeDef + for dm in s3_res.get("DeleteMarkers", []): + res.append( + ObjectVersion.parse_obj( + { + "Name": split_key(dm["Key"]).pop(), + "Type": "OBJECT", + "Size": 0, + "IsDeleted": True, + **dm, + } ) - if not s3_res.get("IsTruncated", False): - break - key_marker = s3_res["NextKeyMarker"] - except s3.exceptions.ClientError: - return None + ) + if not s3_res.get("IsTruncated", False): + break + key_marker = s3_res["NextKeyMarker"] if params.Strict: # Return only that object versions that exactly match the given @@ -558,14 +552,14 @@ async def restore_object( https://www.middlewareinventory.com/blog/recover-s3/ """ # Remove existing deletion markers. - api_res: Optional[List[ObjectVersion]] = await list_object_versions( + api_res: List[ObjectVersion] = await list_object_versions( conn, bucket, ListObjectVersionsRequest(Prefix=params.Key, Strict=True), ) del_objects: List[ObjectIdentifierTypeDef] = [ parse_obj_as(ObjectIdentifierTypeDef, obj) - for obj in api_res or [] + for obj in api_res if obj.IsDeleted ] if del_objects: @@ -603,14 +597,14 @@ async def delete_object( """ async def collect_objects() -> List[ObjectIdentifierTypeDef]: - api_res: Optional[List[ObjectVersion]] = await list_object_versions( + api_res: List[ObjectVersion] = await list_object_versions( conn, bucket, ListObjectVersionsRequest(Prefix=params.Key, Strict=True), ) return [ parse_obj_as(ObjectIdentifierTypeDef, obj) - for obj in api_res or [] + for obj in api_res if obj.Type == "OBJECT" ] @@ -641,7 +635,7 @@ async def delete_object_by_prefix( """ async def collect_objects(prefix: str) -> List[ObjectIdentifierTypeDef]: - api_res: Optional[List[ObjectVersion]] = await list_object_versions( + api_res: List[ObjectVersion] = await list_object_versions( conn, bucket, ListObjectVersionsRequest( @@ -650,7 +644,7 @@ async def collect_objects(prefix: str) -> List[ObjectIdentifierTypeDef]: ) obj: ObjectVersion res_objects: List[ObjectIdentifierTypeDef] = [] - for obj in api_res or []: + for obj in api_res: if not (params.AllVersions or (obj.IsLatest and not obj.IsDeleted)): continue if obj.Type == "OBJECT": diff --git a/src/backend/tests/unit/api/test_api_objects.py b/src/backend/tests/unit/api/test_api_objects.py index dedf628b..1bfd8f9c 100644 --- a/src/backend/tests/unit/api/test_api_objects.py +++ b/src/backend/tests/unit/api/test_api_objects.py @@ -260,8 +260,13 @@ async def test_get_object_list_truncated( async def test_get_object_list_failure( s3_client: S3GWClient, ) -> None: - res = await objects.list_objects(s3_client, bucket="not-exists") - assert res is None + with pytest.raises(HTTPException) as e: + await objects.list_objects(s3_client, bucket="not-exists") + assert e.value.status_code == 404 + assert e.value.detail in [ + "No such bucket", + "The specified bucket does not exist", + ] @pytest.mark.anyio