From b187df4435082fc60e36e5daca0bd6247d9570d9 Mon Sep 17 00:00:00 2001 From: Mike Perez Date: Wed, 28 Aug 2024 18:43:38 -0700 Subject: [PATCH] Add s3 upload storage method Signed-off-by: Mike Perez --- README.rst | 12 +++++++ chacra/controllers/binaries/archs.py | 50 ++++++++++++++++++++++++++-- chacra/models/binaries.py | 26 --------------- config/dev.py | 6 ++++ requirements.txt | 1 + 5 files changed, 66 insertions(+), 29 deletions(-) diff --git a/README.rst b/README.rst index 32c055c0..79868459 100644 --- a/README.rst +++ b/README.rst @@ -41,6 +41,18 @@ the service as follows:: api_key = 'secret' +storage_method +^^^^^^^^^^^^^^ +The ``storage_method`` is a require configuration item, it defines where the +binaries should be stored. The two available method values are ``local`` and +``s3``. + +s3_bucket +^^^^^^^^^ +The ``s3_bucket`` is required if the ``storage_method`` configuration is set to +``s3``. This defines which bucket the binaries should be stored to. + + Self-discovery -------------- The API provides informational JSON at every step of the URL about what is diff --git a/chacra/controllers/binaries/archs.py b/chacra/controllers/binaries/archs.py index f2c63ec5..570f7da7 100644 --- a/chacra/controllers/binaries/archs.py +++ b/chacra/controllers/binaries/archs.py @@ -1,5 +1,8 @@ +import hashlib import logging import os +import boto3 +from botocore.exceptions import ClientError import pecan from pecan import response from pecan.secure import secure @@ -26,6 +29,7 @@ def __init__(self, arch): self.distro_version = request.context['distro_version'] self.ref = request.context['ref'] self.sha1 = request.context['sha1'] + self.checksum = None request.context['arch'] = self.arch @expose(generic=True, template='json') @@ -89,7 +93,7 @@ def index_post(self): if request.POST.get('force', False) is False: error('/errors/invalid', 'resource already exists and "force" key was not used') - full_path = self.save_file(file_obj) + full_path, size = self.save_file(file_obj) if self.binary is None: path = full_path @@ -102,14 +106,21 @@ def index_post(self): self.binary = Binary( self.binary_name, self.project, arch=arch, distro=distro, distro_version=distro_version, - ref=ref, sha1=sha1, path=path, size=os.path.getsize(path) + ref=ref, sha1=sha1, path=path, size=size, + checksum=self.checksum ) else: self.binary.path = full_path + self.binary.checksum = self.checksum # check if this binary is interesting for other configured projects, # and if so, then mark those other repos so that they can be re-built self.mark_related_repos() + + # Remove the local file after S3 upload + if pecan.conf.storage_method == 's3': + os.remove(full_path) + return dict() def mark_related_repos(self): @@ -175,8 +186,41 @@ def save_file(self, file_obj): for chunk in file_iterable: f.write(chunk) + self.checksum = self.generate_checksum(destination) + + if pecan.conf.storage_method == 's3': + bucket = pecan.conf.bucket + object_destination = os.path.relpath(destination, pecan.conf.binary_root) + + s3_client = boto3.client('s3') + try: + with open(destination, 'rb') as f: + s3_client.put_object(Body=f, + Bucket=bucket, + Key=object_destination, + ChecksumAlgorithm='sha256', + ChecksumSHA256=self.checksum + ) + except ClientError as e: + error('/errors/error/', 'file object upload to S3 failed with error %s' % e) + + size = os.path.getsize(destination) + # return the full path to the saved object: - return destination + return destination, size + + def generate_checksum(self, binary): + # S3 requires SHA256 + chsum = None + if pecan.conf.storage_method == 's3': + chsum = hashlib.sha256() + else: + chsum = hashlib.sha512() + + with open(binary, 'rb') as f: + for chunk in iter(lambda: f.read(4096), b''): + chsum.update(chunk) + return chsum.hexdigest() @expose() def _lookup(self, name, *remainder): diff --git a/chacra/models/binaries.py b/chacra/models/binaries.py index b8c58dd0..382d173f 100644 --- a/chacra/models/binaries.py +++ b/chacra/models/binaries.py @@ -168,27 +168,6 @@ def __json__(self): # Listeners - -def generate_checksum(mapper, connection, target): - try: - target.path - except AttributeError: - target.checksum = None - return - - # FIXME - # sometimes we can accept binaries without a path and that is probably something - # that should not happen. The core purpose of this binary is that it works with - # paths and files, this should be required. - if not target.path: - return - chsum = hashlib.sha512() - with open(target.path, 'rb') as f: - for chunk in iter(lambda: f.read(4096), b''): - chsum.update(chunk) - target.checksum = chsum.hexdigest() - - def update_repo(mapper, connection, target): try: if target.repo.is_generic: @@ -206,11 +185,6 @@ def update_repo(mapper, connection, target): # triggered it because there is nothing we need to do pass -# listen for checksum changes -listen(Binary, 'before_insert', generate_checksum) -listen(Binary, 'before_update', generate_checksum) - - def add_timestamp_listeners(): # listen for timestamp modifications listen(Binary, 'before_insert', update_timestamp) diff --git a/config/dev.py b/config/dev.py index 917556d1..6c5b0425 100644 --- a/config/dev.py +++ b/config/dev.py @@ -63,11 +63,17 @@ 'encoding': 'utf-8' } +# Where to store the data. Options are 's3' or 'local' +storage_method = 'local' + # location for storing uploaded binaries binary_root = '%(confdir)s/public' repos_root = '%(confdir)s/repos' distributions_root = '%(confdir)s/distributions' +# If storage method is s3, provide a bucket name +bucket = '' + # When True it will set the headers so that Nginx can serve the download # instead of Pecan. delegate_downloads = False diff --git a/requirements.txt b/requirements.txt index 42b61cde..63454df6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,5 @@ alembic ipython python-statsd requests +boto3 importlib_metadata<=3.6; python_version<'3.8'