From 4941122ebdc1f4942aad9e38d251b06cdc8921be Mon Sep 17 00:00:00 2001 From: Mike Perez Date: Wed, 28 Aug 2024 18:43:38 -0700 Subject: [PATCH] Add s3 upload storage method Signed-off-by: Mike Perez --- README.rst | 12 +++++++++ chacra/controllers/binaries/archs.py | 38 +++++++++++++++++++++++++--- chacra/models/binaries.py | 30 ++++++++-------------- config/dev.py | 6 +++++ requirements.txt | 1 + 5 files changed, 63 insertions(+), 24 deletions(-) diff --git a/README.rst b/README.rst index 32c055c0..99af2706 100644 --- a/README.rst +++ b/README.rst @@ -41,6 +41,18 @@ the service as follows:: api_key = 'secret' +storage_method +^^^^^^^^^^^^^^ +The ``storage_method`` is a required configuration item, it defines where the +binaries should be stored. The two available method values are ``local`` and +``s3``. + +s3_bucket +^^^^^^^^^ +The ``s3_bucket`` is required if the ``storage_method`` configuration is set to +``s3``. This defines which bucket the binaries should be stored to. + + Self-discovery -------------- The API provides informational JSON at every step of the URL about what is diff --git a/chacra/controllers/binaries/archs.py b/chacra/controllers/binaries/archs.py index f2c63ec5..e925e146 100644 --- a/chacra/controllers/binaries/archs.py +++ b/chacra/controllers/binaries/archs.py @@ -1,11 +1,13 @@ import logging import os +import boto3 +from botocore.exceptions import ClientError import pecan from pecan import response from pecan.secure import secure from pecan import expose, abort, request from webob.static import FileIter -from chacra.models import Binary +from chacra.models.binaries import Binary, generate_checksum from chacra import models, util from chacra.controllers import error from chacra.controllers.util import repository_is_automatic @@ -26,6 +28,7 @@ def __init__(self, arch): self.distro_version = request.context['distro_version'] self.ref = request.context['ref'] self.sha1 = request.context['sha1'] + self.checksum = None request.context['arch'] = self.arch @expose(generic=True, template='json') @@ -89,7 +92,7 @@ def index_post(self): if request.POST.get('force', False) is False: error('/errors/invalid', 'resource already exists and "force" key was not used') - full_path = self.save_file(file_obj) + full_path, size = self.save_file(file_obj) if self.binary is None: path = full_path @@ -102,14 +105,17 @@ def index_post(self): self.binary = Binary( self.binary_name, self.project, arch=arch, distro=distro, distro_version=distro_version, - ref=ref, sha1=sha1, path=path, size=os.path.getsize(path) + ref=ref, sha1=sha1, path=path, size=size, + checksum=self.checksum ) else: self.binary.path = full_path + self.binary.checksum = self.checksum # check if this binary is interesting for other configured projects, # and if so, then mark those other repos so that they can be re-built self.mark_related_repos() + return dict() def mark_related_repos(self): @@ -175,8 +181,32 @@ def save_file(self, file_obj): for chunk in file_iterable: f.write(chunk) + size = os.path.getsize(destination) + self.checksum = generate_checksum(destination) + + if pecan.conf.storage_method == 's3': + bucket = pecan.conf.bucket + object_destination = os.path.relpath(destination, pecan.conf.binary_root) + + s3_client = boto3.client('s3') + try: + with open(destination, 'rb') as f: + s3_client.put_object(Body=f, + Bucket=bucket, + Key=object_destination, + ChecksumAlgorithm='sha256', + ChecksumSHA256=self.checksum + ) + except ClientError as e: + error('/errors/error/', 'file object upload to S3 failed with error %s' % e) + + # Remove the local file after S3 upload + os.remove(destination) + + destination = 's3://' + object_destination[1:] + # return the full path to the saved object: - return destination + return destination, size @expose() def _lookup(self, name, *remainder): diff --git a/chacra/models/binaries.py b/chacra/models/binaries.py index b8c58dd0..5f4e0de3 100644 --- a/chacra/models/binaries.py +++ b/chacra/models/binaries.py @@ -1,5 +1,6 @@ import hashlib import datetime +import pecan from sqlalchemy import Column, Integer, String, ForeignKey, Boolean, DateTime, BigInteger from sqlalchemy.orm import relationship, backref from sqlalchemy.event import listen @@ -169,24 +170,18 @@ def __json__(self): # Listeners -def generate_checksum(mapper, connection, target): - try: - target.path - except AttributeError: - target.checksum = None - return +def generate_checksum(self, binary): + # S3 requires SHA256 + chsum = None + if pecan.conf.storage_method == 's3': + chsum = hashlib.sha256() + else: + chsum = hashlib.sha512() - # FIXME - # sometimes we can accept binaries without a path and that is probably something - # that should not happen. The core purpose of this binary is that it works with - # paths and files, this should be required. - if not target.path: - return - chsum = hashlib.sha512() - with open(target.path, 'rb') as f: + with open(binary, 'rb') as f: for chunk in iter(lambda: f.read(4096), b''): chsum.update(chunk) - target.checksum = chsum.hexdigest() + return chsum.hexdigest() def update_repo(mapper, connection, target): @@ -206,11 +201,6 @@ def update_repo(mapper, connection, target): # triggered it because there is nothing we need to do pass -# listen for checksum changes -listen(Binary, 'before_insert', generate_checksum) -listen(Binary, 'before_update', generate_checksum) - - def add_timestamp_listeners(): # listen for timestamp modifications listen(Binary, 'before_insert', update_timestamp) diff --git a/config/dev.py b/config/dev.py index 917556d1..6c5b0425 100644 --- a/config/dev.py +++ b/config/dev.py @@ -63,11 +63,17 @@ 'encoding': 'utf-8' } +# Where to store the data. Options are 's3' or 'local' +storage_method = 'local' + # location for storing uploaded binaries binary_root = '%(confdir)s/public' repos_root = '%(confdir)s/repos' distributions_root = '%(confdir)s/distributions' +# If storage method is s3, provide a bucket name +bucket = '' + # When True it will set the headers so that Nginx can serve the download # instead of Pecan. delegate_downloads = False diff --git a/requirements.txt b/requirements.txt index 42b61cde..63454df6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,5 @@ alembic ipython python-statsd requests +boto3 importlib_metadata<=3.6; python_version<'3.8'