From e2633b02f3c702959ae0eb4ace92815c844c8444 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Sat, 3 Oct 2020 14:14:07 +0100 Subject: [PATCH 01/11] s3 logs prototype --- repo2docker/app.py | 45 +++++++++++++++++++++++++++++++++++++++++++-- setup.py | 1 + 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/repo2docker/app.py b/repo2docker/app.py index 937e74793..4701fc101 100755 --- a/repo2docker/app.py +++ b/repo2docker/app.py @@ -11,6 +11,7 @@ import sys import logging import os +import boto3 import getpass import shutil import tempfile @@ -326,6 +327,12 @@ def _user_name_default(self): config=True, ) + s3_logs_endpoint = Unicode("", help="S3 endpoint", config=True) + s3_logs_access_key = Unicode("", help="S3 access key ", config=True) + s3_logs_secret_key = Unicode("", help="S3 secret key", config=True) + s3_logs_bucket = Unicode("", help="S3 bucket", config=True) + s3_logs_region = Unicode("", help="S3 region", config=True) + all_ports = Bool( False, help=""" @@ -625,7 +632,7 @@ def find_image(self): return True return False - def build(self): + def build(self, logfile=None): """ Build docker image """ @@ -714,6 +721,8 @@ def build(self): bp.__class__.__name__, extra=dict(phase="building"), ) + if logfile: + logfile.write("Using %s builder\n" % bp.__class__.__name__) for l in picked_buildpack.build( docker_client, @@ -725,8 +734,12 @@ def build(self): ): if "stream" in l: self.log.info(l["stream"], extra=dict(phase="building")) + if logfile: + logfile.write(l["stream"]) elif "error" in l: self.log.info(l["error"], extra=dict(phase="failure")) + if logfile: + logfile.write(l["error"]) raise docker.errors.BuildError(l["error"], build_log="") elif "status" in l: self.log.info( @@ -741,7 +754,35 @@ def build(self): shutil.rmtree(checkout_path, ignore_errors=True) def start(self): - self.build() + logfile = None + if ( + self.s3_logs_endpoint + and self.s3_logs_access_key + and self.s3_logs_secret_key + and self.s3_logs_bucket + ): + logfile = tempfile.NamedTemporaryFile("w", delete=False) + try: + self.build(logfile=logfile) + finally: + if logfile: + logfile.close() + if os.stat(logfile.name).st_size: + dest = f"buildlogs/{self.output_image_spec}/repo2docker.log" + self.log.info(f"Uploading log to {self.s3_logs_bucket}/{dest}") + s3 = boto3.resource( + "s3", + endpoint_url=self.s3_logs_endpoint, + aws_access_key_id=self.s3_logs_access_key, + aws_secret_access_key=self.s3_logs_secret_key, + config=boto3.session.Config(signature_version="s3v4"), + region_name=self.s3_logs_region, + ) + s3.Bucket(self.s3_logs_bucket).upload_file( + logfile.name, + dest, + ExtraArgs={"ContentType": "text/plain"}, + ) if self.push: self.push_image() diff --git a/setup.py b/setup.py index 5084b486c..9de162d56 100644 --- a/setup.py +++ b/setup.py @@ -47,6 +47,7 @@ def get_identifier(json): name="jupyter-repo2docker", version=versioneer.get_version(), install_requires=[ + "boto3", "docker", "traitlets", "python-json-logger", From fff010bc797e46686f7c576ba477bea9b3aec662 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Thu, 8 Oct 2020 22:10:42 +0000 Subject: [PATCH 02/11] Remove ANSI terminal escapes from logs --- repo2docker/app.py | 4 ++-- repo2docker/utils.py | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/repo2docker/app.py b/repo2docker/app.py index 4701fc101..e58d4115a 100755 --- a/repo2docker/app.py +++ b/repo2docker/app.py @@ -40,7 +40,7 @@ RBuildPack, ) from . import contentproviders -from .utils import ByteSpecification, chdir +from .utils import ByteSpecification, chdir, remove_ansi_escape class Repo2Docker(Application): @@ -735,7 +735,7 @@ def build(self, logfile=None): if "stream" in l: self.log.info(l["stream"], extra=dict(phase="building")) if logfile: - logfile.write(l["stream"]) + logfile.write(remove_ansi_escape(l["stream"])) elif "error" in l: self.log.info(l["error"], extra=dict(phase="failure")) if logfile: diff --git a/repo2docker/utils.py b/repo2docker/utils.py index d6b4c97ac..c2e4c7632 100644 --- a/repo2docker/utils.py +++ b/repo2docker/utils.py @@ -508,3 +508,10 @@ def is_local_pip_requirement(line): return True return False + + +def remove_ansi_escape(s): + """Remove all ANSI escape codes from a string + https://superuser.com/a/380778 + """ + return re.sub(r"\x1b\[[0-9;]*[a-zA-Z]", "", s) From 83d0f287b8d3b78c6698b42b0500afa78a5bd5e5 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Fri, 9 Oct 2020 20:27:19 +0000 Subject: [PATCH 03/11] Move S3 log upload into a Configurable class --- repo2docker/app.py | 66 ++++++++-------------- repo2docker/logstore.py | 122 ++++++++++++++++++++++++++++++++++++++++ repo2docker/utils.py | 7 --- setup.py | 4 +- 4 files changed, 148 insertions(+), 51 deletions(-) create mode 100644 repo2docker/logstore.py diff --git a/repo2docker/app.py b/repo2docker/app.py index e58d4115a..9196321ca 100755 --- a/repo2docker/app.py +++ b/repo2docker/app.py @@ -11,7 +11,6 @@ import sys import logging import os -import boto3 import getpass import shutil import tempfile @@ -24,7 +23,7 @@ import escapism from pythonjsonlogger import jsonlogger -from traitlets import Any, Dict, Int, List, Unicode, Bool, default +from traitlets import Any, Dict, Instance, Int, List, Type, Unicode, Bool, default from traitlets.config import Application from . import __version__ @@ -40,7 +39,8 @@ RBuildPack, ) from . import contentproviders -from .utils import ByteSpecification, chdir, remove_ansi_escape +from .logstore import LogStore +from .utils import ByteSpecification, chdir class Repo2Docker(Application): @@ -327,12 +327,6 @@ def _user_name_default(self): config=True, ) - s3_logs_endpoint = Unicode("", help="S3 endpoint", config=True) - s3_logs_access_key = Unicode("", help="S3 access key ", config=True) - s3_logs_secret_key = Unicode("", help="S3 secret key", config=True) - s3_logs_bucket = Unicode("", help="S3 bucket", config=True) - s3_logs_region = Unicode("", help="S3 region", config=True) - all_ports = Bool( False, help=""" @@ -376,6 +370,14 @@ def _user_name_default(self): config=True, ) + logstore = Type(LogStore, help="Log store for build logs", config=True) + + _logstore = Instance(LogStore) + + @default("_logstore") + def _default_logstore(self): + return self.logstore(parent=self) + def fetch(self, url, ref, checkout_path): """Fetch the contents of `url` and place it in `checkout_path`. @@ -632,7 +634,7 @@ def find_image(self): return True return False - def build(self, logfile=None): + def build(self): """ Build docker image """ @@ -721,8 +723,7 @@ def build(self, logfile=None): bp.__class__.__name__, extra=dict(phase="building"), ) - if logfile: - logfile.write("Using %s builder\n" % bp.__class__.__name__) + self._logstore.write("Using %s builder\n" % bp.__class__.__name__) for l in picked_buildpack.build( docker_client, @@ -734,12 +735,10 @@ def build(self, logfile=None): ): if "stream" in l: self.log.info(l["stream"], extra=dict(phase="building")) - if logfile: - logfile.write(remove_ansi_escape(l["stream"])) + self._logstore.write(l["stream"]) elif "error" in l: self.log.info(l["error"], extra=dict(phase="failure")) - if logfile: - logfile.write(l["error"]) + self._logstore.write(l["error"]) raise docker.errors.BuildError(l["error"], build_log="") elif "status" in l: self.log.info( @@ -754,35 +753,16 @@ def build(self, logfile=None): shutil.rmtree(checkout_path, ignore_errors=True) def start(self): - logfile = None - if ( - self.s3_logs_endpoint - and self.s3_logs_access_key - and self.s3_logs_secret_key - and self.s3_logs_bucket - ): - logfile = tempfile.NamedTemporaryFile("w", delete=False) + self._logstore.logname = f"{self.output_image_spec}/repodocker.log" try: - self.build(logfile=logfile) + self.build() finally: - if logfile: - logfile.close() - if os.stat(logfile.name).st_size: - dest = f"buildlogs/{self.output_image_spec}/repo2docker.log" - self.log.info(f"Uploading log to {self.s3_logs_bucket}/{dest}") - s3 = boto3.resource( - "s3", - endpoint_url=self.s3_logs_endpoint, - aws_access_key_id=self.s3_logs_access_key, - aws_secret_access_key=self.s3_logs_secret_key, - config=boto3.session.Config(signature_version="s3v4"), - region_name=self.s3_logs_region, - ) - s3.Bucket(self.s3_logs_bucket).upload_file( - logfile.name, - dest, - ExtraArgs={"ContentType": "text/plain"}, - ) + try: + r = self._logstore.close() + if r: + self.log.info(f"Log path: {r}") + except Exception as e: + self.log.error("Failed to save log: {}".format(e)) if self.push: self.push_image() diff --git a/repo2docker/logstore.py b/repo2docker/logstore.py new file mode 100644 index 000000000..9ae8ed658 --- /dev/null +++ b/repo2docker/logstore.py @@ -0,0 +1,122 @@ +import logging +import os +from tempfile import NamedTemporaryFile +import re +from traitlets import Any, Unicode, default +from traitlets.config import LoggingConfigurable + +try: + import boto3 + from botocore import UNSIGNED + from botocore.config import Config + + S3_ENABLED = True +except ImportError: + S3_ENABLED = False + pass + + +"""Match all ANSI escape codes https://superuser.com/a/380778""" +ansi_escape_regex = re.compile(r"\x1b\[[0-9;]*[a-zA-Z]") + + +class LogStore(LoggingConfigurable): + """Abstract interface for a class that stores a build log. + This default implementation does nothing.""" + + logname = Unicode("", help="The name and/or path of the log", config=True) + + def write(self, s): + """Write to the log""" + pass + + def close(self): + """Finish logging. Implementations may save or copy the log. + May return a path to the saved log""" + pass + + +class S3LogStore(LogStore): + """Store a build log and upload to a S3 bucket on close""" + + # Connection details + endpoint = Unicode(help="S3 endpoint", config=True) + access_key = Unicode(help="S3 access key ", config=True) + secret_key = Unicode(help="S3 secret key", config=True) + session_token = Unicode("", help="S3 session token (optional)", config=True) + region = Unicode("", help="S3 region (optional)", config=True) + + # Where to store the log + bucket = Unicode(help="S3 bucket", config=True) + keyprefix = Unicode("", help="Prefix log path with this", config=True) + acl = Unicode( + "public-read", help="ACL for the object, default public-read", config=True + ) + + _logfile = Any(allow_none=True) + + @default("_logfile") + def _default_logfile(self): + return NamedTemporaryFile("w", delete=False) + + def __init__(self, **kwargs): + if not S3_ENABLED: + raise RuntimeError("S3LogStore requires the boto3 library") + super().__init__(**kwargs) + self.log = logging.getLogger("repo2docker") + + def _s3_credentials(self): + creds = dict( + endpoint_url=self.endpoint, + aws_access_key_id=self.access_key, + aws_secret_access_key=self.secret_key, + region_name=self.region, + ) + if self.session_token: + creds["aws_session_token"] = self.session_token + return creds + + def write(self, s): + """Write a log, newlines are not automatically added, + removes ANSI terminal escape codes""" + cleaned = ansi_escape_regex.sub("", str(s)) + self._logfile.write(cleaned) + + def close(self): + """Upload the logfile to S3""" + self._logfile.close() + if not os.stat(self._logfile.name).st_size: + # Empty log means image already exists so nothing was built + return + dest = f"{self.keyprefix}{self.logname}" + self.log.debug(f"Uploading log to {self.endpoint} key:{self.bucket}/{dest}") + s3 = boto3.resource( + "s3", + config=boto3.session.Config(signature_version="s3v4"), + **self._s3_credentials(), + ) + s3.Bucket(self.bucket).upload_file( + self._logfile.name, + dest, + ExtraArgs={"ContentType": "text/plain", "ACL": self.acl}, + ) + os.remove(self._logfile.name) + return self._get_url() + + def _get_url(self): + # For simple S3 servers you can easily build the canonical URL. + # For AWS S3 this can be more complicated as it depends on your region. + # boto3 doesn't have a way to just get the public URL, so instead we create a pre-signed + # URL but discard the parameters since the object is public. + s3unsigned = boto3.client( + "s3", config=Config(UNSIGNED), **self._s3_credentials() + ) + link = s3unsigned.generate_presigned_url( + "get_object", + ExpiresIn=0, + Params={ + "Bucket": self.bucket, + "Key": f"{self.keyprefix}{self.logname}", + }, + ) + return link.split("?", 1)[0] diff --git a/repo2docker/utils.py b/repo2docker/utils.py index c2e4c7632..d6b4c97ac 100644 --- a/repo2docker/utils.py +++ b/repo2docker/utils.py @@ -508,10 +508,3 @@ def is_local_pip_requirement(line): return True return False - - -def remove_ansi_escape(s): - """Remove all ANSI escape codes from a string - https://superuser.com/a/380778 - """ - return re.sub(r"\x1b\[[0-9;]*[a-zA-Z]", "", s) diff --git a/setup.py b/setup.py index 9de162d56..08e9f2476 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,6 @@ def get_identifier(json): name="jupyter-repo2docker", version=versioneer.get_version(), install_requires=[ - "boto3", "docker", "traitlets", "python-json-logger", @@ -57,6 +56,9 @@ def get_identifier(json): "toml", "semver", ], + extras_require={ + "s3log": ["boto3"], + }, python_requires=">=3.6", author="Project Jupyter Contributors", author_email="jupyter@googlegroups.com", From 52c732b37031dab165393be327bdbc0fb5397a00 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Sat, 10 Oct 2020 11:20:59 +0000 Subject: [PATCH 04/11] Remove S3LogStore.close return URL This adds unnecessary complexity since it depends on the S3 server, so rely on the user to know how to get the URL --- repo2docker/app.py | 2 -- repo2docker/logstore.py | 28 ++++------------------------ 2 files changed, 4 insertions(+), 26 deletions(-) diff --git a/repo2docker/app.py b/repo2docker/app.py index 9196321ca..6c32d4f93 100755 --- a/repo2docker/app.py +++ b/repo2docker/app.py @@ -759,8 +759,6 @@ def start(self): finally: try: r = self._logstore.close() - if r: - self.log.info(f"Log path: {r}") except Exception as e: self.log.error("Failed to save log: {}".format(e)) diff --git a/repo2docker/logstore.py b/repo2docker/logstore.py index 9ae8ed658..a8387f137 100644 --- a/repo2docker/logstore.py +++ b/repo2docker/logstore.py @@ -7,8 +7,6 @@ try: import boto3 - from botocore import UNSIGNED - from botocore.config import Config S3_ENABLED = True except ImportError: @@ -31,8 +29,7 @@ def write(self, s): pass def close(self): - """Finish logging. Implementations may save or copy the log. - May return a path to the saved log""" + """Finish logging. Implementations may save or copy the log.""" pass @@ -89,7 +86,9 @@ def close(self): # Empty log means image already exists so nothing was built return dest = f"{self.keyprefix}{self.logname}" - self.log.debug(f"Uploading log to {self.endpoint} key:{self.bucket}/{dest}") + self.log.info( + f"Uploading log to {self.endpoint} bucket:{self.bucket} key:{dest}" + ) s3 = boto3.resource( "s3", config=boto3.session.Config(signature_version="s3v4"), @@ -101,22 +100,3 @@ def close(self): ExtraArgs={"ContentType": "text/plain", "ACL": self.acl}, ) os.remove(self._logfile.name) - return self._get_url() - - def _get_url(self): - # For simple S3 servers you can easily build the canonical URL. - # For AWS S3 this can be more complicated as it depends on your region. - # boto3 doesn't have a way to just get the public URL, so instead we create a pre-signed - # URL but discard the parameters since the object is public. - s3unsigned = boto3.client( - "s3", config=Config(UNSIGNED), **self._s3_credentials() - ) - link = s3unsigned.generate_presigned_url( - "get_object", - ExpiresIn=0, - Params={ - "Bucket": self.bucket, - "Key": f"{self.keyprefix}{self.logname}", - }, - ) - return link.split("?", 1)[0] From d69ca94493949e9b5c67ca9bfbe2efb7e742d97b Mon Sep 17 00:00:00 2001 From: Simon Li Date: Sat, 10 Oct 2020 11:09:18 +0000 Subject: [PATCH 05/11] Add mock unittest for S3LogStore Add boto3 to test dependencies --- Pipfile | 1 + dev-requirements.txt | 1 + tests/unit/test_logstore.py | 57 +++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 tests/unit/test_logstore.py diff --git a/Pipfile b/Pipfile index b4d56fdf2..169957fbc 100644 --- a/Pipfile +++ b/Pipfile @@ -3,6 +3,7 @@ # update these accordingly [dev-packages] +boto3 = "*" pytest=">=4.6" wheel="*" pytest-cov="*" diff --git a/dev-requirements.txt b/dev-requirements.txt index bcecb7ba7..9cb4d15bf 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,5 +1,6 @@ # Note that there is also a Pipfile for this project if you are updating this # file do not forget to update the Pipfile accordingly +boto3 pyyaml pytest>=4.6 wheel diff --git a/tests/unit/test_logstore.py b/tests/unit/test_logstore.py new file mode 100644 index 000000000..fa1d23816 --- /dev/null +++ b/tests/unit/test_logstore.py @@ -0,0 +1,57 @@ +""" +Test S3LogStore +""" +# botocore includes a stub +# https://botocore.amazonaws.com/v1/documentation/api/latest/reference/stubber.html +# but it doesn't work with upload_file so use mock instead +# https://sgillies.net/2017/10/19/mock-is-magic.html +from unittest.mock import patch +from repo2docker import logstore + + +@patch("repo2docker.logstore.boto3") +def test_s3logstore_upload(boto3): + store = logstore.S3LogStore( + endpoint="http://localhost:9000", + access_key="access", + secret_key="secret", + bucket="bucket", + keyprefix="prefix/", + logname="test/build.log", + ) + + store.write("hello\n") + r = store.close() + + boto3.resource.assert_called_with( + "s3", + config=boto3.session.Config(signature_version="s3v4"), + endpoint_url="http://localhost:9000", + aws_access_key_id="access", + aws_secret_access_key="secret", + region_name="", + ) + boto3.resource().Bucket.assert_called_with("bucket") + boto3.resource().Bucket().upload_file.assert_called_with( + store._logfile.name, + "prefix/test/build.log", + ExtraArgs={"ContentType": "text/plain", "ACL": "public-read"}, + ) + + +@patch("repo2docker.logstore.boto3") +def test_s3logstore_empty(boto3): + store = logstore.S3LogStore( + endpoint="http://localhost:9000", + access_key="access", + secret_key="secret", + bucket="bucket", + keyprefix="prefix/", + logname="test/build.log", + ) + + r = store.close() + + assert not boto3.resource.called + assert not boto3.resource().Bucket.called + assert not boto3.resource().Bucket().upload_file.called From c3991e8c71f3f5504187cac8f070cb9fc0bd200c Mon Sep 17 00:00:00 2001 From: Simon Li Date: Sat, 10 Oct 2020 11:42:37 +0000 Subject: [PATCH 06/11] S3LogStore set charset=utf-8 --- repo2docker/logstore.py | 2 +- tests/unit/test_logstore.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/repo2docker/logstore.py b/repo2docker/logstore.py index a8387f137..69ef54922 100644 --- a/repo2docker/logstore.py +++ b/repo2docker/logstore.py @@ -97,6 +97,6 @@ def close(self): s3.Bucket(self.bucket).upload_file( self._logfile.name, dest, - ExtraArgs={"ContentType": "text/plain", "ACL": self.acl}, + ExtraArgs={"ContentType": "text/plain; charset=utf-8", "ACL": self.acl}, ) os.remove(self._logfile.name) diff --git a/tests/unit/test_logstore.py b/tests/unit/test_logstore.py index fa1d23816..0ed37136b 100644 --- a/tests/unit/test_logstore.py +++ b/tests/unit/test_logstore.py @@ -35,7 +35,7 @@ def test_s3logstore_upload(boto3): boto3.resource().Bucket().upload_file.assert_called_with( store._logfile.name, "prefix/test/build.log", - ExtraArgs={"ContentType": "text/plain", "ACL": "public-read"}, + ExtraArgs={"ContentType": "text/plain; charset=utf-8", "ACL": "public-read"}, ) From a028dc2807fb6d298be566070f01dbd3f9eed3f8 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Sat, 10 Oct 2020 12:01:59 +0000 Subject: [PATCH 07/11] S3LogStore add metadata --- repo2docker/logstore.py | 19 ++++++++++++++++--- tests/unit/test_logstore.py | 7 ++++++- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/repo2docker/logstore.py b/repo2docker/logstore.py index 69ef54922..e26812811 100644 --- a/repo2docker/logstore.py +++ b/repo2docker/logstore.py @@ -2,7 +2,7 @@ import os from tempfile import NamedTemporaryFile import re -from traitlets import Any, Unicode, default +from traitlets import Any, Dict, Unicode, default from traitlets.config import LoggingConfigurable try: @@ -23,6 +23,11 @@ class LogStore(LoggingConfigurable): This default implementation does nothing.""" logname = Unicode("", help="The name and/or path of the log", config=True) + metadata = Dict( + {}, + help="Metadata to be associated with the log file", + config=True, + ) def write(self, s): """Write to the log""" @@ -34,7 +39,11 @@ def close(self): class S3LogStore(LogStore): - """Store a build log and upload to a S3 bucket on close""" + """Store a build log and upload to a S3 bucket on close + + If metadata is provided keys must be valid HTML headers, and values must be strings + https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html#object-metadata + """ # Connection details endpoint = Unicode(help="S3 endpoint", config=True) @@ -97,6 +106,10 @@ def close(self): s3.Bucket(self.bucket).upload_file( self._logfile.name, dest, - ExtraArgs={"ContentType": "text/plain; charset=utf-8", "ACL": self.acl}, + ExtraArgs={ + "ContentType": "text/plain; charset=utf-8", + "ACL": self.acl, + "Metadata": self.metadata, + }, ) os.remove(self._logfile.name) diff --git a/tests/unit/test_logstore.py b/tests/unit/test_logstore.py index 0ed37136b..d6f3ee0d0 100644 --- a/tests/unit/test_logstore.py +++ b/tests/unit/test_logstore.py @@ -18,6 +18,7 @@ def test_s3logstore_upload(boto3): bucket="bucket", keyprefix="prefix/", logname="test/build.log", + metadata={"test-key": "test value"}, ) store.write("hello\n") @@ -35,7 +36,11 @@ def test_s3logstore_upload(boto3): boto3.resource().Bucket().upload_file.assert_called_with( store._logfile.name, "prefix/test/build.log", - ExtraArgs={"ContentType": "text/plain; charset=utf-8", "ACL": "public-read"}, + ExtraArgs={ + "ContentType": "text/plain; charset=utf-8", + "ACL": "public-read", + "Metadata": {"test-key": "test value"}, + }, ) From 6d54148b1fe542b484897dd6bc04ce8637c4b408 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Sat, 10 Oct 2020 12:02:11 +0000 Subject: [PATCH 08/11] S3LogStore fix logname --- repo2docker/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repo2docker/app.py b/repo2docker/app.py index 6c32d4f93..80ced73a3 100755 --- a/repo2docker/app.py +++ b/repo2docker/app.py @@ -753,7 +753,7 @@ def build(self): shutil.rmtree(checkout_path, ignore_errors=True) def start(self): - self._logstore.logname = f"{self.output_image_spec}/repodocker.log" + self._logstore.logname = f"{self.output_image_spec}/repo2docker.log" try: self.build() finally: From 792bbe036d26ae5bb77720830601f1a08f0517d3 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Sat, 10 Oct 2020 20:06:55 +0000 Subject: [PATCH 09/11] S3LogStore only log if non-empty, default key `repo2docker.log` (no image name). --- repo2docker/app.py | 2 +- repo2docker/logstore.py | 49 +++++++++++++++++++------------------ tests/unit/test_logstore.py | 10 ++++++-- 3 files changed, 34 insertions(+), 27 deletions(-) diff --git a/repo2docker/app.py b/repo2docker/app.py index 80ced73a3..cde6f727d 100755 --- a/repo2docker/app.py +++ b/repo2docker/app.py @@ -753,7 +753,7 @@ def build(self): shutil.rmtree(checkout_path, ignore_errors=True) def start(self): - self._logstore.logname = f"{self.output_image_spec}/repo2docker.log" + self._logstore.logname = "repo2docker.log" try: self.build() finally: diff --git a/repo2docker/logstore.py b/repo2docker/logstore.py index e26812811..dc57d8da5 100644 --- a/repo2docker/logstore.py +++ b/repo2docker/logstore.py @@ -2,7 +2,7 @@ import os from tempfile import NamedTemporaryFile import re -from traitlets import Any, Dict, Unicode, default +from traitlets import Any, Dict, Unicode from traitlets.config import LoggingConfigurable try: @@ -11,7 +11,6 @@ S3_ENABLED = True except ImportError: S3_ENABLED = False - pass """Match all ANSI escape codes https://superuser.com/a/380778""" @@ -61,10 +60,6 @@ class S3LogStore(LogStore): _logfile = Any(allow_none=True) - @default("_logfile") - def _default_logfile(self): - return NamedTemporaryFile("w", delete=False) - def __init__(self, **kwargs): if not S3_ENABLED: raise RuntimeError("S3LogStore requires the boto3 library") @@ -85,31 +80,37 @@ def _s3_credentials(self): def write(self, s): """Write a log, newlines are not automatically added, removes ANSI terminal escape codes""" + if s and not self._logfile: + self._logfile = NamedTemporaryFile("w", delete=False) cleaned = ansi_escape_regex.sub("", str(s)) self._logfile.write(cleaned) def close(self): """Upload the logfile to S3""" - self._logfile.close() - if not os.stat(self._logfile.name).st_size: - # Empty log means image already exists so nothing was built + if not self._logfile: + # No log means image already exists so nothing was built + self.log.debug("No log file") return + self._logfile.close() dest = f"{self.keyprefix}{self.logname}" self.log.info( f"Uploading log to {self.endpoint} bucket:{self.bucket} key:{dest}" ) - s3 = boto3.resource( - "s3", - config=boto3.session.Config(signature_version="s3v4"), - **self._s3_credentials(), - ) - s3.Bucket(self.bucket).upload_file( - self._logfile.name, - dest, - ExtraArgs={ - "ContentType": "text/plain; charset=utf-8", - "ACL": self.acl, - "Metadata": self.metadata, - }, - ) - os.remove(self._logfile.name) + try: + s3 = boto3.resource( + "s3", + config=boto3.session.Config(signature_version="s3v4"), + **self._s3_credentials(), + ) + s3.Bucket(self.bucket).upload_file( + self._logfile.name, + dest, + ExtraArgs={ + "ContentType": "text/plain; charset=utf-8", + "ACL": self.acl, + "Metadata": self.metadata, + }, + ) + os.remove(self._logfile.name) + finally: + self._logfile = None diff --git a/tests/unit/test_logstore.py b/tests/unit/test_logstore.py index d6f3ee0d0..0e405277f 100644 --- a/tests/unit/test_logstore.py +++ b/tests/unit/test_logstore.py @@ -5,12 +5,15 @@ # https://botocore.amazonaws.com/v1/documentation/api/latest/reference/stubber.html # but it doesn't work with upload_file so use mock instead # https://sgillies.net/2017/10/19/mock-is-magic.html +import os +from tempfile import NamedTemporaryFile from unittest.mock import patch from repo2docker import logstore @patch("repo2docker.logstore.boto3") def test_s3logstore_upload(boto3): + tmp_logfile = NamedTemporaryFile("w", delete=False) store = logstore.S3LogStore( endpoint="http://localhost:9000", access_key="access", @@ -19,10 +22,12 @@ def test_s3logstore_upload(boto3): keyprefix="prefix/", logname="test/build.log", metadata={"test-key": "test value"}, + # Override for testing so we know the name of the tempfile + _logfile=tmp_logfile, ) store.write("hello\n") - r = store.close() + store.close() boto3.resource.assert_called_with( "s3", @@ -34,7 +39,7 @@ def test_s3logstore_upload(boto3): ) boto3.resource().Bucket.assert_called_with("bucket") boto3.resource().Bucket().upload_file.assert_called_with( - store._logfile.name, + tmp_logfile.name, "prefix/test/build.log", ExtraArgs={ "ContentType": "text/plain; charset=utf-8", @@ -42,6 +47,7 @@ def test_s3logstore_upload(boto3): "Metadata": {"test-key": "test value"}, }, ) + assert not os.path.exists(tmp_logfile.name) @patch("repo2docker.logstore.boto3") From cd9086324282f3b6e9d0a30465629c9cec9b88d1 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Fri, 16 Oct 2020 16:43:09 +0000 Subject: [PATCH 10/11] logstore: remove ACL AWS S3 ACLs are legacy: https://aws.amazon.com/blogs/security/iam-policies-and-bucket-policies-and-acls-oh-my-controlling-access-to-s3-resources/ --- repo2docker/logstore.py | 4 ---- tests/unit/test_logstore.py | 1 - 2 files changed, 5 deletions(-) diff --git a/repo2docker/logstore.py b/repo2docker/logstore.py index dc57d8da5..3f61aefcf 100644 --- a/repo2docker/logstore.py +++ b/repo2docker/logstore.py @@ -54,9 +54,6 @@ class S3LogStore(LogStore): # Where to store the log bucket = Unicode(help="S3 bucket", config=True) keyprefix = Unicode("", help="Prefix log path with this", config=True) - acl = Unicode( - "public-read", help="ACL for the object, default public-read", config=True - ) _logfile = Any(allow_none=True) @@ -107,7 +104,6 @@ def close(self): dest, ExtraArgs={ "ContentType": "text/plain; charset=utf-8", - "ACL": self.acl, "Metadata": self.metadata, }, ) diff --git a/tests/unit/test_logstore.py b/tests/unit/test_logstore.py index 0e405277f..71ca2e3a4 100644 --- a/tests/unit/test_logstore.py +++ b/tests/unit/test_logstore.py @@ -43,7 +43,6 @@ def test_s3logstore_upload(boto3): "prefix/test/build.log", ExtraArgs={ "ContentType": "text/plain; charset=utf-8", - "ACL": "public-read", "Metadata": {"test-key": "test value"}, }, ) From 0d283570cc2bcfba658e76c8f108c6a374d3ccd5 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Fri, 16 Oct 2020 18:18:18 +0000 Subject: [PATCH 11/11] Move all log config into S3LogStore repo2docker knows nothing about the logstore other than it should copy the Docker build logs to it --- repo2docker/app.py | 1 - repo2docker/logstore.py | 48 ++++++++++++++++++++++++++++--------- tests/unit/test_logstore.py | 4 +--- 3 files changed, 38 insertions(+), 15 deletions(-) diff --git a/repo2docker/app.py b/repo2docker/app.py index cde6f727d..b87d3ad17 100755 --- a/repo2docker/app.py +++ b/repo2docker/app.py @@ -753,7 +753,6 @@ def build(self): shutil.rmtree(checkout_path, ignore_errors=True) def start(self): - self._logstore.logname = "repo2docker.log" try: self.build() finally: diff --git a/repo2docker/logstore.py b/repo2docker/logstore.py index 3f61aefcf..6ee522c68 100644 --- a/repo2docker/logstore.py +++ b/repo2docker/logstore.py @@ -21,13 +21,6 @@ class LogStore(LoggingConfigurable): """Abstract interface for a class that stores a build log. This default implementation does nothing.""" - logname = Unicode("", help="The name and/or path of the log", config=True) - metadata = Dict( - {}, - help="Metadata to be associated with the log file", - config=True, - ) - def write(self, s): """Write to the log""" pass @@ -42,6 +35,32 @@ class S3LogStore(LogStore): If metadata is provided keys must be valid HTML headers, and values must be strings https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html#object-metadata + + Example bucket policy to allow public read of objects, but prevent listing + + { + "Version": "2012-10-17", + "Statement": [ + { + "Action": [ + "s3:GetObject" + ], + "Effect": "Allow", + "Principal": { + "AWS": [ + "*" + ] + }, + "Resource": [ + "arn:aws:s3:::mybinder/*" + ], + "Sid": "" + } + ] + } + + Source: https://gist.github.com/harshavardhana/400558963e4dfe3709623203222ed30c#granting-read-only-permission-to-an-anonymous-user + """ # Connection details @@ -53,7 +72,15 @@ class S3LogStore(LogStore): # Where to store the log bucket = Unicode(help="S3 bucket", config=True) - keyprefix = Unicode("", help="Prefix log path with this", config=True) + logname = Unicode( + "repo2docker.log", help="The name and/or path of the log", config=True + ) + + metadata = Dict( + {}, + help="Metadata to be associated with the log file", + config=True, + ) _logfile = Any(allow_none=True) @@ -89,9 +116,8 @@ def close(self): self.log.debug("No log file") return self._logfile.close() - dest = f"{self.keyprefix}{self.logname}" self.log.info( - f"Uploading log to {self.endpoint} bucket:{self.bucket} key:{dest}" + f"Uploading log to {self.endpoint} bucket:{self.bucket} key:{self.logname}" ) try: s3 = boto3.resource( @@ -101,7 +127,7 @@ def close(self): ) s3.Bucket(self.bucket).upload_file( self._logfile.name, - dest, + self.logname, ExtraArgs={ "ContentType": "text/plain; charset=utf-8", "Metadata": self.metadata, diff --git a/tests/unit/test_logstore.py b/tests/unit/test_logstore.py index 71ca2e3a4..8c832c037 100644 --- a/tests/unit/test_logstore.py +++ b/tests/unit/test_logstore.py @@ -19,7 +19,6 @@ def test_s3logstore_upload(boto3): access_key="access", secret_key="secret", bucket="bucket", - keyprefix="prefix/", logname="test/build.log", metadata={"test-key": "test value"}, # Override for testing so we know the name of the tempfile @@ -40,7 +39,7 @@ def test_s3logstore_upload(boto3): boto3.resource().Bucket.assert_called_with("bucket") boto3.resource().Bucket().upload_file.assert_called_with( tmp_logfile.name, - "prefix/test/build.log", + "test/build.log", ExtraArgs={ "ContentType": "text/plain; charset=utf-8", "Metadata": {"test-key": "test value"}, @@ -56,7 +55,6 @@ def test_s3logstore_empty(boto3): access_key="access", secret_key="secret", bucket="bucket", - keyprefix="prefix/", logname="test/build.log", )