diff --git a/binderhub/app.py b/binderhub/app.py index 4c382c75d..910b628ed 100755 --- a/binderhub/app.py +++ b/binderhub/app.py @@ -307,6 +307,16 @@ def _valid_badge_base_url(self, proposal): config=True ) + @validate('image_prefix') + def _valid_image_prefix(self, proposal): + image_regex = re.compile(r'^([^\/:]+(?::[0-9]+)?\/)?' + + r'([_a-zA-Z0-9\-\.]+(?:\/[_a-zA-Z0-9\-\.]+)*)$') + if image_regex.match(proposal.value) is None: + raise AttributeError("'image_prefix' does not match the expected pattern "+ + "[[:]/]") + + return proposal.value + build_memory_request = ByteSpecification( 0, help=""" diff --git a/binderhub/builder.py b/binderhub/builder.py index 452725172..d7df69fa6 100644 --- a/binderhub/builder.py +++ b/binderhub/builder.py @@ -306,7 +306,7 @@ async def get(self, provider_prefix, _unescaped_spec): if self.settings['use_registry']: for _ in range(3): try: - image_manifest = await self.registry.get_image_manifest(*'/'.join(image_name.split('/')[-2:]).split(':', 1)) + image_manifest = await self.registry.get_image_manifest(image_name) image_found = bool(image_manifest) break except HTTPClientError: diff --git a/binderhub/health.py b/binderhub/health.py index ed1208866..ea568e077 100644 --- a/binderhub/health.py +++ b/binderhub/health.py @@ -146,9 +146,7 @@ async def check_docker_registry(self): # we are only interested in getting a response from the registry, we # don't care if the image actually exists or not image_name = self.settings["image_prefix"] + "some-image-name:12345" - await registry.get_image_manifest( - *'/'.join(image_name.split('/')[-2:]).split(':', 1) - ) + await registry.get_image_manifest(image_name) return True async def check_pod_quota(self): diff --git a/binderhub/registry.py b/binderhub/registry.py index a36f636d0..81999f7e0 100644 --- a/binderhub/registry.py +++ b/binderhub/registry.py @@ -185,8 +185,28 @@ def _default_password(self): base64.b64decode(b64_auth.encode("utf-8")).decode("utf-8").split(":", 1)[1] ) - async def get_image_manifest(self, image, tag): + @staticmethod + def parse_image_name(name): + """Parse a docker image string into (registry, image, tag)""" + # name could be "localhost:5000/foo:tag" + # or "gcr.io/project/subrepo/sub/sub/sub:tag" + # or "org/repo:tag" for implicit docker.io + registry, _, tagged_image = name.partition("/") + if tagged_image and (registry == "localhost" or any(c in registry for c in (".", ":"))): + # registry host is specified + pass + else: + # registry not given, use implicit default docker.io registry + registry = "docker.io" + tagged_image = name + image, _, tag = tagged_image.partition(":") + + return registry, image, tag + + async def get_image_manifest(self, name): client = httpclient.AsyncHTTPClient() + _, image, tag = DockerRegistry.parse_image_name(name) + url = "{}/v2/{}/manifests/{}".format(self.url, image, tag) # first, get a token to perform the manifest request if self.token_url: diff --git a/binderhub/tests/test_app.py b/binderhub/tests/test_app.py index cc5ad75e2..24d11236a 100644 --- a/binderhub/tests/test_app.py +++ b/binderhub/tests/test_app.py @@ -2,6 +2,10 @@ from subprocess import check_output import sys +import pytest + +from binderhub.app import BinderHub + def test_help(): check_output([sys.executable, '-m', 'binderhub', '-h']) @@ -9,3 +13,18 @@ def test_help(): def test_help_all(): check_output([sys.executable, '-m', 'binderhub', '--help-all']) +def test_image_prefix(): + b = BinderHub() + + prefixes = ["foo/bar", "foo-bar/baz", "foo/bar-", "localhost/foo", "localhost/foo/bar/baz", + "localhost:8080/foo/bar/baz", "127.0.0.1/foo", "127.0.0.1:5000/foo/b", + "f/o/o/b/a/r/b/a/z", "gcr.io/foo", "my.gcr.io:5000/foo", "foo_bar/baz-", + "foo_ba.r/baz-", "localhost:32000/someprefix-"] + for name in prefixes: + b.image_prefix = name + + wrong_prefixes = ["foo/bar-baz:", "foo/bar-baz:", "foo/bar-baz:10", "foo/bar/", "/foo/bar", + "http://localhost/foo/bar"] + for name in wrong_prefixes: + with pytest.raises(AttributeError): + b.image_prefix = name diff --git a/binderhub/tests/test_registry.py b/binderhub/tests/test_registry.py index 32b835ec2..e7f041b4f 100644 --- a/binderhub/tests/test_registry.py +++ b/binderhub/tests/test_registry.py @@ -113,7 +113,7 @@ async def test_get_image_manifest(tmpdir, request): [ (r"/token", MockTokenHandler, {"test_handle": test_handle}), ( - r"/v2/([^/]+)/manifests/([^/]+)", + r"/v2/([a-zA-Z0-9]+(?:/[a-zA-Z0-9]+)*)/manifests/([a-zA-Z0-9]+)", MockManifestHandler, {"test_handle": test_handle}, ), @@ -145,5 +145,9 @@ async def test_get_image_manifest(tmpdir, request): assert registry.token_url == url + "/token" assert registry.username == username assert registry.password == password - manifest = await registry.get_image_manifest("myimage", "abc123") - assert manifest == {"image": "myimage", "tag": "abc123"} + + names = ["myimage:abc123", "localhost/myimage:abc1234", "localhost:8080/myimage:abc3", + "192.168.1.1:8080/myimage:abc321", "192.168.1.1:8080/some/repo/myimage:abc3210"] + for name in names: + manifest = await registry.get_image_manifest(name) + assert manifest == {"image": name.split("/", 1)[-1].rsplit(":", 1)[0], "tag": name.rsplit(":", 1)[-1]} diff --git a/doc/zero-to-binderhub/setup-binderhub.rst b/doc/zero-to-binderhub/setup-binderhub.rst index de79f4453..ff62a7c5e 100644 --- a/doc/zero-to-binderhub/setup-binderhub.rst +++ b/doc/zero-to-binderhub/setup-binderhub.rst @@ -291,13 +291,18 @@ If you setup your own local registry using .. important:: - BinderHub assumes that `image_prefix` contains an extra level for the project name such that: `gcr.io//-name:tag`. - Hence, your `image_prefix` field should be set to: `your-registry.io//-`. - See `this issue `_ for more details. - - `` can be completely arbitrary and/or made-up. - For example, it could be the name you give your BinderHub. - Without this extra level, you may find that your BinderHub always rebuilds images instead of pulling them from the registry. + BinderHub validates that `image_prefix` matches the following pattern: `[/]`. + + is optional and defaults to `docker.io`. It may include the server port but does not + include the protocol (http[s]://). + + supports any repository depth. + + Examples: + - `binder-` + - `foo/binder-` + - `docker.io/foo/bar/baz/binder-` + - `127.0.0.1:5000/binder-` Install BinderHub -----------------