diff --git a/binderhub/app.py b/binderhub/app.py index 0aba78196..8b5efe4be 100755 --- a/binderhub/app.py +++ b/binderhub/app.py @@ -282,6 +282,30 @@ def _valid_badge_base_url(self, proposal): config=True, ) + extra_volume_build = Dict( + {}, + config=True, + help=""" + Additionnal volume on build pods. + """ + ) + + extra_volume_mount_build = Dict( + {}, + config=True, + help=""" + Additionnal volume mounting configuration on build pods. + """ + ) + + init_container_build = Dict( + {}, + config=True, + help=""" + Additionnal container on build pods. + """ + ) + push_secret = Unicode( 'binder-push-secret', allow_none=True, @@ -573,6 +597,9 @@ def initialize(self, *args, **kwargs): self.event_log.register_schema(json.load(f)) self.tornado_settings.update({ + "extra_volume_build": self.extra_volume_build, + "extra_volume_mount_build": self.extra_volume_mount_build, + "init_container_build": self.init_container_build, "push_secret": self.push_secret, "image_prefix": self.image_prefix, "debug": self.debug, diff --git a/binderhub/build.py b/binderhub/build.py index f26cc4ba2..04a1bb696 100644 --- a/binderhub/build.py +++ b/binderhub/build.py @@ -38,6 +38,7 @@ class Build: """ def __init__(self, q, api, name, namespace, repo_url, ref, git_credentials, build_image, image_name, push_secret, memory_limit, docker_host, node_selector, + extra_volume_build={}, extra_volume_mount_build={}, init_container_build={}, appendix='', log_tail_lines=100, sticky_builds=False): self.q = q self.api = api @@ -46,6 +47,9 @@ def __init__(self, q, api, name, namespace, repo_url, ref, git_credentials, buil self.name = name self.namespace = namespace self.image_name = image_name + self.extra_volume_build = extra_volume_build + self.extra_volume_mount_build = extra_volume_mount_build + self.init_container_build = init_container_build self.push_secret = push_secret self.build_image = build_image self.main_loop = IOLoop.current() @@ -230,10 +234,32 @@ def submit(self): name='docker-push-secret', secret=client.V1SecretVolumeSource(secret_name=self.push_secret) )) + if self.extra_volume_build: + volume_mounts.append(client.V1VolumeMount( + mount_path=self.extra_volume_mount_build['mountPath'], name=self.extra_volume_mount_build['name'])) + volumes.append(client.V1Volume( + name=self.extra_volume_build['name'], + host_path=client.V1HostPathVolumeSource(path=self.extra_volume_build['path'], type='Directory') + )) env = [] if self.git_credentials: env.append(client.V1EnvVar(name='GIT_CREDENTIAL_ENV', value=self.git_credentials)) + env.append(client.V1EnvVar(name='REPO_URL', value=self.repo_url)) + + init_containers=[] + if self.init_container_build: + init_containers.append(client.V1Container( + image=self.init_container_build['image'], + name="build-init", + args=self.init_container_build['args'], + volume_mounts=volume_mounts, + resources=client.V1ResourceRequirements( + limits={'memory': self.memory_limit}, + requests={'memory': self.memory_limit} + ), + env=env + )) self.pod = client.V1Pod( metadata=client.V1ObjectMeta( @@ -260,6 +286,7 @@ def submit(self): env=env ) ], + init_containers=init_containers, tolerations=[ client.V1Toleration( key='hub.jupyter.org/dedicated', diff --git a/binderhub/builder.py b/binderhub/builder.py index b0e9acf4f..0c4bec529 100644 --- a/binderhub/builder.py +++ b/binderhub/builder.py @@ -355,6 +355,9 @@ async def get(self, provider_prefix, _unescaped_spec): repo_url=repo_url, ref=ref, image_name=image_name, + extra_volume_build=self.settings['extra_volume_build'], + extra_volume_mount_build=self.settings['extra_volume_mount_build'], + init_container_build=self.settings['init_container_build'], push_secret=push_secret, build_image=self.settings['build_image'], memory_limit=self.settings['build_memory_limit'], diff --git a/doc/customizing.rst b/doc/customizing.rst index 8c0a88f35..ccc6b72b3 100644 --- a/doc/customizing.rst +++ b/doc/customizing.rst @@ -67,7 +67,7 @@ Firstly assume that you have a Git repo ``binderhub_custom_files`` which holds y binderhub_custom_files/ ├── static - │   └── custom_logo.svg + │ └── custom_logo.svg └── templates └── page.html @@ -198,3 +198,70 @@ of 1337 to any repository in the JupyterHub organization. - pattern: ^jupyterhub.* config: quota: 1337 + +Pre-build steps +---------------- + +A `Binder build +`_ +refers to the process of creating a virtual environment for a git repository. This operation takes +place in a `Kubernetes pod `_, +where a `repo2docker `_ container does the heavy lifting +to create the requested environment. + +If you want the environment to access some additionnal resources without baking them +into the built Docker image, you may need to execute some configurations **before** the ``repo2docker`` +container is started using `Init Containers +`_. +Some example use cases could include: + +* Building HTML content with sizable interactive visualization objects instead of relying on static hosting +* Creation of pre-calculated data during the build time in a user defined environment instead of retrieving + them from elsewhere +* Pulling a dataset into a server and linking it to a user pod, so that the data volume is made easily + available to the runtime environment without inflating the built Docker image + +Using the ``init_container_build`` field in the BinderHub configuration, you can specify ``Init Containers`` +to be run in the build pod before ``repo2docker``. Mounting additional `volumes +`_ to this init container is also possible with +the ``extra_volume_build`` field. + +.. note:: + + Multiple init containers and volumes can be specified under the ``init_container_build`` and + ``extra_volume_build`` fields. + +The `repo2data `_ python package provides a good showcase for +the use of ``init_container_build``: + +.. code-block:: yaml + + config: + BinderHub: + extra_volume_build: + - name: repo2data-volume + hostPath: + path: /DATA + type: Directory + init_container_build: + - name: init-builder + image: conpdev/repo2data + args: + - -r + - $(REPO_URL) + volumeMounts: + - name: repo2data-volume + mountPath: /data + +In the configuration above: + +1. An additionnal volume ``repo2data-volume`` is associated with the init container +2. The ``conpdev/repo2data`` init container pull the dataset described by a `data_requirements.json + `_ into ``repo2data-volume`` + +Having the dataset available prior to the user pod running, this approach does not prolong the time +for spawning a user session and keeps the Docker images lean. + +.. note:: + + Commits pushed to the user's git repository will trigger ``init_container_build`` commands. \ No newline at end of file