From 2d8e5287de29bb9f5a5829e0ae6c9700291d5500 Mon Sep 17 00:00:00 2001 From: ltetrel Date: Thu, 26 Mar 2020 16:38:24 -0400 Subject: [PATCH 1/4] Adding optionnal build volumes and build init_containers to binderhub config; REPO_URL env accesible to pod linting adding optionnal volumes and init_containers to binderhub config --- binderhub/app.py | 27 +++++++++++++++++++++++++++ binderhub/build.py | 27 +++++++++++++++++++++++++++ binderhub/builder.py | 3 +++ 3 files changed, 57 insertions(+) diff --git a/binderhub/app.py b/binderhub/app.py index 0aba78196..8b5efe4be 100755 --- a/binderhub/app.py +++ b/binderhub/app.py @@ -282,6 +282,30 @@ def _valid_badge_base_url(self, proposal): config=True, ) + extra_volume_build = Dict( + {}, + config=True, + help=""" + Additionnal volume on build pods. + """ + ) + + extra_volume_mount_build = Dict( + {}, + config=True, + help=""" + Additionnal volume mounting configuration on build pods. + """ + ) + + init_container_build = Dict( + {}, + config=True, + help=""" + Additionnal container on build pods. + """ + ) + push_secret = Unicode( 'binder-push-secret', allow_none=True, @@ -573,6 +597,9 @@ def initialize(self, *args, **kwargs): self.event_log.register_schema(json.load(f)) self.tornado_settings.update({ + "extra_volume_build": self.extra_volume_build, + "extra_volume_mount_build": self.extra_volume_mount_build, + "init_container_build": self.init_container_build, "push_secret": self.push_secret, "image_prefix": self.image_prefix, "debug": self.debug, diff --git a/binderhub/build.py b/binderhub/build.py index f26cc4ba2..04a1bb696 100644 --- a/binderhub/build.py +++ b/binderhub/build.py @@ -38,6 +38,7 @@ class Build: """ def __init__(self, q, api, name, namespace, repo_url, ref, git_credentials, build_image, image_name, push_secret, memory_limit, docker_host, node_selector, + extra_volume_build={}, extra_volume_mount_build={}, init_container_build={}, appendix='', log_tail_lines=100, sticky_builds=False): self.q = q self.api = api @@ -46,6 +47,9 @@ def __init__(self, q, api, name, namespace, repo_url, ref, git_credentials, buil self.name = name self.namespace = namespace self.image_name = image_name + self.extra_volume_build = extra_volume_build + self.extra_volume_mount_build = extra_volume_mount_build + self.init_container_build = init_container_build self.push_secret = push_secret self.build_image = build_image self.main_loop = IOLoop.current() @@ -230,10 +234,32 @@ def submit(self): name='docker-push-secret', secret=client.V1SecretVolumeSource(secret_name=self.push_secret) )) + if self.extra_volume_build: + volume_mounts.append(client.V1VolumeMount( + mount_path=self.extra_volume_mount_build['mountPath'], name=self.extra_volume_mount_build['name'])) + volumes.append(client.V1Volume( + name=self.extra_volume_build['name'], + host_path=client.V1HostPathVolumeSource(path=self.extra_volume_build['path'], type='Directory') + )) env = [] if self.git_credentials: env.append(client.V1EnvVar(name='GIT_CREDENTIAL_ENV', value=self.git_credentials)) + env.append(client.V1EnvVar(name='REPO_URL', value=self.repo_url)) + + init_containers=[] + if self.init_container_build: + init_containers.append(client.V1Container( + image=self.init_container_build['image'], + name="build-init", + args=self.init_container_build['args'], + volume_mounts=volume_mounts, + resources=client.V1ResourceRequirements( + limits={'memory': self.memory_limit}, + requests={'memory': self.memory_limit} + ), + env=env + )) self.pod = client.V1Pod( metadata=client.V1ObjectMeta( @@ -260,6 +286,7 @@ def submit(self): env=env ) ], + init_containers=init_containers, tolerations=[ client.V1Toleration( key='hub.jupyter.org/dedicated', diff --git a/binderhub/builder.py b/binderhub/builder.py index b0e9acf4f..0c4bec529 100644 --- a/binderhub/builder.py +++ b/binderhub/builder.py @@ -355,6 +355,9 @@ async def get(self, provider_prefix, _unescaped_spec): repo_url=repo_url, ref=ref, image_name=image_name, + extra_volume_build=self.settings['extra_volume_build'], + extra_volume_mount_build=self.settings['extra_volume_mount_build'], + init_container_build=self.settings['init_container_build'], push_secret=push_secret, build_image=self.settings['build_image'], memory_limit=self.settings['build_memory_limit'], From 0e4fbaf40d14012f7777b734f3c15686305be20b Mon Sep 17 00:00:00 2001 From: ltetrel Date: Mon, 30 Mar 2020 17:55:59 -0400 Subject: [PATCH 2/4] adding doc for pre-built events --- doc/customizing.rst | 50 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/doc/customizing.rst b/doc/customizing.rst index 8c0a88f35..2df693185 100644 --- a/doc/customizing.rst +++ b/doc/customizing.rst @@ -198,3 +198,53 @@ of 1337 to any repository in the JupyterHub organization. - pattern: ^jupyterhub.* config: quota: 1337 + +Pre-built events +---------------- +Everytime a user launch a repository, Binderhub will create a k8s pod that uses +`repo2docker `_ to prepare the user's +environment. You can find more information on the building process `here +`_. + +It is now possible to specify additionnal events just before ``repo2docker`` using one or multiple +`Init Containers `_. +These could be used if you want to run long and intensive jobs directly on the server, +without impacting user's notebook runtime. + +Examples of such events are : + +* running costly pipelines using the full ressources from the server +* building dynamic html objects to be rendered inside the user's notebook +* pre-pulling data into the server + +.. note:: + + As for a standard build, the ``Init Containers`` commands are triggered again before + ``repo2docker`` when the user make new commits into his repository. + +There is also the possibility to provide additionnal `volume +`_ that can be mounted +inside the build pod. + +The following configuration file showcase the use of `repo2data `_ to pull data into the +server before ``repo2docker`` using a +`hostPath volume `_ + +.. code-block:: yaml + + config: + BinderHub: + extra_volume_build: + - name: extra-volume + hostPath: + path: /DATA + type: Directory + init_container_build: + - name: init-builder + image: conpdev/repo2data + args: + - -r + - $(REPO_URL) + volumeMounts: + - name: extra-volume + mountPath: /data \ No newline at end of file From ec1e7c3080c333313e5656f1ee468323a5b8094e Mon Sep 17 00:00:00 2001 From: Agah Date: Tue, 31 Mar 2020 11:11:44 -0400 Subject: [PATCH 3/4] Documentation for pre-build steps (#1) * add pre-build steps doc * Update customizing.rst * Update customizing.rst * Update customizing.rst --- doc/customizing.rst | 45 ++++++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/doc/customizing.rst b/doc/customizing.rst index 2df693185..e545f7ed0 100644 --- a/doc/customizing.rst +++ b/doc/customizing.rst @@ -67,7 +67,7 @@ Firstly assume that you have a Git repo ``binderhub_custom_files`` which holds y binderhub_custom_files/ ├── static - │   └── custom_logo.svg + │ └── custom_logo.svg └── templates └── page.html @@ -199,37 +199,17 @@ of 1337 to any repository in the JupyterHub organization. config: quota: 1337 -Pre-built events +Pre-build steps ---------------- -Everytime a user launch a repository, Binderhub will create a k8s pod that uses -`repo2docker `_ to prepare the user's -environment. You can find more information on the building process `here -`_. -It is now possible to specify additionnal events just before ``repo2docker`` using one or multiple -`Init Containers `_. -These could be used if you want to run long and intensive jobs directly on the server, -without impacting user's notebook runtime. +A Binder ``build`` refers to the process of creating a virtual environment for a git repository. This operation takes place in a `Kubernetes pod `_, where a `repo2docker `_ container does the heavy lifting to create the requested environment. -Examples of such events are : +If you want the eventual environment to access some additinoal resources without baking them into the built Docker image, you may need to execute some configurations **before** the ``repo2docker`` container is started. In Kubernetes, such priori steps are typically achieved using `init containers `_. -* running costly pipelines using the full ressources from the server -* building dynamic html objects to be rendered inside the user's notebook -* pre-pulling data into the server +In the BinderHub configuration, you can specify init containers to run in the build pod before the ``repo2docker`` call via the ``init_container_build`` key. -.. note:: - - As for a standard build, the ``Init Containers`` commands are triggered again before - ``repo2docker`` when the user make new commits into his repository. - -There is also the possibility to provide additionnal `volume -`_ that can be mounted -inside the build pod. +The `repo2data `_ python package provides a good showcase for the use of ``init_container_build``: -The following configuration file showcase the use of `repo2data `_ to pull data into the -server before ``repo2docker`` using a -`hostPath volume `_ - .. code-block:: yaml config: @@ -247,4 +227,15 @@ server before ``repo2docker`` using a - $(REPO_URL) volumeMounts: - name: extra-volume - mountPath: /data \ No newline at end of file + mountPath: /data + +In the configuration above, a ``conpdev/repo2data`` init container is run to: + +1. Pull the dataset described by a `data_requirements.json `_ to the server +2. Set necessary configurations to associate the downloaded data with the corresponding user pod. + +Having the dataset available pripor to the user pod running, this approach does not prolong the time for spawning a user session and keeps the Docker images lean. Note that the use of ``init_container_build`` is not exclusive to the data management purposes. Any process that can be defined as a ``init container`` job can be specified before the ``repo2docker`` container is started in the build pod. + +.. note:: + + Commits pushed to the user's git repository will trigger ``init_container_build`` runs. From e5dd33a848205d2b6a9ed6786d639a9946e04624 Mon Sep 17 00:00:00 2001 From: ltetrel Date: Mon, 6 Apr 2020 12:10:59 -0400 Subject: [PATCH 4/4] updating docs with Agah's comments + typos --- doc/customizing.rst | 60 ++++++++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 17 deletions(-) diff --git a/doc/customizing.rst b/doc/customizing.rst index e545f7ed0..ccc6b72b3 100644 --- a/doc/customizing.rst +++ b/doc/customizing.rst @@ -202,40 +202,66 @@ of 1337 to any repository in the JupyterHub organization. Pre-build steps ---------------- -A Binder ``build`` refers to the process of creating a virtual environment for a git repository. This operation takes place in a `Kubernetes pod `_, where a `repo2docker `_ container does the heavy lifting to create the requested environment. +A `Binder build +`_ +refers to the process of creating a virtual environment for a git repository. This operation takes +place in a `Kubernetes pod `_, +where a `repo2docker `_ container does the heavy lifting +to create the requested environment. + +If you want the environment to access some additionnal resources without baking them +into the built Docker image, you may need to execute some configurations **before** the ``repo2docker`` +container is started using `Init Containers +`_. +Some example use cases could include: + +* Building HTML content with sizable interactive visualization objects instead of relying on static hosting +* Creation of pre-calculated data during the build time in a user defined environment instead of retrieving + them from elsewhere +* Pulling a dataset into a server and linking it to a user pod, so that the data volume is made easily + available to the runtime environment without inflating the built Docker image + +Using the ``init_container_build`` field in the BinderHub configuration, you can specify ``Init Containers`` +to be run in the build pod before ``repo2docker``. Mounting additional `volumes +`_ to this init container is also possible with +the ``extra_volume_build`` field. -If you want the eventual environment to access some additinoal resources without baking them into the built Docker image, you may need to execute some configurations **before** the ``repo2docker`` container is started. In Kubernetes, such priori steps are typically achieved using `init containers `_. +.. note:: -In the BinderHub configuration, you can specify init containers to run in the build pod before the ``repo2docker`` call via the ``init_container_build`` key. + Multiple init containers and volumes can be specified under the ``init_container_build`` and + ``extra_volume_build`` fields. -The `repo2data `_ python package provides a good showcase for the use of ``init_container_build``: +The `repo2data `_ python package provides a good showcase for +the use of ``init_container_build``: .. code-block:: yaml config: BinderHub: extra_volume_build: - - name: extra-volume + - name: repo2data-volume hostPath: path: /DATA type: Directory init_container_build: - name: init-builder - image: conpdev/repo2data - args: - - -r - - $(REPO_URL) - volumeMounts: - - name: extra-volume - mountPath: /data + image: conpdev/repo2data + args: + - -r + - $(REPO_URL) + volumeMounts: + - name: repo2data-volume + mountPath: /data -In the configuration above, a ``conpdev/repo2data`` init container is run to: +In the configuration above: -1. Pull the dataset described by a `data_requirements.json `_ to the server -2. Set necessary configurations to associate the downloaded data with the corresponding user pod. +1. An additionnal volume ``repo2data-volume`` is associated with the init container +2. The ``conpdev/repo2data`` init container pull the dataset described by a `data_requirements.json + `_ into ``repo2data-volume`` -Having the dataset available pripor to the user pod running, this approach does not prolong the time for spawning a user session and keeps the Docker images lean. Note that the use of ``init_container_build`` is not exclusive to the data management purposes. Any process that can be defined as a ``init container`` job can be specified before the ``repo2docker`` container is started in the build pod. +Having the dataset available prior to the user pod running, this approach does not prolong the time +for spawning a user session and keeps the Docker images lean. .. note:: - Commits pushed to the user's git repository will trigger ``init_container_build`` runs. + Commits pushed to the user's git repository will trigger ``init_container_build`` commands. \ No newline at end of file