diff --git a/binderhub/app.py b/binderhub/app.py index 5eb97aa9c..dba23c1d4 100644 --- a/binderhub/app.py +++ b/binderhub/app.py @@ -63,6 +63,7 @@ GitLabRepoProvider, GitRepoProvider, HydroshareProvider, + MecaRepoProvider, RepoProvider, ZenodoProvider, ) @@ -586,6 +587,7 @@ def _default_build_namespace(self): "figshare": FigshareProvider, "hydroshare": HydroshareProvider, "dataverse": DataverseProvider, + "meca": MecaRepoProvider, }, config=True, help=""" diff --git a/binderhub/event-schemas/launch.json b/binderhub/event-schemas/launch.json index 16e277cf4..09c8b3f39 100644 --- a/binderhub/event-schemas/launch.json +++ b/binderhub/event-schemas/launch.json @@ -14,7 +14,8 @@ "Zenodo", "Figshare", "Hydroshare", - "Dataverse" + "Dataverse", + "MECA" ], "description": "Provider for the repository being launched" }, diff --git a/binderhub/main.py b/binderhub/main.py index 2a2027598..ba2b4f4b5 100644 --- a/binderhub/main.py +++ b/binderhub/main.py @@ -22,6 +22,7 @@ "figshare": "Figshare", "hydroshare": "Hydroshare", "dataverse": "Dataverse", + "meca": "MECA", } diff --git a/binderhub/repoproviders.py b/binderhub/repoproviders.py index be9dd75f4..d3df12b91 100644 --- a/binderhub/repoproviders.py +++ b/binderhub/repoproviders.py @@ -15,9 +15,11 @@ import time import urllib.parse from datetime import datetime, timedelta, timezone -from urllib.parse import urlparse +from hashlib import md5 +from urllib.parse import unquote, urlparse, urlunparse import escapism +import validators as val from prometheus_client import Gauge from tornado.httpclient import AsyncHTTPClient, HTTPError, HTTPRequest from tornado.httputil import url_concat @@ -263,6 +265,114 @@ def get_build_slug(self): return f"zenodo-{self.record_id}" +class MecaRepoProvider(RepoProvider): + """BinderHub Provider that can handle the contents of a MECA bundle + + Users must provide a spec consisting of a public URL to the bundle + The URL origin must be included in the list of allowed_origins when that trait is set + """ + + name = Unicode("MECA Bundle") + + display_name = "MECA Bundle" + + labels = { + "text": "MECA Bundle URL (https://journals.curvenote.com/journal/submissions/12345/meca.zip)", + "tag_text": "", + "ref_prop_disabled": True, + "label_prop_disabled": True, + } + + validate_bundle = Bool(config=True, help="Validate the file as MECA Bundle").tag( + default=True + ) + + allowed_origins = List( + config=True, + help="""List of allowed origins for the URL + + If set, the URL must be on one of these origins. + + If not set, the URL can be on any origin. + """, + ) + + @default("allowed_origins") + def _allowed_origins_default(self): + return [] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + url = unquote(self.spec) + + if not val.url(url): + raise ValueError(f"[MecaRepoProvider] Invalid URL {url}") + + if ( + len(self.allowed_origins) > 0 + and urlparse(self.spec).hostname not in self.allowed_origins + ): + raise ValueError("URL is not on an allowed origin") + + self.url = url + + self.log.info(f"MECA Bundle URL: {self.url}") + self.log.info(f"MECA Bundle raw spec: {self.spec}") + + def get_hashed_slug(self, url, changes_with_content): + """Return a unique slug that is invariant to query parameters in the url""" + parsed_url = urlparse(url) + stripped_url = urlunparse( + (parsed_url.scheme, parsed_url.netloc, parsed_url.path, "", "", "") + ) + return ( + "meca-" + md5(f"{stripped_url}-{changes_with_content}".encode()).hexdigest() + ) + + async def get_resolved_ref(self): + # Check the URL is reachable + client = AsyncHTTPClient() + req = HTTPRequest(self.url, method="HEAD", user_agent="BinderHub") + self.log.info(f"get_resolved_ref() HEAD: {self.url}") + try: + r = await client.fetch(req) + self.log.info(f"URL is reachable: {self.url}") + self.hashed_slug = self.get_hashed_slug( + self.url, r.headers.get("ETag") or r.headers.get("Content-Length") + ) + except Exception as e: + raise RuntimeError(f"URL is unreachable ({e})") + + self.log.info(f"hashed_slug: {self.hashed_slug}") + return self.hashed_slug + + async def get_resolved_spec(self): + if not hasattr(self, "hashed_slug"): + await self.get_resolved_ref() + self.log.info(f"get_resolved_spec(): {self.hashed_slug}") + return self.spec + + async def get_resolved_ref_url(self): + self.log.info(f"get_resolved_ref_url(): {self.url}") + return self.url + + def get_repo_url(self): + """This is passed to repo2docker and is the URL that is to be fetched + with a `http[s]+meca` protocol string. We do this by convention to enable + detection of meca urls by the MecaContentProvider. + """ + parsed = urlparse(self.url) + parsed = parsed._replace(scheme=f"{parsed.scheme}+meca") + url = urlunparse(parsed) + self.log.info(f"get_repo_url(): {url}") + return url + + def get_build_slug(self): + """Should return a unique build slug""" + return self.hashed_slug + + class FigshareProvider(RepoProvider): """Provide contents of a Figshare article diff --git a/binderhub/static/js/src/form.js b/binderhub/static/js/src/form.js index cc00d7b45..f63cc1d19 100644 --- a/binderhub/static/js/src/form.js +++ b/binderhub/static/js/src/form.js @@ -18,7 +18,7 @@ export function getBuildFormValues() { repo = repo.replace(/^(https?:\/\/)?github.com\//, ""); repo = repo.replace(/^(https?:\/\/)?gitlab.com\//, ""); } - // trim trailing or leading '/' on repo + // trim trailing or leading "/" on repo repo = repo.replace(/(^\/)|(\/?$)/g, ""); // git providers encode the URL of the git repository as the repo // argument. @@ -31,7 +31,8 @@ export function getBuildFormValues() { providerPrefix === "zenodo" || providerPrefix === "figshare" || providerPrefix === "dataverse" || - providerPrefix === "hydroshare" + providerPrefix === "hydroshare" || + providerPrefix === "meca" ) { ref = ""; } diff --git a/docs/source/reference/repoproviders.rst b/docs/source/reference/repoproviders.rst index d0f5ca37c..5bcdda538 100644 --- a/docs/source/reference/repoproviders.rst +++ b/docs/source/reference/repoproviders.rst @@ -65,6 +65,12 @@ Module: :mod:`binderhub.repoproviders` .. autoconfigurable:: DataverseProvider :members: +:class:`MecaRepoProvider` +--------------------------- + +.. autoconfigurable:: MecaRepoProvider + :members: + :class:`GitRepoProvider` --------------------------- diff --git a/requirements.txt b/requirements.txt index a416e1527..103536a80 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,4 @@ pyjwt>=2 python-json-logger tornado>=5.1 traitlets +validators