From 94cf52096381f0f2770790f1bafd09fa9b950eb6 Mon Sep 17 00:00:00 2001 From: David Douard Date: Wed, 27 Jan 2021 15:20:03 +0100 Subject: [PATCH] Add a Software Heritage provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Software Heritage¹ is a non-profit multi-stakeholder initiative which collects, preserves and shares all software that is publicly available in source code form. It provides a public API allowing to retrieve any piece of source code that has been ingested and is identified via its SWHID², including a growing number of jupyter notbooks. Thanks to the recent support for SWHID added in repo2docker, this now bring support to SWHID to binderhub as a Provider. ¹ https://www.softwareheritage.org ² https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html#persistent-identifiers --- binderhub/app.py | 3 ++- binderhub/event-schemas/launch.json | 3 ++- binderhub/main.py | 1 + binderhub/repoproviders.py | 38 +++++++++++++++++++++++++++ binderhub/static/js/index.js | 2 +- binderhub/tests/test_repoproviders.py | 25 ++++++++++++++++++ doc/developer/repoproviders.rst | 6 +++++ doc/reference/repoproviders.rst | 7 +++++ 8 files changed, 82 insertions(+), 3 deletions(-) diff --git a/binderhub/app.py b/binderhub/app.py index 4f6d44a15..0b1f04e85 100755 --- a/binderhub/app.py +++ b/binderhub/app.py @@ -49,7 +49,7 @@ from .repoproviders import (GitHubRepoProvider, GitRepoProvider, GitLabRepoProvider, GistRepoProvider, ZenodoProvider, FigshareProvider, HydroshareProvider, - DataverseProvider) + DataverseProvider, SWHIDProvider) from .metrics import MetricsHandler from .utils import ByteSpecification, url_path_join @@ -465,6 +465,7 @@ def _add_slash(self, proposal): 'figshare': FigshareProvider, 'hydroshare': HydroshareProvider, 'dataverse': DataverseProvider, + 'swh': SWHIDProvider, }, config=True, help=""" diff --git a/binderhub/event-schemas/launch.json b/binderhub/event-schemas/launch.json index 7e8a22a1f..70a2e9e6e 100755 --- a/binderhub/event-schemas/launch.json +++ b/binderhub/event-schemas/launch.json @@ -14,7 +14,8 @@ "Zenodo", "Figshare", "Hydroshare", - "Dataverse" + "Dataverse", + "SWHID" ], "description": "Provider for the repository being launched" }, diff --git a/binderhub/main.py b/binderhub/main.py index ff8b45980..f340fc84d 100755 --- a/binderhub/main.py +++ b/binderhub/main.py @@ -19,6 +19,7 @@ "figshare": "Figshare", "hydroshare": "Hydroshare", "dataverse": "Dataverse", + "swh": "Software Heritage", } diff --git a/binderhub/repoproviders.py b/binderhub/repoproviders.py index 83a894ab7..88ba36a2e 100755 --- a/binderhub/repoproviders.py +++ b/binderhub/repoproviders.py @@ -961,3 +961,41 @@ async def get_resolved_spec(self): def get_build_slug(self): return self.gist_id + + +class SWHIDProvider(RepoProvider): + """Provide contents of a SWHID record + + Users must provide a spec consisting of the SWHID of a directory or revision. + """ + name = Unicode("SWHID") + + async def get_resolved_ref(self): + client = AsyncHTTPClient() + req = HTTPRequest("https://archive.softwareheritage.org/api/1/known/", + method="POST", + headers={'content-type': 'application/json'}, + body=json.dumps([self.spec]), + user_agent="BinderHub") + r = await client.fetch(req) + r.rethrow() + + response = json.loads(r.body) + if response[self.spec]["known"]: + return self.spec + raise RuntimeError(f"Unknown SWHID {self.spec}") + + + async def get_resolved_spec(self): + return self.spec + + def get_repo_url(self): + # While called repo URL, the return value of this function is passed + # as argument to repo2docker, hence we return the spec as is. + return self.spec + + async def get_resolved_ref_url(self): + return self.spec + + def get_build_slug(self): + return "swh-{}".format(self.spec) diff --git a/binderhub/static/js/index.js b/binderhub/static/js/index.js index 870b4bd0e..84847389a 100755 --- a/binderhub/static/js/index.js +++ b/binderhub/static/js/index.js @@ -113,7 +113,7 @@ function getBuildFormValues() { var ref = $('#ref').val().trim() || $("#ref").attr("placeholder"); if (providerPrefix === 'zenodo' || providerPrefix === 'figshare' || providerPrefix === 'dataverse' || - providerPrefix === 'hydroshare') { + providerPrefix === 'hydroshare' || providerPrefix === 'swh') { ref = ""; } var path = $('#filepath').val().trim(); diff --git a/binderhub/tests/test_repoproviders.py b/binderhub/tests/test_repoproviders.py index 1a22b4252..17eb30f9f 100755 --- a/binderhub/tests/test_repoproviders.py +++ b/binderhub/tests/test_repoproviders.py @@ -12,6 +12,7 @@ GitLabRepoProvider, GitRepoProvider, HydroshareProvider, + SWHIDProvider, ZenodoProvider, strip_suffix, tokenize_spec, @@ -490,3 +491,27 @@ def test_gist_secret(): provider = GistRepoProvider(spec=spec, allow_secret_gist=True) assert IOLoop().run_sync(provider.get_resolved_ref) is not None + + +@pytest.mark.parametrize('spec,resolved_spec,resolved_ref,resolved_ref_url,build_slug', [ + ['swh:1:rev:c30614ec4587418fb264efb466cba58991029f16', + 'swh:1:rev:c30614ec4587418fb264efb466cba58991029f16', + 'swh:1:rev:c30614ec4587418fb264efb466cba58991029f16', + 'swh:1:rev:c30614ec4587418fb264efb466cba58991029f16', + 'swh-swh:1:rev:c30614ec4587418fb264efb466cba58991029f16'], +]) +async def test_swh(spec, resolved_spec, resolved_ref, resolved_ref_url, build_slug): + provider = SWHIDProvider(spec=spec) + + # have to resolve the ref first + ref = await provider.get_resolved_ref() + assert ref == resolved_ref + + slug = provider.get_build_slug() + assert slug == build_slug + repo_url = provider.get_repo_url() + assert repo_url == spec + ref_url = await provider.get_resolved_ref_url() + assert ref_url == resolved_ref_url + spec = await provider.get_resolved_spec() + assert spec == resolved_spec diff --git a/doc/developer/repoproviders.rst b/doc/developer/repoproviders.rst index 36c521aea..34ac9a53e 100755 --- a/doc/developer/repoproviders.rst +++ b/doc/developer/repoproviders.rst @@ -38,6 +38,12 @@ Currently supported providers, their prefixes and specs are: +------------+--------------------+-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+ | Git | ``git`` | ``/`` | A generic repository provider for URLs that point directly to a git repository. | +------------+--------------------+-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+ + | SWHID | ``swh`` | ```` | A `Software Heritage persistent identifier`_. | + +------------+--------------------+-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+ + + +.. _`Software Heritage persistent identifier`: https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html#persistent-identifiers + Adding a new repository provider ================================ diff --git a/doc/reference/repoproviders.rst b/doc/reference/repoproviders.rst index d0f5ca37c..a538a79b3 100755 --- a/doc/reference/repoproviders.rst +++ b/doc/reference/repoproviders.rst @@ -71,3 +71,10 @@ Module: :mod:`binderhub.repoproviders` .. autoconfigurable:: GitRepoProvider :members: + + +:class:`SWHIDProvider` +--------------------------- + +.. autoconfigurable:: SWHIDProvider + :members: