Skip to content

Commit

Permalink
Add CKAN provider
Browse files Browse the repository at this point in the history
  • Loading branch information
u10313335 committed Mar 7, 2024
1 parent 7f8b6c3 commit b3f9e7c
Show file tree
Hide file tree
Showing 8 changed files with 110 additions and 2 deletions.
2 changes: 2 additions & 0 deletions binderhub/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
from .ratelimit import RateLimiter
from .registry import DockerRegistry
from .repoproviders import (
CKANProvider,
DataverseProvider,
FigshareProvider,
GistRepoProvider,
Expand Down Expand Up @@ -586,6 +587,7 @@ def _default_build_namespace(self):
"figshare": FigshareProvider,
"hydroshare": HydroshareProvider,
"dataverse": DataverseProvider,
"ckan": CKANProvider,
},
config=True,
help="""
Expand Down
3 changes: 2 additions & 1 deletion binderhub/event-schemas/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
"Zenodo",
"Figshare",
"Hydroshare",
"Dataverse"
"Dataverse",
"CKAN"
],
"description": "Provider for the repository being launched"
},
Expand Down
1 change: 1 addition & 0 deletions binderhub/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"figshare": "Figshare",
"hydroshare": "Hydroshare",
"dataverse": "Dataverse",
"ckan": "CKAN",
}


Expand Down
67 changes: 67 additions & 0 deletions binderhub/repoproviders.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,73 @@ def get_build_slug(self):
return f"hydroshare-{self.record_id}"


class CKANProvider(RepoProvider):
"""Provide contents of a CKAN dataset
Users must provide a spec consisting of the CKAN dataset URL.
"""

name = Unicode("CKAN")

display_name = "CKAN dataset"

url_regex = r"/dataset/[a-z0-9_\\-]*$"

labels = {
"text": "CKAN dataset URL (https://demo.ckan.org/dataset/sample-dataset-1)",
"tag_text": "Git ref (branch, tag, or commit)",
"ref_prop_disabled": True,
"label_prop_disabled": True,
}

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.repo = urllib.parse.unquote(self.spec)

async def get_resolved_ref(self):
parsed_repo = urlparse(self.repo)
self.dataset_id = parsed_repo.path.rsplit("/", maxsplit=1)[1]

client = AsyncHTTPClient()

api = parsed_repo._replace(
path=re.sub(self.url_regex, "/api/3/action/", parsed_repo.path)
).geturl()

package_show_url = f"{api}package_show?id={self.dataset_id}"

try:
r = await client.fetch(package_show_url, user_agent="BinderHub")
except HTTPError:
return None

def parse_date(json_body):
json_response = json.loads(json_body)
date = json_response["result"]["metadata_modified"]
parsed_date = datetime.strptime(date, "%Y-%m-%dT%H:%M:%S.%f")
epoch = parsed_date.replace(tzinfo=timezone(timedelta(0))).timestamp()
# truncate the timestamp
return str(int(epoch))

self.record_id = f"{self.dataset_id}.v{parse_date(r.body)}"

return self.record_id

async def get_resolved_spec(self):
if not hasattr(self, "record_id"):
await self.get_resolved_ref()
return self.repo

def get_repo_url(self):
return self.repo

async def get_resolved_ref_url(self):
resolved_spec = await self.get_resolved_spec()
return resolved_spec

def get_build_slug(self):
return f"ckan-{self.dataset_id}"


class GitRepoProvider(RepoProvider):
"""Bare bones git repo provider.
Expand Down
3 changes: 2 additions & 1 deletion binderhub/static/js/src/form.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ export function getBuildFormValues() {
providerPrefix === "zenodo" ||
providerPrefix === "figshare" ||
providerPrefix === "dataverse" ||
providerPrefix === "hydroshare"
providerPrefix === "hydroshare" ||
providerPrefix === "ckan"
) {
ref = "";
}
Expand Down
29 changes: 29 additions & 0 deletions binderhub/tests/test_repoproviders.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from tornado.ioloop import IOLoop

from binderhub.repoproviders import (
CKANProvider,
DataverseProvider,
FigshareProvider,
GistRepoProvider,
Expand Down Expand Up @@ -209,6 +210,34 @@ async def test_dataverse(
assert spec == resolved_spec


@pytest.mark.parametrize(
"spec,resolved_spec,resolved_ref,resolved_ref_url,build_slug",
[
[
"https://demo.ckan.org/dataset/sample-dataset-1",
"https://demo.ckan.org/dataset/sample-dataset-1",
"sample-dataset-1.v",
"https://demo.ckan.org/dataset/sample-dataset-1",
"ckan-sample-dataset-1",
],
],
)
async def test_ckan(spec, resolved_spec, resolved_ref, resolved_ref_url, build_slug):
provider = CKANProvider(spec=spec)

ref = await provider.get_resolved_ref()
assert resolved_ref in ref

slug = provider.get_build_slug()
assert slug == build_slug
repo_url = provider.get_repo_url()
assert repo_url == spec
ref_url = await provider.get_resolved_ref_url()
assert ref_url == resolved_ref_url
spec = await provider.get_resolved_spec()
assert spec == resolved_spec


@pytest.mark.github_api
@pytest.mark.parametrize(
"repo,unresolved_ref,resolved_ref",
Expand Down
2 changes: 2 additions & 0 deletions docs/source/developer/repoproviders.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ Currently supported providers, their prefixes and specs are:
+------------+--------------------+-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+
| Dataverse | ``dataverse`` | ``<dataverse-DOI>`` | `Dataverse <https://dataverse.org/>`_ is open source research data repository software installed all over the world. |
+------------+--------------------+-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+
| CKAN | ``ckan`` | ``<url-escaped-url>/<dataset-id>`` | `CKAN <https://ckan.org/>`_ is an open source data management system. |
+------------+--------------------+-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+
| Git | ``git`` | ``<url-escaped-url>/<commit-sha>`` | A generic repository provider for URLs that point directly to a git repository. |
+------------+--------------------+-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+

Expand Down
5 changes: 5 additions & 0 deletions docs/source/reference/repoproviders.rst
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ Module: :mod:`binderhub.repoproviders`
.. autoconfigurable:: DataverseProvider
:members:

:class:`CKANProvider`
---------------------------

.. autoconfigurable:: CKANProvider
:members:

:class:`GitRepoProvider`
---------------------------
Expand Down

0 comments on commit b3f9e7c

Please sign in to comment.