diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ddc56f083..1e30f7072 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,7 +11,7 @@ repos: # Autoformat: Python code, syntax patterns are modernized - repo: https://github.com/asottile/pyupgrade - rev: v3.15.2 + rev: v3.16.0 hooks: - id: pyupgrade args: @@ -68,10 +68,12 @@ repos: - id: check-case-conflict - id: check-executables-have-shebangs - id: requirements-txt-fixer + # exclude ci/refreeze generated requirements.txt + exclude: ^.*images\/.*\/requirements\.txt$ # Lint: Python code - repo: https://github.com/PyCQA/flake8 - rev: "7.0.0" + rev: "7.1.0" hooks: - id: flake8 diff --git a/binderhub/app.py b/binderhub/app.py index 279f6fb09..50816a59f 100644 --- a/binderhub/app.py +++ b/binderhub/app.py @@ -56,6 +56,7 @@ from .ratelimit import RateLimiter from .registry import DockerRegistry from .repoproviders import ( + CKANProvider, DataverseProvider, FigshareProvider, GistRepoProvider, @@ -574,7 +575,7 @@ def _default_build_namespace(self): return os.environ.get("BUILD_NAMESPACE", "default") build_image = Unicode( - "quay.io/jupyterhub/repo2docker:2023.06.0", + "quay.io/jupyterhub/repo2docker:2024.07.0", help=""" DEPRECATED: Use c.KubernetesBuildExecutor.build_image @@ -603,6 +604,7 @@ def _default_build_namespace(self): "figshare": FigshareProvider, "hydroshare": HydroshareProvider, "dataverse": DataverseProvider, + "ckan": CKANProvider, }, config=True, help=""" diff --git a/binderhub/build.py b/binderhub/build.py index 5e46f7b58..a5be78d3d 100644 --- a/binderhub/build.py +++ b/binderhub/build.py @@ -288,7 +288,7 @@ def _default_namespace(self): return os.getenv("BUILD_NAMESPACE", "default") build_image = Unicode( - "quay.io/jupyterhub/repo2docker:2023.06.0", + "quay.io/jupyterhub/repo2docker:2024.07.0", help="Docker image containing repo2docker that is used to spawn the build pods.", config=True, ) diff --git a/binderhub/builder.py b/binderhub/builder.py index c97704726..f420febe7 100644 --- a/binderhub/builder.py +++ b/binderhub/builder.py @@ -300,7 +300,7 @@ async def get(self, provider_prefix, _unescaped_spec): await self.emit( { "phase": "failed", - "message": f"Sorry, {spec} has been temporarily disabled from launching. Please contact admins for more info!", + "message": f"Sorry, {spec} is not allowed to launch. Please contact admins for more info!", } ) return diff --git a/binderhub/event-schemas/launch.json b/binderhub/event-schemas/launch.json index 16e277cf4..446182926 100644 --- a/binderhub/event-schemas/launch.json +++ b/binderhub/event-schemas/launch.json @@ -14,7 +14,8 @@ "Zenodo", "Figshare", "Hydroshare", - "Dataverse" + "Dataverse", + "CKAN" ], "description": "Provider for the repository being launched" }, diff --git a/binderhub/repoproviders.py b/binderhub/repoproviders.py index 3e219d906..b91fde2e3 100644 --- a/binderhub/repoproviders.py +++ b/binderhub/repoproviders.py @@ -15,7 +15,7 @@ import time import urllib.parse from datetime import datetime, timedelta, timezone -from urllib.parse import urlparse +from urllib.parse import parse_qs, urlencode, urlparse import escapism from prometheus_client import Gauge @@ -489,6 +489,95 @@ def get_build_slug(self): return f"hydroshare-{self.record_id}" +class CKANProvider(RepoProvider): + """Provide contents of a CKAN dataset + Users must provide a spec consisting of the CKAN dataset URL. + """ + + name = Unicode("CKAN") + + display_name = "CKAN dataset" + + display_config = { + "displayName": "CKAN dataset", + "id": "ckan", + "repo": { + "label": "CKAN dataset URL", + "placeholder": "https://demo.ckan.org/dataset/sample-dataset-1", + }, + "ref": {"enabled": False}, + } + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.repo = urllib.parse.unquote(self.spec) + + async def get_resolved_ref(self): + parsed_repo = urlparse(self.repo) + + if "/dataset/" not in parsed_repo.path: + # Not actually a dataset + return None + + # CKAN may be under a URL prefix, and we should accomodate that + url_prefix, dataset_url = parsed_repo.path.split("/dataset/") + + dataset_url_parts = dataset_url.split("/") + self.dataset_id = dataset_url_parts[0] + + api = parsed_repo._replace( + path=f"{url_prefix}/api/3/action/", query="" + ).geturl() + + # Activity ID may be present either as a query parameter, activity_id + # or as part of the URL, under `/history/`. If `/history/` + # is present, that takes precedence over `activity_id` + activity_id = None + if "history" in dataset_url_parts: + activity_id = dataset_url_parts[dataset_url_parts.index("history") + 1] + elif parse_qs(parsed_repo.query).get("activity_id") is not None: + activity_id = parse_qs(parsed_repo.query).get("activity_id")[0] + + if activity_id: + fetch_url = f"{api}activity_data_show?" + urlencode( + {"id": activity_id, "object_type": "package"} + ) + else: + fetch_url = f"{api}package_show?" + urlencode({"id": self.dataset_id}) + + client = AsyncHTTPClient() + try: + r = await client.fetch(fetch_url, user_agent="BinderHub") + except HTTPError: + return None + + json_response = json.loads(r.body) + date = json_response["result"]["metadata_modified"] + parsed_date = datetime.strptime(date, "%Y-%m-%dT%H:%M:%S.%f") + epoch = parsed_date.replace(tzinfo=timezone(timedelta(0))).timestamp() + # truncate the timestamp + dataset_version = str(int(epoch)) + + self.record_id = f"{self.dataset_id}.v{dataset_version}" + + return self.record_id + + async def get_resolved_spec(self): + if not hasattr(self, "record_id"): + await self.get_resolved_ref() + return self.repo + + def get_repo_url(self): + return self.repo + + async def get_resolved_ref_url(self): + resolved_spec = await self.get_resolved_spec() + return resolved_spec + + def get_build_slug(self): + return f"ckan-{self.dataset_id}" + + class GitRepoProvider(RepoProvider): """Bare bones git repo provider. diff --git a/binderhub/tests/test_repoproviders.py b/binderhub/tests/test_repoproviders.py index 87c6a3727..13715083b 100644 --- a/binderhub/tests/test_repoproviders.py +++ b/binderhub/tests/test_repoproviders.py @@ -6,6 +6,7 @@ from tornado.ioloop import IOLoop from binderhub.repoproviders import ( + CKANProvider, DataverseProvider, FigshareProvider, GistRepoProvider, @@ -209,6 +210,53 @@ async def test_dataverse( assert spec == resolved_spec +@pytest.mark.parametrize( + "spec,resolved_spec,resolved_ref,resolved_ref_url,build_slug", + [ + [ + "https://demo.ckan.org/dataset/sample-dataset-1", + "https://demo.ckan.org/dataset/sample-dataset-1", + "sample-dataset-1.v", + "https://demo.ckan.org/dataset/sample-dataset-1", + "ckan-sample-dataset-1", + ], + [ + "https://demo.datashades.com/dataset/chart-test?activity_id=061888e9-e3c2-4769-b097-9c195a841e2f", + "https://demo.datashades.com/dataset/chart-test?activity_id=061888e9-e3c2-4769-b097-9c195a841e2f", + "chart-test.v1717501747", + "https://demo.datashades.com/dataset/chart-test?activity_id=061888e9-e3c2-4769-b097-9c195a841e2f", + "ckan-chart-test", + ], + [ + "https://demo.datashades.com/dataset/chart-test/history/061888e9-e3c2-4769-b097-9c195a841e2f", + "https://demo.datashades.com/dataset/chart-test/history/061888e9-e3c2-4769-b097-9c195a841e2f", + "chart-test.v1717501747", + "https://demo.datashades.com/dataset/chart-test/history/061888e9-e3c2-4769-b097-9c195a841e2f", + "ckan-chart-test", + ], + ["https://demo.ckan.org/group/roger", None, None, None, None], + ["https://demo.ckan.org/dataset/nosuchdataset", None, None, None, None], + ], +) +async def test_ckan(spec, resolved_spec, resolved_ref, resolved_ref_url, build_slug): + provider = CKANProvider(spec=spec) + + ref = await provider.get_resolved_ref() + if not resolved_ref: + # We are done here if we don't expect to resolve + return + assert resolved_ref in ref + + slug = provider.get_build_slug() + assert slug == build_slug + repo_url = provider.get_repo_url() + assert repo_url == spec + ref_url = await provider.get_resolved_ref_url() + assert ref_url == resolved_ref_url + spec = await provider.get_resolved_spec() + assert spec == resolved_spec + + @pytest.mark.github_api @pytest.mark.parametrize( "repo,unresolved_ref,resolved_ref", diff --git a/ci/refreeze b/ci/refreeze index 561a789fe..903646f52 100755 --- a/ci/refreeze +++ b/ci/refreeze @@ -11,4 +11,4 @@ docker run --rm \ --workdir=/io \ --user=root \ python:3.11-bullseye \ - sh -c 'pip install pip-tools==6.* && pip-compile --upgrade helm-chart/images/binderhub/requirements.in' + sh -c 'pip install pip-tools==7.* && pip-compile --allow-unsafe --strip-extras --upgrade helm-chart/images/binderhub/requirements.in' diff --git a/docs/source/developer/repoproviders.rst b/docs/source/developer/repoproviders.rst index 083db3e27..ab648f1c4 100644 --- a/docs/source/developer/repoproviders.rst +++ b/docs/source/developer/repoproviders.rst @@ -36,6 +36,8 @@ Currently supported providers, their prefixes and specs are: +------------+--------------------+-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+ | Dataverse | ``dataverse`` | ```` | `Dataverse `_ is open source research data repository software installed all over the world. | +------------+--------------------+-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+ + | CKAN | ``ckan`` | ``/`` | `CKAN `_ is an open source data management system. | + +------------+--------------------+-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+ | Git | ``git`` | ``/`` | A generic repository provider for URLs that point directly to a git repository. | +------------+--------------------+-------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/docs/source/reference/repoproviders.rst b/docs/source/reference/repoproviders.rst index d0f5ca37c..40b230d70 100644 --- a/docs/source/reference/repoproviders.rst +++ b/docs/source/reference/repoproviders.rst @@ -65,6 +65,11 @@ Module: :mod:`binderhub.repoproviders` .. autoconfigurable:: DataverseProvider :members: +:class:`CKANProvider` +--------------------------- + +.. autoconfigurable:: CKANProvider + :members: :class:`GitRepoProvider` --------------------------- diff --git a/helm-chart/images/binderhub/requirements.txt b/helm-chart/images/binderhub/requirements.txt index 450e70409..8e51e7279 100644 --- a/helm-chart/images/binderhub/requirements.txt +++ b/helm-chart/images/binderhub/requirements.txt @@ -4,7 +4,7 @@ # # Use the "Run workflow" button at https://github.com/jupyterhub/binderhub/actions/workflows/watch-dependencies.yaml # -alembic==1.13.1 +alembic==1.13.2 # via jupyterhub async-generator==1.10 # via jupyterhub @@ -14,7 +14,7 @@ attrs==23.2.0 # referencing cachetools==5.3.3 # via google-auth -certifi==2024.2.2 +certifi==2024.7.4 # via # kubernetes # requests @@ -24,33 +24,33 @@ cffi==1.16.0 # via cryptography charset-normalizer==3.3.2 # via requests -cryptography==42.0.5 +cryptography==42.0.8 # via pyopenssl -docker==7.0.0 - # via -r ../../../requirements.txt +docker==7.1.0 + # via -r helm-chart/images/binderhub/../../../requirements.txt escapism==1.0.1 - # via -r ../../../requirements.txt -google-api-core[grpc]==2.18.0 + # via -r helm-chart/images/binderhub/../../../requirements.txt +google-api-core==2.19.1 # via # google-cloud-appengine-logging # google-cloud-core # google-cloud-logging -google-auth==2.29.0 +google-auth==2.32.0 # via # google-api-core # google-cloud-appengine-logging # google-cloud-core # google-cloud-logging # kubernetes -google-cloud-appengine-logging==1.4.3 +google-cloud-appengine-logging==1.4.4 # via google-cloud-logging google-cloud-audit-log==0.2.5 # via google-cloud-logging google-cloud-core==2.4.1 # via google-cloud-logging google-cloud-logging==3.10.0 - # via -r requirements.in -googleapis-common-protos[grpc]==1.63.0 + # via -r helm-chart/images/binderhub/requirements.in +googleapis-common-protos==1.63.2 # via # google-api-core # google-cloud-audit-log @@ -58,39 +58,39 @@ googleapis-common-protos[grpc]==1.63.0 # grpcio-status greenlet==3.0.3 # via sqlalchemy -grpc-google-iam-v1==0.13.0 +grpc-google-iam-v1==0.13.1 # via google-cloud-logging -grpcio==1.62.1 +grpcio==1.65.0 # via # google-api-core # googleapis-common-protos # grpc-google-iam-v1 # grpcio-status -grpcio-status==1.62.1 +grpcio-status==1.62.2 # via google-api-core idna==3.7 # via requests jinja2==3.1.4 # via - # -r ../../../requirements.txt + # -r helm-chart/images/binderhub/../../../requirements.txt # jupyterhub -jsonschema==4.21.1 +jsonschema==4.23.0 # via - # -r ../../../requirements.txt + # -r helm-chart/images/binderhub/../../../requirements.txt # jupyter-telemetry jsonschema-specifications==2023.12.1 # via jsonschema jupyter-telemetry==0.1.0 # via jupyterhub -jupyterhub==4.1.3 +jupyterhub==4.1.5 # via - # -r ../../../requirements.txt - # -r requirements.in + # -r helm-chart/images/binderhub/../../../requirements.txt + # -r helm-chart/images/binderhub/requirements.in kubernetes==9.0.1 # via - # -r ../../../requirements.txt - # -r requirements.in -mako==1.3.2 + # -r helm-chart/images/binderhub/../../../requirements.txt + # -r helm-chart/images/binderhub/requirements.in +mako==1.3.5 # via alembic markupsafe==2.1.5 # via @@ -100,17 +100,15 @@ oauthlib==3.2.2 # via # jupyterhub # requests-oauthlib -packaging==24.0 - # via - # docker - # jupyterhub +packaging==24.1 + # via jupyterhub pamela==1.1.0 # via jupyterhub prometheus-client==0.20.0 # via - # -r ../../../requirements.txt + # -r helm-chart/images/binderhub/../../../requirements.txt # jupyterhub -proto-plus==1.23.0 +proto-plus==1.24.0 # via # google-api-core # google-cloud-appengine-logging @@ -131,12 +129,12 @@ pyasn1==0.6.0 # rsa pyasn1-modules==0.4.0 # via google-auth -pycparser==2.21 +pycparser==2.22 # via cffi pycurl==7.45.3 - # via -r requirements.in + # via -r helm-chart/images/binderhub/requirements.in pyjwt==2.8.0 - # via -r ../../../requirements.txt + # via -r helm-chart/images/binderhub/../../../requirements.txt pyopenssl==24.1.0 # via certipy python-dateutil==2.9.0.post0 @@ -145,15 +143,15 @@ python-dateutil==2.9.0.post0 # kubernetes python-json-logger==2.0.7 # via - # -r ../../../requirements.txt + # -r helm-chart/images/binderhub/../../../requirements.txt # jupyter-telemetry pyyaml==6.0.1 # via kubernetes -referencing==0.34.0 +referencing==0.35.1 # via # jsonschema # jsonschema-specifications -requests==2.31.0 +requests==2.32.3 # via # docker # google-api-core @@ -162,7 +160,7 @@ requests==2.31.0 # requests-oauthlib requests-oauthlib==2.0.0 # via kubernetes -rpds-py==0.18.0 +rpds-py==0.19.0 # via # jsonschema # referencing @@ -176,20 +174,20 @@ six==1.16.0 # via # kubernetes # python-dateutil -sqlalchemy==2.0.29 +sqlalchemy==2.0.31 # via # alembic # jupyterhub tornado==6.4.1 # via - # -r ../../../requirements.txt + # -r helm-chart/images/binderhub/../../../requirements.txt # jupyterhub -traitlets==5.14.2 +traitlets==5.14.3 # via - # -r ../../../requirements.txt + # -r helm-chart/images/binderhub/../../../requirements.txt # jupyter-telemetry # jupyterhub -typing-extensions==4.10.0 +typing-extensions==4.12.2 # via # alembic # sqlalchemy @@ -198,8 +196,9 @@ urllib3==2.2.2 # docker # kubernetes # requests -websocket-client==1.7.0 +websocket-client==1.8.0 # via kubernetes # The following packages are considered to be unsafe in a requirements file: -# setuptools +setuptools==70.3.0 + # via kubernetes