Skip to content

Commit

Permalink
Switch back to using DOI as persistent_id
Browse files Browse the repository at this point in the history
  • Loading branch information
yuvipanda committed Dec 19, 2024
1 parent 60c0d70 commit e48f5b7
Showing 1 changed file with 10 additions and 5 deletions.
15 changes: 10 additions & 5 deletions repo2docker/contentproviders/dataverse.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,6 @@ def detect(self, spec, ref=None, extra_args=None):
if host is None:
return

# Used only for content_id
self.url = url

# At this point, we *know* this is a dataverse URL, because:
# 1. The DOI resolved to a particular host (if using DOI)
# 2. The host is in the list of known dataverse installations
Expand Down Expand Up @@ -171,6 +168,10 @@ def get_datafiles(self, url: str) -> List[dict]:
# We already handled 404, raise error for everything else
resp.raise_for_status()

# We know the exact persistent_id of the dataset we fetched now
# Save it for use as content_id
self.persistent_id = persistent_id

data = resp.json()["data"]

return data["latestVersion"]["files"]
Expand Down Expand Up @@ -212,5 +213,9 @@ def fetch(self, spec, output_dir, yield_output=False):

@property
def content_id(self):
"""The Dataverse persistent identifier."""
return hashlib.sha256(self.url.encode()).hexdigest()
"""
The Dataverse persistent identifier.
Only valid if called after a succesfull fetch
"""
return self.persistent_id

0 comments on commit e48f5b7

Please sign in to comment.