diff --git a/mardi_importer/mardi_importer/publications/ZenodoResource.py b/mardi_importer/mardi_importer/publications/ZenodoResource.py index 14af717..4d8a0a9 100644 --- a/mardi_importer/mardi_importer/publications/ZenodoResource.py +++ b/mardi_importer/mardi_importer/publications/ZenodoResource.py @@ -9,12 +9,14 @@ from typing import Dict, List log = logging.getLogger('CRANlogger') +CLEANR = re.compile('<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});') # used to parse out html tags @dataclass class ZenodoResource(): api: MardiIntegrator zenodo_id: str title: str = None + _description: str = None _publication_date: str = None _authors: List[Author] = field(default_factory=list) _resource_type: str = None @@ -58,6 +60,19 @@ def __post_init__(self): self._authors.append(author) return self.QID + @property + def description(self): + desc_long = "" + if "description" in self.metadata.keys(): + desc_long = self.metadata["description"] + desc_long = re.sub(CLEANR, '', desc_long) # parse out html tags from the description + desc_long = re.sub(r'\n|\\N|\t|\\T', ' ', desc_long) # parse out tabs and new lines + desc_long = re.sub(r'^\s+|\s+$', '', desc_long) # parse out leading and trailing white space + if re.match("\w+", desc_long): + self._description = desc_long + return self._description + + @property def publication_date(self): if not self._publication_date: @@ -162,20 +177,7 @@ def update(self): new_item.add_claim("wdt:P275", "wd:Q334661") return new_item.write() - - def update2(self): - - self.item = self.api.item.get(entity_id=self.QID) - - self.insert_claims() - self.item.write() - - if self.QID: - print(f"zenodo item with ID {self.QID} has been updated.") - return self.QID - else: - print(f"zenodo item could not be updated.") - return None + def create(self, update = False): @@ -190,18 +192,19 @@ def create(self, update = False): if self.title: item.labels.set(language="en", value=self.title) - # Add description and instance information + if self.resource_type and self.resource_type != "wd:Q37866906": - item.descriptions.set( - language="en", - value=f"{self.metadata['resource_type']['title']} published at Zenodo repository" - ) + desc = f"{self.metadata['resource_type']['title']} published at Zenodo repository. " item.add_claim('wdt:P31',self.resource_type) else: - item.descriptions.set( - language="en", - value="Resource published at Zenodo repository" - ) + desc = "Resource published at Zenodo repository. " + + item.descriptions.set(language="en", value=desc) + + if self.description: + prop_nr = self.api.get_local_id_by_label("description", "property") + item.add_claim(prop_nr, self.description) + # Publication date if self.publication_date: diff --git a/mardi_importer/mardi_importer/zenodo/new_entities.json b/mardi_importer/mardi_importer/zenodo/new_entities.json index 6038f27..f4613a3 100644 --- a/mardi_importer/mardi_importer/zenodo/new_entities.json +++ b/mardi_importer/mardi_importer/zenodo/new_entities.json @@ -14,6 +14,11 @@ "label": "Internal Project ID", "description": "An identifier for a project, e.g. from a community such as MATH+ or SFB1234", "datatype": "wikibase-item" + }, + { + "label": "description", + "description": "long description of an item", + "datatype": "string" } ], "items": [