Skip to content

Commit 8602eff

Browse files
committed
metadata export for drafts #11305
Drafts are exported on-the-fly rather than being cached.
1 parent e3bc7cf commit 8602eff

File tree

9 files changed

+299
-43
lines changed

9 files changed

+299
-43
lines changed

src/main/java/edu/harvard/iq/dataverse/DatasetPage.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5984,7 +5984,7 @@ public String getCroissant() {
59845984
if (isThisLatestReleasedVersion()) {
59855985
final String CROISSANT_SCHEMA_NAME = "croissant";
59865986
ExportService instance = ExportService.getInstance();
5987-
String croissant = instance.getExportAsString(dataset, CROISSANT_SCHEMA_NAME);
5987+
String croissant = instance.getLatestPublishedAsString(dataset, CROISSANT_SCHEMA_NAME);
59885988
if (croissant != null && !croissant.isEmpty()) {
59895989
logger.fine("Returning cached CROISSANT.");
59905990
return croissant;
@@ -5996,7 +5996,7 @@ public String getCroissant() {
59965996
public String getJsonLd() {
59975997
if (isThisLatestReleasedVersion()) {
59985998
ExportService instance = ExportService.getInstance();
5999-
String jsonLd = instance.getExportAsString(dataset, SchemaDotOrgExporter.NAME);
5999+
String jsonLd = instance.getLatestPublishedAsString(dataset, SchemaDotOrgExporter.NAME);
60006000
if (jsonLd != null) {
60016001
logger.fine("Returning cached schema.org JSON-LD.");
60026002
return jsonLd;

src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1876,17 +1876,14 @@ public String getPublicationDateAsString() {
18761876
// one metadata export in a given format per dataset (it uses the current
18771877
// released (published) version. This JSON fragment is generated for a
18781878
// specific released version - and we can have multiple released versions.
1879+
// (A JSON fragment is generated for drafts as well. -- P.D.)
18791880
// So something will need to be modified to accommodate this. -- L.A.
18801881
/**
18811882
* We call the export format "Schema.org JSON-LD" and extensive Javadoc can
18821883
* be found in {@link edu.harvard.iq.dataverse.export.SchemaDotOrgExporter}.
18831884
*/
18841885
public String getJsonLd() {
18851886
// We show published datasets only for "datePublished" field below.
1886-
if (!this.isPublished()) {
1887-
return "";
1888-
}
1889-
18901887
if (jsonLd != null) {
18911888
return jsonLd;
18921889
}
@@ -1975,7 +1972,12 @@ public String getJsonLd() {
19751972
* was modified within a DataFeed."
19761973
*/
19771974
job.add("dateModified", this.getPublicationDateAsString());
1978-
job.add("version", this.getVersionNumber().toString());
1975+
if (this.isPublished()) {
1976+
job.add("version", this.getVersionNumber().toString());
1977+
} else {
1978+
// This will show "DRAFT" for drafts.
1979+
job.add("version", this.getFriendlyVersionNumber());
1980+
}
19791981

19801982
String description = this.getDescriptionsPlainTextTruncated();
19811983
job.add("description", description);

src/main/java/edu/harvard/iq/dataverse/api/Datasets.java

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -230,31 +230,39 @@ public Response getDataset(@Context ContainerRequestContext crc, @PathParam("id"
230230
return ok(jsonbuilder.add("latestVersion", (latest != null) ? json(latest, true) : null));
231231
}, getRequestUser(crc));
232232
}
233-
234-
// This API call should, ideally, call findUserOrDie() and the GetDatasetCommand
235-
// to obtain the dataset that we are trying to export - which would handle
236-
// Auth in the process... For now, Auth isn't necessary - since export ONLY
237-
// WORKS on published datasets, which are open to the world. -- L.A. 4.5
233+
238234
@GET
235+
@AuthRequired
239236
@Path("/export")
240237
@Produces({"application/xml", "application/json", "application/html", "application/ld+json", "*/*" })
241-
public Response exportDataset(@QueryParam("persistentId") String persistentId, @QueryParam("exporter") String exporter, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) {
238+
public Response exportDataset(@Context ContainerRequestContext crc, @QueryParam("persistentId") String persistentId,
239+
@QueryParam("version") String versionId, @QueryParam("exporter") String exporter,
240+
@Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) {
242241

243242
try {
244243
Dataset dataset = datasetService.findByGlobalId(persistentId);
245244
if (dataset == null) {
246245
return error(Response.Status.NOT_FOUND, "A dataset with the persistentId " + persistentId + " could not be found.");
247246
}
248-
247+
248+
DataverseRequest req = createDataverseRequest(getRequestUser(crc));
249+
String versionToLookUp = DS_VERSION_LATEST_PUBLISHED;
250+
if (versionId != null) {
251+
versionToLookUp = versionId;
252+
}
253+
DatasetVersion datasetVersion = getDatasetVersionOrDie(req, versionToLookUp, dataset, uriInfo, headers);
254+
249255
ExportService instance = ExportService.getInstance();
250-
251-
InputStream is = instance.getExport(dataset, exporter);
252-
256+
257+
InputStream is = instance.getExport(datasetVersion, exporter);
258+
253259
String mediaType = instance.getMediaType(exporter);
254-
//Export is only possible for released (non-draft) dataset versions so we can log without checking to see if this is a request for a draft
255-
MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, dataset);
256-
mdcLogService.logEntry(entry);
257-
260+
261+
if (datasetVersion.isReleased()) {
262+
MakeDataCountLoggingServiceBean.MakeDataCountEntry entry = new MakeDataCountEntry(uriInfo, headers, dvRequestService, dataset);
263+
mdcLogService.logEntry(entry);
264+
}
265+
258266
return Response.ok()
259267
.entity(is)
260268
.type(mediaType).

src/main/java/edu/harvard/iq/dataverse/export/ExportService.java

Lines changed: 47 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import java.util.logging.Level;
4848
import java.util.logging.Logger;
4949
import jakarta.ws.rs.core.MediaType;
50+
import java.io.FileInputStream;
5051

5152
import org.apache.commons.io.IOUtils;
5253

@@ -127,11 +128,39 @@ public List<String[]> getExportersLabels() {
127128
return retList;
128129
}
129130

130-
public InputStream getExport(Dataset dataset, String formatName) throws ExportException, IOException {
131-
// first we will try to locate an already existing, cached export
132-
// for this format:
133-
134-
InputStream exportInputStream = getCachedExportFormat(dataset, formatName);
131+
public InputStream getExport(DatasetVersion datasetVersion, String formatName) throws ExportException, IOException {
132+
133+
Dataset dataset = datasetVersion.getDataset();
134+
InputStream exportInputStream = null;
135+
136+
if (datasetVersion.isDraft()) {
137+
// For drafts we create the export on the fly rather than caching.
138+
Exporter exporter = exporterMap.get(formatName);
139+
if (exporter != null) {
140+
File tempFile = File.createTempFile("tempFileToExport", ".tmp");
141+
try (OutputStream outputStream = new FileOutputStream(tempFile)) {
142+
// getPrerequisiteFormatName logic copied from exportFormat()
143+
if (exporter.getPrerequisiteFormatName().isPresent()) {
144+
String prereqFormatName = exporter.getPrerequisiteFormatName().get();
145+
try (InputStream preReqStream = getExport(datasetVersion, prereqFormatName)) {
146+
InternalExportDataProvider dataProvider = new InternalExportDataProvider(datasetVersion, preReqStream);
147+
exporter.exportDataset(dataProvider, outputStream);
148+
} catch (IOException ioe) {
149+
throw new ExportException("Could not get prerequisite " + prereqFormatName + " to create " + formatName + " export for dataset " + dataset.getId(), ioe);
150+
}
151+
} else {
152+
InternalExportDataProvider dataProvider = new InternalExportDataProvider(datasetVersion);
153+
exporter.exportDataset(dataProvider, outputStream);
154+
}
155+
return new FileInputStream(tempFile);
156+
} finally {
157+
boolean tempFileDeleted = tempFile.delete();
158+
}
159+
}
160+
} else {
161+
// for non-drafts (published versions) we try to locate an already existing, cached export
162+
exportInputStream = getCachedExportFormat(dataset, formatName);
163+
}
135164

136165
// The DDI export is limited for restricted and actively embargoed files (no
137166
// data/file description sections).and when an embargo ends, we need to refresh
@@ -207,11 +236,18 @@ public InputStream getExport(Dataset dataset, String formatName) throws ExportEx
207236

208237
}
209238

210-
public String getExportAsString(Dataset dataset, String formatName) {
239+
public String getLatestPublishedAsString(Dataset dataset, String formatName) {
240+
if (dataset == null) {
241+
return null;
242+
}
243+
DatasetVersion releasedVersion = dataset.getReleasedVersion();
244+
if (releasedVersion == null) {
245+
return null;
246+
}
211247
InputStream inputStream = null;
212248
InputStreamReader inp = null;
213249
try {
214-
inputStream = getExport(dataset, formatName);
250+
inputStream = getExport(releasedVersion, formatName);
215251
if (inputStream != null) {
216252
inp = new InputStreamReader(inputStream, "UTF8");
217253
BufferedReader br = new BufferedReader(inp);
@@ -238,8 +274,9 @@ public String getExportAsString(Dataset dataset, String formatName) {
238274
}
239275

240276
// This method goes through all the Exporters and calls
241-
// the "chacheExport()" method that will save the produced output
277+
// the "cacheExport()" method that will save the produced output
242278
// in a file in the dataset directory, on each Exporter available.
279+
// This is only for the latest published version.
243280
public void exportAllFormats(Dataset dataset) throws ExportException {
244281
try {
245282
clearAllCachedFormats(dataset);
@@ -258,7 +295,7 @@ public void exportAllFormats(Dataset dataset) throws ExportException {
258295
String formatName = e.getFormatName();
259296
if(e.getPrerequisiteFormatName().isPresent()) {
260297
String prereqFormatName = e.getPrerequisiteFormatName().get();
261-
try (InputStream preReqStream = getExport(dataset, prereqFormatName)) {
298+
try (InputStream preReqStream = getExport(dataset.getReleasedVersion(), prereqFormatName)) {
262299
dataProvider.setPrerequisiteInputStream(preReqStream);
263300
cacheExport(dataset, dataProvider, formatName, e);
264301
dataProvider.setPrerequisiteInputStream(null);
@@ -313,7 +350,7 @@ public void exportFormat(Dataset dataset, String formatName) throws ExportExcept
313350
}
314351
if(e.getPrerequisiteFormatName().isPresent()) {
315352
String prereqFormatName = e.getPrerequisiteFormatName().get();
316-
try (InputStream preReqStream = getExport(dataset, prereqFormatName)) {
353+
try (InputStream preReqStream = getExport(releasedVersion, prereqFormatName)) {
317354
InternalExportDataProvider dataProvider = new InternalExportDataProvider(releasedVersion, preReqStream);
318355
cacheExport(dataset, dataProvider, formatName, e);
319356
} catch (IOException ioe) {

src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -413,9 +413,13 @@ private static void writeVersionStatement(XMLStreamWriter xmlw, DatasetVersionDT
413413
xmlw.writeStartElement("verStmt");
414414
xmlw.writeAttribute("source","archive");
415415
xmlw.writeStartElement("version");
416-
XmlWriterUtil.writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10));
417-
XmlWriterUtil.writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString());
418-
xmlw.writeCharacters(datasetVersionDTO.getVersionNumber().toString());
416+
if (datasetVersionDTO.getReleaseTime() != null) {
417+
XmlWriterUtil.writeAttribute(xmlw, "date", datasetVersionDTO.getReleaseTime().substring(0, 10));
418+
}
419+
XmlWriterUtil.writeAttribute(xmlw, "type", datasetVersionDTO.getVersionState().toString());
420+
if (datasetVersionDTO.getVersionNumber() != null) {
421+
xmlw.writeCharacters(datasetVersionDTO.getVersionNumber().toString());
422+
}
419423
xmlw.writeEndElement(); // version
420424
if (!StringUtils.isBlank(datasetVersionDTO.getVersionNote())) {
421425
xmlw.writeStartElement("notes");

src/main/java/edu/harvard/iq/dataverse/harvest/server/xoai/DataverseXoaiItemRepository.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ private Metadata getDatasetMetadata(Dataset dataset, String metadataPrefix) thro
253253

254254
} else {
255255
InputStream pregeneratedMetadataStream;
256-
pregeneratedMetadataStream = ExportService.getInstance().getExport(dataset, metadataPrefix);
256+
pregeneratedMetadataStream = ExportService.getInstance().getExport(dataset.getReleasedVersion(), metadataPrefix);
257257

258258
metadata = Metadata.copyFromStream(pregeneratedMetadataStream);
259259
}

src/test/java/edu/harvard/iq/dataverse/DatasetVersionTest.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,12 @@ public void testGetJsonLd() throws ParseException {
101101
datasetVersion.setDataset(dataset);
102102
datasetVersion.setVersionState(DatasetVersion.VersionState.DRAFT);
103103
assertEquals("", datasetVersion.getPublicationDateAsString());
104-
// Only published datasets return any JSON.
105-
assertEquals("", datasetVersion.getJsonLd());
104+
// TODO make some assertions on drafts
105+
// String jsonLdDraft = datasetVersion.getJsonLd();
106+
// logger.fine("jsonLdDraft: " + JsonUtil.prettyPrint(jsonLdDraft));
107+
// JsonReader jsonReaderDraft = Json.createReader(new StringReader(jsonLdDraft));
108+
// JsonObject objDraft = jsonReaderDraft.readObject();
109+
// assertEquals("http://schema.org", objDraft.getString("@context"));
106110
datasetVersion.setVersionState(DatasetVersion.VersionState.RELEASED);
107111
datasetVersion.setVersionNumber(1L);
108112
SimpleDateFormat dateFmt = new SimpleDateFormat("yyyyMMdd");
@@ -153,8 +157,6 @@ public void testGetJsonLdNonCC0License() throws ParseException {
153157
datasetVersion.setDataset(dataset);
154158
datasetVersion.setVersionState(DatasetVersion.VersionState.DRAFT);
155159
assertEquals("", datasetVersion.getPublicationDateAsString());
156-
// Only published datasets return any JSON.
157-
assertEquals("", datasetVersion.getJsonLd());
158160
datasetVersion.setVersionState(DatasetVersion.VersionState.RELEASED);
159161
datasetVersion.setVersionNumber(1L);
160162
datasetVersion.setMinorVersionNumber(0L);

0 commit comments

Comments
 (0)