diff --git a/docs/api.md b/docs/api.md index 028eeb5cf..2d932cd3f 100644 --- a/docs/api.md +++ b/docs/api.md @@ -32,6 +32,11 @@ Finally, **system information and configuration** is available via a set of spec Here major and breaking changes to the API are listed by version. +### ODK Central v2023.3 + +**Changed**: +- ETag support has been added for [Download Dataset](#reference/datasets/download-dataset/download-dataset) and [Download Form Attachment](#reference/forms/individual-form/downloading-a-form-attachment). + ### ODK Central v2023.2 **Added**: @@ -1368,6 +1373,8 @@ This endpoint allows you to fetch the list of expected attachment files, and wil To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time) will be given. +This endpoint supports `ETag`, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, it returns a value in `ETag` header, you can pass this value in the header `If-None-Match` of subsequent requests. If the file has not been changed since the previous request, you will receive `304 Not Modified` response otherwise you'll get the latest file. + + Parameters + xmlFormId: `simple` (string, required) - The `xmlFormId` of the Form being referenced. @@ -1376,11 +1383,14 @@ To download a single file, use this endpoint. The appropriate `Content-Dispositi Content-Type: {the MIME type of the attachment file itself} Content-Disposition: attachment; filename={the file's name} + ETag: content version identifier + Body (binary data) ++ Response 304 + + Response 403 (application/json) + Attributes (Error 403) @@ -1619,6 +1629,8 @@ To upload a binary to an expected file slot, `POST` the binary to its endpoint. As of version 2022.3, if there is already a Dataset linked to this attachment, it will be unlinked and replaced with the uploaded file. +This endpoint supports `ETag` header, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, the endpoint returns a value in `ETag` header. If you pass that value in the `If-None-Match` header of a subsequent request, then if the file has not been changed since the previous request, you will receive `304 Not Modified` response; otherwise you'll get the latest file. + + Parameters + xmlFormId: `simple` (string, required) - The `xmlFormId` of the Form being referenced. + filename: `people.csv` (string, required) - The name of that attachment. @@ -1631,6 +1643,8 @@ As of version 2022.3, if there is already a Dataset linked to this attachment, i + Response 200 (application/json) + Attributes (Success) ++ Response 304 + + Response 403 (application/json) + Attributes (Error 403) @@ -1861,6 +1875,8 @@ Attachments are specific to each version of a Form. You can retrieve the attachm To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time) will be given. +This endpoint supports `ETag` header, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, the endpoint returns a value in `ETag` header. If you pass that value in the `If-None-Match` header of a subsequent request, then if the file has not been changed since the previous request, you will receive `304 Not Modified` response; otherwise you'll get the latest file. + + Parameters + version: `one` (string, required) - The `version` of the Form version being referenced. Pass `___` to indicate a blank `version`. @@ -1869,11 +1885,14 @@ To download a single file, use this endpoint. The appropriate `Content-Dispositi Content-Type: {the MIME type of the attachment file itself} Content-Disposition: attachment; filename={the file's name} + ETag: content version identifier + Body (binary data) ++ Response 304 + + Response 403 (application/json) + Attributes (Error 403) @@ -3013,6 +3032,8 @@ Returns the metadata of a Dataset including properties and forms that create and Datasets (collections of Entities) can be used as Attachments in other Forms, but they can also be downloaded directly as a CSV file. The CSV format matches what is expected for a [select question](https://docs.getodk.org/form-datasets/#building-selects-from-csv-files) with columns for `name`, `label,` and properties. In the case of Datasets, the `name` column is the Entity's UUID, the `label` column is the human-readable Entity label populated in the Submission, and the properties are the full set of Dataset Properties for that Dataset. If any Property for an given Entity is blank (e.g. it was not captured by that Form or was left blank), that field of the CSV is blank. +This endpoint supports `ETag` header, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, the endpoint returns a value in the ETag header. If you pass that value in the If-None-Match header of a subsequent request, then if the Dataset has not been changed since the previous request, you will receive `304 Not Modified` response; otherwise you'll get the new data. + Note that as of Version 2022.3 we do not guarantee the order of the Dataset Property columns. ``` @@ -3032,11 +3053,14 @@ name,label,first_name,last_name,age,favorite_color Content-Type: text/csv Content-Disposition: attachment; filename={the dataset name}.csv + ETag: content version identifier + Body (binary data) ++ Response 304 + + Response 403 (application/json) + Attributes (Error 403) diff --git a/lib/model/query/datasets.js b/lib/model/query/datasets.js index de5766814..279bb4556 100644 --- a/lib/model/query/datasets.js +++ b/lib/model/query/datasets.js @@ -233,6 +233,12 @@ const get = (projectId, name, publishedOnly = false, extended = false) => ({ may return _get(maybeOne, options.withCondition({ projectId, name }), publishedOnly); }; +// Get by dataset ID - return only published dataset +const getById = (id, extended = false) => ({ maybeOne }) => { + const options = extended ? QueryOptions.extended : QueryOptions.none; + return _get(maybeOne, options.withCondition({ id }), true); +}; + //////////////////////////////////////////////////////////////////////////////// @@ -384,7 +390,7 @@ const getDiff = (projectId, xmlFormId, forDraft) => ({ all }) => all(sql` module.exports = { createOrMerge, publishIfExists, - getList, get, + getList, get, getById, getMetadata, getProperties, getFieldsByFormDefId, getDiff diff --git a/lib/resources/datasets.js b/lib/resources/datasets.js index a543f546e..e017cafec 100644 --- a/lib/resources/datasets.js +++ b/lib/resources/datasets.js @@ -10,7 +10,7 @@ const sanitize = require('sanitize-filename'); const { getOrNotFound } = require('../util/promise'); const { streamEntityCsv } = require('../data/entity'); -const { contentDisposition } = require('../util/http'); +const { contentDisposition, withEtag } = require('../util/http'); const { md5sum } = require('../util/crypto'); module.exports = (service, endpoint) => { @@ -34,20 +34,15 @@ module.exports = (service, endpoint) => { const properties = await Datasets.getProperties(dataset.id); const { lastEntity } = dataset.forApi(); - // Etag logic inspired from https://stackoverflow.com/questions/72334843/custom-computed-etag-for-express-js/72335674#72335674 - const serverEtag = `"${md5sum(lastEntity?.toISOString() ?? '1970-01-01')}"`; - const clientEtag = request.get('If-None-Match'); - if (clientEtag?.includes(serverEtag)) { // nginx weakens Etag when gzip is used, so clientEtag is like W/"4e9f0c7e9a8240..." - response.status(304); - return; - } - const entities = await Entities.streamForExport(dataset.id); - const filename = sanitize(dataset.name); - const extension = 'csv'; - response.append('Content-Disposition', contentDisposition(`${filename}.${extension}`)); - response.append('Content-Type', 'text/csv'); - response.set('ETag', serverEtag); - return streamEntityCsv(entities, properties); + const serverEtag = md5sum(lastEntity?.toISOString() ?? '1970-01-01'); + return withEtag(serverEtag, async () => { + const entities = await Entities.streamForExport(dataset.id); + const filename = sanitize(dataset.name); + const extension = 'csv'; + response.append('Content-Disposition', contentDisposition(`${filename}.${extension}`)); + response.append('Content-Type', 'text/csv'); + return streamEntityCsv(entities, properties); + }); })); }; diff --git a/lib/resources/forms.js b/lib/resources/forms.js index c7e66bb7c..f15f67f57 100644 --- a/lib/resources/forms.js +++ b/lib/resources/forms.js @@ -12,7 +12,7 @@ const { identity } = require('ramda'); const { Blob, Form } = require('../model/frames'); const { ensureDef } = require('../model/frame'); const { QueryOptions } = require('../util/db'); -const { isTrue, xml, binary, contentDisposition } = require('../util/http'); +const { isTrue, xml, binary, contentDisposition, withEtag } = require('../util/http'); const Problem = require('../util/problem'); const { sanitizeFieldsForOdata, setVersion } = require('../data/schema'); const { getOrNotFound, reject, resolve, rejectIf } = require('../util/promise'); @@ -20,6 +20,7 @@ const { success } = require('../util/http'); const { formList, formManifest } = require('../formats/openrosa'); const { noargs, isPresent, isBlank } = require('../util/util'); const { streamEntityCsv } = require('../data/entity'); +const { md5sum } = require('../util/crypto'); // excel-related util funcs/data used below: const isExcel = (contentType) => (contentType === 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet') || (contentType === 'application/vnd.ms-excel'); @@ -272,24 +273,33 @@ module.exports = (service, endpoint) => { .then((form) => auth.canOrReject('form.read', form)) .then((form) => FormAttachments.getAllByFormDefId(form.def.id)))); - service.get(`${base}/attachments/:name`, endpoint(({ Blobs, FormAttachments, Forms, Datasets, Entities }, { params, auth }, _, response) => + service.get(`${base}/attachments/:name`, endpoint(({ Blobs, FormAttachments, Forms, Datasets, Entities }, { params, auth }, request, response) => getInstance(Forms, params) .then((form) => auth.canOrReject('form.read', form)) .then((form) => FormAttachments.getByFormDefIdAndName(form.def.id, params.name) .then(getOrNotFound) - .then((attachment) => ((attachment.blobId == null && attachment.datasetId == null) - ? reject(Problem.user.notFound()) - : (attachment.blobId != null - ? Blobs.getById(attachment.blobId) - .then(getOrNotFound) - .then((blob) => binary(blob.contentType, attachment.name, blob.content)) - : Datasets.getProperties(attachment.datasetId) - .then((properties) => Entities.streamForExport(attachment.datasetId) - .then((entities) => { - response.append('Content-Disposition', contentDisposition(`${attachment.name}`)); - response.append('Content-Type', 'text/csv'); - return streamEntityCsv(entities, properties); - })))))))); + .then(async (attachment) => { + if (attachment.blobId == null && attachment.datasetId == null) { + return reject(Problem.user.notFound()); + } else if (attachment.blobId != null) { + const blob = await Blobs.getById(attachment.blobId).then(getOrNotFound); + return withEtag(`"${blob.md5}"`, () => binary(blob.contentType, attachment.name, blob.content)); + } else { + const dataset = await Datasets.getById(attachment.datasetId, true).then(getOrNotFound); + const properties = await Datasets.getProperties(attachment.datasetId); + const { lastEntity } = dataset.forApi(); + + const serverEtag = md5sum(lastEntity?.toISOString() ?? '1970-01-01'); + + return withEtag(serverEtag, async () => { + const entities = await Entities.streamForExport(attachment.datasetId); + response.append('Content-Disposition', contentDisposition(`${attachment.name}`)); + response.append('Content-Type', 'text/csv'); + return streamEntityCsv(entities, properties); + }); + } + + })))); }; // the linter literally won't let me break this apart.. diff --git a/lib/util/http.js b/lib/util/http.js index 89c246ab9..fca2b0244 100644 --- a/lib/util/http.js +++ b/lib/util/http.js @@ -142,11 +142,26 @@ const url = (strings, ...parts) => { return result.join(''); }; +// Checks Etag in the request if it matches serverEtag then returns 304 +// Otherwise executes given function 'fn' +const withEtag = (serverEtag, fn) => (request, response) => { + + response.set('ETag', `"${serverEtag}"`); + + // Etag logic inspired from https://stackoverflow.com/questions/72334843/custom-computed-etag-for-express-js/72335674#72335674 + const clientEtag = request.get('If-None-Match'); + if (clientEtag?.includes(serverEtag)) { // nginx weakens Etag when gzip is used, so clientEtag is like W/"4e9f0c7e9a8240..." + response.status(304); + return; + } + return fn(); +}; module.exports = { isTrue, isFalse, urlPathname, serialize, success, contentType, xml, atom, json, contentDisposition, binary, redirect, - urlWithQueryParams, url + urlWithQueryParams, url, + withEtag }; diff --git a/test/integration/api/datasets.js b/test/integration/api/datasets.js index 25498a411..c6d85b0b2 100644 --- a/test/integration/api/datasets.js +++ b/test/integration/api/datasets.js @@ -897,6 +897,46 @@ describe('datasets and entities', () => { })); + it('should return 304 content not changed if ETag matches', testService(async (service, container) => { + const asAlice = await service.login('alice'); + + await asAlice.post('/v1/projects/1/forms?publish=true') + .send(testData.forms.simpleEntity) + .set('Content-Type', 'application/xml') + .expect(200); + + await asAlice.post('/v1/projects/1/forms/simpleEntity/submissions') + .send(testData.instances.simpleEntity.one) + .set('Content-Type', 'application/xml') + .expect(200); + + await asAlice.patch('/v1/projects/1/forms/simpleEntity/submissions/one') + .send({ reviewState: 'approved' }) + .expect(200); + + await exhaust(container); + + await asAlice.post('/v1/projects/1/forms?publish=true') + .send(testData.forms.withAttachments.replace(/goodone/g, 'people')) + .set('Content-Type', 'application/xml') + .expect(200); + + const result = await asAlice.get('/v1/projects/1/forms/withAttachments/attachments/people.csv') + .expect(200); + + result.text.should.be.eql( + 'name,label,first_name,age\n' + + '12345678-1234-4123-8234-123456789abc,Alice (88),Alice,88\n' + ); + + const etag = result.get('ETag'); + + await asAlice.get('/v1/projects/1/forms/withAttachments/attachments/people.csv') + .set('If-None-Match', etag) + .expect(304); + + })); + it('should override blob and link dataset', testService((service, { Forms, FormAttachments, Audits, Datasets }) => service.login('alice', (asAlice) => asAlice.post('/v1/projects/1/forms') diff --git a/test/integration/api/forms/forms.js b/test/integration/api/forms/forms.js index d3e0e0adf..a867d448c 100644 --- a/test/integration/api/forms/forms.js +++ b/test/integration/api/forms/forms.js @@ -1085,6 +1085,70 @@ describe('api: /projects/:id/forms (create, read, update)', () => { headers['content-type'].should.equal('text/csv; charset=utf-8'); text.should.equal('test,csv\n1,2'); }))))); + + it('should return 304 content not changed if ETag matches', testService(async (service) => { + const asAlice = await service.login('alice'); + + await asAlice.post('/v1/projects/1/forms') + .send(testData.forms.withAttachments) + .set('Content-Type', 'application/xml') + .expect(200); + + await asAlice.post('/v1/projects/1/forms/withAttachments/draft/attachments/goodone.csv') + .send('test,csv\n1,2') + .set('Content-Type', 'text/csv') + .expect(200); + + await asAlice.post('/v1/projects/1/forms/withAttachments/draft/publish') + .expect(200); + + const result = await asAlice.get('/v1/projects/1/forms/withAttachments/attachments/goodone.csv') + .expect(200); + + result.text.should.be.eql( + 'test,csv\n' + + '1,2' + ); + + const etag = result.get('ETag'); + + await asAlice.get('/v1/projects/1/forms/withAttachments/attachments/goodone.csv') + .set('If-None-Match', etag) + .expect(304); + + })); + + it('should return latest content and correct ETag', testService(async (service) => { + const asAlice = await service.login('alice'); + + await asAlice.post('/v1/projects/1/forms') + .send(testData.forms.withAttachments) + .set('Content-Type', 'application/xml') + .expect(200); + + await asAlice.post('/v1/projects/1/forms/withAttachments/draft/attachments/goodone.csv') + .send('test,csv\n1,2') + .set('Content-Type', 'text/csv') + .expect(200); + + const attachmentV1 = await asAlice.get('/v1/projects/1/forms/withAttachments/draft/attachments/goodone.csv') + .expect(200); + + const etagV1 = attachmentV1.get('ETag'); + + await asAlice.post('/v1/projects/1/forms/withAttachments/draft/attachments/goodone.csv') + .send('test,csv\n1,2\n3,4') + .set('Content-Type', 'text/csv') + .expect(200); + + const attachmentV2 = await asAlice.get('/v1/projects/1/forms/withAttachments/draft/attachments/goodone.csv') + .set('If-None-Match', etagV1) + .expect(200); + + const etagV2 = attachmentV2.get('ETag'); + + etagV1.should.not.be.eql(etagV2); + })); }); }); });