From ec9fc17b4a99c96d4008a79d8bdafa6f61afc80b Mon Sep 17 00:00:00 2001 From: Liam Raven Date: Tue, 14 Apr 2020 18:05:02 +1000 Subject: [PATCH] feat: support sourcing from buckets with over 1000 objects (#20) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * adds ContinuationToken and extra error handling * updates list objects error handling Co-Authored-By: Robin Métral * updates empty bucket error handling Co-Authored-By: Robin Métral * adds types for getS3ListObjects params Co-Authored-By: Robin Métral * adds types for listAllS3Items params Co-Authored-By: Robin Métral * use optional chaining operator for contents check Co-Authored-By: Robin Métral * uses ObjectType for allS3Items Co-Authored-By: Robin Métral * feat: adds suggested changes from PR to use ContinuationToken * fix: runs prettier to correct indent * fix: adds type back to allBucketsObjects * corrects type on allBucketsObjects Co-Authored-By: Robin Métral * Corrects check for data.Contents Co-Authored-By: Robin Métral Co-authored-by: Robin Métral --- src/gatsby-node.ts | 72 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 22 deletions(-) diff --git a/src/gatsby-node.ts b/src/gatsby-node.ts index 7e00000d..6ba5dd2b 100644 --- a/src/gatsby-node.ts +++ b/src/gatsby-node.ts @@ -28,34 +28,62 @@ export async function sourceNodes( // get objects const s3 = new AWS.S3(); - const listObjects = async bucket => { - // todo improve this call - // see https://stackoverflow.com/a/49888947 - const response = await s3 - .listObjectsV2({ - Bucket: bucket - // todo handle continuation token - // ContinuationToken: token, - }) - .promise(); - - // add bucket key - const objects = response.Contents?.reduce((acc: ObjectType[], cur) => { - const object: ObjectType = { ...cur, Bucket: bucket }; - acc.push(object); - return acc; - }, []); - - return objects; + const getS3ListObjects = async (params: { + Bucket: string; + ContinuationToken?: string; + }) => { + return await s3 + .listObjectsV2(params) + .promise() + .catch(error => { + reporter.error( + `Error listing S3 objects on bucket "${params.Bucket}": ${error}` + ); + }); + }; + + const listAllS3Objects = async (bucket: string) => { + const allS3Objects: ObjectType[] = []; + + const data = await getS3ListObjects({ Bucket: bucket }); + + if (data && data.Contents) { + data.Contents.forEach(object => { + allS3Objects.push({ ...object, Bucket: bucket }); + }); + } else { + reporter.error( + `Error processing objects from bucket "${bucket}". Is it empty?` + ); + } + + let nextToken = data && data.IsTruncated && data.NextContinuationToken; + + while (nextToken) { + const data = await getS3ListObjects({ + Bucket: bucket, + ContinuationToken: nextToken + }); + + if (data && data.Contents) { + data.Contents.forEach(object => { + allS3Objects.push({ ...object, Bucket: bucket }); + }); + } + nextToken = data && data.IsTruncated && data.NextContinuationToken; + } + + return allS3Objects; }; try { - let objects: Array = await Promise.all( - buckets.map(bucket => listObjects(bucket)) + const allBucketsObjects: ObjectType[][] = await Promise.all( + buckets.map(bucket => listAllS3Objects(bucket)) ); + // flatten objects // flat() is not supported in node 10 - objects = [].concat(...objects); + const objects = allBucketsObjects.reduce((acc, val) => acc.concat(val), []); // create file nodes // todo touch nodes if they exist already