Skip to content

Commit

Permalink
CMR-10163: Implement Free text search for collection search (#352)
Browse files Browse the repository at this point in the history
* CMR-10163: Adds support for Free Text Search

* CMR-10163: Fixes lint warnings

* CMR-10163: Adds errors for invalid keyword search

* CMR-10163: Adds missing codecov test

* CMR-10163: Fixes lint error

* CMR-10163: Addresses PR comment
  • Loading branch information
dmistry1 authored Oct 2, 2024
1 parent 70f1ca3 commit 55915b0
Show file tree
Hide file tree
Showing 7 changed files with 170 additions and 8 deletions.
131 changes: 131 additions & 0 deletions src/__tests__/providerCollection.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,137 @@ describe("GET /:provider/collections", () => {
expect(body.collections[1].id).to.equal(mockCollections[1].id);
});
});

describe("Free text parameter", () => {
describe("given a matching free text query", () => {
it("should return collections matching the free text search", async () => {
const mockCollections = generateSTACCollections(3);
mockCollections[0].title = "Landsat 8 Collection";
mockCollections[1].title = "Sentinel-2 Collection";
mockCollections[2].title = "MODIS Collection";

sandbox
.stub(Providers, "getProviders")
.resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]);

sandbox.stub(Collections, "getCollections").resolves({
count: 1,
cursor: null,
items: [mockCollections[0]], // Only return the Landsat collection
});

const { statusCode, body } = await request(app)
.get("/stac/TEST/collections")
.query({ q: "Landsat" });

expect(statusCode).to.equal(200);
expect(body.collections).to.have.lengthOf(1);
expect(body.collections[0].title).to.equal("Landsat 8 Collection");
});
});

describe("given a free text query without matching collection", () => {
it("should return an empty result for non-matching free text search", async () => {
sandbox
.stub(Providers, "getProviders")
.resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]);

sandbox.stub(Collections, "getCollections").resolves({
count: 0,
cursor: null,
items: [],
});

const { statusCode, body } = await request(app)
.get("/stac/TEST/collections")
.query({ q: "NonExistentCollection" });

expect(statusCode).to.equal(200);
expect(body.collections).to.have.lengthOf(0);
});
});

describe("given a matching keyword phrase", () => {
it("should return collections matching the keyword phrase", async () => {
const mockCollections = generateSTACCollections(3);
mockCollections[0].title = "Landsat 8 Collection";
mockCollections[1].title = "Sentinel-2 Collection";
mockCollections[2].title = "MODIS Collection";

sandbox
.stub(Providers, "getProviders")
.resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]);

sandbox.stub(Collections, "getCollections").resolves({
count: 1,
cursor: null,
items: [mockCollections[0]], // Only return the Landsat collection
});

const { statusCode, body } = await request(app)
.get("/stac/TEST/collections")
.query({ q: '"Landsat 8 Collection"' });

expect(statusCode).to.equal(200);
expect(body.collections).to.have.lengthOf(1);
expect(body.collections[0].title).to.equal("Landsat 8 Collection");
});
});

describe("given a free text query with a keyword and keyword phrase", () => {
it("should return 400 for invalid free text query", async () => {
sandbox
.stub(Providers, "getProviders")
.resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]);

const { statusCode, body } = await request(app)
.get("/stac/TEST/collections")
.query({ q: '"Earth Science" Climate' });

expect(statusCode).to.equal(400);
expect(body).to.have.property("errors");
expect(body.errors).to.include(
"Search query must be either a single keyword or a single phrase enclosed in double quotes."
);
});
});

describe("given a free text query with unmatched quotes", () => {
it("should return 400 for invalid free text query", async () => {
sandbox
.stub(Providers, "getProviders")
.resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]);

const { statusCode, body } = await request(app)
.get("/stac/TEST/collections")
.query({ q: '"Earth Science' });

expect(statusCode).to.equal(400);
expect(body).to.have.property("errors");
expect(body.errors).to.include(
"Search query must be either a single keyword or a single phrase enclosed in double quotes."
);
});
});

describe("given a free text query with multiple keyword phrases", () => {
it("should return 400 for invalid free text query", async () => {
sandbox
.stub(Providers, "getProviders")
.resolves([null, [{ "provider-id": "TEST", "short-name": "TEST" }]]);

const { statusCode, body } = await request(app)
.get("/stac/TEST/collections")
.query({ q: '"Earth Science" "Climate Change"' });

expect(statusCode).to.equal(400);
expect(body).to.have.property("errors");
expect(body.errors).to.include(
"Search query must be either a single keyword or a single phrase enclosed in double quotes."
);
});
});
});
});

describe("POST /:provider/collections", () => {
Expand Down
4 changes: 4 additions & 0 deletions src/domains/collections.ts
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ export const getCollections = async (
count,
items: collections,
} = await paginateQuery(collectionsQuery, params, opts, collectionHandler);

return { cursor, count, items: collections as STACCollection[] };
};

Expand All @@ -257,6 +258,7 @@ export const getCollections = async (
*/
export const collectionToId = (collection: { shortName: string; version?: string | null }) => {
const { shortName, version } = collection;

return version ? `${shortName}_${version}` : shortName;
};

Expand Down Expand Up @@ -298,6 +300,7 @@ export const getCollectionIds = async (
count,
items: collectionIds,
} = await paginateQuery(collectionIdsQuery, params, opts, collectionIdsHandler);

return { cursor, count, items: collectionIds as { id: string; title: string }[] };
};

Expand All @@ -316,5 +319,6 @@ export const getAllCollectionIds = async (
items: { id: string; title: string }[];
}> => {
params.limit = MAX_SIGNED_INTEGER;

return await getCollectionIds(params, opts);
};
1 change: 1 addition & 0 deletions src/domains/providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export const conformance = [
"http://www.opengis.net/spec/ogcapi-features-1/1.0/conf/oas30",
"http://www.opengis.net/spec/ogcapi-features-1/1.0/conf/geojson",
"https://api.stacspec.org/v1.0.0-rc.2/collection-search",
"https://api.stacspec.org/v1.0.0-rc.2/collection-search#free-text",
"http://www.opengis.net/spec/ogcapi-common-1/1.0/conf/simple-query",
];

Expand Down
12 changes: 7 additions & 5 deletions src/domains/stac.ts
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,7 @@ const idsQuery = (req: Request, query: StacQuery) => {

const cursorQuery = (_req: Request, query: StacQuery) => ({ cursor: query.cursor });

const freeTextQuery = (_req: Request, query: StacQuery) => ({ keyword: query.q });
/**
* Convert bbox STAC query term to GraphQL query term.
*/
Expand Down Expand Up @@ -455,15 +456,16 @@ export const buildQuery = async (req: Request) => {
const query = mergeMaybe(req.query, req.body);

const queryBuilders = [
idsQuery,
collectionsQuery,
bboxQuery,
intersectsQuery,
cloudCoverQuery,
collectionsQuery,
cursorQuery,
freeTextQuery,
idsQuery,
intersectsQuery,
limitQuery,
temporalQuery,
sortKeyQuery,
cursorQuery,
temporalQuery,
];

return await queryBuilders.reduce(
Expand Down
24 changes: 23 additions & 1 deletion src/middleware/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,24 @@ const validBbox = (bbox: string | number[]) => {
);
};

const validFreeText = (freeText: string) => {
// Check if it's a single keyword or multiple keywords separated by spaces
// This allows for queries like "alpha beta" or "alpha%20beta"
if (/^[^\s"]+(\s+[^\s"]+)*$/.test(freeText)) {
return true;
}

// Check if it's a properly formatted phrase (enclosed in quotes)
if (/^"[^""]+"$/.test(freeText)) {
return true;
}

// If it doesn't match either pattern, it's invalid
return false;
};

const validateQueryTerms = (query: StacQuery) => {
const { bbox, intersects, datetime, limit: strLimit } = query;
const { bbox, intersects, datetime, limit: strLimit, q: freeText } = query;

const limit = Number.isNaN(Number(strLimit)) ? null : Number(strLimit);

Expand All @@ -238,6 +254,12 @@ const validateQueryTerms = (query: StacQuery) => {
"Query param datetime does not match a valid date format. Please use RFC3339 or ISO8601 formatted datetime strings."
);
}

if (freeText && !validFreeText(freeText)) {
return new InvalidParameterError(
"Search query must be either a single keyword or a single phrase enclosed in double quotes."
);
}
};

/**
Expand Down
5 changes: 3 additions & 2 deletions src/models/GraphQLModels.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,13 @@ export type GranulesInput = GraphQLInput & {

export type CollectionsInput = GraphQLInput & {
// filtering
providers?: string[];
cloudHosted?: boolean;
conceptIds?: string[];
entryId?: string[];
cloudHosted?: boolean;
hasGranules?: boolean;
includeFacets?: string;
keyword?: string;
providers?: string[];
};

export type FacetFilter = {
Expand Down
1 change: 1 addition & 0 deletions src/models/StacModels.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export type StacQuery = {
query?: {
[key: string]: PropertyQuery;
};
q?: string; //query for free text search
};

export type StacExtension = {
Expand Down

0 comments on commit 55915b0

Please sign in to comment.