Skip to content

Commit 3ba8dc7

Browse files
ciaranschutteCiaran Schutte
andauthored
multiple node with data thresholding (#912)
* fix build error * single endpoint * fix GQL type name collisions * move data mask threshold env. ensure data masking applies even if hits.total is not queried * change belowThreshold to relation object * surface api errors to axios responses * add relation logic and agg merging for network search * read data mask threshold min from config * check for valid env val * remove unused func * fix TS build errors * fix schema merge error --------- Co-authored-by: Ciaran Schutte <[email protected]>
1 parent c8c7be5 commit 3ba8dc7

24 files changed

+133
-103
lines changed

modules/server/.env.schema

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
ALLOW_CUSTOM_MAX_DOWNLOAD_ROWS=false
22
CONFIG_PATH=./configs
3-
DATA_MASK_THRESHOLD=
3+
DATA_MASK_MIN_THRESHOLD=
44
DEBUG=false
55
DOCUMENT_TYPE=''
66
DOWNLOAD_STREAM_BUFFER_SIZE=2000

modules/server/src/app.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ export default async function (rootPath = '') {
1414
* @param {boolean} enableAdmin
1515
* @param {boolean} enabledDocumentHits - enables including "hits" property in the GQL response
1616
*/
17-
return Arranger({
17+
return arranger({
1818
enableAdmin: ENV_CONFIG.ENABLE_ADMIN,
1919
enableDocumentHits: ENV_CONFIG.ENABLE_DOCUMENT_HITS,
2020
}).then((router) => {

modules/server/src/config/constants.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ export const ALLOW_CUSTOM_MAX_DOWNLOAD_ROWS = stringToBool(
44
process.env.ALLOW_CUSTOM_MAX_DOWNLOAD_ROWS,
55
);
66
export const CONFIG_FILES_PATH = process.env.CONFIG_PATH || './configs';
7-
export const DATA_MASK_THRESHOLD = process.env.DATA_MASK_THRESHOLD || Number.MAX_SAFE_INTEGER;
7+
export const DATA_MASK_MIN_THRESHOLD =
8+
process.env.DATA_MASK_MIN_THRESHOLD || Number.MAX_SAFE_INTEGER;
89
export const DEBUG_MODE = stringToBool(process.env.DEBUG);
910
export const DOCUMENT_TYPE = process.env.DOCUMENT_TYPE || '';
1011
export const DOWNLOAD_STREAM_BUFFER_SIZE =

modules/server/src/graphqlRoutes.js

Lines changed: 19 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@ import { ApolloServer } from 'apollo-server-express';
33
import { Router } from 'express';
44
import expressPlayground from 'graphql-playground-middleware-express';
55

6-
import getConfigObject, { initializeSets } from './config';
6+
import { mergeSchemas } from '@graphql-tools/schema';
7+
import getConfigObject, { ENV_CONFIG, initializeSets } from './config';
78
import { DEBUG_MODE, ES_PASS, ES_USER } from './config/constants';
89
import { ConfigProperties } from './config/types';
910
import { addMappingsToTypes, extendFields, fetchMapping } from './mapping';
1011
import { extendColumns, extendFacets, flattenMappingToFields } from './mapping/extendMapping';
11-
import { createSchemaFromNetworkConfig, mergeSchemas } from './network';
12+
import { createSchemaFromNetworkConfig } from './network';
1213
import makeSchema from './schema';
1314

1415
const getESMapping = async (esClient, index) => {
@@ -156,32 +157,13 @@ const noSchemaHandler =
156157
});
157158
};
158159

159-
const createEndpoint = async ({
160-
esClient,
161-
graphqlOptions = {},
162-
mockSchema,
163-
schema,
164-
networkSchema,
165-
}) => {
160+
const createEndpoint = async ({ esClient, graphqlOptions = {}, mockSchema, schema }) => {
166161
const mainPath = '/graphql';
167162
const mockPath = '/mock/graphql';
168163
const router = Router();
169164

170165
console.log('Starting GraphQL server:');
171166

172-
if (ENABLE_NETWORK_AGGREGATION) {
173-
/**
174-
* TODO: make available on one route
175-
*/
176-
const networkPath = '/network';
177-
const apolloNetworkServer = new ApolloServer({
178-
cache: 'bounded',
179-
schema: networkSchema,
180-
});
181-
await apolloNetworkServer.start();
182-
apolloNetworkServer.applyMiddleware({ app: router, path: networkPath });
183-
}
184-
185167
try {
186168
await router.get(
187169
mainPath,
@@ -261,10 +243,11 @@ const createEndpoint = async ({
261243
return router;
262244
};
263245

264-
export const createSchemasFromConfigs = async ({
246+
const createSchemasFromConfigs = async ({
265247
configsSource = '',
266248
enableAdmin,
267249
enableDocumentHits,
250+
enableNetworkAggregation,
268251
esClient,
269252
getServerSideFilter,
270253
graphqlOptions = {},
@@ -287,28 +270,30 @@ export const createSchemasFromConfigs = async ({
287270
types: typesWithMappings,
288271
});
289272

290-
/**
273+
const schemasToMerge = [schema];
274+
275+
/*
291276
* Federated Network Search
292277
*/
293-
if (ENABLE_NETWORK_AGGREGATION) {
294-
const { networkSchema } = await createSchemaFromNetworkConfig({
278+
if (enableNetworkAggregation) {
279+
const networkSchema = await createSchemaFromNetworkConfig({
295280
networkConfigs: configsFromFiles[ConfigProperties.NETWORK_AGGREGATION].map((config) => ({
296281
...config,
297-
/**
282+
/*
298283
* part of the gql schema is generated dynamically
299-
* in the case of the "file" field, the field name and type name are the same
300-
* it's more flexible to define it here as an additional property than to confuse functions further down the pipeline
284+
* in the case of the "file" field, the field name and gql type name are the same
301285
*/
302286
documentName: config.documentType,
303287
})),
304288
});
289+
schemasToMerge.push(networkSchema);
305290
}
306291

292+
const fullSchema = mergeSchemas({ schemas: schemasToMerge });
307293
return {
308294
...commonFields,
309295
mockSchema,
310-
schema,
311-
networkSchema,
296+
schema: fullSchema,
312297
};
313298
} catch (error) {
314299
const message = error?.message || error;
@@ -323,16 +308,18 @@ export default async ({
323308
configsSource = '',
324309
enableAdmin,
325310
enableDocumentHits,
311+
enableNetworkAggregation,
326312
esClient,
327313
getServerSideFilter,
328314
graphqlOptions = {},
329315
}) => {
330316
try {
331-
const { fieldsFromMapping, mockSchema, schema, typesWithMappings, networkSchema } =
317+
const { fieldsFromMapping, mockSchema, schema, typesWithMappings } =
332318
await createSchemasFromConfigs({
333319
configsSource,
334320
enableAdmin,
335321
enableDocumentHits,
322+
enableNetworkAggregation,
336323
esClient,
337324
getServerSideFilter,
338325
graphqlOptions,
@@ -343,7 +330,6 @@ export default async ({
343330
graphqlOptions,
344331
mockSchema,
345332
schema,
346-
networkSchema,
347333
});
348334

349335
await initializeSets({ esClient });

modules/server/src/index.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
export { default as getGraphQLRoutes, createSchemasFromConfigs } from './graphqlRoutes';
2-
export { default } from './server';
31
export { default as App } from './app';
2+
export { default as getGraphQLRoutes } from './graphqlRoutes';
3+
export { default } from './server';

modules/server/src/mapping/createConnectionResolvers.ts

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ export type CreateConnectionResolversArgs = {
99
createStateResolvers?: boolean;
1010
enableAdmin: boolean;
1111
enableDocumentHits: boolean;
12-
dataMaskThreshold: number;
1312
getServerSideFilter?: GetServerSideFilterFn;
1413
Parallel: any;
1514
type: Record<string, any>;
@@ -20,7 +19,6 @@ const createConnectionResolvers: CreateConnectionResolversFn = ({
2019
createStateResolvers = true,
2120
enableAdmin,
2221
enableDocumentHits,
23-
dataMaskThreshold,
2422
getServerSideFilter,
2523
Parallel,
2624
type,
@@ -30,7 +28,6 @@ const createConnectionResolvers: CreateConnectionResolversFn = ({
3028
type,
3129
Parallel,
3230
getServerSideFilter,
33-
dataMaskThreshold,
3431
enableDocumentHits,
3532
});
3633

modules/server/src/mapping/createConnectionTypeDefs.js

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@ export default ({ type, fields = '', createStateTypeDefs = true, showRecords })
1212
aggregations_filter_themselves: Boolean
1313
): ${type.name}Aggregations
1414
15-
${!showRecords ? 'dataMasking: DataMasking' : ''}
16-
1715
configs: ${createStateTypeDefs ? 'ConfigsWithState' : 'ConfigsWithoutState'}
1816
1917
hits(

modules/server/src/mapping/masking.ts

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,15 @@
1+
import { ENV_CONFIG } from '@/config';
12
import { Aggregation } from './types';
23

4+
export const Relation = {
5+
eq: 'eq',
6+
gte: 'gte',
7+
} as const;
8+
export type Relation = keyof typeof Relation;
9+
310
/**
411
* This returns a total count that is less than or equal to the actual total hits in the query.
5-
* It is calculated by adding +1 for values under threshold and bucket.doc_count
12+
* It is calculated by adding +1 for values under threshold or adding bucket.doc_count amount
613
* for values greater than or equal to
714
*
815
* @param aggregation an aggregation with the most buckets which has data masking applied
@@ -18,7 +25,8 @@ const calculateHitsFromAggregation = ({
1825
return 0;
1926
}
2027
return aggregation.buckets.reduce(
21-
(totalAcc, bucket) => (bucket.belowThreshold ? totalAcc + 1 : totalAcc + bucket.doc_count),
28+
(totalAcc, bucket) =>
29+
bucket.relation === Relation.gte ? totalAcc + 1 : totalAcc + bucket.doc_count,
2230
0,
2331
);
2432
};
@@ -29,12 +37,10 @@ const calculateHitsFromAggregation = ({
2937
* 2) Find the agg with the most bucket count and data masking applied to be used in calculating hits.total
3038
*
3139
* @param aggregations - aggregations from query
32-
* @param thresholdMin - threshold value
3340
* @returns aggregations with data masking applied and hits total
3441
*/
3542
export const applyAggregationMasking = ({
3643
aggregations,
37-
thresholdMin,
3844
}: {
3945
aggregations: Record<
4046
string,
@@ -43,13 +49,16 @@ export const applyAggregationMasking = ({
4349
buckets: Array<{
4450
doc_count: number;
4551
key: string;
52+
relation: Relation;
4653
}>;
4754
}
4855
>;
49-
thresholdMin: number;
5056
}) => {
51-
// set data masked properties to one less than the configured threshold value (under threshold)
52-
const THRESHOLD_REPLACEMENT_VALUE = thresholdMin - 1;
57+
const thresholdMin = ENV_CONFIG.DATA_MASK_MIN_THRESHOLD;
58+
if (thresholdMin < 1) {
59+
throw Error('DATA_MASK_MIN_THRESHOLD environment variable has to be a positive integer.');
60+
}
61+
const THRESHOLD_REPLACEMENT_VALUE = 1;
5362

5463
const { aggsTotal: dataMaskedAggregations, totalHitsAgg } = Object.entries(aggregations).reduce<{
5564
aggsTotal: Record<string, Aggregation>;
@@ -59,12 +68,13 @@ export const applyAggregationMasking = ({
5968
// mask buckets if under threshold
6069
const dataMaskedBuckets = aggregation.buckets.map((bucket) =>
6170
bucket.doc_count < thresholdMin
62-
? { ...bucket, doc_count: THRESHOLD_REPLACEMENT_VALUE, belowThreshold: true }
63-
: { ...bucket, belowThreshold: false },
71+
? { ...bucket, doc_count: THRESHOLD_REPLACEMENT_VALUE, relation: Relation.gte }
72+
: { ...bucket, relation: Relation.eq },
6473
);
6574

6675
// update total hits selected agg if needed
67-
const bucketIsMasked = dataMaskedBuckets.some((bucket) => bucket.belowThreshold);
76+
const bucketIsMasked = dataMaskedBuckets.some((bucket) => bucket.relation === Relation.gte);
77+
// take aggregation with the most buckets that has masked data
6878
const hitsAgg =
6979
totalHitsAgg.bucketCount < aggregation.bucket_count && bucketIsMasked
7080
? { key: type, bucketCount: aggregation.bucket_count }

modules/server/src/mapping/resolvers.ts

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
import { ConfigProperties, ExtendedConfigsInterface } from '@/config/types';
22
import { GraphQLResolveInfo } from 'graphql';
33
import { get } from 'lodash';
4-
import { Context } from 'vm';
54
import { CreateConnectionResolversArgs } from './createConnectionResolvers';
65
import { applyAggregationMasking } from './masking';
76
import resolveAggregations, { aggregationsToGraphql } from './resolveAggregations';
87
import resolveHits from './resolveHits';
9-
import { Aggregation, Hits, Root } from './types';
8+
import { Aggregation, Context, Hits, Root } from './types';
109

1110
/**
1211
* Resolve hits from aggregations
@@ -27,7 +26,6 @@ const resolveHitsFromAggs =
2726
context: Context,
2827
info: GraphQLResolveInfo,
2928
) => Record<string, Aggregation>,
30-
dataMaskThreshold: number,
3129
) =>
3230
async (obj: Root, args: Hits, context: Context, info: GraphQLResolveInfo) => {
3331
/*
@@ -36,7 +34,8 @@ const resolveHitsFromAggs =
3634
*/
3735
const aggregationsPath = 'operation.selectionSet.selections[0].selectionSet.selections';
3836
const aggregationsSelectionSet = get(info, aggregationsPath, []).find(
39-
(selection) => selection.kind === 'Field' && selection.name.value === 'aggregations',
37+
(selection: { kind: string; name: { value: string } }) =>
38+
selection.kind === 'Field' && selection.name.value === 'aggregations',
4039
);
4140

4241
/*
@@ -49,9 +48,8 @@ const resolveHitsFromAggs =
4948
// modifying the query info field inline so it can query aggregations correctly
5049
// not idiomatic so doesn't line up with typings from graphql
5150
const aggregations = await aggregationsQuery(obj, info.variableValues, context, modifiedInfo);
52-
const { hitsTotal: total } = applyAggregationMasking({
51+
const { hitsTotal: total, dataMaskedAggregations } = applyAggregationMasking({
5352
aggregations,
54-
thresholdMin: dataMaskThreshold,
5553
});
5654
return { total };
5755
} else {
@@ -64,7 +62,6 @@ export const createResolvers = ({
6462
type,
6563
Parallel,
6664
getServerSideFilter,
67-
dataMaskThreshold,
6865
enableDocumentHits,
6966
}: Omit<CreateConnectionResolversArgs, 'enableAdmin'>) => {
7067
// configs
@@ -97,8 +94,15 @@ export const createResolvers = ({
9794
context: Context,
9895
info: GraphQLResolveInfo,
9996
) => {
100-
const aggs = await aggregationsQuery(obj, args, context, info);
101-
return aggregationsToGraphql(aggs);
97+
const aggregations = await aggregationsQuery(obj, args, context, info);
98+
if (enableDocumentHits) {
99+
return aggregationsToGraphql(aggregations);
100+
} else {
101+
const { dataMaskedAggregations } = applyAggregationMasking({
102+
aggregations,
103+
});
104+
return aggregationsToGraphql(dataMaskedAggregations);
105+
}
102106
};
103107

104108
// hits
@@ -108,7 +112,7 @@ export const createResolvers = ({
108112
: // @ts-ignore
109113
// typing resolveAggregations requires typing a lot of code down the chain
110114
// TODO: improve typing
111-
resolveHitsFromAggs(aggregationsQuery, dataMaskThreshold);
115+
resolveHitsFromAggs(aggregationsQuery);
112116

113117
return { hits, aggregations, configs };
114118
};

modules/server/src/mapping/types.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import { Client } from '@elastic/elasticsearch';
2+
import { Relation } from './masking';
23

34
export type Bucket = {
45
doc_count: number;
56
key: string;
6-
belowThreshold: boolean;
7+
relation: Relation;
78
};
89

910
export type Aggregation = {

0 commit comments

Comments
 (0)