Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wip] feat: optimize data subject lookup #123

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 104 additions & 10 deletions lib/utils.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
const { AL_WITH_PLACEHOLDERS, AL_OLD_SUBJECT_SEARCH } = process.env

const cds = require('@sap/cds')

const WRITE = { CREATE: 1, UPDATE: 1, DELETE: 1 }

const $hasPersonalData = Symbol('@cap-js/audit-logging:hasPersonalData')
const $dataSubject = Symbol('@cap-js/audit-logging:dataSubject')
const $dataSubjectQuery = Symbol('@cap-js/audit-logging:dataSubjectQuery')
const $parents = Symbol('@cap-js/audit-logging:parents')
const $visitedUp = Symbol('@cap-js/audit-logging:visitedUp')
const $visitedDown = Symbol('@cap-js/audit-logging:visitedDown')
Expand Down Expand Up @@ -90,7 +93,7 @@ const addDataSubject = (log, row, key, entity) => {
}
}

const _addKeysToWhere = (keys, row, alias) => {
const _addKeysToWhereWithValues = (keys, row, alias) => {
return keys
.filter(key => !key.isAssociation && key.name !== 'IsActiveEntity')
.reduce((keys, key) => {
Expand All @@ -100,6 +103,18 @@ const _addKeysToWhere = (keys, row, alias) => {
}, [])
}

const _addKeysToWhereWithPlaceholders = (keys, row, alias) => {
return keys
.filter(key => !key.isAssociation && key.name !== 'IsActiveEntity')
.reduce((keys, key) => {
if (keys.length) keys.push('and')
keys.push({ ref: [alias, key.name] }, '=', { val: `$$$_${key.name}_$$$` })
return keys
}, [])
}

const _addKeysToWhere = AL_WITH_PLACEHOLDERS ? _addKeysToWhereWithPlaceholders : _addKeysToWhereWithValues

const _keyColumns = (keys, alias) => {
return keys
.filter(key => !key.isAssociation && key.name !== 'IsActiveEntity')
Expand All @@ -121,11 +136,24 @@ const _buildSubSelect = (model, { entity, relative, element, next }, row, previo
const targetAlias = _alias(element._target)
const relativeAlias = _alias(relative)

childCqn.where(relative._relations[element.name].join(targetAlias, relativeAlias))
let w = relative._relations[element.name].join(targetAlias, relativeAlias)

// REVISIT: rewrite to path expression, if alias for relative is already used in subselect to avoid sql error
if (previousCqn?._aliases.has(relativeAlias)) {
let t
for (const a in entity.associations) if (entity.associations[a].target === relative.name) t = entity.associations[a]
if (t && w[0]?.xpr) for (const ele of w[0].xpr) if (ele.ref?.[0] === relativeAlias) ele.ref.splice(0, 1, as, t.name)
}
childCqn._aliases = new Set(previousCqn ? [...previousCqn._aliases.values(), as] : [as])

childCqn.where(w)

if (previousCqn) childCqn.where('exists', previousCqn)
else childCqn.where(_addKeysToWhere(keys, row, as))

// measure distance between root and data subject entity
childCqn._depth = previousCqn ? previousCqn._depth + 1 : 1

if (next) return _buildSubSelect(model, next, {}, childCqn)

return childCqn
Expand All @@ -140,6 +168,9 @@ const _getDataSubjectIdQuery = ({ dataSubjectEntity, subs }, row, model) => {
.columns(_keyColumns(keys, as))
.where(['exists', _buildSubSelect(model, subs[0], row)])

// measure distance between root and data subject entity
cqn._depth = cqn.SELECT.where[1]._depth + 1

// entity reused in different branches => must check all
for (let i = 1; i < subs.length; i++) cqn.or(['exists', _buildSubSelect(model, subs[i], row)])

Expand All @@ -162,7 +193,11 @@ const _getUps = (entity, model) => {
}

const _getDataSubjectUp = (root, model, entity, prev, next, result) => {
for (const element of _getUps(entity, model)) {
const _ups = _getUps(entity, model)

if (!AL_OLD_SUBJECT_SEARCH && _ups.every(e => e.own($visitedUp)?.has(root))) return 'exhausted'

for (const element of _ups) {
// cycle detection
if (element.own($visitedUp) == null) element.set($visitedUp, new Set())
if (element.own($visitedUp).has(root)) continue
Expand All @@ -171,19 +206,24 @@ const _getDataSubjectUp = (root, model, entity, prev, next, result) => {
const me = { entity, relative: element.parent, element }
if (prev) prev.next = me
if (element.parent['@PersonalData.EntitySemantics'] === 'DataSubject') {
if (!result) result = { dataSubjectEntity: element.parent, subs: [] }
if (!result || typeof result !== 'object') result = { dataSubjectEntity: element.parent, subs: [] }
result.subs.push(next || me)
return result
} else {
// REVISIT: why MUST it be dfs? with dfs alone, we don't find the shortest path
// dfs is a must here
result = _getDataSubjectUp(root, model, element.parent, me, next || me, result)
}
}

return result
}

const _getDataSubjectDown = (root, entity, prev, next) => {
const associations = Object.values(entity.associations || {}).filter(e => !e._isBacklink)

if (!AL_OLD_SUBJECT_SEARCH && associations.every(e => e.own($visitedDown)?.has(root))) return 'exhausted'

// bfs makes more sense here -> check all own assocs first before going deeper
for (const element of associations) {
const me = { entity, relative: entity, element }
Expand All @@ -192,6 +232,7 @@ const _getDataSubjectDown = (root, entity, prev, next) => {
return { dataSubjectEntity: element._target, subs: [next || me] }
}
}

for (const element of associations) {
// cycle detection
if (element.own($visitedDown) == null) element.set($visitedDown, new Set())
Expand All @@ -205,17 +246,55 @@ const _getDataSubjectDown = (root, entity, prev, next) => {
}
}

const getDataSubject = (entity, model) => {
const _getDataSubjectGreedy = (entity, model) => {
if (entity.own($dataSubject) == null) {
// entities with EntitySemantics 'DataSubjectDetails' or 'Other' must not necessarily
// be always below or always above 'DataSubject' entity in CSN tree
let dataSubjectInfo = _getDataSubjectUp(entity.name, model, entity)
if (!dataSubjectInfo) dataSubjectInfo = _getDataSubjectDown(entity.name, entity)
entity.set($dataSubject, dataSubjectInfo)
}
return entity.own($dataSubject)
return { dataSubjectInfo: entity.own($dataSubject) }
}

const _getDataSubjectExhaustive = (entity, model) => {
if (entity.own($dataSubject) == null) {
let dataSubjectInfo
let dataSubjectQuery = { _depth: Infinity }

while (dataSubjectQuery._depth > 2) {
let up = _getDataSubjectUp(entity.name, model, entity)
if (up === 'exhausted') break
if (up) {
const q = _getDataSubjectIdQuery(up, {}, model)
if (q._depth < dataSubjectQuery._depth) {
dataSubjectInfo = up
dataSubjectQuery = q
}
}
}

while (dataSubjectQuery._depth > 2) {
let down = _getDataSubjectDown(entity.name, entity)
if (down === 'exhausted') break
if (down) {
const q = _getDataSubjectIdQuery(down, {}, model)
if (q._depth < dataSubjectQuery._depth) {
dataSubjectInfo = down
dataSubjectQuery = q
}
}
}

entity.set($dataSubject, dataSubjectInfo)
entity.set($dataSubjectQuery, dataSubjectQuery)
}

return { dataSubjectInfo: entity[$dataSubject], dataSubjectQuery: entity[$dataSubjectQuery] }
}

const _getDataSubject = AL_OLD_SUBJECT_SEARCH ? _getDataSubjectGreedy : _getDataSubjectExhaustive

const _getDataSubjectsMap = req => {
const mapKey = getMapKeyForCurrentRequest(req)
const _audit = (req.context._audit ??= {})
Expand All @@ -225,18 +304,33 @@ const _getDataSubjectsMap = req => {
}

const addDataSubjectForDetailsEntity = (row, log, req, entity, model) => {
const dataSubjectInfo = getDataSubject(entity, model)
const { dataSubjectInfo, dataSubjectQuery } = _getDataSubject(entity, model)
const role = dataSubjectInfo.dataSubjectEntity['@PersonalData.DataSubjectRole']
log.data_subject.role ??= role
log.data_subject.type = dataSubjectInfo.dataSubjectEntity.name

/*
* for each req (cf. $batch with atomicity) and data subject role (e.g., customer vs supplier),
* store (in audit data structure at context) and reuse a single promise to look up the respective data subject
*/
const map = _getDataSubjectsMap(req)
if (map.has(role)) log.data_subject.id = map.get(role)
// REVISIT by downward lookups row might already contain ID - some potential to optimize
else map.set(role, _getDataSubjectIdQuery(dataSubjectInfo, row, model))

if (AL_WITH_PLACEHOLDERS) {
if (!map.has(role)) {
let y = JSON.stringify(dataSubjectQuery)
for (const each of y.match(/\$\$\$_\w+_\$\$\$/g)) {
const keyName = each.match(/\$\$\$_(\w+)_\$\$\$/)[1]
y = y.replace(each, row[keyName] || row._old?.[keyName])
}
const z = new SELECT()
z.SELECT = JSON.parse(y).SELECT
map.set(role, z)
}
} else {
if (!map.has(role)) map.set(role, _getDataSubjectIdQuery(dataSubjectInfo, row, model))
}

log.data_subject.id = map.get(role)
}

const resolveDataSubjects = (logs, req) => {
Expand Down
Loading