Skip to content

Commit

Permalink
Merge pull request #86 from privacytoolsproject/MF_RefactoringUtility…
Browse files Browse the repository at this point in the history
…Functions

Refactoring utility functions
  • Loading branch information
MeganFantes authored Aug 16, 2019
2 parents e906e4b + 11344d5 commit 1cb65f3
Show file tree
Hide file tree
Showing 142 changed files with 3,341 additions and 3,112 deletions.
11 changes: 9 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ Suggests:
testthat
Collate:
'CompositionTheorems.R'
'createJSON.R'
'datasets.R'
'mechanism.R'
'mechanism-bootstrap.R'
Expand All @@ -38,5 +37,13 @@ Collate:
'statistic-tree.R'
'statistic-variance.R'
'update_parameters.R'
'utilities.R'
'utilities-categorical.R'
'utilities-covariance.R'
'utilities-data-validation.R'
'utilities-histogram.R'
'utilities-mean.R'
'utilities-noise-generation.R'
'utilities-objective.R'
'utilities-postprocessing.R'
'utilities-tree.R'
VignetteBuilder: knitr
39 changes: 18 additions & 21 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
# Generated by roxygen2: do not edit by hand

export(bootstrap.replication)
export(censordata)
export(check_histogram_mechanism)
export(check_variable_type)
export(checkepsilon)
export(checkrange)
export(coefficient.release)
export(dlap)
export(bootstrapReplication)
export(censorData)
export(checkEpsilon)
export(checkRange)
export(checkVariableType)
export(coefficientRelease)
export(dLap)
export(dpCovariance)
export(dpGLM)
export(dpHeavyHitters)
Expand All @@ -18,28 +17,26 @@ export(dpTree)
export(dpUnif)
export(dpVariance)
export(fillMissing)
export(glm.getAccuracy)
export(glm.getParameters)
export(histogram.getAccuracy)
export(histogram.getEpsilon)
export(make_logical)
export(mean.getAccuracy)
export(mean.getCI)
export(mean.getParameters)
export(glmGetAccuracy)
export(glmGetParameters)
export(histogramGetAccuracy)
export(histogramGetEpsilon)
export(makeLogical)
export(meanGetCI)
export(mechanism)
export(mechanismBootstrap)
export(mechanismExponential)
export(mechanismGaussian)
export(mechanismLaplace)
export(mechanismObjective)
export(mechanismStability)
export(plap)
export(qlap)
export(pLap)
export(qLap)
export(rLap)
export(release2json)
export(rlap)
export(sgn)
export(tree.getAccuracy)
export(tree.getParameters)
export(treeGetAccuracy)
export(treeGetParameters)
exportClasses(dpCovariance)
exportClasses(dpGLM)
exportClasses(dpHeavyHitters)
Expand Down
20 changes: 0 additions & 20 deletions R/createJSON.R

This file was deleted.

48 changes: 24 additions & 24 deletions R/mechanism-bootstrap.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,19 @@
#' @import stats
#' @export

bootstrap.replication <- function(x, n, sensitivity, epsilon, fun) {
bootstrapReplication <- function(x, n, sensitivity, epsilon, fun) {
partition <- rmultinom(n=1, size=n, prob=rep(1 / n, n))
max.appearances <- max(partition)
probs <- sapply(1:max.appearances, dbinom, size=n, prob=(1 / n))
stat.partitions <- vector('list', max.appearances)
for (i in 1:max.appearances) {
variance.i <- (i * probs[i] * (sensitivity^2)) / (2 * epsilon)
stat.i <- fun(x[partition == i])
noise.i <- dpNoise(n=length(stat.i), scale=sqrt(variance.i), dist='gaussian')
stat.partitions[[i]] <- i * stat.i + noise.i
maxAppearances <- max(partition)
probs <- sapply(1:maxAppearances, dbinom, size=n, prob=(1 / n))
statPartitions <- vector('list', maxAppearances)
for (i in 1:maxAppearances) {
iVariance <- (i * probs[i] * (sensitivity^2)) / (2 * epsilon)
iStat <- fun(x[partition == i])
iNoise <- dpNoise(n=length(iStat), scale=sqrt(iVariance), dist='gaussian')
statPartitions[[i]] <- i * iStat + iNoise
}
stat.out <- do.call(rbind, stat.partitions)
return(apply(stat.out, 2, sum))
statOut <- do.call(rbind, statPartitions)
return(apply(statOut, 2, sum))
}


Expand All @@ -40,31 +40,31 @@ mechanismBootstrap <- setRefClass(

mechanismBootstrap$methods(
bootStatEval = function(xi,...) {
fun.args <- getFuncArgs(fun, inputList=list(...), inputObject=.self)
input.vals = c(list(x=x), fun.args)
stat <- do.call(boot.fun, input.vals)
funArgs <- getFuncArgs(fun, inputList=list(...), inputObject=.self)
inputVals = c(list(x=x), funArgs)
stat <- do.call(bootFun, inputVals)
return(stat)
})

mechanismBootstrap$methods(
bootSE = function(release, n.boot, sens) {
bootSE = function(release, nBoot, sens) {
se <- sd(release)
c.alpha <- qchisq(0.01, df=(n.boot - 1))
conservative <- sqrt(max(c(se^2 - (c.alpha * sens^2 * n.boot) / (2 * epsilon * (n.boot - 1)), 0)))
naive <- sqrt(max(c(se^2 - (sens^2 * n.boot) / (2 * epsilon), 0)))
cAlpha <- qchisq(0.01, df=(nBoot - 1))
conservative <- sqrt(max(c(se^2 - (cAlpha * sens^2 * nBoot) / (2 * epsilon * (nBoot - 1)), 0)))
naive <- sqrt(max(c(se^2 - (sens^2 * nBoot) / (2 * epsilon), 0)))
return(list('sd' = se,
'conservative' = conservative,
'naive' = naive))
})

mechanismBootstrap$methods(
evaluate = function(fun, x, sens, postFun) {
x <- censordata(x, .self$var.type, .self$rng)
x <- fillMissing(x, .self$var.type, .self$impute.rng[0], .self$impute.rng[1])
epsilon.part <- epsilon / .self$n.boot
release <- replicate(.self$n.boot, bootstrap.replication(x, n, sens, epsilon.part, fun=.self$bootStatEval))
std.error <- .self$bootSE(release, .self$n.boot, sens)
out <- list('release' = release, 'std.error' = std.error)
x <- censorData(x, .self$varType, .self$rng)
x <- fillMissing(x, .self$varType, .self$imputeRng[0], .self$imputeRng[1])
epsilonPart <- epsilon / .self$nBoot
release <- replicate(.self$nBoot, bootstrapReplication(x, n, sens, epsilonPart, fun=.self$bootStatEval))
stdError <- .self$bootSE(release, .self$nBoot, sens)
out <- list('release' = release, 'stdError' = stdError)
out <- postFun(out)
return(out)
})
16 changes: 8 additions & 8 deletions R/mechanism-exponential.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,18 @@ mechanismExponential <- setRefClass(

mechanismExponential$methods(
evaluate = function(fun, x, sens, postFun, ...) {
x <- censordata(x, .self$var.type, rng=.self$rng, levels=.self$bins)
x <- fillMissing(x, .self$var.type, rng=.self$rng, categories=.self$bins)
x <- censorData(x, .self$varType, rng=.self$rng, levels=.self$bins)
x <- fillMissing(x, .self$varType, rng=.self$rng, categories=.self$bins)
fun.args <- getFuncArgs(fun, inputList=list(...), inputObject=.self)
input.vals = c(list(x=x), fun.args)
true.val <- do.call(fun, input.vals) # Concern: are we confident that the environment this is happening in is getting erased.
quality <- true.val - max(true.val)
probs <- ifelse(true.val == 0, 0, exp((.self$epsilon * quality) / (2 * sens)))
gap <- as.numeric(true.val[.self$k] - true.val[.self$k + 1])
inputVals = c(list(x=x), fun.args)
trueVal <- do.call(fun, inputVals) # Concern: are we confident that the environment this is happening in is getting erased.
quality <- trueVal - max(trueVal)
probs <- ifelse(trueVal == 0, 0, exp((.self$epsilon * quality) / (2 * sens)))
gap <- as.numeric(trueVal[.self$k] - trueVal[.self$k + 1])
if (gap < (-2 / epsilon * log(delta))) {
out <- list('release' = NULL)
} else {
release <- sample(names(true.val), size=.self$k, prob=probs)
release <- sample(names(trueVal), size=.self$k, prob=probs)
out <- list('release' = release)
out <- postFun(out, gap)
}
Expand Down
12 changes: 6 additions & 6 deletions R/mechanism-gaussian.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ mechanismGaussian <- setRefClass(

mechanismGaussian$methods(
evaluate = function(fun, x, sens, postFun, ...) {
x <- censordata(x, .self$var.type, .self$rng)
x <- fillMissing(x, .self$var.type, impute.rng=.self$rng, categories=.self$bins)
fun.args <- getFuncArgs(fun, inputList=list(...), inputObject=.self)
input.vals = c(list(x=x), fun.args)
true.val <- do.call(fun, input.vals)
x <- censorData(x, .self$varType, .self$rng)
x <- fillMissing(x, .self$varType, imputeRng=.self$rng, categories=.self$bins)
funArgs <- getFuncArgs(fun, inputList=list(...), inputObject=.self)
inputVals = c(list(x=x), funArgs)
trueVal <- do.call(fun, inputVals)
scale <- sens * sqrt(2 * log(1.25 / .self$delta)) / .self$epsilon
release <- true.val + dpNoise(n=length(true.val), scale=scale, dist='gaussian')
release <- trueVal + dpNoise(n=length(trueVal), scale=scale, dist='gaussian')
out <- list('release' = release)
out <- postFun(out, ...)
return(out)
Expand Down
10 changes: 5 additions & 5 deletions R/mechanism-laplace.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,13 @@ mechanismLaplace$methods(
#' laplace_mean <- mechanismLaplace$evaluate(mean_function, data[, variable], sens, post_processing_function)
#'
evaluate = function(fun, x, sens, postFun, ...) {
x <- censordata(x, .self$var.type, .self$rng, .self$bins)
x <- fillMissing(x, .self$var.type, impute.rng=.self$rng, categories=.self$impute.bins)
x <- censorData(x, .self$varType, .self$rng, .self$bins)
x <- fillMissing(x, .self$varType, imputeRng=.self$rng, categories=.self$imputeBins)
fun.args <- getFuncArgs(fun, inputList=list(...), inputObject=.self)
input.vals = c(list(x=x), fun.args)
true.val <- do.call(fun, input.vals) # Concern: are we confident that the environment this is happening in is getting erased.
inputVals = c(list(x=x), fun.args)
trueVal <- do.call(fun, inputVals) # Concern: are we confident that the environment this is happening in is getting erased.
scale <- sens / .self$epsilon
release <- true.val + dpNoise(n=length(true.val), scale=scale, dist='laplace')
release <- trueVal + dpNoise(n=length(trueVal), scale=scale, dist='laplace')
out <- list('release' = release)
out <- postFun(out, ...)
return(out)
Expand Down
62 changes: 31 additions & 31 deletions R/mechanism-objective.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ mechanismObjective$methods(
x <- x[, cols]

# censor & impute missing values
x <- censordata(x, .self$var.type, .self$rng, .self$bins)
x <- fillMissing(x, .self$var.type, impute.rng=.self$impute.rng)
x <- censorData(x, .self$varType, .self$rng, .self$bins)
x <- fillMissing(x, .self$varType, imputeRng=.self$imputeRng)

# extract X and y
y <- x[, cols[1]]
X <- x[, cols[2:length(cols)], drop=FALSE]
X.names <- names(X)
xNames <- names(X)

# scale inputs s.t. max Euclidean norm <= 1
scaler <- mapMatrixUnit(X, p=2)
Expand All @@ -34,19 +34,19 @@ mechanismObjective$methods(
# add intercept
if (.self$intercept) {
X <- cbind(1, X)
X.names <- c('intercept', X.names)
xNames <- c('intercept', xNames)
}

# set start params, adjust for ols
if (.self$name == 'ols') {
start.params <- rep(0, ncol(X) + 1)
X.names <- c(X.names, 'variance')
y.scaler <- mapMatrixUnit(y, p=2)
y <- y.scaler$matrix
y.max.norm <- y.scaler$max.norm
startParams <- rep(0, ncol(X) + 1)
xNames <- c(xNames, 'variance')
yScaler <- mapMatrixUnit(y, p=2)
y <- yScaler$matrix
yMaxNorm <- yScaler$maxNorm
} else {
start.params <- rep(0, ncol(X))
y.max.norm <- NULL
startParams <- rep(0, ncol(X))
yMaxNorm <- NULL
}

# Set scalar c from [CMS11]
Expand All @@ -68,35 +68,35 @@ mechanismObjective$methods(

beta <- ep/2
# fit
if (is.null(.self$n.boot)) {
if (is.null(.self$nBoot)) {
# old noise draw based on gamma and laplace
# b.norm <- dpNoise(n=1, scale=(2 / .self$epsilon), dist='gamma', shape=length(start.params))
# b <- dpNoise(n=length(start.params), scale=(-.self$epsilon * b.norm), dist='laplace')
# bNorm <- dpNoise(n=1, scale=(2 / .self$epsilon), dist='gamma', shape=length(startParams))
# b <- dpNoise(n=length(startParams), scale=(-.self$epsilon * bNorm), dist='laplace')

# new noise draw based on exponential and uniform
b.norm <- dpNoise(n=1, scale=(1/beta), dist='gamma', shape=1)
random_vec <- dpNoise(n=length(start.params), scale=1, dist='gaussian')
random_vec_norm <- sqrt(sum(random_vec^2))
b <- random_vec*(b.norm/random_vec_norm)
bNorm <- dpNoise(n=1, scale=(1/beta), dist='gamma', shape=1)
randomVec <- dpNoise(n=length(startParams), scale=1, dist='gaussian')
randomVecNorm <- sqrt(sum(randomVec^2))
b <- randomVec*(bNorm/randomVecNorm)

estimates <- optim(par=start.params, fn=.self$objective, X=X, y=y, b=b, n=n, lambda=lambda)$par
release <- data.frame(scaleRelease(estimates, scaler$max.norm, y.max.norm))
estimates <- optim(par=startParams, fn=.self$objective, X=X, y=y, b=b, n=n, lambda=lambda)$par
release <- data.frame(scaleRelease(estimates, scaler$maxNorm, yMaxNorm))
names(release) <- 'estimate'
rownames(release) <- X.names
rownames(release) <- xNames
} else {
local.epsilon <- .self$epsilon / .self$n.boot
release <- vector('list', .self$n.boot)
for (i in 1:.self$n.boot) {
localEpsilon <- .self$epsilon / .self$nBoot
release <- vector('list', .self$nBoot)
for (i in 1:.self$nBoot) {
index <- sample(1:.self$n, .self$n, replace=TRUE)
X.star <- X[index, ]
y.star <- y[index]
b.norm <- dpNoise(n=1, scale=(2 / local.epsilon), dist='gamma', shape=length(start.params))
b <- dpNoise(n=length(start.params), scale=(-local.epsilon * b.norm), dist='laplace')
estimates <- optim(par=start.params, fn=.self$objective, X=X.star, y=y.star, b=b, n=n)$par
release[[i]] <- scaleRelease(estimates, scaler$max.norm, y.max.norm)
xStar <- X[index, ]
yStar <- y[index]
bNorm <- dpNoise(n=1, scale=(2 / localEpsilon), dist='gamma', shape=length(startParams))
b <- dpNoise(n=length(startParams), scale=(-localEpsilon * bNorm), dist='laplace')
estimates <- optim(par=startParams, fn=.self$objective, X=xStar, y=yStar, b=b, n=n)$par
release[[i]] <- scaleRelease(estimates, scaler$maxNorm, yMaxNorm)
}
release <- data.frame(do.call(rbind, release))
names(release) <- X.names
names(release) <- xNames
}

# format output
Expand Down
16 changes: 8 additions & 8 deletions R/mechanism-stability.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,25 +78,25 @@ mechanismStability$methods(
numHistogramBins <- NULL
imputationRange <- NULL
histogramBins <- NULL
if (.self$var.type %in% c('numeric', 'integer')) {
if (.self$varType %in% c('numeric', 'integer')) {
dataRange <- range(x)
numHistogramBins <- ifelse(is.null(.self$n.bins), .self$n / .self$granularity, .self$n.bins)
numHistogramBins <- ifelse(is.null(.self$nBins), .self$n / .self$granularity, .self$nBins)
histogramBins <- seq(dataRange[1], dataRange[2], length.out=(numHistogramBins + 1))
# set the imputation range to the detected data range to maintain privacy
imputationRange <- dataRange
}

x <- censordata(x, .self$var.type, dataRange, histogramBins)
x <- fillMissing(x, .self$var.type, impute.rng=imputationRange, categories=levels(x)) # levels(x) will be NULL for numeric variables, a vector of bins for character variables
x <- censorData(x, .self$varType, dataRange, histogramBins)
x <- fillMissing(x, .self$varType, imputeRng=imputationRange, categories=levels(x)) # levels(x) will be NULL for numeric variables, a vector of bins for character variables
fun.args <- getFuncArgs(fun, inputList=list(bins=histogramBins), inputObject=.self)
input.vals <- c(list(x=x), fun.args)
true.val <- do.call(fun, input.vals) # Concern: are we confident that the environment this is happening in is getting erased.
inputVals <- c(list(x=x), fun.args)
trueVal <- do.call(fun, inputVals) # Concern: are we confident that the environment this is happening in is getting erased.

# remove empty bins before noise is added (per definition of stability mechanism)
true.val <- true.val[true.val > 0]
trueVal <- trueVal[trueVal > 0]

scale <- sens / .self$epsilon
release <- true.val + dpNoise(n=length(true.val), scale=scale, dist='laplace')
release <- trueVal + dpNoise(n=length(trueVal), scale=scale, dist='laplace')

# calculate the accuracy threshold, below which histogram buckets should be removed
accuracyThreshold <- 1+2*log(2/delta)/epsilon
Expand Down
Loading

0 comments on commit 1cb65f3

Please sign in to comment.