ck_simpleparams {cellKey} | R Documentation |
Set parameters required to perturb numeric variables using a simple approach
Description
ck_simpleparams()
allows to define parameters for a simple perturbation
approach based on a single magnitude parameter (m
). The values of epsilon
are used to "weight"
parameter m
in case type == "top_contr"
is set in
ck_params_nums()
.
Usage
ck_simpleparams(p, epsilon = 1)
Arguments
p |
a percentage value used as magnitude for perturbation |
epsilon |
a numeric vector in descending order with all values |
Details
details about the flex function can be found in Deliverable D4.2, Part I in SGA "Open Source tools for perturbative confidentiality methods"
Value
an object suitable as input for ck_params_nums()
.
See Also
ck_flexparams()
, ck_params_nums()
Examples
x <- ck_create_testdata()
# create some 0/1 variables that should be perturbed later
x[, cnt_females := ifelse(sex == "male", 0, 1)]
x[, cnt_males := ifelse(sex == "male", 1, 0)]
x[, cnt_highincome := ifelse(income >= 9000, 1, 0)]
# a variable with positive and negative contributions
x[, mixed := sample(-10:10, nrow(x), replace = TRUE)]
# create record keys
x$rkey <- ck_generate_rkeys(dat = x)
# define required inputs
# hierarchy with some bogus codes
d_sex <- hier_create(root = "Total", nodes = c("male", "female"))
d_sex <- hier_add(d_sex, root = "female", "f")
d_sex <- hier_add(d_sex, root = "male", "m")
d_age <- hier_create(root = "Total", nodes = paste0("age_group", 1:6))
d_age <- hier_add(d_age, root = "age_group1", "ag1a")
d_age <- hier_add(d_age, root = "age_group2", "ag2a")
# define the cell key object
countvars <- c("cnt_females", "cnt_males", "cnt_highincome")
numvars <- c("expend", "income", "savings", "mixed")
tab <- ck_setup(
x = x,
rkey = "rkey",
dims = list(sex = d_sex, age = d_age),
w = "sampling_weight",
countvars = countvars,
numvars = numvars)
# show some information about this table instance
tab$print() # identical with print(tab)
# information about the hierarchies
tab$hierarchy_info()
# which variables have been defined?
tab$allvars()
# count variables
tab$cntvars()
# continuous variables
tab$numvars()
# create perturbation parameters for "total" variable and
# write to yaml-file
# create a ptable using functionality from the ptable-pkg
f_yaml <- tempfile(fileext = ".yaml")
p_cnts1 <- ck_params_cnts(
ptab = ptable::pt_ex_cnts(),
path = f_yaml)
# read parameters from yaml-file and set them for variable `"total"`
p_cnts1 <- ck_read_yaml(path = f_yaml)
tab$params_cnts_set(val = p_cnts1, v = "total")
# create alternative perturbation parameters by specifying parameters
para2 <- ptable::create_cnt_ptable(
D = 8, V = 3, js = 2, create = FALSE)
p_cnts2 <- ck_params_cnts(ptab = para2)
# use these ptable it for the remaining variables
tab$params_cnts_set(val = p_cnts2, v = countvars)
# perturb a variable
tab$perturb(v = "total")
# multiple variables can be perturbed as well
tab$perturb(v = c("cnt_males", "cnt_highincome"))
# return weighted and unweighted results
tab$freqtab(v = c("total", "cnt_males"))
# numerical variables (positive variables using flex-function)
# we also write the config to a yaml file
f_yaml <- tempfile(fileext = ".yaml")
# create a ptable using functionality from the ptable-pkg
# a single ptable for all cells
ptab1 <- ptable::pt_ex_nums(parity = TRUE, separation = FALSE)
# a single ptab for all cells except for very small ones
ptab2 <- ptable::pt_ex_nums(parity = TRUE, separation = TRUE)
# different ptables for cells with even/odd number of contributors
# and very small cells
ptab3 <- ptable::pt_ex_nums(parity = FALSE, separation = TRUE)
p_nums1 <- ck_params_nums(
ptab = ptab1,
type = "top_contr",
top_k = 3,
mult_params = ck_flexparams(
fp = 1000,
p = c(0.30, 0.03),
epsilon = c(1, 0.5, 0.2),
q = 3),
mu_c = 2,
same_key = FALSE,
use_zero_rkeys = FALSE,
path = f_yaml)
# we read the parameters from the yaml-file
p_nums1 <- ck_read_yaml(path = f_yaml)
# for variables with positive and negative values
p_nums2 <- ck_params_nums(
ptab = ptab2,
type = "top_contr",
top_k = 3,
mult_params = ck_flexparams(
fp = 1000,
p = c(0.15, 0.02),
epsilon = c(1, 0.4, 0.15),
q = 3),
mu_c = 2,
same_key = FALSE)
# simple perturbation parameters (not using the flex-function approach)
p_nums3 <- ck_params_nums(
ptab = ptab3,
type = "mean",
mult_params = ck_simpleparams(p = 0.25),
mu_c = 2,
same_key = FALSE)
# use `p_nums1` for all variables
tab$params_nums_set(p_nums1, c("savings", "income", "expend"))
# use different parameters for variable `mixed`
tab$params_nums_set(p_nums2, v = "mixed")
# identify sensitive cells to which extra protection (`mu_c`) is added.
tab$supp_p(v = "income", p = 85)
tab$supp_pq(v = "income", p = 85, q = 90)
tab$supp_nk(v = "income", n = 2, k = 90)
tab$supp_freq(v = "income", n = 14, weighted = FALSE)
tab$supp_val(v = "income", n = 10000, weighted = TRUE)
tab$supp_cells(
v = "income",
inp = data.frame(
sex = c("female", "female"),
"age" = c("age_group1", "age_group3")
)
)
# perturb variables
tab$perturb(v = c("income", "savings"))
# extract results
tab$numtab("income", mean_before_sum = TRUE)
tab$numtab("income", mean_before_sum = FALSE)
tab$numtab("savings")
# results can be resetted, too
tab$reset_cntvars(v = "cnt_males")
# we can then set other parameters and perturb again
tab$params_cnts_set(val = p_cnts1, v = "cnt_males")
tab$perturb(v = "cnt_males")
# write results to a .csv file
tab$freqtab(
v = c("total", "cnt_males"),
path = file.path(tempdir(), "outtab.csv")
)
# show results containing weighted and unweighted results
tab$freqtab(v = c("total", "cnt_males"))
# utility measures for a count variable
tab$measures_cnts(v = "total", exclude_zeros = TRUE)
# modifications for perturbed count variables
tab$mod_cnts()
# display a summary about utility measures
tab$summary()