reduce_cat {DQAstats} | R Documentation |
reduce_cat helper function
Description
Internal function to reduce categorical variables to a maximum of values to be displayed.
Usage
reduce_cat(data, levellimit = 25)
Arguments
data |
A list object. The object |
levellimit |
An integer value. The number of maximum values to be displayed (default: 25). |
Value
A data.table with the data quality assessment results for categorical
dataelements that are reduced to the maximum number of levels specified
with levellimit
.
Examples
# runtime ~ 5 sec.
utils_path <- system.file(
"demo_data/utilities/",
package = "DQAstats"
)
mdr_filename <- "mdr_example_data.csv"
rv <- list()
rv$mdr <- read_mdr(
utils_path = utils_path,
mdr_filename <- mdr_filename
)
source_system_name <- "exampleCSV_source"
target_system_name <- "exampleCSV_target"
rv <- c(rv, create_helper_vars(
mdr = rv$mdr,
source_db = source_system_name,
target_db = target_system_name
))
# save source/target vars
rv$source$system_name <- source_system_name
rv$target$system_name <- target_system_name
rv$source$system_type <- "csv"
rv$target$system_type <- "csv"
rv$log$logfile_dir <- tempdir()
# set headless (without GUI, progressbars, etc.)
rv$headless <- TRUE
# set configs
demo_files <- system.file("demo_data", package = "DQAstats")
Sys.setenv("EXAMPLECSV_SOURCE_PATH" = demo_files)
Sys.setenv("EXAMPLECSV_TARGET_PATH" = demo_files)
# get configs
rv$source$settings <- DIZutils::get_config_env(
system_name = rv$source$system_name,
logfile_dir = rv$log$logfile_dir,
headless = rv$headless
)
rv$target$settings <- DIZutils::get_config_env(
system_name = tolower(rv$target$system_name),
logfile_dir = rv$log$logfile_dir,
headless = rv$headless
)
# set start_time (e.g. when clicking the 'Load Data'-button in shiny
rv$start_time <- format(Sys.time(), usetz = TRUE, tz = "CET")
# define restricting date
rv$restricting_date$use_it <- FALSE
# load source data
tempdat <- data_loading(
rv = rv,
system = rv$source,
keys_to_test = rv$keys_source
)
rv$data_source <- tempdat$outdata
# load target data
tempdat <- data_loading(
rv = rv,
system = rv$target,
keys_to_test = rv$keys_target
)
rv$data_target <- tempdat$outdata
rv$data_plausibility$atemporal <- get_atemp_plausis(
rv = rv,
atemp_vars = rv$pl$atemp_vars,
mdr = rv$mdr,
headless = rv$headless
)
# add the plausibility raw data to data_target and data_source
for (i in names(rv$data_plausibility$atemporal)) {
for (k in c("source_data", "target_data")) {
w <- gsub("_data", "", k)
raw_data <- paste0("data_", w)
rv[[raw_data]][[i]] <-
rv$data_plausibility$atemporal[[i]][[k]][[raw_data]]
rv$data_plausibility$atemporal[[i]][[k]][[raw_data]] <- NULL
}
gc()
}
# calculate descriptive results
rv$results_descriptive <- descriptive_results(
rv = rv,
headless = rv$headless
)
reduce_cat(
data = rv$results_descriptive,
levellimit = 25
)
[Package DQAstats version 0.3.5 Index]