reduce_cat {DQAstats}R Documentation

reduce_cat helper function

Description

Internal function to reduce categorical variables to a maximum of values to be displayed.

Usage

reduce_cat(data, levellimit = 25)

Arguments

data

A list object. The object rv$results_descriptive.

levellimit

An integer value. The number of maximum values to be displayed (default: 25).

Value

A data.table with the data quality assessment results for categorical dataelements that are reduced to the maximum number of levels specified with levellimit.

Examples

# runtime ~ 5 sec.
utils_path <- system.file(
  "demo_data/utilities/",
  package = "DQAstats"
)
mdr_filename <- "mdr_example_data.csv"
rv <- list()
rv$mdr <- read_mdr(
  utils_path = utils_path,
  mdr_filename <- mdr_filename
)

source_system_name <- "exampleCSV_source"
target_system_name <- "exampleCSV_target"

rv <- c(rv, create_helper_vars(
  mdr = rv$mdr,
  source_db = source_system_name,
  target_db = target_system_name
))
# save source/target vars
rv$source$system_name <- source_system_name
rv$target$system_name <- target_system_name
rv$source$system_type <- "csv"
rv$target$system_type <- "csv"

rv$log$logfile_dir <- tempdir()

# set headless (without GUI, progressbars, etc.)
rv$headless <- TRUE

# set configs
demo_files <- system.file("demo_data", package = "DQAstats")
Sys.setenv("EXAMPLECSV_SOURCE_PATH" = demo_files)
Sys.setenv("EXAMPLECSV_TARGET_PATH" = demo_files)

# get configs
rv$source$settings <- DIZutils::get_config_env(
  system_name = rv$source$system_name,
  logfile_dir = rv$log$logfile_dir,
  headless = rv$headless
)
rv$target$settings <- DIZutils::get_config_env(
  system_name = tolower(rv$target$system_name),
  logfile_dir = rv$log$logfile_dir,
  headless = rv$headless
)

# set start_time (e.g. when clicking the 'Load Data'-button in shiny
rv$start_time <- format(Sys.time(), usetz = TRUE, tz = "CET")

# define restricting date
rv$restricting_date$use_it <- FALSE

# load source data
tempdat <- data_loading(
  rv = rv,
  system = rv$source,
  keys_to_test = rv$keys_source
)
rv$data_source <- tempdat$outdata

# load target data
tempdat <- data_loading(
  rv = rv,
  system = rv$target,
  keys_to_test = rv$keys_target
)
rv$data_target <- tempdat$outdata

rv$data_plausibility$atemporal <- get_atemp_plausis(
  rv = rv,
  atemp_vars = rv$pl$atemp_vars,
  mdr = rv$mdr,
  headless = rv$headless
)

# add the plausibility raw data to data_target and data_source
for (i in names(rv$data_plausibility$atemporal)) {
  for (k in c("source_data", "target_data")) {
    w <- gsub("_data", "", k)
    raw_data <- paste0("data_", w)
    rv[[raw_data]][[i]] <-
      rv$data_plausibility$atemporal[[i]][[k]][[raw_data]]
    rv$data_plausibility$atemporal[[i]][[k]][[raw_data]] <- NULL
  }
  gc()
}

# calculate descriptive results
rv$results_descriptive <- descriptive_results(
  rv = rv,
  headless = rv$headless
)

reduce_cat(
  data = rv$results_descriptive,
  levellimit = 25
)



[Package DQAstats version 0.3.5 Index]