completeness {DQAstats} | R Documentation |
completeness helper function
Description
Internal function to perform missing analysis.
Usage
completeness(results, headless = FALSE, logfile_dir)
Arguments
results |
A list object. The list should contain the results of either 'rv$results_descriptive' or 'rv$results_plausibility_atemporal'. |
headless |
A boolean (default: FALSE). Indicating, if the function is run only in the console (headless = TRUE) or on a GUI frontend (headless = FALSE). |
logfile_dir |
The absolute path to folder where the logfile
will be stored default( |
Value
A data.table with the absolute and relative counts of missing values (results of the completeness checks) for each dataelement for the source data system and the target data system.
Examples
# runtime ~ 5 sec.
utils_path <- system.file(
"demo_data/utilities/",
package = "DQAstats"
)
mdr_filename <- "mdr_example_data.csv"
rv <- list()
rv$mdr <- read_mdr(
utils_path = utils_path,
mdr_filename <- mdr_filename
)
source_system_name <- "exampleCSV_source"
target_system_name <- "exampleCSV_target"
rv <- c(rv, create_helper_vars(
mdr = rv$mdr,
source_db = source_system_name,
target_db = target_system_name
))
# save source/target vars
rv$source$system_name <- source_system_name
rv$target$system_name <- target_system_name
rv$source$system_type <- "csv"
rv$target$system_type <- "csv"
rv$log$logfile_dir <- tempdir()
# set headless (without GUI, progressbars, etc.)
rv$headless <- TRUE
# set configs
demo_files <- system.file("demo_data", package = "DQAstats")
Sys.setenv("EXAMPLECSV_SOURCE_PATH" = demo_files)
Sys.setenv("EXAMPLECSV_TARGET_PATH" = demo_files)
# get configs
rv$source$settings <- DIZutils::get_config_env(
system_name = rv$source$system_name,
logfile_dir = rv$log$logfile_dir,
headless = rv$headless
)
rv$target$settings <- DIZutils::get_config_env(
system_name = tolower(rv$target$system_name),
logfile_dir = rv$log$logfile_dir,
headless = rv$headless
)
# set start_time (e.g. when clicking the 'Load Data'-button in shiny
rv$start_time <- format(Sys.time(), usetz = TRUE, tz = "CET")
# define restricting date
rv$restricting_date$use_it <- FALSE
# load source data
tempdat <- data_loading(
rv = rv,
system = rv$source,
keys_to_test = rv$keys_source
)
rv$data_source <- tempdat$outdata
# load target data
tempdat <- data_loading(
rv = rv,
system = rv$target,
keys_to_test = rv$keys_target
)
rv$data_target <- tempdat$outdata
rv$data_plausibility$atemporal <- get_atemp_plausis(
rv = rv,
atemp_vars = rv$pl$atemp_vars,
mdr = rv$mdr,
headless = rv$headless
)
# add the plausibility raw data to data_target and data_source
for (i in names(rv$data_plausibility$atemporal)) {
for (k in c("source_data", "target_data")) {
w <- gsub("_data", "", k)
raw_data <- paste0("data_", w)
rv[[raw_data]][[i]] <-
rv$data_plausibility$atemporal[[i]][[k]][[raw_data]]
rv$data_plausibility$atemporal[[i]][[k]][[raw_data]] <- NULL
}
gc()
}
# calculate descriptive results
rv$results_descriptive <- descriptive_results(
rv = rv,
headless = rv$headless
)
completeness(
results = rv$results_descriptive,
headless = rv$headless,
logfile_dir = rv$log$logfile_dir
)
[Package DQAstats version 0.3.5 Index]