exampleData {dataReporter} | R Documentation |
Example data with zero-inflated variables
Description
An artificial dataset, intended for presenting the extended features of dataReporter
,
which is a toolset for identifying potential errors in a dataset.
Usage
exampleData
Format
A data.frame
with 300 observations on the following 6 variables.
addresses
a factor with fictitious US addresses
binomial
a numeric vector with a binomial distributed variable
poisson
a numeric vector with a Poisson distributed variable
gauss
a numeric vector with a Gaussian distributed variable
zigauss
a numeric vector with a zero-inflated Gaussian distributed variable
bpinteraction
a factor with interactions between binomial and poisson values
Source
Artificial data
Examples
isID <- function(v, nMax = NULL, ...) {
out <- list(problem = FALSE, message = "")
if (class(v) %in% c("character", "factor", "labelled", "numeric", "integer")) {
v <- as.character(v)
lengths <- nchar(v)
if (all(lengths > 10) & length(unique(lengths)) == 1) {
out$problem <- TRUE
out$message <- "Warning: This variable seems to contain ID codes!"
}
}
out
}
countZeros <- function(v, ...) {
res <- length(which(v == 0))
summaryResult(list(feature = "No. zeros", result = res, value = res))
}
countZeros <- summaryFunction(countZeros, description = "Count number of zeros",
classes = allClasses())
summarize(toyData, numericSummaries = c(defaultNumericSummaries()))
mosaicVisual <- function(v, vnam, doEval) {
thisCall <- call("mosaicplot", table(v), main = vnam, xlab = "")
if (doEval) {
return(eval(thisCall))
} else return(deparse(thisCall))
}
mosaicVisual <- visualFunction(mosaicVisual,
description = "Mosaic plots using graphics",
classes = allClasses())
identifyColons <- function(v, nMax = Inf, ... ) {
v <- unique(na.omit(v))
problemMessage <- "Note: The following values include colons:"
problem <- FALSE
problemValues <- NULL
problemValues <- v[sapply(gregexpr("[[:xdigit:]]:[[:xdigit:]]", v),
function(x) all(x != -1))]
if (length(problemValues) > 0) {
problem <- TRUE
}
problemStatus <- list(problem = problem,
problemValues = problemValues)
outMessage <- messageGenerator(problemStatus, problemMessage, nMax)
checkResult(list(problem = problem,
message = outMessage,
problemValues = problemValues))
}
identifyColons <- checkFunction(identifyColons,
description = "Identify non-suffixed nor -prefixed colons",
classes = c("character", "factor", "labelled"))
makeDataReport(exampleData, replace = TRUE,
preChecks = c("isKey", "isEmpty", "isID"),
allVisuals = "mosaicVisual",
characterSummaries = c(defaultCharacterSummaries(), "countZeros"),
factorSummaries = c(defaultFactorSummaries(), "countZeros"),
labelledSummaries = c(defaultLabelledSummaries(), "countZeros"),
numericSummaries = c(defaultNumericSummaries(), "countZeros"),
integerSummaries = c(defaultIntegerSummaries(), "countZeros"),
characterChecks = c(defaultCharacterChecks(), "identifyColons"),
factorChecks = c(defaultFactorChecks(), "identifyColons"),
labelledCheck = c(defaultLabelledChecks(), "identifyColons"))
[Package dataReporter version 1.0.2 Index]