Resample {sharp} | R Documentation |
Resampling observations
Description
Generates a vector of resampled observation IDs.
Usage
Resample(data, family = NULL, tau = 0.5, resampling = "subsampling", ...)
Arguments
data |
vector or matrix of data. In regression, this should be the outcome data. |
family |
type of regression model. This argument is defined as in
|
tau |
subsample size. Only used if |
resampling |
resampling approach. Possible values are:
|
... |
additional parameters passed to the function provided in
|
Details
With categorical outcomes (i.e. "family" argument is set to "binomial", "multinomial" or "cox"), the resampling is done such that the proportion of observations from each of the categories is representative of that of the full sample.
Value
A vector of resampled IDs.
Examples
## Linear regression framework
# Data simulation
simul <- SimulateRegression()
# Subsampling
ids <- Resample(data = simul$ydata, family = "gaussian")
sum(duplicated(ids))
# Bootstrapping
ids <- Resample(data = simul$ydata, family = "gaussian", resampling = "bootstrap")
sum(duplicated(ids))
## Logistic regression framework
# Data simulation
simul <- SimulateRegression(family = "binomial")
# Subsampling
ids <- Resample(data = simul$ydata, family = "binomial")
sum(duplicated(ids))
prop.table(table(simul$ydata))
prop.table(table(simul$ydata[ids]))
# Data simulation for a binary confounder
conf <- ifelse(runif(n = 100) > 0.5, yes = 1, no = 0)
# User-defined resampling function
BalancedResampling <- function(data, tau, Z, ...) {
s <- NULL
for (z in unique(Z)) {
s <- c(s, sample(which((data == "0") & (Z == z)), size = tau * sum((data == "0") & (Z == z))))
s <- c(s, sample(which((data == "1") & (Z == z)), size = tau * sum((data == "1") & (Z == z))))
}
return(s)
}
# Resampling keeping proportions by Y and Z
ids <- Resample(data = simul$ydata, family = "binomial", resampling = BalancedResampling, Z = conf)
prop.table(table(simul$ydata, conf))
prop.table(table(simul$ydata[ids], conf[ids]))
# User-defined resampling for stability selection
stab <- VariableSelection(
xdata = simul$xdata, ydata = simul$ydata, family = "binomial",
resampling = BalancedResampling, Z = conf
)