Refit {sharp} | R Documentation |
Regression model refitting
Description
Refits the regression model with stably selected variables as predictors
(without penalisation). Variables in xdata
not evaluated in the
stability selection model will automatically be included as predictors.
Usage
Refit(
xdata,
ydata,
stability = NULL,
family = NULL,
implementation = NULL,
Lambda = NULL,
seed = 1,
verbose = TRUE,
...
)
Recalibrate(
xdata,
ydata,
stability = NULL,
family = NULL,
implementation = NULL,
Lambda = NULL,
seed = 1,
verbose = TRUE,
...
)
Arguments
xdata |
matrix of predictors with observations as rows and variables as columns. |
ydata |
optional vector or matrix of outcome(s). If |
stability |
output of |
family |
type of regression model. Possible values include
|
implementation |
optional function to refit the model. If |
Lambda |
optional vector of penalty parameters. |
seed |
value of the seed to initialise the random number generator and
ensure reproducibility of the results (see |
verbose |
logical indicating if a loading bar and messages should be printed. |
... |
additional arguments to be passed to the function provided in
|
Value
The output as obtained from:
\link[stats]{lm} |
for
linear regression ( |
\link[survival]{coxph} |
for Cox regression ( |
\link[stats]{glm} |
for logistic regression
( |
\link[nnet]{multinom} |
for
multinomial regression ( |
See Also
Examples
## Linear regression
# Data simulation
set.seed(1)
simul <- SimulateRegression(n = 100, pk = 50, family = "gaussian")
# Data split
ids_train <- Resample(
data = simul$ydata,
tau = 0.5, family = "gaussian"
)
xtrain <- simul$xdata[ids_train, , drop = FALSE]
ytrain <- simul$ydata[ids_train, , drop = FALSE]
xrefit <- simul$xdata[-ids_train, , drop = FALSE]
yrefit <- simul$ydata[-ids_train, , drop = FALSE]
# Stability selection
stab <- VariableSelection(xdata = xtrain, ydata = ytrain, family = "gaussian")
print(SelectedVariables(stab))
# Refitting the model
refitted <- Refit(
xdata = xrefit, ydata = yrefit,
stability = stab
)
refitted$coefficients # refitted coefficients
head(refitted$fitted.values) # refitted predicted values
# Fitting the full model (including all possible predictors)
refitted <- Refit(
xdata = simul$xdata, ydata = simul$ydata,
family = "gaussian"
)
refitted$coefficients # refitted coefficients
## Logistic regression
# Data simulation
set.seed(1)
simul <- SimulateRegression(n = 200, pk = 20, family = "binomial")
# Data split
ids_train <- Resample(
data = simul$ydata,
tau = 0.5, family = "binomial"
)
xtrain <- simul$xdata[ids_train, , drop = FALSE]
ytrain <- simul$ydata[ids_train, , drop = FALSE]
xrefit <- simul$xdata[-ids_train, , drop = FALSE]
yrefit <- simul$ydata[-ids_train, , drop = FALSE]
# Stability selection
stab <- VariableSelection(xdata = xtrain, ydata = ytrain, family = "binomial")
# Refitting the model
refitted <- Refit(
xdata = xrefit, ydata = yrefit,
stability = stab
)
refitted$coefficients # refitted coefficients
head(refitted$fitted.values) # refitted predicted probabilities
## Partial Least Squares (multiple components)
if (requireNamespace("sgPLS", quietly = TRUE)) {
# Data simulation
set.seed(1)
simul <- SimulateRegression(n = 500, pk = 15, q = 3, family = "gaussian")
# Data split
ids_train <- Resample(
data = simul$ydata,
tau = 0.5, family = "gaussian"
)
xtrain <- simul$xdata[ids_train, , drop = FALSE]
ytrain <- simul$ydata[ids_train, , drop = FALSE]
xrefit <- simul$xdata[-ids_train, , drop = FALSE]
yrefit <- simul$ydata[-ids_train, , drop = FALSE]
# Stability selection
stab <- BiSelection(
xdata = xtrain, ydata = ytrain,
family = "gaussian", ncomp = 3,
LambdaX = seq_len(ncol(xtrain) - 1),
LambdaY = seq_len(ncol(ytrain) - 1),
implementation = SparsePLS
)
plot(stab)
# Refitting the model
refitted <- Refit(
xdata = xrefit, ydata = yrefit,
stability = stab
)
refitted$Wmat # refitted X-weights
refitted$Cmat # refitted Y-weights
}