Refit {sharp}R Documentation

Regression model refitting


Refits the regression model with stably selected variables as predictors (without penalisation). Variables in xdata not evaluated in the stability selection model will automatically be included as predictors.


  stability = NULL,
  family = NULL,
  implementation = NULL,
  Lambda = NULL,
  seed = 1,
  verbose = TRUE,

  stability = NULL,
  family = NULL,
  implementation = NULL,
  Lambda = NULL,
  seed = 1,
  verbose = TRUE,



matrix of predictors with observations as rows and variables as columns.


optional vector or matrix of outcome(s). If family is set to "binomial" or "multinomial", ydata can be a vector with character/numeric values or a factor.


output of VariableSelection or BiSelection. If stability=NULL (the default), a model including all variables in xdata as predictors is fitted. Argument family must be provided in this case.


type of regression model. Possible values include "gaussian" (linear regression), "binomial" (logistic regression), "multinomial" (multinomial regression), and "cox" (survival analysis). If provided, this argument must be consistent with input stability.


optional function to refit the model. If stability is the output of VariableSelection, a regression model is refitted. If implementation=NULL and Lambda=0, this is done using lm (for linear regression), coxph (Cox regression), glm (logistic regression), or multinom (multinomial regression). If Lambda=NULL, a Ridge regression is fitted and calibrated by cross validation using cv.glmnet. The function PLS is used if stability is the output of BiSelection.


optional vector of penalty parameters.


value of the seed to initialise the random number generator and ensure reproducibility of the results (see set.seed).


logical indicating if a loading bar and messages should be printed.


additional arguments to be passed to the function provided in implementation.


The output as obtained from:


for linear regression ("gaussian" family).


for Cox regression ("cox" family).


for logistic regression ("binomial" family).


for multinomial regression ("multinomial" family).

See Also



## Linear regression

# Data simulation
simul <- SimulateRegression(n = 100, pk = 50, family = "gaussian")

# Data split
ids_train <- Resample(
  data = simul$ydata,
  tau = 0.5, family = "gaussian"
xtrain <- simul$xdata[ids_train, , drop = FALSE]
ytrain <- simul$ydata[ids_train, , drop = FALSE]
xrefit <- simul$xdata[-ids_train, , drop = FALSE]
yrefit <- simul$ydata[-ids_train, , drop = FALSE]

# Stability selection
stab <- VariableSelection(xdata = xtrain, ydata = ytrain, family = "gaussian")

# Refitting the model
refitted <- Refit(
  xdata = xrefit, ydata = yrefit,
  stability = stab
refitted$coefficients # refitted coefficients
head(refitted$fitted.values) # refitted predicted values

# Fitting the full model (including all possible predictors)
refitted <- Refit(
  xdata = simul$xdata, ydata = simul$ydata,
  family = "gaussian"
refitted$coefficients # refitted coefficients

## Logistic regression

# Data simulation
simul <- SimulateRegression(n = 200, pk = 20, family = "binomial")

# Data split
ids_train <- Resample(
  data = simul$ydata,
  tau = 0.5, family = "binomial"
xtrain <- simul$xdata[ids_train, , drop = FALSE]
ytrain <- simul$ydata[ids_train, , drop = FALSE]
xrefit <- simul$xdata[-ids_train, , drop = FALSE]
yrefit <- simul$ydata[-ids_train, , drop = FALSE]

# Stability selection
stab <- VariableSelection(xdata = xtrain, ydata = ytrain, family = "binomial")

# Refitting the model
refitted <- Refit(
  xdata = xrefit, ydata = yrefit,
  stability = stab
refitted$coefficients # refitted coefficients
head(refitted$fitted.values) # refitted predicted probabilities

## Partial Least Squares (multiple components)
if (requireNamespace("sgPLS", quietly = TRUE)) {
  # Data simulation
  simul <- SimulateRegression(n = 500, pk = 15, q = 3, family = "gaussian")

  # Data split
  ids_train <- Resample(
    data = simul$ydata,
    tau = 0.5, family = "gaussian"
  xtrain <- simul$xdata[ids_train, , drop = FALSE]
  ytrain <- simul$ydata[ids_train, , drop = FALSE]
  xrefit <- simul$xdata[-ids_train, , drop = FALSE]
  yrefit <- simul$ydata[-ids_train, , drop = FALSE]

  # Stability selection
  stab <- BiSelection(
    xdata = xtrain, ydata = ytrain,
    family = "gaussian", ncomp = 3,
    LambdaX = seq_len(ncol(xtrain) - 1),
    LambdaY = seq_len(ncol(ytrain) - 1),
    implementation = SparsePLS

  # Refitting the model
  refitted <- Refit(
    xdata = xrefit, ydata = yrefit,
    stability = stab
  refitted$Wmat # refitted X-weights
  refitted$Cmat # refitted Y-weights

[Package sharp version 1.4.6 Index]