predict.variable_selection {sharp}R Documentation

Predict method for stability selection

Description

Computes predicted values from the output of VariableSelection.

Usage

## S3 method for class 'variable_selection'
predict(
  object,
  xdata,
  ydata,
  newdata = NULL,
  method = c("ensemble", "refit"),
  ...
)

Arguments

object

output of VariableSelection.

xdata

predictor data (training set).

ydata

outcome data (training set).

newdata

optional predictor data (test set).

method

character string indicating if predictions should be obtained from an Ensemble model (if method="ensemble") or a Refitted model (if method="refit").

...

additional arguments passed to predict.

Value

Predicted values.

See Also

Refit, Ensemble, EnsemblePredictions

Examples


## Linear regression

# Data simulation
set.seed(1)
simul <- SimulateRegression(n = 500, pk = 50, family = "gaussian")

# Training/test split
ids <- Split(data = simul$ydata, tau = c(0.8, 0.2))

# Stability selection
stab <- VariableSelection(
  xdata = simul$xdata[ids[[1]], ],
  ydata = simul$ydata[ids[[1]], ]
)

# Predictions from post stability selection estimation
yhat <- predict(stab,
  xdata = simul$xdata[ids[[1]], ],
  ydata = simul$ydata[ids[[1]], ],
  newdata = simul$xdata[ids[[2]], ],
  method = "refit"
)
cor(simul$ydata[ids[[2]], ], yhat)^2 # Q-squared

# Predictions from ensemble model
yhat <- predict(stab,
  xdata = simul$xdata[ids[[1]], ],
  ydata = simul$ydata[ids[[1]], ],
  newdata = simul$xdata[ids[[2]], ],
  method = "ensemble"
)
cor(simul$ydata[ids[[2]], ], yhat)^2 # Q-squared


## Logistic regression

# Data simulation
set.seed(1)
simul <- SimulateRegression(n = 500, pk = 20, family = "binomial", ev_xy = 0.9)

# Training/test split
ids <- Split(data = simul$ydata, family = "binomial", tau = c(0.8, 0.2))

# Stability selection
stab <- VariableSelection(
  xdata = simul$xdata[ids[[1]], ],
  ydata = simul$ydata[ids[[1]], ],
  family = "binomial"
)

# Predictions from post stability selection estimation
yhat <- predict(stab,
  xdata = simul$xdata[ids[[1]], ],
  ydata = simul$ydata[ids[[1]], ],
  newdata = simul$xdata[ids[[2]], ],
  method = "refit", type = "response"
)
plot(ROC(predicted = yhat, observed = simul$ydata[ids[[2]], ]))

# Predictions from ensemble model
yhat <- predict(stab,
  xdata = simul$xdata[ids[[1]], ],
  ydata = simul$ydata[ids[[1]], ],
  newdata = simul$xdata[ids[[2]], ],
  method = "ensemble", type = "response"
)
plot(ROC(predicted = yhat, observed = simul$ydata[ids[[2]], ]),
  add = TRUE,
  col = "blue"
)


[Package sharp version 1.4.6 Index]