pmforest {model4you} | R Documentation |
Compute model-based forest from model.
Description
Input a parametric model and get a forest.
Usage
pmforest(
model,
data = NULL,
zformula = ~.,
ntree = 500L,
perturb = list(replace = FALSE, fraction = 0.632),
mtry = NULL,
applyfun = NULL,
cores = NULL,
control = ctree_control(teststat = "quad", testtype = "Univ", mincriterion = 0,
saveinfo = FALSE, lookahead = TRUE, ...),
trace = FALSE,
...
)
## S3 method for class 'pmforest'
gettree(object, tree = 1L, saveinfo = TRUE, coeffun = coef, ...)
Arguments
model |
a model object. The model can be a parametric model with a single binary covariate. |
data |
data. If |
zformula |
formula describing which variable should be used for partitioning.
Default is to use all variables in data that are not in the model (i.e. |
ntree |
number of trees. |
perturb |
a list with arguments replace and fraction determining which type of
resampling with |
mtry |
number of input variables randomly sampled as candidates at each
node (Default |
applyfun |
see |
cores |
see |
control |
control parameters, see |
trace |
a logical indicating if a progress bar shall be printed while the forest grows. |
... |
additional parameters passed on to model fit such as weights. |
object |
an object returned by pmforest. |
tree |
an integer, the number of the tree to extract from the forest. |
saveinfo |
logical. Should the model info be stored in terminal nodes? |
coeffun |
function that takes the model object and returns the coefficients. Useful when coef() does not return all coefficients (e.g. survreg). |
Value
cforest object
See Also
Examples
library("model4you")
if(require("mvtnorm") & require("survival")) {
## function to simulate the data
sim_data <- function(n = 500, p = 10, beta = 3, sd = 1){
## treatment
lev <- c("C", "A")
a <- rep(factor(lev, labels = lev, levels = lev), length = n)
## correlated z variables
sigma <- diag(p)
sigma[sigma == 0] <- 0.2
ztemp <- rmvnorm(n, sigma = sigma)
z <- (pnorm(ztemp) * 2 * pi) - pi
colnames(z) <- paste0("z", 1:ncol(z))
z1 <- z[,1]
## outcome
y <- 7 + 0.2 * (a %in% "A") + beta * cos(z1) * (a %in% "A") + rnorm(n, 0, sd)
data.frame(y = y, a = a, z)
}
## simulate data
set.seed(123)
beta <- 3
ntrain <- 500
ntest <- 50
simdata <- simdata_s <- sim_data(p = 5, beta = beta, n = ntrain)
tsimdata <- tsimdata_s <- sim_data(p = 5, beta = beta, n = ntest)
simdata_s$cens <- rep(1, ntrain)
tsimdata_s$cens <- rep(1, ntest)
## base model
basemodel_lm <- lm(y ~ a, data = simdata)
## forest
frst_lm <- pmforest(basemodel_lm, ntree = 20,
perturb = list(replace = FALSE, fraction = 0.632),
control = ctree_control(mincriterion = 0))
## personalised models
# (1) return the model objects
pmodels_lm <- pmodel(x = frst_lm, newdata = tsimdata, fun = identity)
class(pmodels_lm)
# (2) return coefficients only (default)
coefs_lm <- pmodel(x = frst_lm, newdata = tsimdata)
# compare predictive objective functions of personalised models versus
# base model
sum(objfun(pmodels_lm)) # -RSS personalised models
sum(objfun(basemodel_lm, newdata = tsimdata)) # -RSS base model
if(require("ggplot2")) {
## dependence plot
dp_lm <- cbind(coefs_lm, tsimdata)
ggplot(tsimdata) +
stat_function(fun = function(z1) 0.2 + beta * cos(z1),
aes(color = "true treatment\neffect")) +
geom_point(data = dp_lm,
aes(y = aA, x = z1, color = "estimates lm"),
alpha = 0.5) +
ylab("treatment effect") +
xlab("patient characteristic z1")
}
}