flexrsurvclt {flexrsurv} | R Documentation |
Fit Relative Survival Model and Correct Life Tables
Description
flexrsurvclt
is used to fit relative survival regression model.
transition package.
Usage
flexrsurvclt(formula=formula(data),
formula.table=NULL,
data=parent.frame(),
Id,
baselinehazard=TRUE,
firstWCEIadditive=FALSE,
knots.Bh,
degree.Bh=3,
intercept.Bh=TRUE,
Spline=c("b-spline", "tp-spline", "tpi-spline"),
log.Bh=FALSE,
bhlink=c("log", "identity"),
Min_T=0,
Max_T=NULL,
model=c("additive","multiplicative"),
rate,
logit_start,
logit_end,
logit_start_byperiod = NULL,
logit_end_byperiod = NULL,
weights_byperiod = NULL,
Id_byperiod = NULL,
contrasts.table = NULL,
knots.table=c(-2.5,0,2.5),
degree.table=3,
Spline.table=c("restricted B-splines"),
Spline.CLT=R2bBSplineBasis(knots=c(-2.5,0,2.5), degree=3),
model_correction = c("cohort", "period"),
weights=NULL,
na.action=NULL,
datacontrol=NULL,
Idcontrol,
ratecontrol,
logit_startcontrol,
logit_endcontrol,
logit_start_byperiodcontrol = NULL,
logit_end_byperiodcontrol = NULL,
weights_byperiodcontrol = NULL,
Id_byperiodcontrol = NULL,
weightscontrol=NULL,
int_meth=c("GL", "CAV_SIM", "SIM_3_8", "BOOLE", "GLM", "BANDS"),
bands=NULL,
npoints=20,
stept=NULL,
init=NULL,
initbyglm=TRUE,
initbands=bands,
optim.control=list(trace=100, REPORT=1, fnscale=-1, maxit=25),
optim_meth=c("BFGS", "CG", "Nelder-Mead", "L-BFGS-B", "SANN", "Brent"),
Coptim.control=list(),
lower = -Inf,
upper = Inf,
control.glm=list(epsilon=1e-8, maxit=100, trace=FALSE,
epsilon.glm=.1, maxit.glm=25),
vartype = c("oim", "opg", "none"),
varmethod = c("optim", "numDeriv.hessian", "numDeriv.jacobian"),
numDeriv.method.args=list(eps=5e-7, d=0.001,
zero.tol=sqrt(.Machine$double.eps/7e-4), r=4, v=2),
debug=FALSE
)
flexrsurvclt.ll(formula=formula(data),
formula.table=NULL,
data=parent.frame(),
Id,
baselinehazard=TRUE,
firstWCEIadditive=FALSE,
knots.Bh,
degree.Bh=3,
Spline=c("b-spline", "tp-spline", "tpi-spline"),
log.Bh=FALSE,
bhlink=c("log", "identity"),
intercept.Bh=TRUE,
Min_T=0,
Max_T=NULL,
model=c("additive","multiplicative"),
rate,
logit_start,
logit_end,
logit_start_byperiod = NULL,
logit_end_byperiod = NULL,
weights_byperiod = NULL,
Id_byperiod = NULL,
contrasts.table = NULL,
knots.table=c(-2.5,0,2.5),
degree.table=3,
Spline.table=c("restricted B-splines"),
Spline.CLT=R2bBSplineBasis(knots=c(-2.5,0,2.5), degree=3),
model_correction = c("cohort", "period"),
weights=NULL,
na.action=NULL,
datacontrol=NULL,
Idcontrol,
ratecontrol,
logit_startcontrol,
logit_endcontrol,
logit_start_byperiodcontrol = NULL,
logit_end_byperiodcontrol = NULL,
weights_byperiodcontrol = NULL,
Id_byperiodcontrol = NULL,
weightscontrol=NULL,
int_meth=c("GL", "CAV_SIM", "SIM_3_8", "BOOLE", "GLM", "BANDS"),
bands=NULL,
npoints=20,
stept=NULL,
init=NULL,
optim.control=list(trace=100, REPORT=1, fnscale=-1, maxit=25),
Coptim.control= list(),
optim_meth=c("BFGS", "CG", "Nelder-Mead", "L-BFGS-B", "SANN", "Brent"),
lower = -Inf,
upper = Inf,
vartype = c("oim", "opg", "none"),
varmethod = c("optim", "numDeriv.hessian", "numDeriv.jacobian"),
numDeriv.method.args=list(eps=5e-7, d=0.001,
zero.tol=sqrt(.Machine$double.eps/7e-4), r=4, v=2),
debug=FALSE
)
Arguments
formula |
a formula object, with the response on the left of a ~ operator, and the terms on the
right. The response must be a survival object as returned by the |
formula.table |
a formula object, with empty left hand side, and the terms on the right. This is the formula of the proportional part of the correction model for the table table |
data |
a data.frame in which to interpret the variables named in the formulas. |
Id |
vector whose unique values defines the Ids of the subjects. |
baselinehazard |
if FALSE, no baseline hazard in the model |
firstWCEIadditive |
if TRUE, the first WCEI term in the formula is considered as the baseline |
knots.Bh |
the internal breakpoints that define the spline used to estimate the baseline hazard. Typical values are the mean or median for one knot, quantiles for more knots. |
degree.Bh |
degree of the piecewise polynomial of the baseline hazard. Default is 3 for cubic splines. |
intercept.Bh |
TRUE if the first bases is included in the baseline hazard. Default is TRUE. |
Spline |
a character string specifying the type of spline basis. "b-spline" for B-spline basis, "tp-spline" for truncated power basis and "tpi-spline" for monotone (increasing) truncated power basis. |
log.Bh |
logical value: if TRUE, an additional basis equal to log(time) is added to the spline bases of time. |
bhlink |
character string specifying the link function of the baseline hazard:
Default is |
Min_T |
minimum of time period which is analysed. Default is |
Max_T |
maximum of time period which is analysed. Default is |
model |
character string specifying the type of model for both non-proportional and non linear effects.
The model |
rate |
a vector of the background rate for a relevant comparative population to be used in the fitting process.
Should be a numeric vector (for relative survival model).
|
logit_start |
a vector of the logit of the cumulative hazard at the start of the
interval in the life table.
|
logit_end |
a vector of the logit of the cumulative hazard at the end of the
interval in the life table.
|
logit_start_byperiod , logit_end_byperiod , weights_byperiod , Id_byperiod |
A REMPLIR |
knots.table |
the internal breakpoints on the logit scale that define the knots of the spline used to estimate the correction model of the life table. |
degree.table |
degree of the piecewise polynomial of the spline used to estimate the correction model of the life table. Default is 3 for cubic splines. |
contrasts.table |
an optional list. See the |
Spline.table |
a character string specifying the type of spline basis of the the correction model of the life table. In this version, only "restricted B-splines" is available. "restricted B-splines" are B-spline basis with linear extrapolation + 2nd derivative at boundaries == 0. |
Spline.CLT |
a S4 object with method deriv() and evaluate().
The spline basis of the correction of the life table can be specified either by the parameters ( |
model_correction |
character string specifying A COMPLETER.
|
weights |
an optional vector of weights to be used in the fitting process. Should be NULL or a numeric vector. If not null, the total likelihood is the weighted sum of individual likelihood. |
na.action |
a missing-data filter function, applied to the model.frame. If |
datacontrol |
a data.frame in which to interpret the variables named in the formula for the control group. |
Idcontrol , ratecontrol , logit_startcontrol , logit_endcontrol , weightscontrol |
Id, rate, logit of the cumulative hazard at the start and the end of the intervalle in the life table, and weights for the control group |
logit_start_byperiodcontrol , logit_end_byperiodcontrol , weights_byperiodcontrol , Id_byperiodcontrol |
A REMPLIR |
int_meth |
character string specifying the the numerical integration method. Possible values are "GL" for Gauss-Legendre method, "CAV_SIM" for Cavalieri-Simpson's rule, "SIM_3_8" for the Simpson's 3/8 rule, "BOOLE" for the Boole's rule, or "BANDS" for the midpoint rule with specified bands. |
bands |
bands used to split data in the numerical integration when |
npoints |
number of points used in the numerical integration when |
stept |
scalar value of the time-step in numerical integration. It is required only when |
init |
starting values of the parameters. |
initbyglm |
a logical value indicating indicating how are found or refined init values. If TRUE, the fitting method described in Remontet et al.(2007)
is ued to find or refine starting values. This may speedup the fit. If FALSE, the maximisation of the likelihood starts at values
given in |
initbands |
bands used to split data when |
optim.control |
a list of control parameters passed to the |
optim_meth |
method to be used to optimize the likelihood.
See |
Coptim.control |
a list of control parameters passed to the |
lower , upper |
Bounds on the variables for the "L-BFGS-B" method, or bounds in which to search for method "Brent".
See |
control.glm |
a list of control parameters passed to the |
vartype |
character string specifying the type of variance matrix computed by |
varmethod |
character string specifying the method to compute the hessian matrix when |
numDeriv.method.args |
arguments passed to |
debug |
control the volum of intermediate output |
Details
A full description of the additive and the multiplicative both non-linear and non-proportional models is given respectively in Remontet (2007) and Mahboubi (2011).
flexrsurv.ll
is the workhorse function: it is not normally called
directly.
Value
flexrsurv
returns an object of class "flexrsurv"
.
An object of class "flexrsurv"
is a list containing at least the following components:
coefficients |
a named vector of coefficients |
loglik |
the log-likelihood |
var |
estimated covariance matrix for the estimated coefficients |
informationMatrix |
estimated information matrix |
bhlink |
the linkk of baseline hazard:
if |
init |
vector of the starting values supplied |
converged |
logical, Was the optimlizer algorithm judged to have converged? |
linear.predictors |
the linear fit on link scale (not including the baseline hazard term if |
fitted.values |
the estimated value of the hazard rate at each event time, obtained by transforming the linear predictors by the inverse of the link function |
cumulative.hazard |
the estimated value of the cumulative hazard in the time interval |
call |
the matched call |
formula |
the formula supplied |
terms |
the |
data |
the |
rate |
the rate vector used |
time |
the time vector used |
workingformula |
the formula used by the fitter |
optim.control |
the value of the |
control.glm |
the value of the |
method |
the name of the fitter function used |
References
Mahboubi, A., M. Abrahamowicz, et al. (2011). "Flexible modeling of the effects of continuous prognostic factors in relative survival." Stat Med 30(12): 1351-1365. doi:10.1002/sim.4208
Remontet, L., N. Bossard, et al. (2007). "An overall strategy based on regression models to estimate relative survival and model the effects of prognostic factors in cancer survival studies." Stat Med 26(10): 2214-2228. doi:10.1002/sim.2656
See Also
print.flexrsurv
,
summary.flexrsurv
,
logLik.flexrsurv
,
predict.flexrsurv
,
NPH
,
NLL
, and
NPHNLL
.
Examples
if (requireNamespace("relsurv", quietly = TRUE) & requireNamespace("date", quietly = TRUE)) {
library(date)
# data from package relsurv
data(rdata, package="relsurv")
# rate table from package relsurv
data(slopop, package="relsurv")
# get the death rate at event (or end of followup) from slopop for rdata
rdata$iage <- findInterval(rdata$age*365.24+rdata$time, attr(slopop, "cutpoints")[[1]])
rdata$iyear <- findInterval(rdata$year+rdata$time, attr(slopop, "cutpoints")[[2]])
therate <- rep(-1, dim(rdata)[1])
for( i in 1:dim(rdata)[1]){
therate[i] <- slopop[rdata$iage[i], rdata$iyear[i], rdata$sex[i]]
}
rdata$slorate <- therate
# get the logit_start and logit_end
# logit start at age 18
tmpsurv <- Surv(rep(0, length(rdata$time)), rdata$time, rdata$cens)
HH <- getHazardFromTable(tmpsurv, startdate=rdata$year,
startage=rdata$age*365.25 , matchdata=rdata, ratetable=slopop,
age="age", year="year",
rmap=list(sex=sex),
agemin=18,
ratename = "poprate", cumrateendname ="cumrateend", cumrateentername ="cumrateenter"
)
rdata$slorate <- HH$poprate
rdata$logit_start <- log(exp(HH$cumrateenter)-1)
rdata$logit_end <- log(exp(HH$cumrateend)-1)
rdata$Id <- 1:dim(rdata)[1]
# change sex coding
rdata$sex01 <- rdata$sex -1
# fit a relative survival model with a non linear effect of age
# without correction of life table
# partial likelihood
fit00 <- flexrsurvclt(Surv(time,cens)~sex01+NLL(age, Knots=60, Degree=3,
Boundary.knots = c(24, 95)),
rate=slorate,
data=rdata,
knots.Bh=1850, # one interior knot at 5 years
degree.Bh=3,
Max_T=5400,
Spline = "b-spline",
initbyglm=TRUE,
initbands=seq(0, 5400, 100),
int_meth= "BANDS",
bands=seq(0, 5400, 50)
)
summary(fit00)
# fit a relative survival model with a non linear effect of age
# without correction of life table
# full likelihood
fit0 <- flexrsurvclt(Surv(time,cens)~sex01+NLL(age, Knots=60, Degree=3,
Boundary.knots = c(24, 95)),
rate=slorate,
logit_start=logit_start,
logit_end=logit_end,
data=rdata,
Id=Id,
knots.Bh=1850, # one interior knot at 5 years
degree.Bh=3,
Max_T=5400,
Spline = "b-spline",
initbyglm=TRUE,
initbands=seq(0, 5400, 100),
int_meth= "BANDS",
bands=seq(0, 5400, 50)
)
summary(fit0)
# fit a relative survival model with a non linear effect of age
# with correction of life table
# full likelihood
fit1 <- flexrsurvclt(Surv(time,cens)~sex01+NLL(age, Knots=60, Degree=3,
Boundary.knots = c(24, 95)),
rate=slorate,
logit_start=logit_start,
logit_end=logit_end,
data=rdata,
Id=Id,
knots.Bh=1850, # one interior knot at 5 years
degree.Bh=3,
Max_T=5400,
Spline = "b-spline",
Spline.CLT=flexrsurv:::R2bBSplineBasis(knots=c(-2.5,0,2.5), degree=3),
initbyglm=TRUE,
initbands=seq(0, 5400, 100),
int_meth= "BANDS",
bands=seq(0, 5400, 50)
)
summary(fit1)
print(coef(fit1))
# fit a relative survival model with a non linear effect of age
# with correction of life table, strabified by sex
# full likelihood
fit2 <- flexrsurvclt(Surv(time,cens)~sex01+NLL(age, Knots=60, Degree=3,
Boundary.knots = c(24, 95)),
formula.table= ~sex,
rate=slorate,
logit_start=logit_start,
logit_end=logit_end,
data=rdata,
Id=Id,
knots.Bh=1850, # one interior knot at 5 years
degree.Bh=3,
Max_T=5400,
Spline = "b-spline",
Spline.CLT=flexrsurv:::R2bBSplineBasis(knots=c(-2.5,0,2.5), degree=3),
initbyglm=TRUE,
initbands=seq(0, 5400, 100),
int_meth= "BANDS",
bands=seq(0, 5400, 50)
)
summary(fit2)
AIC(fit0, fit1, fit2)
}