synthesize {riskRegression} | R Documentation |
Cooking and synthesizing survival data
Description
Fit parametric regression models to the outcome distribution and optionally
also parametric regression models for the joint distribution of the predictors
structural equation models.
Then the function sim.synth
can be called on the resulting object to
to simulate from the parametric model based on the machinery of the lava
package
Usage
synthesize(object, data, ...)
## S3 method for class 'formula'
synthesize(
object,
data,
recursive = FALSE,
max.levels = 10,
verbose = FALSE,
...
)
## S3 method for class 'lvm'
synthesize(
object,
data,
max.levels = 10,
logtrans = NULL,
verbose = FALSE,
fix.names = FALSE,
...
)
Arguments
object |
Specification of the synthesizing model structures. Either a |
data |
Data to be synthesized. |
... |
Not used yet. |
recursive |
Let covariates recursively depend on each other. |
max.levels |
Integer used to guess which variables are categorical. When set to |
verbose |
Logical. If |
logtrans |
Vector of covariate names that should be log-transformed. This is primarily for internal use. |
fix.names |
Fix possible problematic covariate names. |
Details
Synthesizes survival data (also works for linear models and generalized linear models).
The idea is to be able to simulate new data sets that mimic the original data.
See the vignette vignette("synthesize",package = "riskRegression")
for more details.
The simulation engine is: lava.
Value
lava object
Author(s)
Thomas A. Gerds <tag@biostat.ku.dk>
See Also
lvm
Examples
# pbc data
library(survival)
library(lava)
data(pbc)
pbc <- na.omit(pbc[,c("time","status","sex","age","bili")])
pbc$logbili <- log(pbc$bili)
v_synt <- synthesize(object=Surv(time,status)~sex+age+logbili,data=pbc)
d <- simsynth(v_synt,1000)
fit_sim <- coxph(Surv(time,status==1)~age+sex+logbili,data=d)
fit_real <- coxph(Surv(time,status==1)~age+sex+logbili,data=pbc)
# compare estimated log-hazard ratios between simulated and real data
cbind(coef(fit_sim),coef(fit_real))
u <- lvm()
distribution(u,~sex) <- binomial.lvm()
distribution(u,~age) <- normal.lvm()
distribution(u,~trt) <- binomial.lvm()
distribution(u,~logbili) <- normal.lvm()
u <-eventTime(u,time~min(time.cens=0,time.transplant=1,time.death=2), "status")
lava::regression(u,logbili~age+sex) <- 1
lava::regression(u,time.transplant~sex+age+logbili) <- 1
lava::regression(u,time.death~sex+age+logbili) <- 1
lava::regression(u,time.cens~1) <- 1
transform(u,logbili~bili) <- function(x){log(x)}
u_synt <- synthesize(object=u, data=na.omit(pbc))
set.seed(8)
d <- simsynth(u_synt,n=1000)
# note: synthesize may relabel status variable
fit_sim <- coxph(Surv(time,status==1)~age+sex+logbili,data=d)
fit_real <- coxph(Surv(time,status==1)~age+sex+log(bili),data=pbc)
# compare estimated log-hazard ratios between simulated and real data
cbind(coef(fit_sim),coef(fit_real))
#
# Cancer data
#
data(cancer)
b <- lvm()
distribution(b,~rx) <- binomial.lvm()
distribution(b,~age) <- normal.lvm()
distribution(b,~resid.ds) <- binomial.lvm()
distribution(b,~ecog.ps) <- binomial.lvm()
lava::regression(b,time.death~age+rx+resid.ds) <- 1
b<-eventTime(b,futime~min(time.cens=0,time.death=1), "fustat")
b_synt <- synthesize(object = b, data = ovarian)
D <- simsynth(b_synt,1000)
fit_real <- coxph(Surv(futime,fustat)~age+rx+resid.ds, data=ovarian)
fit_sim <- coxph(Surv(futime,fustat)~age+rx+resid.ds, data=D)
cbind(coef(fit_sim),coef(fit_real))
w_synt <- synthesize(object=Surv(futime,fustat)~age+rx+resid.ds, data=ovarian)
D <- simsynth(w_synt,1000)
fit_sim <- coxph(Surv(futime,fustat==1)~age+rx+resid.ds,data=D)
fit_real <- coxph(Surv(futime,fustat==1)~age+rx+resid.ds,data=ovarian)
# compare estimated log-hazard ratios between simulated and real data
cbind(coef(fit_sim),coef(fit_real))