RobustAFT-package {RobustAFT}R Documentation

Robust Accelerated Failure Time Model Fitting

Description

This package computes the truncated maximum likelihood regression estimates described in Marazzi and Yohai (2004) and Locatelli et al. (2010). The error distribution is assumed to follow approximately a Gaussian or a log-Weibull distribution. The cut-off values for outlier rejection are fixed or adaptive. The main functions of this package are TML.noncensored and TML.censored.

Details

Package: RobustAFT
Type: Package
#Version: 1.4-6
#Date: 2023-06-19
License: GPL-2 or later
LazyLoad: yes

Author(s)

Alfio Marazzi <Alfio.Marazzi@chuv.ch>, Jean-Luc Muralti

Maintainer: A. Randriamiharisoa <Alex.Randriamiharisoa@chuv.ch>

References

Marazzi A., Yohai V. (2004). Adaptively truncated maximum likelihood regression with asymmetric errors. Journal of Statistical Planning and Inference, 122, 271-291.

Locatelli I., Marazzi A., Yohai V. (2010). Robust accelerated failure time regression. Computational Statistics and Data Analysis, 55, 874-887.

Marazzi A. (1993) Algorithm, Routines, and S functions for Robust Statistics. Wadsworth & Brooks/cole, Pacific Grove, California.

Examples

# Example 1. This is the example described in Marazzi and Yohai (2004).
# ---------
# The two following auxiliary functions, not included in the library, 
#must be loaded.
# ------------- Auxiliary functions -------------------------------------

SDmux.lw <- function(x,theta,sigma,COV){
# Standard deviation of the conditional mean estimate: log-Weibull case
np <- length(theta); nc <- ncol(COV); nr <- nrow(COV)
if (np!=length(x)) cat("length(x) must be the same as length(theta)")
if (nc!=nr)        cat("COV is not a square matrix")
if (nc!=(np+1))    cat("ncol(COV) must be the same as length(theta)+1")
log.mu.x <- t(x)%*%theta+lgamma(1+sigma) # log of conditional mean estimate
mu.x     <- exp(log.mu.x)                # conditional mean estimate
dg       <- digamma(1+sigma)
COV.TT   <- COV[1:np,1:np]
Var.S    <- COV[(np+1),(np+1)]
COV.TS   <- COV[1:np,(np+1)]
V.mu.x   <- t(x)%*%COV.TT%*%x + dg^2*Var.S + 2*dg*(t(x)%*%COV.TS)
SD.mu.x  <- as.numeric((sqrt(V.mu.x))*mu.x)
SD.mu.x}

plt <- function(LOS,Cost,Adm,theta.fr,sigma.fr,sd0.fr,sd1.fr,theta.ml,
                sigma.ml,sd0.ml,sd1.ml){
# Plot of the conditional mean and confidence intervals: log-Weibull case
par(mfrow=c(1,2),oma=c(0,0,2,0))
plot(LOS,Cost,type="n")
points(LOS[Adm==0],Cost[Adm==0],pch=1)
points(LOS[Adm==1],Cost[Adm==1],pch=16,col=2)
x0t   <- x0%*%theta.fr; x1t <- x1t <- x1%*%theta.fr
lines(l0,exp(x0t)*gamma(1+sigma.fr))
lines(l0,exp(x1t)*gamma(1+sigma.fr),col=2)
z0min <- exp(x0t)*gamma(1+sigma.fr)-2.576*sd0.fr
z0max <- exp(x0t)*gamma(1+sigma.fr)+2.576*sd0.fr
z1min <- exp(x1t)*gamma(1+sigma.fr)-2.576*sd1.fr
z1max <- exp(x1t)*gamma(1+sigma.fr)+2.576*sd1.fr
lines(l0,z0min,lty=2,col=1)
lines(l0,z0max,lty=2,col=1)
lines(l0,z1min,lty=2,col=1)
lines(l0,z1max,lty=2,col=1)
polygon(c(l0,rev(l0)), c(z0min,rev(z0max)), border=FALSE, density=10, angle=90)
polygon(c(l0,rev(l0)), c(z1min,rev(z1max)), border=FALSE, density=12, angle=90,col=2)
plot(LOS,Cost,type="n")
points(LOS[Adm==0],Cost[Adm==0],pch=1)
points(LOS[Adm==1],Cost[Adm==1],pch=16,col=2)
x0t   <- x0%*%theta.ml; x1t <- x1t <- x1%*%theta.ml
lines(l0,exp(x0t)*gamma(1+sigma.ml))
lines(l0,exp(x1t)*gamma(1+sigma.ml),col=2)
z0min <- exp(x0t)*gamma(1+sigma.ml)-2.576*sd0.ml
z0max <- exp(x0t)*gamma(1+sigma.ml)+2.576*sd0.ml
z1min <- exp(x1t)*gamma(1+sigma.ml)-2.576*sd1.ml
z1max <- exp(x1t)*gamma(1+sigma.ml)+2.576*sd1.ml
lines(l0,z0min,lty=2,col=1)
lines(l0,z0max,lty=2,col=1)
lines(l0,z1min,lty=2,col=1)
lines(l0,z1max,lty=2,col=1)
polygon(c(l0,rev(l0)), c(z0min,rev(z0max)), border=FALSE, density=10, angle=90)
polygon(c(l0,rev(l0)), c(z1min,rev(z1max)), border=FALSE, density=12, angle=90,col=2)}

#------ End of auxiliary functions --------------------------------------------------

library(RobustAFT)

data(D243)
Cost <- D243$Cost                            # Cost (Swiss francs)
LOS <- D243$LOS                              # Length of stay (days)
Adm <- D243$Typadm; Adm <- (Adm==" Urg")*1   # Type of admission 
                                             # (0=on notification, 1=Emergency)
Ass <- D243$Typass; Ass <- (Ass=="P"   )*1   # Type of insurance (0=usual, 1=private)
Age <- D243$age                              # Age (years)
Dst <- D243$dest;   Dst <- (Dst=="DOMI")*1   # Destination (1=Home, 0=another hospital)
Sex <- D243$Sexe;   Sex <- (Sex=="M"   )*1   # Sex (1=Male, 0=Female)

## Not run: 
# Plot data
par(mfrow=c(1,2))
plot(LOS,Cost); plot(log(LOS),log(Cost))

# log-Weibull fits
# ----------------
# Full robust model
zwff     <- TML.noncensored(log(Cost)~log(LOS)+Adm+Ass+Age+Sex+Dst,
            errors="logWeibull")
summary(zwff)

# Reduced model
zwfr     <- update(zwff,log(Cost)~log(LOS)+Adm)
summary(zwfr)

# Residual plots
par(mfrow=c(1,2))
plot(zwfr,which=c(1,3))

# Plot robust predictions on log-log scale
par(mfrow=c(1,1))
l0       <- seq(from=2,to=60,by=0.5)
x0       <- as.matrix(cbind(1,log(l0),0))
x1       <- as.matrix(cbind(1,log(l0),1))
plot(log(LOS),log(Cost),type="n")
points(log(LOS[Adm==1]),log(Cost[Adm==1]),pch=16,col=2)
points(log(LOS[Adm==0]),log(Cost[Adm==0]),pch=1)
lines(log(l0),predict(zwfr,x0))
lines(log(l0),predict(zwfr,x1),col=2)

# Maximum likelihood : full model
zmlf     <- TML.noncensored(log(Cost)~log(LOS)+Adm+Ass+Age+Sex+Dst,
            errors="logWeibull",cu=100)
summary(zmlf)

# Maximum likelihood : reduced model
zmlr     <- update(zmlf,log(Cost)~log(LOS)+Adm)
summary(zmlr)

# Plot conditional means and confidence intervals
l0       <- seq(from=2,to=62,by=0.5)
x0       <- as.matrix(cbind(1,log(l0),0))
x1       <- as.matrix(cbind(1,log(l0),1))
theta.fr <- coef(zwfr)
sigma.fr <- zwfr$v1
COV.fr   <- vcov(zwfr)
sd0.fr   <- apply(x0,1,SDmux.lw,theta.fr,sigma.fr,COV.fr)
sd1.fr   <- apply(x1,1,SDmux.lw,theta.fr,sigma.fr,COV.fr)
theta.ml <- coef(zmlr)
sigma.ml <- zmlr$v1
COV.ml   <- zmlr$COV
sd0.ml   <- apply(x0,1,SDmux.lw,theta.ml,sigma.ml,COV.ml)
sd1.ml   <- apply(x1,1,SDmux.lw,theta.ml,sigma.ml,COV.ml)
plt(LOS,Cost,Adm,theta.fr,sigma.fr,sd0.fr,sd1.fr,theta.ml,sigma.ml,sd0.ml,sd1.ml)

# Gaussian fits (for comparison)
# ------------------------------
# Reduced model
zgfr <- TML.noncensored(log(Cost)~log(LOS)+Adm,errors="Gaussian")
summary(zgfr)

# Residual plots
par(mfrow=c(1,2))
plot(zgfr,which=c(1,3))

# Classical Gaussian fit
lr <- lm(log(Cost)~log(LOS)+Adm)
summary(lr)

# Compare several fits
# --------------------
comp <- fits.compare(TML.logWeibull=zwfr,ML.logWeibull=zmlr,least.squares=lr)
comp
plot(comp,leg.position=c("topleft","topleft","bottomleft")) # click on graphics

## End(Not run)
#
#------------------------------------------------------------------------------
#
# Example 2. This is the example described in Locatelli Marazzi and Yohai (2010).
# ---------
# This is the example described in Locatelli et al. (2010). 
# The estimates are slighty different due to changes in the algorithm for the 
# final estimate.
## Not run: 
# Remove data of Example 1
rm(Cost,LOS,Adm,Ass,Age,Dst,Sex)

data(MCI)
attach(MCI)
     
# Exploratory Analysis

par(mfrow=c(1,1)) 

plot(Age,log(LOS),type= "n",cex=0.7)

# (1) filled square   : regular,   complete   
# (2) empty  square   : regular,   censored
# (3) filled triangle : emergency, complete
# (4) empty  triangle : emergency, censored

points(Age[Dest==1 & TypAdm==0], log(LOS)[Dest==1 & TypAdm==0], pch=15,cex=0.7)  # (1)
points(Age[Dest==0 & TypAdm==0], log(LOS)[Dest==0 & TypAdm==0], pch=0, cex=0.7)  # (2)
points(Age[Dest==1 & TypAdm==1], log(LOS)[Dest==1 & TypAdm==1], pch=17,cex=0.7)  # (3)
points(Age[Dest==0 & TypAdm==1], log(LOS)[Dest==0 & TypAdm==1], pch=2, cex=0.7)  # (4)

# Maximum Likelihood
ML   <- survreg(Surv(log(LOS), Dest) ~ TypAdm*Age, dist="gaussian")
summary(ML)
B.ML <- ML$coef; S.ML <- ML$scale
abline(c(B.ML[1]        ,B.ML[3]        ),lwd=1,col="grey",lty=1)
abline(c(B.ML[1]+B.ML[2],B.ML[3]+B.ML[4]),lwd=1,col="grey",lty=1)
     
# Robust Accelerated Failure Time Regression with Gaussian errors
ctrol.S   <- list(N=150, q=5, sigma0=1, MAXIT=100, TOL=0.001,seed=123)

ctrol.ref <- list(maxit.sigma=2,tol.sigma=0.0001,maxit.Beta=2,tol.Beta=0.0001,
          Maxit.S=50, tol.S.sigma=0.001, tol.S.Beta=0.001,alg.sigma=1,nitmon=FALSE)

ctrol.tml <- list(maxit.sigma=50,tol.sigma=0.0001,maxit.Beta=50,tol.Beta=0.0001,
    Maxit.TML=50, tol.TML.sigma=0.001, tol.TML.Beta=0.001, alg.sigma=1,nitmon=FALSE)

WML       <- TML.censored(log(LOS)~TypAdm*Age,data=MCI,delta=Dest,otp="adaptive",
             control.S=ctrol.S,control.ref=ctrol.ref,control.tml=ctrol.tml)

summary(WML)

B.WML     <-coef(WML)
abline(c(B.WML[1]         ,B.WML[3]         ),lty=1, col="red")
abline(c(B.WML[1]+B.WML[2],B.WML[3]+B.WML[4]),lty=1, col="red")
#
detach(MCI)

## End(Not run)

[Package RobustAFT version 1.4-7 Index]