R: Compute Model-averaged Predictions

modavgPred {AICcmodavg}

R Documentation

Compute Model-averaged Predictions

Description

This function computes the model-averaged predictions, unconditional standard errors, and confidence intervals based on the entire candidate model set. The function is currently implemented for glm, gls, lm, lme, mer, merMod, lmerModLmerTest, negbin, rlm, survreg object classes that are stored in a list as well as various models of unmarkedFit classes.

Usage

modavgPred(cand.set, modnames = NULL, newdata, second.ord = TRUE,
           nobs = NULL, uncond.se = "revised", conf.level = 0.95, ...)

## S3 method for class 'AICaov.lm'
modavgPred(cand.set, modnames = NULL, newdata,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, ...) 

## S3 method for class 'AICglm.lm'
modavgPred(cand.set, modnames = NULL, newdata,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", c.hat = 1,
        gamdisp = NULL, ...)

## S3 method for class 'AIClm'
modavgPred(cand.set, modnames = NULL, newdata,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, ...) 

## S3 method for class 'AICgls'
modavgPred(cand.set, modnames = NULL, newdata,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, ...) 

## S3 method for class 'AIClme'
modavgPred(cand.set, modnames = NULL, newdata,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, ...)

## S3 method for class 'AICmer'
modavgPred(cand.set, modnames = NULL, newdata,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", c.hat = 1, ...)

## S3 method for class 'AICglmerMod'
modavgPred(cand.set, modnames = NULL, newdata,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", c.hat = 1, ...)

## S3 method for class 'AIClmerMod'
modavgPred(cand.set, modnames = NULL, newdata,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, ...)

## S3 method for class 'AIClmerModLmerTest'
modavgPred(cand.set, modnames = NULL, newdata,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, ...)

## S3 method for class 'AICnegbin.glm.lm'
modavgPred(cand.set, modnames = NULL,
        newdata, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", ...)

## S3 method for class 'AICrlm.lm'
modavgPred(cand.set, modnames = NULL, newdata,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, ...) 

## S3 method for class 'AICsurvreg'
modavgPred(cand.set, modnames = NULL, newdata,
        second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", ...) 

## S3 method for class 'AICunmarkedFitOccu'
modavgPred(cand.set, modnames = NULL,
        newdata, second.ord = TRUE, nobs = NULL, uncond.se = "revised", 
        conf.level = 0.95, type = "response", c.hat = 1,
        parm.type = NULL, ...)

## S3 method for class 'AICunmarkedFitColExt'
modavgPred(cand.set, modnames = NULL,
        newdata, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", c.hat = 1,
        parm.type = NULL, ...)  

## S3 method for class 'AICunmarkedFitOccuRN'
modavgPred(cand.set, modnames = NULL,
        newdata, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", c.hat = 1,
        parm.type = NULL, ...)

## S3 method for class 'AICunmarkedFitPCount'
modavgPred(cand.set, modnames = NULL,
        newdata, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", c.hat = 1,
        parm.type = NULL, ...) 

## S3 method for class 'AICunmarkedFitPCO'
modavgPred(cand.set, modnames = NULL,
        newdata, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", c.hat = 1,
        parm.type = NULL, ...)

## S3 method for class 'AICunmarkedFitDS'
modavgPred(cand.set, modnames = NULL,
        newdata, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", c.hat = 1,
        parm.type = NULL, ...) 

## S3 method for class 'AICunmarkedFitGDS'
modavgPred(cand.set, modnames = NULL,
        newdata, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", c.hat = 1,
        parm.type = NULL, ...)

## S3 method for class 'AICunmarkedFitOccuFP'
modavgPred(cand.set, modnames = NULL,
        newdata, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", c.hat = 1,
        parm.type = NULL, ...)

## S3 method for class 'AICunmarkedFitMPois'
modavgPred(cand.set, modnames = NULL,
        newdata, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", c.hat = 1,
        parm.type = NULL, ...)

## S3 method for class 'AICunmarkedFitGMM'
modavgPred(cand.set, modnames = NULL,
        newdata, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", c.hat = 1,
        parm.type = NULL, ...)

## S3 method for class 'AICunmarkedFitGPC'
modavgPred(cand.set, modnames = NULL,
        newdata, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", c.hat = 1,
        parm.type = NULL, ...)

## S3 method for class 'AICunmarkedFitOccuTTD'
modavgPred(cand.set, modnames = NULL,
        newdata, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", c.hat = 1,
        parm.type = NULL, ...)

## S3 method for class 'AICunmarkedFitMMO'
modavgPred(cand.set, modnames = NULL,
        newdata, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", c.hat = 1,
        parm.type = NULL, ...)

## S3 method for class 'AICunmarkedFitDSO'
modavgPred(cand.set, modnames = NULL,
        newdata, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", c.hat = 1,
        parm.type = NULL, ...)

## S3 method for class 'AICunmarkedFitOccuMS'
modavgPred(cand.set, modnames = NULL,
        newdata, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", c.hat = 1,
        parm.type = NULL, ...)

## S3 method for class 'AICunmarkedFitOccuMulti'
modavgPred(cand.set, modnames = NULL,
        newdata, second.ord = TRUE, nobs = NULL, uncond.se = "revised",
        conf.level = 0.95, type = "response", c.hat = 1,
        parm.type = NULL, ...)

Arguments

`cand.set`	a list storing each of the models in the candidate model set.
`modnames`	a character vector of model names to facilitate the identification of each model in the model selection table. If `NULL`, the function uses the names in the cand.set list of candidate models. If no names appear in the list, generic names (e.g., `Mod1`, `Mod2`) are supplied in the table in the same order as in the list of candidate models.
`newdata`	a data frame with the same structure as that of the original data frame for which we want to make predictions.
`second.ord`	logical. If `TRUE`, the function returns the second-order Akaike information criterion (i.e., AICc).
`nobs`	this argument allows to specify a numeric value other than total sample size to compute the AICc (i.e., `nobs` defaults to total number of observations). This is relevant only for mixed models or various models of `unmarkedFit` classes where sample size is not straightforward. In such cases, one might use total number of observations or number of independent clusters (e.g., sites) as the value of `nobs`.
`uncond.se`	either, `old`, or `revised`, specifying the equation used to compute the unconditional standard error of a model-averaged estimate. With `uncond.se = "old"`, computations are based on equation 4.9 of Burnham and Anderson (2002), which was the former way to compute unconditional standard errors. With `uncond.se = "revised"`, equation 6.12 of Burnham and Anderson (2002) is used. Anderson (2008, p. 111) recommends use of the revised version for the computation of unconditional standard errors and it is now the default. Note that versions of package AICcmodavg < 1.04 used the old method to compute unconditional standard errors.
`conf.level`	the confidence level (`1 - \alpha`) requested for the computation of unconditional confidence intervals.
`type`	the scale of prediction requested, one of `response` or `link`. The latter is only relevant for `glm`, `mer`, and `unmarkedFit` classes. Note that the value `terms` is not defined for `modavgPred`.
`c.hat`	value of overdispersion parameter (i.e., variance inflation factor) such as that obtained from `c_hat`. Note that values of `c.hat` different from 1 are only appropriate for binomial GLM's with trials > 1 (i.e., success/trial or cbind(success, failure) syntax), with Poisson GLM's, single-season and dynamic occupancy models (MacKenzie et al. 2002, 2003), or N-mixture models (Royle 2004, Dail and Madsen 2011). If `c.hat > 1`, `modavgPred` will return the quasi-likelihood analogue of the information criteria requested and multiply the variance-covariance matrix of the estimates by this value (i.e., SE's are multiplied by `sqrt(c.hat)`). This option is not supported for generalized linear mixed models of the `mer` class.
`gamdisp`	the value of the gamma dispersion parameter.
`parm.type`	this argument specifies the parameter type on which the predictions will be computed and is only relevant for models of `unmarkedFit` classes. The character strings supported vary with the type of model fitted. For `unmarkedFitOccu` objects, either `psi` or `detect` can be supplied to indicate whether the parameter is on occupancy or detectability, respectively. For `unmarkedFitColExt` objects, possible values are `psi`, `gamma`, `epsilon`, and `detect`, for parameters on occupancy in the inital year, colonization, extinction, and detectability, respectively. For `unmarkedFitOccuTTD` objects, possible values are `psi`, `gamma`, `epsilon`, and `detect`, for parameters on occupancy in the inital year, colonization, extinction, and time-to-dection (lambda rate parameter), respectively. For `unmarkedFitOccuFP` objects, one can specify `psi`, `detect`, `falsepos`, and `certain`, for occupancy, detectability, probability of assigning false-positives, and probability detections are certain, respectively. For `unmarkedFitOccuRN` objects, either `lambda` or `detect` can be entered for abundance and detectability parameters, respectively. For `unmarkedFitPCount` and `unmarkedFitMPois` objects, `lambda` or `detect` denote parameters on abundance and detectability, respectively. For `unmarkedFitPCO`, `unmarkedFitMMO`, and `unmarkedFitDSO` objects, one can enter `lambda`, `gamma`, `omega`, `iota`, or `detect`, to specify parameters on abundance, recruitment, apparent survival, immigration, and detectability, respectively. For `unmarkedFitDS` objects, `lambda` and `detect` are supported. For `unmarkedFitGDS`, `lambda`, `phi`, and `detect` denote abundance, availability, and detection probability, respectively. For `unmarkedFitGMM` and `unmarkedFitGPC` objects, `lambda`, `phi`, and `detect` denote abundance, availability, and detectability, respectively. For `unmarkedFitOccuMS` objects, `psi`, `phi`, and `detect` denote occupancy, transition, and detection probability, respectively. For `unmarkedFitOccuMulti` objects, `psi` and `detect` denote occupancy and detection probability, respectively.
`...`	additional arguments passed to the function.

Details

The candidate models must be stored in a list. Note that a data frame from which to make predictions must be supplied with the newdata argument and that all variables appearing in the model set must appear in this data frame. Variables must be of the same type as in the original analysis (e.g., factor, numeric).

One can compute unconditional confidence intervals around the predictions from the elements returned by modavgPred. The classic computation based on asymptotic normality of the estimator is appropriate to estimate confidence intervals on the linear predictor (i.e., link scale). For predictions of some types of response variables such as counts or binary variables, the normal approximation may be inappropriate. In such cases, it is often better to compute the confidence intervals on the linear predictor scale and then back-transform the limits to the scale of the response variable. These are the confidence intervals returned by modavgPred. Burnham et al. (1987), Burnham and Anderson (2002, p. 164), and Williams et al. (2002) suggest alternative methods of computing confidence intervals for small degrees of freedom with profile likelihood intervals or bootstrapping, but these approaches are not yet implemented in modavgPred.

Value

modavgPred returns an object of class modavgPred with the following components:

`type`	the scale of predicted values (response or link) for `glm`, `mer`, `merMod`, or `unmarkedFit` classes.
`mod.avg.pred`	the model-averaged prediction over the entire candidate model set.
`uncond.se`	the unconditional standard error of each model-averaged prediction.
`conf.level`	the confidence level used to compute the confidence interval.
`lower.CL`	the lower confidence limit.
`upper.CL`	the upper confidence limit.
`matrix.output`	a matrix with rows consisting of the model-averaged predictions, the unconditional standard errors, and the confidence limits.

Author(s)

Marc J. Mazerolle

References

Anderson, D. R. (2008) Model-based Inference in the Life Sciences: a primer on evidence. Springer: New York.

Burnham, K. P., Anderson, D. R., White, G. C., Brownie, C., Pollock, K. H. (1987) Design and analysis methods for fish survival experiments based on release-recapture. American Fisheries Society Monographs 5, 1–437.

Burnham, K. P., Anderson, D. R. (2002) Model Selection and Multimodel Inference: a practical information-theoretic approach. Second edition. Springer: New York.

Dail, D., Madsen, L. (2011) Models for estimating abundance from repeated counts of an open population. Biometrics 67, 577–587.

MacKenzie, D. I., Nichols, J. D., Lachman, G. B., Droege, S., Royle, J. A., Langtimm, C. A. (2002) Estimating site occupancy rates when detection probabilities are less than one. Ecology 83, 2248–2255.

MacKenzie, D. I., Nichols, J. D., Hines, J. E., Knutson, M. G., Franklin, A. B. (2003) Estimating site occupancy, colonization, and local extinction when a species is detected imperfectly. Ecology 84, 2200–2207.

Royle, J. A. (2004) N-mixture models for estimating population size from spatially replicated counts. Biometrics 60, 108–115.

Williams, B. K., Nichols, J. D., Conroy, M. J. (2002) Analysis and Management of Animal Populations. Academic Press: New York.

Examples

##example from subset of models in Table 1 in Mazerolle (2006)
data(dry.frog)

Cand.models <- list( )
Cand.models[[1]] <- lm(log_Mass_lost ~ Shade + Substrate +
                       cent_Initial_mass + Initial_mass2,
                       data = dry.frog)
Cand.models[[2]] <- lm(log_Mass_lost ~ Shade + Substrate +
                       cent_Initial_mass + Initial_mass2 +
                       Shade:Substrate, data = dry.frog)
Cand.models[[3]] <- lm(log_Mass_lost ~ cent_Initial_mass +
                       Initial_mass2, data = dry.frog)
Cand.models[[4]] <- lm(log_Mass_lost ~ Shade + cent_Initial_mass +
                       Initial_mass2, data = dry.frog)
Cand.models[[4]] <- lm(log_Mass_lost ~ Shade + cent_Initial_mass +
                       Initial_mass2, data = dry.frog)
Cand.models[[5]] <- lm(log_Mass_lost ~ Substrate + cent_Initial_mass +
                       Initial_mass2, data = dry.frog)

##setup model names
Modnames <- paste("mod", 1:length(Cand.models), sep = "")

##compute model-averaged value and unconditional SE of predicted log of
##mass lost for frogs of average mass in shade for each substrate type

##first create data set to use for predictions
new.dat <- data.frame(Shade = c(1, 1, 1),
                      cent_Initial_mass = c(0, 0, 0),
                      Initial_mass2 = c(0, 0, 0),
                      Substrate = c("SOIL", "SPHAGNUM", "PEAT")) 

##compare unconditional SE's using both methods
modavgPred(cand.set = Cand.models, modnames = Modnames,
           newdata = new.dat, type = "response", uncond.se = "old")
modavgPred(cand.set = Cand.models, modnames = Modnames,
           newdata = new.dat, type = "response", uncond.se = "revised")
##round to 4 digits after decimal point
print(modavgPred(cand.set = Cand.models, modnames = Modnames,
                 newdata = new.dat, type = "response",
                 uncond.se = "revised"), digits = 4)



##Gamma glm
## Not run: 
##clotting data example from 'gamma.shape' in MASS package of
##Venables and Ripley (2002, Modern applied statistics with
##S. Springer-Verlag: New York.)
clotting <- data.frame(u = c(5, 10, 15, 20, 30, 40, 60, 80, 100),
                       lot1 = c(118, 58, 42, 35, 27, 25, 21, 19, 18),
                       lot2 = c(69, 35, 26, 21, 18, 16, 13, 12, 12))
clot1 <- glm(lot1 ~ log(u), data = clotting, family = Gamma)

require(MASS)
gamma.dispersion(clot1) #dispersion parameter
gamma.shape(clot1) #reciprocal of dispersion parameter ==
##shape parameter 
summary(clot1, dispersion = gamma.dispersion(clot1))  #better

##create list with models
Cand <- list( )
Cand[[1]] <- glm(lot1 ~ log(u), data = clotting, family = Gamma)
Cand[[2]] <- glm(lot1 ~ 1, data = clotting, family = Gamma)

##create vector of model names
Modnames <- paste("mod", 1:length(Cand), sep = "")

##compute model-averaged predictions on scale of response variable for
##all observations
modavgPred(cand.set = Cand, modnames = Modnames, newdata = clotting,
           gamdisp = gamma.dispersion(clot1), type = "response") 

##compute model-averaged predictions on scale of linear predictor
modavgPred(cand.set = Cand, modnames = Modnames, newdata = clotting,
           gamdisp = gamma.dispersion(clot1), type = "link")

##compute model-averaged predictions on scale of linear predictor
modavgPred(cand.set = Cand, modnames = Modnames, newdata = clotting,
           gamdisp = gamma.dispersion(clot1), type = "terms") #returns an error
##because type = "terms" is not defined for 'modavgPred'

modavgPred(cand.set = Cand, modnames = Modnames, newdata = clotting,
           type = "terms") #returns an error because
##no gamma dispersion parameter was specified (i.e., 'gamdisp' missing) 

## End(Not run)


##example of model-averaged predictions from N-mixture model
##each variable appears twice in the models - this is a bit longer
## Not run: 
require(unmarked)
data(mallard)
mallardUMF <- unmarkedFramePCount(mallard.y, siteCovs = mallard.site,
                                  obsCovs = mallard.obs)
##set up models so that each variable on abundance appears twice
fm.mall.one <- pcount(~ ivel + date  ~ length + forest, mallardUMF,
                      K = 30)
fm.mall.two <- pcount(~ ivel + date  ~ elev + forest, mallardUMF,
                      K = 30)
fm.mall.three <- pcount(~ ivel + date  ~ length + elev, mallardUMF,
                        K = 30)
fm.mall.four <- pcount(~ ivel + date  ~ 1, mallardUMF, K = 30)

##model list
Cands <- list(fm.mall.one, fm.mall.two, fm.mall.three, fm.mall.four)
Modnames <- c("length + forest", "elev + forest", "length + elev",
              "null")

##compute model-averaged predictions of abundance for values of elev
modavgPred(cand.set = Cands, modnames = Modnames, newdata =
           data.frame(elev = seq(from = -1.4, to = 2.4, by = 0.1),
                      length = 0, forest = 0), parm.type = "lambda",
           type = "response")

##compute model-averaged predictions of detection for values of ivel
modavgPred(cand.set = Cands, modnames = Modnames, newdata =
           data.frame(ivel = seq(from = -1.75, to = 5.9, by = 0.5),
                      date = 0), parm.type = "detect",
           type = "response")
detach(package:unmarked)

## End(Not run)


##example of model-averaged abundance from distance model
## Not run: 
##this is a bit longer
data(linetran) #example from ?distsamp
     
ltUMF <- with(linetran, {
  unmarkedFrameDS(y = cbind(dc1, dc2, dc3, dc4),
                  siteCovs = data.frame(Length, area, habitat),
                  dist.breaks = c(0, 5, 10, 15, 20),
                  tlength = linetran$Length * 1000, survey = "line",
                  unitsIn = "m")
})
     
## Half-normal detection function. Density output (log scale). No covariates.
fm1 <- distsamp(~ 1 ~ 1, ltUMF)
     
## Halfnormal. Covariates affecting both density and and detection.
fm2 <- distsamp(~area + habitat ~ habitat, ltUMF)

## Hazard function. Covariates affecting both density and and detection.
fm3 <- distsamp(~area + habitat ~ habitat, ltUMF, keyfun="hazard")

##assemble model list
Cands <- list(fm1, fm2, fm3)
Modnames <- paste("mod", 1:length(Cands), sep = "")

##model-average predictions on abundance
modavgPred(cand.set = Cands, modnames = Modnames, parm.type = "lambda", type = "link",
           newdata = data.frame(area = mean(linetran$area), habitat = c("A", "B")))
detach(package:unmarked)

## End(Not run)



##example using Orthodont data set from Pinheiro and Bates (2000)
## Not run: 
require(nlme)

##set up candidate models
m1 <- gls(distance ~ age, correlation = corCompSymm(value = 0.5, form = ~ 1 | Subject),
          data = Orthodont, method = "ML")

m2 <- gls(distance ~ 1, correlation = corCompSymm(value = 0.5, form = ~ 1 | Subject),
          data = Orthodont, method = "ML")

##assemble in list
Cand.models <- list(m1, m2)
##model names
Modnames <- c("age effect", "null model")

##model selection table
aictab(cand.set = Cand.models, modnames = Modnames)

##model-averaged predictions
modavgPred(cand.set = Cand.models, modnames = Modnames, newdata =
data.frame(age = c(8, 10, 12, 14)))
detach(package:nlme)

## End(Not run)

[Package AICcmodavg version 2.3-3 Index]