R: Create Model Selection Tables Based on BIC

bictab {AICcmodavg}

R Documentation

Create Model Selection Tables Based on BIC

Description

This function creates a model selection table based on the Bayesian information criterion (Schwarz 1978, Burnham and Anderson 2002). The table ranks the models based on the BIC and also provides delta BIC and BIC model weights. The function adjusts for overdispersion in model selection by using the QBIC when c.hat > 1. bictab selects the appropriate function to create the model selection table based on the object class. The current version works with lists containing objects of aov, betareg, clm, clmm, clogit, coxme, coxph, fitdist, fitdistr, glm, glmmTMB, gls, gnls, hurdle, lavaan, lm, lme, lmekin, maxlikeFit, mer, merMod, lmerModLmerTest, multinom, nlme, nls, polr, rlm, survreg, vglm, and zeroinfl classes as well as various models of unmarkedFit classes but does not yet allow mixing of different classes.

Usage

bictab(cand.set, modnames = NULL, nobs = NULL,
       sort = TRUE, ...)

## S3 method for class 'AICaov.lm'
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICbetareg'
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICsclm.clm'
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICclmm'
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICclm'
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICcoxme'
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICcoxph'
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICfitdist'
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICfitdistr'
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICglm.lm'
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, c.hat = 1, ...)  

## S3 method for class 'AICglmmTMB'
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, c.hat = 1, ...)  

## S3 method for class 'AICgls'
bictab(cand.set, modnames = NULL,
        nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICgnls.gls'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AIChurdle'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AIClavaan'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AIClm'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AIClme'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AIClmekin'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICmaxlikeFit.list'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...) 

## S3 method for class 'AICmer'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AIClmerMod'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AIClmerModLmerTest'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICglmerMod'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...) 

## S3 method for class 'AICnlmerMod'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICmultinom.nnet'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICnlme.lme'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...) 

## S3 method for class 'AICnls'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICpolr'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICrlm.lm'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICsurvreg'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)

## S3 method for class 'AICunmarkedFitOccu'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitColExt'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitOccuRN'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitPCount'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitPCO'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitDS'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitGDS'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitOccuFP'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitMPois'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitGMM'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitGPC'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitOccuMulti'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitOccuMS'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitOccuTTD'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitMMO'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICunmarkedFitDSO'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICvglm'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, c.hat = 1, ...)

## S3 method for class 'AICzeroinfl'
bictab(cand.set, modnames = NULL,
         nobs = NULL, sort = TRUE, ...)

Arguments

`cand.set`	a list storing each of the models in the candidate model set.
`modnames`	a character vector of model names to facilitate the identification of each model in the model selection table. If `NULL`, the function uses the names in the cand.set list of candidate models (i.e., a named list). If no names appear in the list and no character vector is provided, generic names (e.g., `Mod1`, `Mod2`) are supplied in the table in the same order as in the list of candidate models.
`nobs`	this argument allows to specify a numeric value other than total sample size to compute the BIC (i.e., `nobs` defaults to total number of observations). This is relevant only for mixed models or various models of `unmarkedFit` classes where sample size is not straightforward. In such cases, one might use total number of observations or number of independent clusters (e.g., sites) as the value of `nobs`.
`sort`	logical. If `TRUE`, the model selection table is ranked according to the BIC values.
`c.hat`	value of overdispersion parameter (i.e., variance inflation factor) such as that obtained from `c_hat`. Note that values of c.hat different from 1 are only appropriate for binomial GLM's with trials > 1 (i.e., success/trial or cbind(success, failure) syntax), with Poisson GLM's, single-season occupancy models (MacKenzie et al. 2002), dynamic occupancy models (MacKenzie et al. 2003), or N-mixture models (Royle 2004, Dail and Madsen 2011). If `c.hat` > 1, `bictab` will return the quasi-likelihood analogue of the BIC (QBIC) and multiply the variance-covariance matrix of the estimates by this value (i.e., SE's are multiplied by `sqrt(c.hat)`). This option is not supported for generalized linear mixed models of the `mer` or `merMod` classes.
`...`	additional arguments passed to the function.

Details

BIC tends to favor simpler models than AIC whenever n > 8 (Schwarz 1978, Link and Barker 2006, Anderson 2008). BIC assigns uniform prior probabilities across all models (i.e., equal 1/R), whereas in AIC and AICc, prior probabilities increase with sample size (Burnham and Anderson 2004, Link and Barker 2010). Some authors argue that BIC requires the true model to be included in the model set, whereas AIC or AICc does not (Burnham and Anderson 2002). However, Link and Barker (2006, 2010) consider both as assuming that a model in the model set approximates truth.

bictab internally creates a new class for the cand.set list of candidate models, according to the contents of the list. The current function is implemented for clogit, coxme, coxph, fitdist, fitdistr, glm, glmmTMB, gls, gnls, hurdle, lavaan, lm, lme, lmekin, maxlikeFit, mer, merMod, lmerModLmerTest, multinom, nlme, nls, polr, rlm, survreg, vglm, and zeroinfl classes as well as various unmarkedFit classes. The function constructs a model selection table based on BIC.

Value

bictab creates an object of class bictab with the following components:

`Modname`	the name of each model of the candidate model set.
`K`	the number of estimated parameters for each model.
`(Q)BIC`	the Bayesian information criterion for each model.
`Delta_(Q)BIC`	the delta BIC component.
`ModelLik`	the relative likelihood of the model given the data (exp(-0.5*delta[i])). This is not to be confused with the likelihood of the parameters given the data. The relative likelihood can then be normalized across all models to get the model probabilities.
`(Q)BICWt`	the BIC model weights, also termed "model probabilities" (Burnham and Anderson 2002, Link and Barker 2006, Anderson 2008). These measures indicate the level of support (i.e., weight of evidence) in favor of any given model being the most parsimonious among the candidate model set.
`Cum.Wt`	the cumulative BIC weights. These are only meaningful if results in table are sorted in decreasing order of BIC weights (i.e., `sort = TRUE`).
`c.hat`	if c.hat was specified as an argument, it is included in the table.
`LL`	the log-likelihood of each model.
`Res.LL`	if parameters are estimated by restricted maximum-likelihood (REML), the restricted log-likelihood of each model.

Author(s)

Marc J. Mazerolle

References

Anderson, D. R. (2008) Model-based Inference in the Life Sciences: a primer on evidence. Springer: New York.

Burnham, K. P., Anderson, D. R. (2002) Model Selection and Multimodel Inference: a practical information-theoretic approach. Second edition. Springer: New York.

Burnham, K. P., Anderson, D. R. (2004) Multimodel inference: understanding AIC and BIC in model selection. Sociological Methods and Research 33, 261–304.

Dail, D., Madsen, L. (2011) Models for estimating abundance from repeated counts of an open population. Biometrics 67, 577–587.

Link, W. A., Barker, R. J. (2006) Model weights and the foundations of multimodel inference. Ecology 87, 2626–2635.

Link, W. A., Barker, R. J. (2010) Bayesian Inference with Ecological Applications. Academic Press: Boston.

MacKenzie, D. I., Nichols, J. D., Lachman, G. B., Droege, S., Royle, J. A., Langtimm, C. A. (2002) Estimating site occupancy rates when detection probabilities are less than one. Ecology 83, 2248–2255.

MacKenzie, D. I., Nichols, J. D., Hines, J. E., Knutson, M. G., Franklin, A. B. (2003) Estimating site occupancy, colonization, and local extinction when a species is detected imperfectly. Ecology 84, 2200–2207.

Royle, J. A. (2004) N-mixture models for estimating population size from spatially replicated counts. Biometrics 60, 108–115.

Schwarz, G. (1978) Estimating the dimension of a model. Annals of Statistics 6, 461–464.

Examples

##Mazerolle (2006) frog water loss example
data(dry.frog)

##setup a subset of models of Table 1
Cand.models <- list( )
Cand.models[[1]] <- lm(log_Mass_lost ~ Shade + Substrate +
                       cent_Initial_mass + Initial_mass2,
                       data = dry.frog)
Cand.models[[2]] <- lm(log_Mass_lost ~ Shade + Substrate +
                       cent_Initial_mass + Initial_mass2 +
                       Shade:Substrate, data = dry.frog)
Cand.models[[3]] <- lm(log_Mass_lost ~ cent_Initial_mass +
                       Initial_mass2, data = dry.frog)
Cand.models[[4]] <- lm(log_Mass_lost ~ Shade + cent_Initial_mass +
                       Initial_mass2, data = dry.frog)
Cand.models[[5]] <- lm(log_Mass_lost ~ Substrate + cent_Initial_mass +
                       Initial_mass2, data = dry.frog)

##create a vector of names to trace back models in set
Modnames <- paste("mod", 1:length(Cand.models), sep = " ")

##generate BIC table
bictab(cand.set = Cand.models, modnames = Modnames, sort = TRUE)
##round to 4 digits after decimal point and give log-likelihood
print(bictab(cand.set = Cand.models, modnames = Modnames, sort = TRUE),
      digits = 4, LL = TRUE)



## Not run: 
##Burnham and Anderson (2002) flour beetle data
data(beetle)
##models as suggested by Burnham and Anderson p. 198          
Cand.set <- list( )
Cand.set[[1]] <- glm(Mortality_rate ~ Dose, family =
                     binomial(link = "logit"), weights = Number_tested,
                     data = beetle)
Cand.set[[2]] <- glm(Mortality_rate ~ Dose, family =
                     binomial(link = "probit"), weights = Number_tested,
                     data = beetle)
Cand.set[[3]] <- glm(Mortality_rate ~ Dose, family =
                     binomial(link ="cloglog"), weights = Number_tested,
                     data = beetle)

##set up named list
names(Cand.set) <- c("logit", "probit", "cloglog")

##compare models
##model names will be taken from the list if modnames is not specified
bictab(cand.set = Cand.set)

## End(Not run)


##two-way ANOVA with interaction
data(iron)
##full model
m1 <- lm(Iron ~ Pot + Food + Pot:Food, data = iron)
##additive model
m2 <- lm(Iron ~ Pot + Food, data = iron)
##null model
m3 <- lm(Iron ~ 1, data = iron)

##candidate models
Cand.aov <- list(m1, m2, m3)
Cand.names <- c("full", "additive", "null")
bictab(Cand.aov, Cand.names)



##single-season occupancy model example modified from ?occu
## Not run: 
require(unmarked)
##single season example modified from ?occu
data(frogs)
pferUMF <- unmarkedFrameOccu(pfer.bin)
##add fake covariates
siteCovs(pferUMF) <- data.frame(sitevar1 = rnorm(numSites(pferUMF)),
                                sitevar2 = runif(numSites(pferUMF))) 
     
##observation covariates
obsCovs(pferUMF) <- data.frame(obsvar1 = rnorm(numSites(pferUMF) *
                                 obsNum(pferUMF))) 

##set up candidate model set
fm1 <- occu(~ obsvar1 ~ sitevar1, pferUMF)
fm2 <- occu(~ 1 ~ sitevar1, pferUMF)
fm3 <- occu(~ obsvar1 ~ sitevar2, pferUMF)
fm4 <- occu(~ 1 ~ sitevar2, pferUMF)

##assemble models in named list (alternative to using 'modnames' argument)
Cand.mods <- list("fm1" = fm1, "fm2" = fm2, "fm3" = fm3, "fm4" = fm4)

##compute table based on QBIC that accounts for c.hat
bictab(cand.set = Cand.mods, c.hat = 3.9)

detach(package:unmarked)

## End(Not run)

[Package AICcmodavg version 2.3-3 Index]