chapter6 {MQMF} | R Documentation |
chapter6 The 53 R-code chunks from On Uncertainty
Description
chapter6 is not an active function but rather acts as a repository for the various example code chunks found in chapter6. There are 53 r-code chunks in chapter6. You should, of course, feel free to use and modify any of these example chunks in your own work.
Usage
chapter6(verbose = TRUE)
Arguments
verbose |
Should instructions to written to the console, default=TRUE |
Examples
## Not run:
# All the example code from # On Uncertainty
# On Uncertainty
## Introduction
### Types of Uncertainty
### The Example Model
# R-chunk 1 Page 189
#Fit a surplus production model to abdat fisheries data
data(abdat); logce <- log(abdat$cpue)
param <- log(c(0.42,9400,3400,0.05))
label=c("r","K","Binit","sigma") # simpspm returns
bestmod <- nlm(f=negLL,p=param,funk=simpspm,indat=abdat,logobs=logce)
outfit(bestmod,title="SP-Model",parnames=label) #backtransforms
# R-chunk 2 Page 190
#plot the abdat data and the optimum sp-model fit Fig 6.1
predce <- exp(simpspm(bestmod$estimate,abdat))
optresid <- abdat[,"cpue"]/predce #multiply by predce for obsce
ymax <- getmax(c(predce,abdat$cpue))
oldp <- plot1(abdat$year,(predce*optresid),type="l",maxy=ymax,cex=0.9,
ylab="CPUE",xlab="Year",lwd=3,col="grey",lty=1)
points(abdat$year,abdat$cpue,pch=1,col=1,cex=1.1)
lines(abdat$year,predce,lwd=2,col=1) # best fit line
par(oldp) # return par to old settings; this line not in book
## Bootstrapping
### Empirical Probability Density Distributions
## A Simple Bootstrap Example
# R-chunk 3 Page 193
#regression between catches of NPF prawn species Fig 6.2
data(npf)
model <- lm(endeavour ~ tiger,data=npf)
oldp <- plot1(npf$tiger,npf$endeavour,type="p",xlab="Tiger Prawn (t)",
ylab="Endeavour Prawn (t)",cex=0.9)
abline(model,col=1,lwd=2)
correl <- sqrt(summary(model)$r.squared)
pval <- summary(model)$coefficients[2,4]
label <- paste0("Correlation ",round(correl,5)," P = ",round(pval,8))
text(2700,180,label,cex=1.0,font=7,pos=4)
par(oldp) # return par to old settings; this line not in book
# R-chunk 4 Page 194
# 5000 bootstrap estimates of correlation coefficient Fig 6.3
set.seed(12321) # better to use a less obvious seed, if at all
N <- 5000 # number of bootstrap samples
result <- numeric(N) #a vector to store 5000 correlations
for (i in 1:N) { #sample index from 1:23 with replacement
pick <- sample(1:23,23,replace=TRUE) #sample is an R function
result[i] <- cor(npf$tiger[pick],npf$endeavour[pick])
}
rge <- range(result) # store the range of results
CI <- quants(result) # calculate quantiles; 90%CI = 5% and 95%
restrim <- result[result > 0] #remove possible -ve values for plot
oldp <- parset(cex=1.0) #set up a plot window and draw a histogram
bins <- seq(trunc(range(restrim)[1]*10)/10,1.0,0.01)
outh <- hist(restrim,breaks=bins,main="",col=0,xlab="Correlation")
abline(v=c(correl,mean(result)),col=c(4,3),lwd=c(3,2),lty=c(1,2))
abline(v=CI[c(2,4)],col=4,lwd=2) # and 90% confidence intervals
text(0.48,400,makelabel("Range ",rge,sep=" ",sigdig=4),font=7,pos=4)
label <- makelabel("90%CI ",CI[c(2,4)],sep=" ",sigdig=4)
text(0.48,300,label,cex=1.0,font=7,pos=4)
par(oldp) # return par to old settings; this line not in book
## Bootstrapping Time-Series Data
# R-chunk 5 Page 196
# fitting Schaefer model with log-normal residuals with 24 years
data(abdat); logce <- log(abdat$cpue) # of abalone fisheries data
param <- log(c(r= 0.42,K=9400,Binit=3400,sigma=0.05)) #log values
bestmod <- nlm(f=negLL,p=param,funk=simpspm,indat=abdat,logobs=logce)
optpar <- bestmod$estimate # these are still log-transformed
predce <- exp(simpspm(optpar,abdat)) #linear-scale pred cpue
optres <- abdat[,"cpue"]/predce # optimum log-normal residual
optmsy <- exp(optpar[1])*exp(optpar[2])/4
sampn <- length(optres) # number of residuals and of years
# R-chunk 6 Page 196 Table 6.1 code not included in the book
outtab <- halftable(cbind(abdat,predce,optres),subdiv=2)
kable(outtab, digits=c(0,0,3,3,3,0,0,3,3,3), caption='(ref:tab601)')
# R-chunk 7 Pages 196 - 197
# 1000 bootstrap Schaefer model fits; takes a few seconds
start <- Sys.time() # use of as.matrix faster than using data.frame
bootfish <- as.matrix(abdat) # and avoid altering original data
N <- 1000; years <- abdat[,"year"] # need N x years matrices
columns <- c("r","K","Binit","sigma")
results <- matrix(0,nrow=N,ncol=sampn,dimnames=list(1:N,years))
bootcpue <- matrix(0,nrow=N,ncol=sampn,dimnames=list(1:N,years))
parboot <- matrix(0,nrow=N,ncol=4,dimnames=list(1:N,columns))
for (i in 1:N) { # fit the models and save solutions
bootcpue[i,] <- predce * sample(optres, sampn, replace=TRUE)
bootfish[,"cpue"] <- bootcpue[i,] #calc and save bootcpue
bootmod <- nlm(f=negLL,p=optpar,funk=simpspm,indat=bootfish,
logobs=log(bootfish[,"cpue"]))
parboot[i,] <- exp(bootmod$estimate) #now save parameters
results[i,] <- exp(simpspm(bootmod$estimate,abdat)) #and predce
}
cat("total time = ",Sys.time()-start, "seconds \n")
# R-chunk 8 Page 197
# bootstrap replicates in grey behind main plot Fig 6.4
oldp <- plot1(abdat[,"year"],abdat[,"cpue"],type="n",xlab="Year",
ylab="CPUE") # type="n" just lays out an empty plot
for (i in 1:N) # ready to add the separate components
lines(abdat[,"year"],results[i,],lwd=1,col="grey")
points(abdat[,"year"],abdat[,"cpue"],pch=16,cex=1.0,col=1)
lines(abdat[,"year"],predce,lwd=2,col=1)
par(oldp) # return par to old settings; this line not in book
# R-chunk 9 Pages 198 - 199
#histograms of bootstrap parameters and model outputs Fig 6.5
dohist <- function(invect,nmvar,bins=30,bootres,avpar) { #adhoc
hist(invect[,nmvar],breaks=bins,main="",xlab=nmvar,col=0)
abline(v=c(exp(avpar),bootres[pick,nmvar]),lwd=c(3,2,3,2),
col=c(3,4,4,4))
}
msy <- parboot[,"r"]*parboot[,"K"]/4 #calculate bootstrap MSY
msyB <- quants(msy) #from optimum bootstrap parameters
oldp <- parset(plots=c(2,2),cex=0.9)
bootres <- apply(parboot,2,quants); pick <- c(2,3,4) #quantiles
dohist(parboot,nmvar="r",bootres=bootres,avpar=optpar[1])
dohist(parboot,nmvar="K",bootres=bootres,avpar=optpar[2])
dohist(parboot,nmvar="Binit",bootres=bootres,avpar=optpar[3])
hist(msy,breaks=30,main="",xlab="MSY",col=0)
abline(v=c(optmsy,msyB[pick]),lwd=c(3,2,3,2),col=c(3,4,4,4))
par(oldp) # return par to old settings; this line not in book
### Parameter Correlation
# R-chunk 10 Page 200
#relationships between parameters and MSY Fig 6.6
parboot1 <- cbind(parboot,msy)
# note rgb use, alpha allows for shading, try 1/15 or 1/10
pairs(parboot1,pch=16,col=rgb(red=1,green=0,blue=0,alpha = 1/20))
## Asymptotic Errors
# R-chunk 11 Page 203
#Fit Schaefer model and generate the Hessian
data(abdat)
param <- log(c(r= 0.42,K=9400,Binit=3400,sigma=0.05))
# Note inclusion of the option hessian=TRUE in nlm function
bestmod <- nlm(f=negLL,p=param,funk=simpspm,indat=abdat,
logobs=log(abdat[,"cpue"]),hessian=TRUE)
outfit(bestmod,backtran = TRUE) #try typing bestmod in console
# Now generate the confidence intervals
vcov <- solve(bestmod$hessian) # solve inverts matrices
sterr <- sqrt(diag(vcov)) #diag extracts diagonal from a matrix
optpar <- bestmod$estimate #use qt for t-distrib quantiles
U95 <- optpar + qt(0.975,20)*sterr # 4 parameters hence
L95 <- optpar - qt(0.975,20)*sterr # (24 - 4) df
cat("\n r K Binit sigma \n")
cat("Upper 95% ",round(exp(U95),5),"\n") # backtransform
cat("Optimum ",round(exp(optpar),5),"\n")#\n =linefeed in cat
cat("Lower 95% ",round(exp(L95),5),"\n")
### Uncertainty about the Model Outputs
### Sampling from a Multi-Variate Normal Distribution
# R-chunk 12 Page 204
# Use multi-variate normal to generate percentile CI Fig 6.7
library(mvtnorm) # use RStudio, or install.packages("mvtnorm")
N <- 1000 # number of multi-variate normal parameter vectors
years <- abdat[,"year"]; sampn <- length(years) # 24 years
mvncpue <- matrix(0,nrow=N,ncol=sampn,dimnames=list(1:N,years))
columns <- c("r","K","Binit","sigma")
# Fill parameter vectors with N vectors from rmvnorm
mvnpar <- matrix(exp(rmvnorm(N,mean=optpar,sigma=vcov)),
nrow=N,ncol=4,dimnames=list(1:N,columns))
# Calculate N cpue trajectories using simpspm
for (i in 1:N) mvncpue[i,] <- exp(simpspm(log(mvnpar[i,]),abdat))
msy <- mvnpar[,"r"]*mvnpar[,"K"]/4 #N MSY estimates
# plot data and trajectories from the N parameter vectors
oldp <- plot1(abdat[,"year"],abdat[,"cpue"],type="p",xlab="Year",
ylab="CPUE",cex=0.9)
for (i in 1:N) lines(abdat[,"year"],mvncpue[i,],col="grey",lwd=1)
points(abdat[,"year"],abdat[,"cpue"],pch=16,cex=1.0)#orig data
lines(abdat[,"year"],exp(simpspm(optpar,abdat)),lwd=2,col=1)
par(oldp) # return par to old settings; this line not in book
# R-chunk 13 Page 205
#correlations between parameters when using mvtnorm Fig 6.8
pairs(cbind(mvnpar,msy),pch=16,col=rgb(red=1,0,0,alpha = 1/10))
# R-chunk 14 Pages 206 - 207
#N parameter vectors from the multivariate normal Fig 6.9
mvnres <- apply(mvnpar,2,quants) # table of quantiles
pick <- c(2,3,4) # select rows for 5%, 50%, and 95%
meanmsy <- mean(msy) # optimum bootstrap parameters
msymvn <- quants(msy) # msy from mult-variate normal estimates
plothist <- function(x,optp,label,resmvn) {
hist(x,breaks=30,main="",xlab=label,col=0)
abline(v=c(exp(optp),resmvn),lwd=c(3,2,3,2),col=c(3,4,4,4))
} # repeated 4 times, so worthwhile writing a short function
oldp <- par(no.readonly=TRUE)
par(mfrow=c(2,2),mai=c(0.45,0.45,0.05,0.05),oma=c(0.0,0,0.0,0.0))
par(cex=0.85, mgp=c(1.35,0.35,0), font.axis=7,font=7,font.lab=7)
plothist(mvnpar[,"r"],optpar[1],"r",mvnres[pick,"r"])
plothist(mvnpar[,"K"],optpar[2],"K",mvnres[pick,"K"])
plothist(mvnpar[,"Binit"],optpar[3],"Binit",mvnres[pick,"Binit"])
plothist(msy,meanmsy,"MSY",msymvn[pick])
par(oldp) # return par to old settings; this line not in book
# R-chunk 15 Page 208 Table 6.2 code not included in the book
#Tabulate percentile CI from bootstrap (B) and multi-variate (mvn)
kable(cbind(bootres,msyB),digits=c(4,3,3,4,3), caption='(ref:tab602)')
kable(cbind(mvnres,msymvn),digits=c(4,3,3,4,3))
## Likelihood Profiles
# R-chunk 16 Page 209
#Fit the Schaefer surplus production model to abdat
data(abdat); logce <- log(abdat$cpue) # using negLL
param <- log(c(r= 0.42,K=9400,Binit=3400,sigma=0.05))
optmod <- nlm(f=negLL,p=param,funk=simpspm,indat=abdat,logobs=logce)
outfit(optmod,parnames=c("r","K","Binit","sigma"))
# R-chunk 17 Page 210
#the code for MQMF's negLLP function
negLLP <- function(pars, funk, indat, logobs, initpar=pars,
notfixed=c(1:length(pars)),...) {
usepar <- initpar #copy the original parameters into usepar
usepar[notfixed] <- pars[notfixed] #change 'notfixed' values
npar <- length(usepar)
logpred <- funk(usepar,indat,...) #funk uses the usepar values
pick <- which(is.na(logobs)) # proceed as in negLL
if (length(pick) > 0) {
LL <- -sum(dnorm(logobs[-pick],logpred[-pick],exp(pars[npar]),
log=T))
} else {
LL <- -sum(dnorm(logobs,logpred,exp(pars[npar]),log=T))
}
return(LL)
} # end of negLLP
# R-chunk 18 Page 211
#does negLLP give same answers as negLL when no parameters fixed?
param <- log(c(r= 0.42,K=9400,Binit=3400,sigma=0.05))
bestmod <- nlm(f=negLLP,p=param,funk=simpspm,indat=abdat,logobs=logce)
outfit(bestmod,parnames=c("r","K","Binit","sigma"))
# R-chunk 19 Page 211
#Likelihood profile for r values 0.325 to 0.45
rval <- seq(0.325,0.45,0.001) # set up the test sequence
ntrial <- length(rval) # create storage for the results
columns <- c("r","K","Binit","sigma","-veLL")
result <- matrix(0,nrow=ntrial,ncol=length(columns),
dimnames=list(rval,columns))# close to optimum
bestest <- c(r= 0.32,K=11000,Binit=4000,sigma=0.05)
for (i in 1:ntrial) { #i <- 1
param <- log(c(rval[i],bestest[2:4])) #recycle bestest values
parinit <- param #to improve the stability of nlm as r changes
bestmodP <- nlm(f=negLLP,p=param,funk=simpspm,initpar=parinit,
indat=abdat,logobs=log(abdat$cpue),notfixed=c(2:4),
typsize=magnitude(param),iterlim=1000)
bestest <- exp(bestmodP$estimate)
result[i,] <- c(bestest,bestmodP$minimum) # store each result
}
minLL <- min(result[,"-veLL"]) #minimum across r values used.
# R-chunk 20 Page 212 Table 6.3 code not included in the book
#tabulate first 12 records from likelihood profile
kable(head(result,12),digits=c(3,3,3,4,5), caption='(ref:tab603)')
### Likelihood Ratio Based Confidence Intervals
# R-chunk 21 Page 213
#likelihood profile on r from the Schaefer model Fig 6.10
plotprofile(result,var="r",lwd=2) # review the code
# R-chunk 22 Page 214
#Likelihood profile for K values 7200 to 12000
Kval <- seq(7200,12000,10)
ntrial <- length(Kval)
columns <- c("r","K","Binit","sigma","-veLL")
resultK <- matrix(0,nrow=ntrial,ncol=length(columns),
dimnames=list(Kval,columns))
bestest <- c(r= 0.45,K=7500,Binit=2800,sigma=0.05)
for (i in 1:ntrial) {
param <- log(c(bestest[1],Kval[i],bestest[c(3,4)]))
parinit <- param
bestmodP <- nlm(f=negLLP,p=param,funk=simpspm,initpar=parinit,
indat=abdat,logobs=log(abdat$cpue),
notfixed=c(1,3,4),iterlim=1000)
bestest <- exp(bestmodP$estimate)
resultK[i,] <- c(bestest,bestmodP$minimum)
}
minLLK <- min(resultK[,"-veLL"])
#kable(head(result,12),digits=c(4,3,3,4,5)) # if wanted.
# R-chunk 23 Page 214
#likelihood profile on K from the Schaefer model Fig 6.11
plotprofile(resultK,var="K",lwd=2)
### -ve Log-Likelihoods or Likelihoods
# R-chunk 24 Page 215
#translate -velog-likelihoods into likelihoods
likes <- exp(-resultK[,"-veLL"])/sum(exp(-resultK[,"-veLL"]),na.rm=TRUE)
resK <- cbind(resultK,likes,cumlike=cumsum(likes))
# R-chunk 25 Page 216 Table 6.4 code not included in the book
#tabulate head of likelihood profile matrix for K
kable(head(resK,8),digits=c(4,0,3,4,5,9,7),caption='(ref:tab604)')
# R-chunk 26 Page 216 Figure 6.12 code not in the book
#K parameter likelihood profile Fig 6.12
oldp <- plot1(resK[,"K"],resK[,"likes"],xlab="K value",
ylab="Likelihood",lwd=2)
lower <- which.closest(0.025,resK[,"cumlike"])
mid <- which(resK[,"likes"] == max(resK[,"likes"]))
upper <- which.closest(0.975,resK[,"cumlike"])
abline(v=c(resK[c(lower,mid,upper),"K"]),col=1,lwd=c(1,2,1))
label <- makelabel("",resK[c(lower,mid,upper),"K"],sep=" ")
text(9500,0.005,label,cex=1.2,pos=4)
par(oldp) # return par to old settings; this line not in book
### Percentile Likelihood Profiles for Model Outputs
# R-chunk 27 Page 217 - 218
#examine effect on -veLL of MSY values from 740 - 1050t
#need a different negLLP() function, negLLO(): O for output.
#now optvar=888.831 (rK/4), the optimum MSY, varval ranges 740-1050
#and wght is the weighting to give to the penalty
negLLO <- function(pars,funk,indat,logobs,wght,optvar,varval) {
logpred <- funk(pars,indat)
LL <- -sum(dnorm(logobs,logpred,exp(tail(pars,1)),log=T)) +
wght*((varval - optvar)/optvar)^2 #compare with negLL
return(LL)
} # end of negLLO
msyP <- seq(740,1020,2.5);
optmsy <- exp(optmod$estimate[1])*exp(optmod$estimate[2])/4
ntrial <- length(msyP)
wait <- 400
columns <- c("r","K","Binit","sigma","-veLL","MSY","pen","TrialMSY")
resultO <- matrix(0,nrow=ntrial,ncol=length(columns),dimnames=list(msyP,columns))
bestest <- c(r= 0.47,K=7300,Binit=2700,sigma=0.05)
for (i in 1:ntrial) { # i <- 1
param <- log(bestest)
bestmodO <- nlm(f=negLLO,p=param,funk=simpspm,indat=abdat,
logobs=log(abdat$cpue),wght=wait,
optvar=optmsy,varval=msyP[i],iterlim=1000)
bestest <- exp(bestmodO$estimate)
ans <- c(bestest,bestmodO$minimum,bestest[1]*bestest[2]/4,
wait *((msyP[i] - optmsy)/optmsy)^2,msyP[i])
resultO[i,] <- ans
}
minLLO <- min(resultO[,"-veLL"])
# R-chunk 28 Page 218 Table 6.5 code not included in the book
#tabulate first and last few records of profile on MSY
kable(head(resultO[,1:7],4),digits=c(3,3,3,4,2,3,2),caption='(ref:tab605)')
kable(tail(resultO[,1:7],4),digits=c(3,3,3,4,2,3,2))
# R-chunk 29 Page 219 Figure 6.13 code not included in the book
#likelihood profile on MSY from the Schaefer model Fig 6.13
plotprofile(resultO,var="TrialMSY",lwd=2)
## Bayesian Posterior Distributions
### Generating the Markov Chain
### The Starting Point
### The Burn-in Period
### Convergence to the Stationary Distribution
### The Jumping Distribution
### Application of MCMC to the Example
# R-chunk 30 Page 225
#activate and plot the fisheries data in abdat Fig 6.14
data(abdat) # type abdat in the console to see contents
plotspmdat(abdat) #use helper function to plot fishery stats vs year
### Markov Chain Monte Carlo
### A First Example of an MCMC
# R-chunk 31 Pages 228 - 229
# Conduct MCMC analysis to illustrate burn-in. Fig 6.15
data(abdat); logce <- log(abdat$cpue)
fish <- as.matrix(abdat) # faster to use a matrix than a data.frame!
begin <- Sys.time() # enable time taken to be calculated
chains <- 1 # 1 chain per run; normally do more
burnin <- 0 # no burn-in for first three chains
N <- 100 # Number of MCMC steps to keep
step <- 4 # equals one step per parameter so no thinning
priorcalc <- calcprior # define the prior probability function
scales <- c(0.065,0.055,0.065,0.425) #found by trial and error
set.seed(128900) #gives repeatable results in book; usually omitted
inpar <- log(c(r= 0.4,K=11000,Binit=3600,sigma=0.05))
result1 <- do_MCMC(chains,burnin,N,step,inpar,negLL,calcpred=simpspm,
calcdat=fish,obsdat=logce,priorcalc,scales)
inpar <- log(c(r= 0.35,K=8500,Binit=3400,sigma=0.05))
result2 <- do_MCMC(chains,burnin,N,step,inpar,negLL,calcpred=simpspm,
calcdat=fish,obsdat=logce,priorcalc,scales)
inpar <- log(c(r= 0.45,K=9500,Binit=3200,sigma=0.05))
result3 <- do_MCMC(chains,burnin,N,step,inpar,negLL,calcpred=simpspm,
calcdat=fish,obsdat=logce,priorcalc,scales)
burnin <- 50 # strictly a low thinning rate of 4; not enough
step <- 16 # 16 thinstep rate = 4 parameters x 4 = 16
N <- 10000 # 16 x 10000 = 160,000 steps + 50 burnin
inpar <- log(c(r= 0.4,K=9400,Binit=3400,sigma=0.05))
result4 <- do_MCMC(chains,burnin,N,step,inpar,negLL,calcpred=simpspm,
calcdat=fish,obsdat=logce,priorcalc,scales)
post1 <- result1[[1]][[1]]
post2 <- result2[[1]][[1]]
post3 <- result3[[1]][[1]]
postY <- result4[[1]][[1]]
cat("time = ",Sys.time() - begin,"\n")
cat("Accept = ",result4[[2]],"\n")
# R-chunk 32 Pages 229 - 230
#first example and start of 3 initial chains for MCMC Fig6.15
oldp <- parset(cex=0.85)
P <- 75 # the first 75 steps only start to explore parameter space
plot(postY[,"K"],postY[,"r"],type="p",cex=0.2,xlim=c(7000,13000),
ylim=c(0.28,0.47),col=8,xlab="K",ylab="r",panel.first=grid())
lines(post2[1:P,"K"],post2[1:P,"r"],lwd=1,col=1)
points(post2[1:P,"K"],post2[1:P,"r"],pch=15,cex=1.0)
lines(post1[1:P,"K"],post1[1:P,"r"],lwd=1,col=1)
points(post1[1:P,"K"],post1[1:P,"r"],pch=1,cex=1.2,col=1)
lines(post3[1:P,"K"],post3[1:P,"r"],lwd=1,col=1)
points(post3[1:P,"K"],post3[1:P,"r"],pch=2,cex=1.2,col=1)
par(oldp) # return par to old settings; this line not in book
# R-chunk 33 Pages 230 - 231
#pairs plot of parameters from the first MCMC Fig 6.16
posterior <- result4[[1]][[1]]
msy <-posterior[,1]*posterior[,2]/4
pairs(cbind(posterior[,1:4],msy),pch=16,col=rgb(1,0,0,1/50),font=7)
# R-chunk 34 Pages 231 - 232
#plot the traces from the first MCMC example Fig 6.17
posterior <- result4[[1]][[1]]
oldp <- par(no.readonly=TRUE) # this line not in book
par(mfrow=c(4,2),mai=c(0.4,0.4,0.05,0.05),oma=c(0.0,0,0.0,0.0))
par(cex=0.8, mgp=c(1.35,0.35,0), font.axis=7,font=7,font.lab=7)
label <- colnames(posterior)
N <- dim(posterior)[1]
for (i in 1:4) {
ymax <- getmax(posterior[,i]); ymin <- getmin(posterior[,i])
plot(1:N,posterior[,i],type="l",lwd=1,ylim=c(ymin,ymax),
panel.first=grid(),ylab=label[i],xlab="Step")
plot(density(posterior[,i]),lwd=2,col=2,panel.first=grid(),main="")
}
par(oldp) # return par to old settings; this line not in book
# R-chunk 35 Page 233
#Use acf to examine auto-correlation with thinstep = 16 Fig 6.18
posterior <- result4[[1]][[1]]
label <- colnames(posterior)[1:4]
oldp <- parset(plots=c(2,2),cex=0.85)
for (i in 1:4) auto <- acf(posterior[,i],type="correlation",lwd=2,
plot=TRUE,ylab=label[i],lag.max=20)
par(oldp) # return par to old settings; this line not in book
# R-chunk 36 Pages 233 - 234
#setup MCMC with thinstep of 128 per parameter Fig 6.19
begin=gettime()
scales <- c(0.06,0.05,0.06,0.4)
inpar <- log(c(r= 0.4,K=9400,Binit=3400,sigma=0.05))
result <- do_MCMC(chains=1,burnin=100,N=1000,thinstep=512,inpar,
negLL,calcpred=simpspm,calcdat=fish,
obsdat=logce,calcprior,scales,schaefer=TRUE)
posterior <- result[[1]][[1]]
label <- colnames(posterior)[1:4]
oldp <- parset(plots=c(2,2),cex=0.85)
for (i in 1:4) auto <- acf(posterior[,i],type="correlation",lwd=2,
plot=TRUE,ylab=label[i],lag.max=20)
par(oldp) # return par to old settings; this line not in book
cat(gettime() - begin)
### Marginal Distributions
# R-chunk 37 Pages 235 - 236
# plot marginal distributions from the MCMC Fig 6.20
dohist <- function(x,xlab) { # to save a little space
return(hist(x,main="",breaks=50,col=0,xlab=xlab,ylab="",
panel.first=grid()))
}
# ensure we have the optimum solution available
param <- log(c(r= 0.42,K=9400,Binit=3400,sigma=0.05))
bestmod <- nlm(f=negLL,p=param,funk=simpspm,indat=abdat,
logobs=log(abdat$cpue))
optval <- exp(bestmod$estimate)
posterior <- result[[1]][[1]] #example above N=1000, thin=512
oldp <- par(no.readonly=TRUE)
par(mfrow=c(5,1),mai=c(0.4,0.3,0.025,0.05),oma=c(0,1,0,0))
par(cex=0.85, mgp=c(1.35,0.35,0), font.axis=7,font=7,font.lab=7)
np <- length(param)
for (i in 1:np) { #store invisible output from hist for later use
outH <- dohist(posterior[,i],xlab=colnames(posterior)[i])
abline(v=optval[i],lwd=3,col=4)
tmp <- density(posterior[,i])
scaler <- sum(outH$counts)*(outH$mids[2]-outH$mids[1])
tmp$y <- tmp$y * scaler
lines(tmp,lwd=2,col=2)
}
msy <- posterior[,"r"]*posterior[,"K"]/4
mout <- dohist(msy,xlab="MSY")
tmp <- density(msy)
tmp$y <- tmp$y * (sum(mout$counts)*(mout$mids[2]-mout$mids[1]))
lines(tmp,lwd=2,col=2)
abline(v=(optval[1]*optval[2]/4),lwd=3,col=4)
mtext("Frequency",side=2,outer=T,line=0.0,font=7,cex=1.0)
par(oldp) # return par to old settings; this line not in book
## The Use of Rcpp
# R-chunk 38 Pages 236 - 237
#profile the running of do_MCMC using the now well known abdat
data(abdat); logce <- log(abdat$cpue); fish <- as.matrix(abdat)
param <- log(c(r=0.39,K=9200,Binit=3400,sigma=0.05))
Rprof(append=TRUE) # note the use of negLL1()
result <- do_MCMC(chains=1,burnin=100,N=20000,thinstep=16,inpar=param,
infunk=negLL1,calcpred=simpspm,calcdat=fish,
obsdat=logce,priorcalc=calcprior,
scales=c(0.07,0.06,0.07,0.45))
Rprof(NULL)
outprof <- summaryRprof()
# R-chunk 39 Page 238 Table 6.6 code not included in the book
#tabulate output of Rprof on do_MCMC function
kable(head(outprof$by.self,12),caption='(ref:tab606)')
### Addressing Vectors and Matrices
### Replacement for simpspm()
# R-chunk 40 Page 240
library(Rcpp)
#Send a text string containing the C++ code to cppFunction this will
#take a few seconds to compile, then the function simpspmC will
#continue to be available during the rest of your R session. The
#code in this chunk could be included into its own R file, and then
#the R source() function can be used to include the C++ into a
#session. indat must have catch in col2 (col1 in C++), and cpue in
#col3 (col2 in C++). Note the use of ; at the end of each line.
#Like simpspm(), this returns only the log(predicted cpue).
cppFunction('NumericVector simpspmC(NumericVector pars,
NumericMatrix indat, LogicalVector schaefer) {
int nyrs = indat.nrow();
NumericVector predce(nyrs);
NumericVector biom(nyrs+1);
double Bt, qval;
double sumq = 0.0;
double p = 0.00000001;
if (schaefer(0) == TRUE) {
p = 1.0;
}
NumericVector ep = exp(pars);
biom[0] = ep[2];
for (int i = 0; i < nyrs; i++) {
Bt = biom[i];
biom[(i+1)]=Bt+(ep[0]/p)*Bt*(1-pow((Bt/ep[1]),p))-
indat(i,1);
if (biom[(i+1)] < 40.0) biom[(i+1)] = 40.0;
sumq += log(indat(i,2)/biom[i]);
}
qval = exp(sumq/nyrs);
for (int i = 0; i < nyrs; i++) {
predce[i] = log(biom[i] * qval);
}
return predce;
}')
# R-chunk 41 Page 241
#Ensure results obtained from simpspm and simpspmC are same
library(microbenchmark)
data(abdat)
fishC <- as.matrix(abdat) # Use a matrix rather than a data.frame
inpar <- log(c(r= 0.389,K=9200,Binit=3300,sigma=0.05))
spmR <- exp(simpspm(inpar,fishC)) # demonstrate equivalence
#need to declare all arguments in simpspmC, no default values
spmC <- exp(simpspmC(inpar,fishC,schaefer=TRUE))
out <- microbenchmark( # verything identical calling function
simpspm(inpar,fishC,schaefer=TRUE),
simpspmC(inpar,fishC,schaefer=TRUE),
times=1000
)
out2 <- summary(out)[,2:8]
out2 <- rbind(out2,out2[2,]/out2[1,])
rownames(out2) <- c("simpspm","simpspmC","TimeRatio")
# R-chunk 42 Page 241 Table 6.7 code not included in the book
#compare results from simpspm and simpspmC
kable(halftable(cbind(spmR,spmC)),row.names=TRUE,digits=c(4,4,4,4,4,4),caption='(ref:tab607)')
# R-chunk 43 Page 242 Table 6.8 code not included in the book
#output from microbenchmark comparison of simpspm and simpspmC
kable(out2,row.names=TRUE,digits=c(3,3,3,3,3,3,3,0),caption='(ref:tab608)')
# R-chunk 44 Pages 242 - 243
#How much does using simpspmC in do_MCMC speed the run time?
#Assumes Rcpp code has run, eg source("Rcpp_functions.R")
set.seed(167423) #Can use getseed() to generate a suitable seed
beginR <- gettime() #to enable estimate of time taken
setscale <- c(0.07,0.06,0.07,0.45)
reps <- 2000 #Not enough but sufficient for demonstration
param <- log(c(r=0.39,K=9200,Binit=3400,sigma=0.05))
resultR <- do_MCMC(chains=1,burnin=100,N=reps,thinstep=128,
inpar=param,infunk=negLL1,calcpred=simpspm,
calcdat=fishC,obsdat=log(abdat$cpue),schaefer=TRUE,
priorcalc=calcprior,scales=setscale)
timeR <- gettime() - beginR
cat("time = ",timeR,"\n")
cat("acceptance rate = ",resultR$arate," \n")
postR <- resultR[[1]][[1]]
set.seed(167423) # Use the same pseudo-random numbers and the
beginC <- gettime() # same starting point to make the comparsion
param <- log(c(r=0.39,K=9200,Binit=3400,sigma=0.05))
resultC <- do_MCMC(chains=1,burnin=100,N=reps,thinstep=128,
inpar=param,infunk=negLL1,calcpred=simpspmC,
calcdat=fishC,obsdat=log(abdat$cpue),schaefer=TRUE,
priorcalc=calcprior,scales=setscale)
timeC <- gettime() - beginC
cat("time = ",timeC,"\n") # note the same acceptance rates
cat("acceptance rate = ",resultC$arate," \n")
postC <- resultC[[1]][[1]]
cat("Time Ratio = ",timeC/timeR)
# R-chunk 45 Page 243
#compare marginal distributions of the 2 chains Fig 6.21
oldp <- par(no.readonly=TRUE) # this line not in the book
par(mfrow=c(1,1),mai=c(0.45,0.45,0.05,0.05),oma=c(0.0,0,0.0,0.0))
par(cex=0.85, mgp=c(1.35,0.35,0), font.axis=7,font=7,font.lab=7)
maxy <- getmax(c(density(postR[,"K"])$y,density(postC[,"K"])$y))
plot(density(postR[,"K"]),lwd=2,col=1,xlab="K",ylab="Density",
main="",ylim=c(0,maxy),panel.first=grid())
lines(density(postC[,"K"]),lwd=3,col=5,lty=2)
par(oldp) # return par to old settings; this line not in book
### Multiple Independent Chains
# R-chunk 46 Page 244
#run multiple = 3 chains
setscale <- c(0.07,0.06,0.07,0.45) # I only use a seed for
set.seed(9393074) # reproducibility within this book
reps <- 10000 # reset the timer
beginC <- gettime() # remember a thinstep=256 is insufficient
resultC <- do_MCMC(chains=3,burnin=100,N=reps,thinstep=256,
inpar=param,infunk=negLL1,calcpred=simpspmC,
calcdat=fishC,obsdat=log(fishC[,"cpue"]),
priorcalc=calcprior,scales=setscale,schaefer=TRUE)
cat("time = ",gettime() - beginC," secs \n")
# R-chunk 47 Pages 244 - 245
#3 chain run using simpspmC, 10000 reps, thinstep=256 Fig 6.22
oldp <- par(no.readonly=TRUE)
par(mfrow=c(2,2),mai=c(0.4,0.45,0.05,0.05),oma=c(0.0,0,0.0,0.0))
par(cex=0.85, mgp=c(1.35,0.35,0), font.axis=7,font=7,font.lab=7)
label <- c("r","K","Binit","sigma")
for (i in 1:4) {
plot(density(resultC$result[[2]][,i]),lwd=2,col=1,
xlab=label[i],ylab="Density",main="",panel.first=grid())
lines(density(resultC$result[[1]][,i]),lwd=2,col=2)
lines(density(resultC$result[[3]][,i]),lwd=2,col=3)
}
par(oldp) # return par to old settings; this line not in book
# R-chunk 48 Pages 245 - 246
#generate summary stats from the 3 MCMC chains
av <- matrix(0,nrow=3,ncol=4,dimnames=list(1:3,label))
sig2 <- av # do the variance
relsig <- av # relative to mean of all chains
for (i in 1:3) {
tmp <- resultC$result[[i]]
av[i,] <- apply(tmp[,1:4],2,mean)
sig2[i,] <- apply(tmp[,1:4],2,var)
}
cat("Average \n")
av
cat("\nVariance per chain \n")
sig2
cat("\n")
for (i in 1:4) relsig[,i] <- sig2[,i]/mean(sig2[,i])
cat("Variance Relative to Mean Variance of Chains \n")
relsig
# R-chunk 49 Pages 246 - 247
#compare quantile from the 2 most widely separate MCMC chains
tmp <- resultC$result[[2]] # the 10000 values of each parameter
cat("Chain 2 \n")
msy1 <- tmp[,"r"]*tmp[,"K"]/4
ch1 <- apply(cbind(tmp[,1:4],msy1),2,quants)
round(ch1,4)
tmp <- resultC$result[[3]]
cat("Chain 3 \n")
msy2 <- tmp[,"r"]*tmp[,"K"]/4
ch2 <- apply(cbind(tmp[,1:4],msy2),2,quants)
round(ch2,4)
cat("Percent difference ")
cat("\n2.5% ",round(100*(ch1[1,] - ch2[1,])/ch1[1,],4),"\n")
cat("50% ",round(100*(ch1[3,] - ch2[3,])/ch1[3,],4),"\n")
cat("97.5% ",round(100*(ch1[5,] - ch2[5,])/ch1[5,],4),"\n")
### Replicates Required to Avoid Serial Correlation
# R-chunk 50 Page 248
#compare two higher thinning rates per parameter in MCMC
param <- log(c(r=0.39,K=9200,Binit=3400,sigma=0.05))
setscale <- c(0.07,0.06,0.07,0.45)
result1 <- do_MCMC(chains=1,burnin=100,N=2000,thinstep=1024,
inpar=param,infunk=negLL1,calcpred=simpspmC,
calcdat=fishC,obsdat=log(abdat$cpue),
priorcalc=calcprior,scales=setscale,schaefer=TRUE)
result2 <- do_MCMC(chains=1,burnin=50,N=1000,thinstep=2048,
inpar=param,infunk=negLL1,calcpred=simpspmC,
calcdat=fishC,obsdat=log(abdat$cpue),
priorcalc=calcprior,scales=setscale,schaefer=TRUE)
# R-chunk 51 Page 248
#autocorrelation of 2 different thinning rate chains Fig6.23
posterior1 <- result1$result[[1]]
posterior2 <- result2$result[[1]]
label <- colnames(posterior1)[1:4]
oldp <- par(no.readonly=TRUE)
par(mfrow=c(4,2),mai=c(0.25,0.45,0.05,0.05),oma=c(1.0,0,1.0,0.0))
par(cex=0.85, mgp=c(1.35,0.35,0), font.axis=7,font=7,font.lab=7)
for (i in 1:4) {
auto <- acf(posterior1[,i],type="correlation",plot=TRUE,
ylab=label[i],lag.max=20,xlab="",ylim=c(0,0.3),lwd=2)
if (i == 1) mtext(1024,side=3,line=-0.1,outer=FALSE,cex=1.2)
auto <- acf(posterior2[,i],type="correlation",plot=TRUE,
ylab=label[i],lag.max=20,xlab="",ylim=c(0,0.3),lwd=2)
if (i == 1) mtext(2048,side=3,line=-0.1,outer=FALSE,cex=1.2)
}
mtext("Lag",side=1,line=-0.1,outer=TRUE,cex=1.2)
par(oldp) # return par to old settings; this line not in book
# R-chunk 52 Page 249
#visual comparison of 2 chains marginal densities Fig 6.24
oldp <- parset(plots=c(2,2),cex=0.85)
label <- c("r","K","Binit","sigma")
for (i in 1:4) {
plot(density(result1$result[[1]][,i]),lwd=4,col=1,xlab=label[i],
ylab="Density",main="",panel.first=grid())
lines(density(result2$result[[1]][,i]),lwd=2,col=5,lty=2)
}
par(oldp) # return par to old settings; this line not in book
# R-chunk 53 Pages 250 - 251
#tablulate a summary of the two different thinning rates.
cat("1024 thinning rate \n")
posterior <- result1$result[[1]]
msy <-posterior[,1]*posterior[,2]/4
tmp1 <- apply(cbind(posterior[,1:4],msy),2,quants)
rge <- apply(cbind(posterior[,1:4],msy),2,range)
tmp1 <- rbind(tmp1,rge[2,] - rge[1,])
rownames(tmp1)[6] <- "Range"
print(round(tmp1,4))
posterior2 <- result2$result[[1]]
msy2 <-posterior2[,1]*posterior2[,2]/4
cat("2048 thinning rate \n")
tmp2 <- apply(cbind(posterior2[,1:4],msy2),2,quants)
rge2 <- apply(cbind(posterior2[,1:4],msy2),2,range)
tmp2 <- rbind(tmp2,rge2[2,] - rge2[1,])
rownames(tmp2)[6] <- "Range"
print(round(tmp2,4))
cat("Inner 95% ranges and Differences between total ranges \n")
cat("95% 1 ",round((tmp1[5,] - tmp1[1,]),4),"\n")
cat("95% 2 ",round((tmp2[5,] - tmp2[1,]),4),"\n")
cat("Diff ",round((tmp2[6,] - tmp1[6,]),4),"\n")
## End(Not run)
[Package MQMF version 0.1.5 Index]