Q.Amc {pbANOVA}R Documentation

PB multiple comparisons of the levels of factor A (output like TukeyHSD)

Description

Using Parametric Bootstrap to simulate a distribution for the multiple comparisons and calculate a test stat

Usage

Q.Amc(L=5000, ns, means, s2, alpha=0.05, a, b, c)

Arguments

L

Number of simulated values for the distribution

ns

sample size for each group

means

sample mean for each group

s2

sample variance for each group

alpha

significant level

a

Number of levels for factor A

b

Number of levels for factor B

c

Number of levels for factor C

Value

Q.crit: The (1-alpha) percentile of the simulated distribution.

Q.test: largest test statistic from all pairs

res.df: A dataframe containing the differences between each pair of factor levels, standard errors, confidence interval for the differences, test statistic for each pair, p-value, and indicator of whether the difference was statistically significant for each pair.

Examples

# We need elapsed time less than 5 seconds to publish
# the package with this example. Hence, there are # before
#a couple of PB algrithms. Remove # before you run the code.

#function to make everything but the response a factor

make.factor <- function(dataset, fact.cols){
for( i in fact.cols){
 dataset[,i] <- factor(dataset[,i])
}
return(dataset)
}

barley_ex <- make.factor(barleyh20, 1:5)
##this dataset has 4 factors, ignore year

library(Rmisc)

library(MASS)

#library(lmtest)

summarySE(barley_ex, "wt", c("genotype", "site", "time"))
#ignore year, note that the data are balanced

summary(barley_ex$wt)
mod1 <- lm(wt~genotype*site*time, data=barley_ex)
anova(mod1)
plot(mod1$fit, mod1$resid)
qqnorm(mod1$resid)
shapiro.test(mod1$resid)

boxcox(mod1, lambda=seq(-4, -2, by=0.1)) #lambda approx -3.5
mod2 <- lm(wt^(-3.5)~genotype*site*time, data=barley_ex)
plot(mod2$fit, mod2$resid) #worse?

#go with untransformed data? drop 3way term
mod3 <- lm(wt~genotype + site + time + genotype:site + genotype:time + site:time, data=barley_ex)
anova(mod3) #site:time ns

anova(lm(wt~genotype + site + time + genotype:site + genotype:time, data=barley_ex))
anova(lm(wt~genotype + site + time + genotype:site, data=barley_ex))

anova(lm(wt~genotype + site + time, data=barley_ex))
anova(lm(wt~site + time, data=barley_ex))
anova(lm(wt~time, data=barley_ex))
TukeyHSD(aov(wt  ~ time, data=barley_ex))  #all sig except 35-30 and 20-15 (0.0569)


###use PB methods
summarySE(barley_ex, "wt", c("genotype", "site", "time"))
#note that the data are balanced

#need to extract group sizes (ns), group var's (s2), means (ybars) for function
barley.ns <- summarySE(barley_ex, "wt", c("genotype", "site", "time"))$N
barley.means <- summarySE(barley_ex, "wt", c("genotype", "site", "time"))$wt
barley.s2 <- summarySE(barley_ex, "wt", c("genotype", "site", "time"))$sd^2

#alg.ABC(ns=barley.ns, ybars=barley.means,s2=barley.s2, a=2, b=2, c=7, L=5000)
#p=0.9996, can drop 3way term

#can we drop the site:time int term?
#alg.BC(ns=barley.ns, ybars=barley.means,s2=barley.s2, a=2, b=2, c=7, L=5000)
#p=0.9998, drop

#reorder data to make the different two-way terms
barleyTSG.ns <- summarySE(barley_ex, "wt", c("time", "site", "genotype"))$N
barleyTSG.means <- summarySE(barley_ex, "wt", c("time", "site", "genotype"))$wt
barleyTSG.s2 <- summarySE(barley_ex, "wt", c("time", "site", "genotype"))$sd^2

#alg.BC(ns=barleyTSG.ns, ybars=barleyTSG.means, s2=barleyTSG.s2, a=7, b=2, c=2, L=5000)
#p=0.9988, drop site:genotype

#reorder to SGT, can we drop genotype:time?
barleySGT.ns <- summarySE(barley_ex, "wt", c("site", "genotype", "time"))$N
barleySGT.means <- summarySE(barley_ex, "wt", c("site", "genotype", "time"))$wt
barleySGT.s2 <- summarySE(barley_ex, "wt", c("site", "genotype", "time"))$sd^2

#alg.BC(ns=barleySGT.ns, ybars=barleySGT.means,s2=barleySGT.s2, a=2, b=2, c=7, L=5000)
#p=0.9976, drop
#alg.C(ns=barley.ns, ybars=barley.means,s2=barley.s2, a=2, b=2, c=7, L=5000) #GST
#p=0, time has signif efffect, same conclusion as F-test

#alg.C(ns=barleyTSG.ns, ybars=barleyTSG.means, s2=barleyTSG.s2, a=7, b=2, c=2, L=5000)
#p=0.9996 no signif effect of genotype


##site?
barleyGTS.ns <- summarySE(barley_ex, "wt", c("genotype", "time", "site"))$N
barleyGTS.means <- summarySE(barley_ex, "wt", c("genotype", "time", "site"))$wt
barleyGTS.s2 <- summarySE(barley_ex, "wt", c("genotype", "time", "site"))$sd^2

#alg.C(ns=barleyGTS.ns, ybars=barleyGTS.means, s2=barleyGTS.s2, a=2, b=7, c=2, L=5000)
#p=0.9998, site is NS

#multiple comparisons
#this function tests all pairwise comparisons of the levels of factor A,
# so we use the TSG order
Q.Amc(L=5000, ns=barleyTSG.ns, means=barleyTSG.means, s2=barleyTSG.s2,
alpha=0.05, a=7, b=2, c=2)

#Demonstrate the method on unbalanced data by collapsing time into L, M, H
#barley_ex$time2 <- "M"
#barley_ex$time2 <- ifelse(as.numeric(barley_ex$time) <=2, "L", barley_ex$time2)
#barley_ex$time2 <- ifelse(as.numeric(barley_ex$time) >=6, "H", barley_ex$time2)
#barley_ex$time2 <- as.factor(barley_ex$time2)

#still pretty balanced, separate the lowest level
#barley_ex$time2 <- ifelse(as.numeric(barley_ex$time) <2, "LL", barley_ex$time2)
#barley_ex$time2 <- as.factor(barley_ex$time2)

#summarySE(barley_ex, "wt", c("genotype", "time2", "site"))
#two of the bigger groups N=12 have larger var
#anova(lm(wt ~genotype*time2*site, data=barley_ex))
#still looks like time2 the only sig factor

#library(lmtest)
#bptest(lm(wt ~genotype*time2*site, data=barley_ex)) #violates

#mod.un <- lm(wt ~genotype*time2*site, data=barley_ex)
#plot(mod.un$fit, mod.un$resid)
#qqnorm(mod.un$resid)

#boxcox(lm(wt ~genotype*time2*site, data=barley_ex), lambda=seq(-6, -4, by=0.1))
#lambda = -4.5


#the above transformations didn't work so just try the untransformed data
#the three way interaction term was not significant
#anova(lm(wt ~genotype+ time2 + site + genotype:time2 + genotype:site + time2:site, data=barley_ex))
#anova(lm(wt ~genotype+ time2 + site + genotype:time2 + genotype:site, data=barley_ex))
#anova(lm(wt ~genotype+ time2 + site + genotype:time2, data=barley_ex))
#anova(lm(wt ~genotype+ time2 + site, data=barley_ex))
#anova(lm(wt ~genotype+ time2, data=barley_ex))
#anova(lm(wt ~time2, data=barley_ex))

#TukeyHSD(aov(wt ~time2, data=barley_ex)) #all pairs significantly different


#PB methods
#barleyGST2.ns <- summarySE(barley_ex, "wt", c("genotype", "site", "time2"))$N
#barleyGST2.means <- summarySE(barley_ex, "wt", c("genotype", "site", "time2"))$wt
#barleyGST2.s2 <- summarySE(barley_ex, "wt", c("genotype", "site", "time2"))$sd^2


#alg.ABC(ns=barleyGST2.ns, ybars=barleyGST2.means,s2=barleyGST2.s2, a=2, b=2, c=4, L=5000)
#p=0.9734, can drop 3way term


#alg.BC(ns=barleyGST2.ns, ybars=barleyGST2.means,s2=barleyGST2.s2, a=2, b=2, c=4, L=5000)
#p=0.94, can drop site:time2

#barleySGT2.ns <- summarySE(barley_ex, "wt", c("site","genotype",  "time2"))$N
#barleySGT2.means <- summarySE(barley_ex, "wt", c("site", "genotype", "time2"))$wt
#barleySGT2.s2 <- summarySE(barley_ex, "wt", c("site","genotype", "time2"))$sd^2

#alg.BC(ns=barleySGT2.ns, ybars=barleySGT2.means,s2=barleySGT2.s2, a=2, b=2, c=4, L=5000)
#p=0.9952, can drop genotype:time2


#barleyTSG2.ns <- summarySE(barley_ex, "wt", c("time2", "site","genotype"))$N
#barleyTSG2.means <- summarySE(barley_ex, "wt", c("time2","site", "genotype"))$wt
#barleyTSG2.s2 <- summarySE(barley_ex, "wt", c("time2","site","genotype"))$sd^2


#alg.BC(ns=barleyTSG2.ns, ybars=barleyTSG2.means,s2=barleyTSG2.s2, a=4, b=2, c=2, L=5000)
#p=0.9556, can drop site:genotype

#alg.C(ns=barleyGST2.ns, ybars=barleyGST2.means,s2=barleyGST2.s2, a=2, b=2, c=4, L=5000)
#p=0, time still has significant effect

#alg.C(ns=barleyTSG2.ns, ybars=barleyTSG2.means,s2=barleyTSG2.s2, a=4, b=2, c=2, L=5000)
#p=0.9716, genotype is not significant


#barleyTGS2.ns <- summarySE(barley_ex, "wt", c("time2","genotype", "site"))$N
#barleyTGS2.means <- summarySE(barley_ex, "wt", c("time2","genotype", "site"))$wt
#barleyTGS2.s2 <- summarySE(barley_ex, "wt", c("time2","genotype", "site"))$sd^2


#alg.C(ns=barleyTGS2.ns, ybars=barleyTGS2.means,s2=barleyTGS2.s2, a=4, b=2, c=2, L=5000)
#p=0.9904, site is not significant


##mult comparisons of factor A so we put time first
#Q.Amc(L=5000, ns=barleyTSG2.ns, means=barleyTSG2.means, s2=barleyTSG2.s2,
# alpha=0.05, a=4, b=2, c=2)
#all sig, agrees with Tukey's test

#TukeyHSD(aov(wt ~time2, data=barley_ex))


[Package pbANOVA version 0.1.0 Index]