Q.Amc {pbANOVA} | R Documentation |
PB multiple comparisons of the levels of factor A (output like TukeyHSD)
Description
Using Parametric Bootstrap to simulate a distribution for the multiple comparisons and calculate a test stat
Usage
Q.Amc(L=5000, ns, means, s2, alpha=0.05, a, b, c)
Arguments
L |
Number of simulated values for the distribution |
ns |
sample size for each group |
means |
sample mean for each group |
s2 |
sample variance for each group |
alpha |
significant level |
a |
Number of levels for factor A |
b |
Number of levels for factor B |
c |
Number of levels for factor C |
Value
Q.crit: The (1-alpha) percentile of the simulated distribution.
Q.test: largest test statistic from all pairs
res.df: A dataframe containing the differences between each pair of factor levels, standard errors, confidence interval for the differences, test statistic for each pair, p-value, and indicator of whether the difference was statistically significant for each pair.
Examples
# We need elapsed time less than 5 seconds to publish
# the package with this example. Hence, there are # before
#a couple of PB algrithms. Remove # before you run the code.
#function to make everything but the response a factor
make.factor <- function(dataset, fact.cols){
for( i in fact.cols){
dataset[,i] <- factor(dataset[,i])
}
return(dataset)
}
barley_ex <- make.factor(barleyh20, 1:5)
##this dataset has 4 factors, ignore year
library(Rmisc)
library(MASS)
#library(lmtest)
summarySE(barley_ex, "wt", c("genotype", "site", "time"))
#ignore year, note that the data are balanced
summary(barley_ex$wt)
mod1 <- lm(wt~genotype*site*time, data=barley_ex)
anova(mod1)
plot(mod1$fit, mod1$resid)
qqnorm(mod1$resid)
shapiro.test(mod1$resid)
boxcox(mod1, lambda=seq(-4, -2, by=0.1)) #lambda approx -3.5
mod2 <- lm(wt^(-3.5)~genotype*site*time, data=barley_ex)
plot(mod2$fit, mod2$resid) #worse?
#go with untransformed data? drop 3way term
mod3 <- lm(wt~genotype + site + time + genotype:site + genotype:time + site:time, data=barley_ex)
anova(mod3) #site:time ns
anova(lm(wt~genotype + site + time + genotype:site + genotype:time, data=barley_ex))
anova(lm(wt~genotype + site + time + genotype:site, data=barley_ex))
anova(lm(wt~genotype + site + time, data=barley_ex))
anova(lm(wt~site + time, data=barley_ex))
anova(lm(wt~time, data=barley_ex))
TukeyHSD(aov(wt ~ time, data=barley_ex)) #all sig except 35-30 and 20-15 (0.0569)
###use PB methods
summarySE(barley_ex, "wt", c("genotype", "site", "time"))
#note that the data are balanced
#need to extract group sizes (ns), group var's (s2), means (ybars) for function
barley.ns <- summarySE(barley_ex, "wt", c("genotype", "site", "time"))$N
barley.means <- summarySE(barley_ex, "wt", c("genotype", "site", "time"))$wt
barley.s2 <- summarySE(barley_ex, "wt", c("genotype", "site", "time"))$sd^2
#alg.ABC(ns=barley.ns, ybars=barley.means,s2=barley.s2, a=2, b=2, c=7, L=5000)
#p=0.9996, can drop 3way term
#can we drop the site:time int term?
#alg.BC(ns=barley.ns, ybars=barley.means,s2=barley.s2, a=2, b=2, c=7, L=5000)
#p=0.9998, drop
#reorder data to make the different two-way terms
barleyTSG.ns <- summarySE(barley_ex, "wt", c("time", "site", "genotype"))$N
barleyTSG.means <- summarySE(barley_ex, "wt", c("time", "site", "genotype"))$wt
barleyTSG.s2 <- summarySE(barley_ex, "wt", c("time", "site", "genotype"))$sd^2
#alg.BC(ns=barleyTSG.ns, ybars=barleyTSG.means, s2=barleyTSG.s2, a=7, b=2, c=2, L=5000)
#p=0.9988, drop site:genotype
#reorder to SGT, can we drop genotype:time?
barleySGT.ns <- summarySE(barley_ex, "wt", c("site", "genotype", "time"))$N
barleySGT.means <- summarySE(barley_ex, "wt", c("site", "genotype", "time"))$wt
barleySGT.s2 <- summarySE(barley_ex, "wt", c("site", "genotype", "time"))$sd^2
#alg.BC(ns=barleySGT.ns, ybars=barleySGT.means,s2=barleySGT.s2, a=2, b=2, c=7, L=5000)
#p=0.9976, drop
#alg.C(ns=barley.ns, ybars=barley.means,s2=barley.s2, a=2, b=2, c=7, L=5000) #GST
#p=0, time has signif efffect, same conclusion as F-test
#alg.C(ns=barleyTSG.ns, ybars=barleyTSG.means, s2=barleyTSG.s2, a=7, b=2, c=2, L=5000)
#p=0.9996 no signif effect of genotype
##site?
barleyGTS.ns <- summarySE(barley_ex, "wt", c("genotype", "time", "site"))$N
barleyGTS.means <- summarySE(barley_ex, "wt", c("genotype", "time", "site"))$wt
barleyGTS.s2 <- summarySE(barley_ex, "wt", c("genotype", "time", "site"))$sd^2
#alg.C(ns=barleyGTS.ns, ybars=barleyGTS.means, s2=barleyGTS.s2, a=2, b=7, c=2, L=5000)
#p=0.9998, site is NS
#multiple comparisons
#this function tests all pairwise comparisons of the levels of factor A,
# so we use the TSG order
Q.Amc(L=5000, ns=barleyTSG.ns, means=barleyTSG.means, s2=barleyTSG.s2,
alpha=0.05, a=7, b=2, c=2)
#Demonstrate the method on unbalanced data by collapsing time into L, M, H
#barley_ex$time2 <- "M"
#barley_ex$time2 <- ifelse(as.numeric(barley_ex$time) <=2, "L", barley_ex$time2)
#barley_ex$time2 <- ifelse(as.numeric(barley_ex$time) >=6, "H", barley_ex$time2)
#barley_ex$time2 <- as.factor(barley_ex$time2)
#still pretty balanced, separate the lowest level
#barley_ex$time2 <- ifelse(as.numeric(barley_ex$time) <2, "LL", barley_ex$time2)
#barley_ex$time2 <- as.factor(barley_ex$time2)
#summarySE(barley_ex, "wt", c("genotype", "time2", "site"))
#two of the bigger groups N=12 have larger var
#anova(lm(wt ~genotype*time2*site, data=barley_ex))
#still looks like time2 the only sig factor
#library(lmtest)
#bptest(lm(wt ~genotype*time2*site, data=barley_ex)) #violates
#mod.un <- lm(wt ~genotype*time2*site, data=barley_ex)
#plot(mod.un$fit, mod.un$resid)
#qqnorm(mod.un$resid)
#boxcox(lm(wt ~genotype*time2*site, data=barley_ex), lambda=seq(-6, -4, by=0.1))
#lambda = -4.5
#the above transformations didn't work so just try the untransformed data
#the three way interaction term was not significant
#anova(lm(wt ~genotype+ time2 + site + genotype:time2 + genotype:site + time2:site, data=barley_ex))
#anova(lm(wt ~genotype+ time2 + site + genotype:time2 + genotype:site, data=barley_ex))
#anova(lm(wt ~genotype+ time2 + site + genotype:time2, data=barley_ex))
#anova(lm(wt ~genotype+ time2 + site, data=barley_ex))
#anova(lm(wt ~genotype+ time2, data=barley_ex))
#anova(lm(wt ~time2, data=barley_ex))
#TukeyHSD(aov(wt ~time2, data=barley_ex)) #all pairs significantly different
#PB methods
#barleyGST2.ns <- summarySE(barley_ex, "wt", c("genotype", "site", "time2"))$N
#barleyGST2.means <- summarySE(barley_ex, "wt", c("genotype", "site", "time2"))$wt
#barleyGST2.s2 <- summarySE(barley_ex, "wt", c("genotype", "site", "time2"))$sd^2
#alg.ABC(ns=barleyGST2.ns, ybars=barleyGST2.means,s2=barleyGST2.s2, a=2, b=2, c=4, L=5000)
#p=0.9734, can drop 3way term
#alg.BC(ns=barleyGST2.ns, ybars=barleyGST2.means,s2=barleyGST2.s2, a=2, b=2, c=4, L=5000)
#p=0.94, can drop site:time2
#barleySGT2.ns <- summarySE(barley_ex, "wt", c("site","genotype", "time2"))$N
#barleySGT2.means <- summarySE(barley_ex, "wt", c("site", "genotype", "time2"))$wt
#barleySGT2.s2 <- summarySE(barley_ex, "wt", c("site","genotype", "time2"))$sd^2
#alg.BC(ns=barleySGT2.ns, ybars=barleySGT2.means,s2=barleySGT2.s2, a=2, b=2, c=4, L=5000)
#p=0.9952, can drop genotype:time2
#barleyTSG2.ns <- summarySE(barley_ex, "wt", c("time2", "site","genotype"))$N
#barleyTSG2.means <- summarySE(barley_ex, "wt", c("time2","site", "genotype"))$wt
#barleyTSG2.s2 <- summarySE(barley_ex, "wt", c("time2","site","genotype"))$sd^2
#alg.BC(ns=barleyTSG2.ns, ybars=barleyTSG2.means,s2=barleyTSG2.s2, a=4, b=2, c=2, L=5000)
#p=0.9556, can drop site:genotype
#alg.C(ns=barleyGST2.ns, ybars=barleyGST2.means,s2=barleyGST2.s2, a=2, b=2, c=4, L=5000)
#p=0, time still has significant effect
#alg.C(ns=barleyTSG2.ns, ybars=barleyTSG2.means,s2=barleyTSG2.s2, a=4, b=2, c=2, L=5000)
#p=0.9716, genotype is not significant
#barleyTGS2.ns <- summarySE(barley_ex, "wt", c("time2","genotype", "site"))$N
#barleyTGS2.means <- summarySE(barley_ex, "wt", c("time2","genotype", "site"))$wt
#barleyTGS2.s2 <- summarySE(barley_ex, "wt", c("time2","genotype", "site"))$sd^2
#alg.C(ns=barleyTGS2.ns, ybars=barleyTGS2.means,s2=barleyTGS2.s2, a=4, b=2, c=2, L=5000)
#p=0.9904, site is not significant
##mult comparisons of factor A so we put time first
#Q.Amc(L=5000, ns=barleyTSG2.ns, means=barleyTSG2.means, s2=barleyTSG2.s2,
# alpha=0.05, a=4, b=2, c=2)
#all sig, agrees with Tukey's test
#TukeyHSD(aov(wt ~time2, data=barley_ex))
[Package pbANOVA version 0.1.0 Index]