SFA {SimDesign} | R Documentation |
Surrogate Function Approximation via the Generalized Linear Model
Description
Given a simulation that was executed with runSimulation
,
potentially with the argument store_results = TRUE
to store the
unsummarised analysis results, fit a surrogate function approximation (SFA)
model to the results and (optionally) perform a root-solving
step to solve a target quantity. See Schoemann et al. (2014) for details.
Usage
SFA(
results,
formula,
family = "binomial",
b = NULL,
design = NULL,
CI = 0.95,
interval = NULL,
...
)
## S3 method for class 'SFA'
print(x, ...)
Arguments
results |
data returned from |
formula |
formula to specify for the regression model |
family |
character vector indicating the family of GLMs to use
(see |
b |
(optional) Target quantity to use for root solving given the fitted surrogate function (e.g., find sample size associated with SFA implied power of .80) |
design |
(optional) |
CI |
advertised confidence interval of SFA prediction around solved target |
interval |
interval to be passed to |
... |
additional arguments to pass to |
x |
an object of class |
Author(s)
Phil Chalmers rphilip.chalmers@gmail.com
References
Chalmers, R. P., & Adkins, M. C. (2020). Writing Effective and Reliable Monte Carlo Simulations
with the SimDesign Package. The Quantitative Methods for Psychology, 16
(4), 248-280.
doi:10.20982/tqmp.16.4.p248
Schoemann, A. M., Miller, P., Pornprasertmanit, S., and Wu, W. (2014). Using Monte Carlo simulations to determine power and sample size for planned missing designs. International Journal of Behavioral Development, SAGE Publications, 38, 471-479.
See Also
Examples
## Not run:
# create long Design object to fit surrogate over
Design <- createDesign(N = 100:500,
d = .2)
Design
#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 2 --- Define generate, analyse, and summarise functions
Generate <- function(condition, fixed_objects) {
Attach(condition)
group1 <- rnorm(N)
group2 <- rnorm(N, mean=d)
dat <- data.frame(group = gl(2, N, labels=c('G1', 'G2')),
DV = c(group1, group2))
dat
}
Analyse <- function(condition, dat, fixed_objects) {
p <- c(p = t.test(DV ~ group, dat, var.equal=TRUE)$p.value)
p
}
Summarise <- function(condition, results, fixed_objects) {
ret <- EDR(results, alpha = .05)
ret
}
#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 3 --- Estimate power over N
# Use small number of replications given range of sample sizes
## note that due to the lower replications disabling the
## RAM printing will help reduce overhead
sim <- runSimulation(design=Design, replications=10,
generate=Generate, analyse=Analyse,
summarise=Summarise, store_results=TRUE,
progress=FALSE, control=list(print_RAM=FALSE))
sim
# total of 4010 replication
sum(sim$REPLICATIONS)
# use the unsummarised results for the SFA, and include p.values < alpha
sim_results <- SimResults(sim)
sim_results <- within(sim_results, sig <- p < .05)
sim_results
# fitted model
sfa <- SFA(sim_results, formula = sig ~ N)
sfa
summary(sfa)
# plot the observed and SFA expected values
plot(p ~ N, sim, las=1, pch=16, main='Rejection rates with R=10')
pred <- predict(sfa, type = 'response')
lines(sim_results$N, pred, col='red', lty=2)
# fitted model + root-solved solution given f(.) = b,
# where b = target power of .8
design <- data.frame(N=NA, d=.2)
sfa.root <- SFA(sim_results, formula = sig ~ N,
b=.8, design=design)
sfa.root
# true root
pwr::pwr.t.test(power=.8, d=.2)
################
# example with smaller range but higher precision
Design <- createDesign(N = 375:425,
d = .2)
Design
sim2 <- runSimulation(design=Design, replications=100,
generate=Generate, analyse=Analyse,
summarise=Summarise, store_results=TRUE,
progress=FALSE, control=list(print_RAM=FALSE))
sim2
sum(sim2$REPLICATIONS) # more replications in total
# use the unsummarised results for the SFA, and include p.values < alpha
sim_results <- SimResults(sim2)
sim_results <- within(sim_results, sig <- p < .05)
sim_results
# fitted model
sfa <- SFA(sim_results, formula = sig ~ N)
sfa
summary(sfa)
# plot the observed and SFA expected values
plot(p ~ N, sim2, las=1, pch=16, main='Rejection rates with R=100')
pred <- predict(sfa, type = 'response')
lines(sim_results$N, pred, col='red', lty=2)
# fitted model + root-solved solution given f(.) = b,
# where b = target power of .8
design <- data.frame(N=NA, d=.2)
sfa.root <- SFA(sim_results, formula = sig ~ N,
b=.8, design=design, interval=c(100, 500))
sfa.root
# true root
pwr::pwr.t.test(power=.8, d=.2)
###################
# vary multiple parameters (e.g., sample size + effect size) to fit
# multi-parameter surrogate
Design <- createDesign(N = seq(from=10, to=500, by=10),
d = seq(from=.1, to=.5, by=.1))
Design
sim3 <- runSimulation(design=Design, replications=50,
generate=Generate, analyse=Analyse,
summarise=Summarise, store_results=TRUE,
progress=FALSE, control=list(print_RAM=FALSE))
sim3
sum(sim3$REPLICATIONS)
# use the unsummarised results for the SFA, and include p.values < alpha
sim_results <- SimResults(sim3)
sim_results <- within(sim_results, sig <- p < .05)
sim_results
# additive effects (logit(sig) ~ N + d)
sfa0 <- SFA(sim_results, formula = sig ~ N+d)
sfa0
# multiplicative effects (logit(sig) ~ N + d + N:d)
sfa <- SFA(sim_results, formula = sig ~ N*d)
sfa
# multiplicative better fit (sample size interacts with effect size)
anova(sfa0, sfa, test = "LRT")
summary(sfa)
# plot the observed and SFA expected values
library(ggplot2)
sim3$pred <- predict(sfa, type = 'response', newdata=sim3)
ggplot(sim3, aes(N, p, color = factor(d))) +
geom_point() + geom_line(aes(y=pred)) +
facet_wrap(~factor(d))
# fitted model + root-solved solution given f(.) = b,
# where b = target power of .8
design <- data.frame(N=NA, d=.2)
sfa.root <- SFA(sim_results, formula = sig ~ N * d,
b=.8, design=design, interval=c(100, 500))
sfa.root
# true root
pwr::pwr.t.test(power=.8, d=.2)
# root prediction where d *not* used in original data
design <- data.frame(N=NA, d=.25)
sfa.root <- SFA(sim_results, formula = sig ~ N * d,
b=.8, design=design, interval=c(100, 500))
sfa.root
# true root
pwr::pwr.t.test(power=.8, d=.25)
## End(Not run)