R: Surrogate Function Approximation via the Generalized Linear...

SFA {SimDesign}

R Documentation

Surrogate Function Approximation via the Generalized Linear Model

Description

Given a simulation that was executed with runSimulation, potentially with the argument store_results = TRUE to store the unsummarised analysis results, fit a surrogate function approximation (SFA) model to the results and (optionally) perform a root-solving step to solve a target quantity. See Schoemann et al. (2014) for details.

Usage

SFA(
  results,
  formula,
  family = "binomial",
  b = NULL,
  design = NULL,
  CI = 0.95,
  interval = NULL,
  ...
)

## S3 method for class 'SFA'
print(x, ...)

Arguments

`results`	data returned from `runSimulation`. This can be the original results object or the extracted results stored when using `store_results = TRUE` included to store the analysis results.
`formula`	formula to specify for the regression model
`family`	character vector indicating the family of GLMs to use (see `family`)
`b`	(optional) Target quantity to use for root solving given the fitted surrogate function (e.g., find sample size associated with SFA implied power of .80)
`design`	(optional) `data.frame` object containing all the information relevant for the surrogate model (passed to `newdata` in `predict`) with an `NA` value in the variable to be solved
`CI`	advertised confidence interval of SFA prediction around solved target
`interval`	interval to be passed to `uniroot` if not specified then the lowest and highest values from `results` for the respective variable will be used
`...`	additional arguments to pass to `glm`
`x`	an object of class `SFA`

Author(s)

Phil Chalmers rphilip.chalmers@gmail.com

References

Chalmers, R. P., & Adkins, M. C. (2020). Writing Effective and Reliable Monte Carlo Simulations with the SimDesign Package. The Quantitative Methods for Psychology, 16(4), 248-280. doi:10.20982/tqmp.16.4.p248

Schoemann, A. M., Miller, P., Pornprasertmanit, S., and Wu, W. (2014). Using Monte Carlo simulations to determine power and sample size for planned missing designs. International Journal of Behavioral Development, SAGE Publications, 38, 471-479.

Examples

## Not run: 

# create long Design object to fit surrogate over
Design <- createDesign(N = 100:500,
                       d = .2)
Design

#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 2 --- Define generate, analyse, and summarise functions

Generate <- function(condition, fixed_objects) {
    Attach(condition)
    group1 <- rnorm(N)
    group2 <- rnorm(N, mean=d)
    dat <- data.frame(group = gl(2, N, labels=c('G1', 'G2')),
                      DV = c(group1, group2))
    dat
}

Analyse <- function(condition, dat, fixed_objects) {
    p <- c(p = t.test(DV ~ group, dat, var.equal=TRUE)$p.value)
    p
}

Summarise <- function(condition, results, fixed_objects) {
    ret <- EDR(results, alpha = .05)
    ret
}

#~~~~~~~~~~~~~~~~~~~~~~~~
#### Step 3 --- Estimate power over N

# Use small number of replications given range of sample sizes
## note that due to the lower replications disabling the
## RAM printing will help reduce overhead

sim <- runSimulation(design=Design, replications=10,
                     generate=Generate, analyse=Analyse,
                     summarise=Summarise, store_results=TRUE,
                     progress=FALSE, control=list(print_RAM=FALSE))
sim

# total of 4010 replication
sum(sim$REPLICATIONS)

# use the unsummarised results for the SFA, and include p.values < alpha
sim_results <- SimResults(sim)
sim_results <- within(sim_results, sig <- p < .05)
sim_results

# fitted model
sfa <- SFA(sim_results, formula = sig ~ N)
sfa
summary(sfa)

# plot the observed and SFA expected values
plot(p ~ N, sim, las=1, pch=16, main='Rejection rates with R=10')
pred <- predict(sfa, type = 'response')
lines(sim_results$N, pred, col='red', lty=2)

# fitted model + root-solved solution given f(.) = b,
#   where b = target power of .8
design <- data.frame(N=NA, d=.2)
sfa.root <- SFA(sim_results, formula = sig ~ N,
                b=.8, design=design)
sfa.root

# true root
pwr::pwr.t.test(power=.8, d=.2)


################
# example with smaller range but higher precision
Design <- createDesign(N = 375:425,
                       d = .2)
Design

sim2 <- runSimulation(design=Design, replications=100,
                     generate=Generate, analyse=Analyse,
                     summarise=Summarise, store_results=TRUE,
                     progress=FALSE, control=list(print_RAM=FALSE))
sim2
sum(sim2$REPLICATIONS) # more replications in total

# use the unsummarised results for the SFA, and include p.values < alpha
sim_results <- SimResults(sim2)
sim_results <- within(sim_results, sig <- p < .05)
sim_results

# fitted model
sfa <- SFA(sim_results, formula = sig ~ N)
sfa
summary(sfa)

# plot the observed and SFA expected values
plot(p ~ N, sim2, las=1, pch=16, main='Rejection rates with R=100')
pred <- predict(sfa, type = 'response')
lines(sim_results$N, pred, col='red', lty=2)

# fitted model + root-solved solution given f(.) = b,
#   where b = target power of .8
design <- data.frame(N=NA, d=.2)
sfa.root <- SFA(sim_results, formula = sig ~ N,
                b=.8, design=design, interval=c(100, 500))
sfa.root

# true root
pwr::pwr.t.test(power=.8, d=.2)

###################
# vary multiple parameters (e.g., sample size + effect size) to fit
# multi-parameter surrogate

Design <- createDesign(N = seq(from=10, to=500, by=10),
                       d = seq(from=.1, to=.5, by=.1))
Design

sim3 <- runSimulation(design=Design, replications=50,
                      generate=Generate, analyse=Analyse,
                      summarise=Summarise, store_results=TRUE,
                      progress=FALSE, control=list(print_RAM=FALSE))
sim3
sum(sim3$REPLICATIONS)

# use the unsummarised results for the SFA, and include p.values < alpha
sim_results <- SimResults(sim3)
sim_results <- within(sim_results, sig <- p < .05)
sim_results

# additive effects (logit(sig) ~ N + d)
sfa0 <- SFA(sim_results, formula = sig ~ N+d)
sfa0

# multiplicative effects (logit(sig) ~ N + d + N:d)
sfa <- SFA(sim_results, formula = sig ~ N*d)
sfa

# multiplicative better fit (sample size interacts with effect size)
anova(sfa0, sfa, test = "LRT")
summary(sfa)

# plot the observed and SFA expected values
library(ggplot2)
sim3$pred <- predict(sfa, type = 'response', newdata=sim3)
ggplot(sim3, aes(N, p, color = factor(d))) +
  geom_point() + geom_line(aes(y=pred)) +
  facet_wrap(~factor(d))

# fitted model + root-solved solution given f(.) = b,
#   where b = target power of .8
design <- data.frame(N=NA, d=.2)
sfa.root <- SFA(sim_results, formula = sig ~ N * d,
                b=.8, design=design, interval=c(100, 500))
sfa.root

# true root
pwr::pwr.t.test(power=.8, d=.2)

# root prediction where d *not* used in original data
design <- data.frame(N=NA, d=.25)
sfa.root <- SFA(sim_results, formula = sig ~ N * d,
                b=.8, design=design, interval=c(100, 500))
sfa.root

# true root
pwr::pwr.t.test(power=.8, d=.25)


## End(Not run)

[Package SimDesign version 2.16 Index]