R: Declare the size and features of the population

declare_model {DeclareDesign}

R Documentation

Declare the size and features of the population

Description

Declare the size and features of the population

Usage

declare_model(..., handler = fabricate, label = NULL)

Arguments

`...`	arguments to be captured, and later passed to the handler
`handler`	a tidy-in, tidy-out function
`label`	a string describing the step

Value

A function that returns a data.frame.

Examples

# declare_model is usually used when concatenating
# design elements with `+`

## Example: Two-arm randomized experiment
design <-
  declare_model(
    N = 500,
    X = rep(c(0, 1), each = N / 2),
    U = rnorm(N, sd = 0.25),
    potential_outcomes(Y ~ 0.2 * Z + X + U)
  ) +
  declare_inquiry(ATE = mean(Y_Z_1 - Y_Z_0)) +
  declare_assignment(Z = complete_ra(N = N, m = 250)) +
  declare_measurement(Y = reveal_outcomes(Y ~ Z)) +
  declare_estimator(Y ~ Z, inquiry = "ATE")

# declare_model returns a function:
M <- declare_model(N = 100)
M()

# Declare a population from existing data
M <- declare_model(data = mtcars)
M()

# Resample from existing data
M <- declare_model(N = 100, data = mtcars, handler = resample_data)
M()

# Declare a model with covariates: 
# observed covariates X1 and X2 and 
# unobserved heterogeneity U that each affect 
# outcome Y
M <- declare_model(
  N = 100,
  U = rnorm(N),
  X1 = rbinom(N, size = 1, prob = 0.5),
  X2 = X1 + rnorm(N),
  Y = 0.1 * X1 + 0.2 * X2 + 0.1 * X1 * X2 + U
)
M()

if(require("MASS")) {

  # We can draw correlated variables using draw_multivariate
  M <-
  declare_model(
    draw_multivariate(c(X1, X2) ~ MASS::mvrnorm(
      n = 1000,
      mu = c(0, 0),
      Sigma = matrix(c(1, 0.3, 0.3, 1), nrow = 2)
    )))
  M()
  
}

# Declare potential outcomes model dependent on assignment Z
## Manually
M <- 
  declare_model(N = 100, 
                Y_Z_0 = rbinom(N, size = 1, prob = 0.5),
                Y_Z_1 = rbinom(N, size = 1, prob = 0.6)
  )
M()

## Using potential_outcomes
M <- 
  declare_model(N = 100, 
                potential_outcomes(Y ~ rbinom(N, size = 1, prob = 0.1 * Z + 0.5))
  )
M()
 
## we can draw from a distribution of effect sizes 
M <-
  declare_model(
    N = 100, 
    tau = runif(1, min = 0, max = 1), 
    U = rnorm(N), 
    potential_outcomes(Y ~ tau * Z + U)
  )
M()

## we can simulate treatment-by-covariate effect heterogeneity:
M <- 
  declare_model(
    N = 100, 
    U = rnorm(N), 
    X = rbinom(N, 1, prob = 0.5),
    potential_outcomes(Y ~  0.3 * Z + 0.2*X + 0.1*Z*X + U)
  )
M()

## potential outcomes can respond to two treatments:
M <- declare_model(
  N = 6,
  U = rnorm(N),
  potential_outcomes(Y ~ Z1 + Z2 + U, 
                     conditions = list(Z1 = c(0, 1), Z2 = c(0, 1))))

M()

# Declare a two-level hierarchical population
# containing varying numbers of individuals within
# households and an age variable defined at the individual
# level
M <- declare_model(
  households = add_level(
    N = 100, 
    N_members = sample(c(1, 2, 3, 4), N, 
                       prob = c(0.2, 0.3, 0.25, 0.25), 
                       replace = TRUE)
  ),
  individuals = add_level(
    N = N_members, 
    age = sample(18:90, N, replace = TRUE)
  )
)
M()

## Panel data have a more complex structure:
M <- 
  declare_model(
    countries = add_level(
      N = 196, 
      country_shock = rnorm(N)
    ),
    years = add_level(
      N = 100, 
      time_trend = 1:N,
      year_shock = runif(N, 1, 10), 
      nest = FALSE
    ),
    observation = cross_levels(
      by = join_using(countries, years),
      observation_shock = rnorm(N),
      Y = 0.01 * time_trend + country_shock + year_shock + observation_shock 
    )
  )
M()

# Declare a population using a custom function
# the default handler is fabricatr::fabricate,
# but you can supply any function that returns a data.frame
my_model_function <- function(N) {
  data.frame(u = rnorm(N))
}

M <- declare_model(N = 10, handler = my_model_function)
M()

[Package DeclareDesign version 1.0.10 Index]