COMBO_MCMC_2stage {COMBO} | R Documentation |
Jointly estimate \beta
, \gamma
, and \delta
parameters from the true outcome
first-stage observation, and second-stage observation mechanisms, respectively,
in a two-stage binary outcome misclassification model.
COMBO_MCMC_2stage(
Ystar,
Ytilde,
x,
z,
v,
prior,
beta_prior_parameters,
gamma_prior_parameters,
delta_prior_parameters,
naive_delta_prior_parameters,
number_MCMC_chains = 4,
MCMC_sample = 2000,
burn_in = 1000,
display_progress = TRUE
)
Ystar |
A numeric vector of indicator variables (1, 2) for the observed
outcome |
Ytilde |
A numeric vector of indicator variables (1, 2) for the second-stage
observed outcome |
x |
A numeric matrix of covariates in the true outcome mechanism.
|
z |
A numeric matrix of covariates in the observation mechanism.
|
v |
A numeric matrix of covariates in the second-stage observation mechanism.
|
prior |
A character string specifying the prior distribution for the
|
beta_prior_parameters |
A numeric list of prior distribution parameters
for the |
gamma_prior_parameters |
A numeric list of prior distribution parameters
for the |
delta_prior_parameters |
A numeric list of prior distribution parameters
for the |
naive_delta_prior_parameters |
A numeric list of prior distribution parameters
for the naive model |
number_MCMC_chains |
An integer specifying the number of MCMC chains to compute.
The default is |
MCMC_sample |
An integer specifying the number of MCMC samples to draw.
The default is |
burn_in |
An integer specifying the number of MCMC samples to discard
for the burn-in period. The default is |
display_progress |
A logical value specifying whether messages should be
displayed during model compilation. The default is |
COMBO_MCMC_2stage
returns a list of the posterior samples and posterior
means for both the binary outcome misclassification model and a naive logistic
regression of the observed outcome, Y*
, predicted by the matrix x
.
The list contains the following components:
posterior_sample_df |
A data frame containing three columns. The first
column indicates the chain from which a sample is taken, from 1 to |
posterior_means_df |
A data frame containing three columns. The first
column specifies the parameter associated with a given row. Parameters are
indexed as in the |
naive_posterior_sample_df |
A data frame containing three columns.
The first column indicates the chain from which a sample is taken, from
1 to |
naive_posterior_means_df |
A data frame containing three columns. The first
column specifies the naive parameter associated with a given row. Parameters are
indexed as in the |
# Helper functions
sum_every_n <- function(x, n){
vector_groups = split(x,
ceiling(seq_along(x) / n))
sum_x = Reduce(`+`, vector_groups)
return(sum_x)
}
sum_every_n1 <- function(x, n){
vector_groups = split(x,
ceiling(seq_along(x) / n))
sum_x = Reduce(`+`, vector_groups) + 1
return(sum_x)
}
# Example
set.seed(123)
n <- 1000
x_mu <- 0
x_sigma <- 1
z_shape <- 1
v_shape <- 1
true_beta <- matrix(c(1, -2), ncol = 1)
true_gamma <- matrix(c(.5, 1, -.5, -1), nrow = 2, byrow = FALSE)
true_delta <- array(c(1.5, 1, .5, .5, -.5, 0, -1, -1), dim = c(2, 2, 2))
x_matrix = matrix(rnorm(n, x_mu, x_sigma), ncol = 1)
X = matrix(c(rep(1, n), x_matrix[,1]), ncol = 2, byrow = FALSE)
z_matrix = matrix(rgamma(n, z_shape), ncol = 1)
Z = matrix(c(rep(1, n), z_matrix[,1]), ncol = 2, byrow = FALSE)
v_matrix = matrix(rgamma(n, v_shape), ncol = 1)
V = matrix(c(rep(1, n), v_matrix[,1]), ncol = 2, byrow = FALSE)
exp_xb = exp(X %*% true_beta)
pi_result = exp_xb[,1] / (exp_xb[,1] + 1)
pi_matrix = matrix(c(pi_result, 1 - pi_result), ncol = 2, byrow = FALSE)
true_Y <- rep(NA, n)
for(i in 1:n){
true_Y[i] = which(stats::rmultinom(1, 1, pi_matrix[i,]) == 1)
}
exp_zg = exp(Z %*% true_gamma)
pistar_denominator = matrix(c(1 + exp_zg[,1], 1 + exp_zg[,2]), ncol = 2, byrow = FALSE)
pistar_result = exp_zg / pistar_denominator
pistar_matrix = matrix(c(pistar_result[,1], 1 - pistar_result[,1],
pistar_result[,2], 1 - pistar_result[,2]),
ncol = 2, byrow = FALSE)
obs_Y <- rep(NA, n)
for(i in 1:n){
true_j = true_Y[i]
obs_Y[i] = which(rmultinom(1, 1,
pistar_matrix[c(i, n + i),
true_j]) == 1)
}
Ystar <- obs_Y
exp_vd1 = exp(V %*% true_delta[,,1])
exp_vd2 = exp(V %*% true_delta[,,2])
pi_denominator1 = apply(exp_vd1, FUN = sum_every_n1, n, MARGIN = 2)
pi_result1 = exp_vd1 / rbind(pi_denominator1)
pi_denominator2 = apply(exp_vd2, FUN = sum_every_n1, n, MARGIN = 2)
pi_result2 = exp_vd2 / rbind(pi_denominator2)
pitilde_matrix1 = rbind(pi_result1,
1 - apply(pi_result1,
FUN = sum_every_n, n = n,
MARGIN = 2))
pitilde_matrix2 = rbind(pi_result2,
1 - apply(pi_result2,
FUN = sum_every_n, n = n,
MARGIN = 2))
pitilde_array = array(c(pitilde_matrix1, pitilde_matrix2),
dim = c(dim(pitilde_matrix1), 2))
obs_Ytilde <- rep(NA, n)
for(i in 1:n){
true_j = true_Y[i]
obs_k = Ystar[i]
obs_Ytilde[i] = which(rmultinom(1, 1,
pitilde_array[c(i,n+ i),
obs_k, true_j]) == 1)
}
Ytilde <- obs_Ytilde
unif_lower_beta <- matrix(c(-5, -5, NA, NA), nrow = 2, byrow = TRUE)
unif_upper_beta <- matrix(c(5, 5, NA, NA), nrow = 2, byrow = TRUE)
unif_lower_gamma <- array(data = c(-5, NA, -5, NA, -5, NA, -5, NA),
dim = c(2,2,2))
unif_upper_gamma <- array(data = c(5, NA, 5, NA, 5, NA, 5, NA),
dim = c(2,2,2))
unif_upper_delta <- array(rep(c(5, NA), 8), dim = c(2,2,2,2))
unif_lower_delta <- array(rep(c(-5, NA), 8), dim = c(2,2,2,2))
unif_lower_naive_delta <- array(data = c(-5, NA, -5, NA, -5, NA, -5, NA),
dim = c(2,2,2))
unif_upper_naive_delta <- array(data = c(5, NA, 5, NA, 5, NA, 5, NA),
dim = c(2,2,2))
beta_prior_parameters <- list(lower = unif_lower_beta, upper = unif_upper_beta)
gamma_prior_parameters <- list(lower = unif_lower_gamma, upper = unif_upper_gamma)
delta_prior_parameters <- list(lower = unif_lower_delta, upper = unif_upper_delta)
naive_delta_prior_parameters <- list(lower = unif_lower_naive_delta,
upper = unif_upper_naive_delta)
MCMC_results <- COMBO_MCMC_2stage(Ystar, Ytilde,
x = x_matrix, z = z_matrix,
v = v_matrix,
prior = "uniform",
beta_prior_parameters = beta_prior_parameters,
gamma_prior_parameters = gamma_prior_parameters,
delta_prior_parameters = delta_prior_parameters,
naive_delta_prior_parameters = naive_delta_prior_parameters,
number_MCMC_chains = 2,
MCMC_sample = 200, burn_in = 100)
MCMC_results$posterior_means_df