bsitar {bsitar}R Documentation

Fit Bayesian SITAR growth curve model

Description

The bsitar() is the main function that fits the Bayesian version of the super imposition by translation and rotation (SITAR) model. The SITAR model is a nonlinear mixed effects model that has been used extensively to summarize growth processes (such as height and weight) from early childhood through the adulthood. The frequentist version of the SITAR model can be fit by using an already available R package, sitar (Cole 2022). Besides Bayesian implementation, the bsitar package greatly enhances the modelling capabilities of the SITAR. For example, in addition to the univariate analysis (i.e, modelling a single outcome), the bsitar allows univariate-by-subgroup and multivariate model fitting. In univariate-by-subgroup analysis, a single outcome is simultaneously analysed for subgroups defined by a factor variable such as gender. An advantage of univariate-by-subgroup analysis is that posterior draws for each sub group are part of a single model object that makes it possible to compare coefficients across groups and also test various hypotheses. The multivariate analysis involves simultaneous joint modelling of two or more outcomes.

Usage

bsitar(
  x,
  y,
  id,
  data,
  df = 4,
  knots = NA,
  fixed = a + b + c,
  random = a + b + c,
  xoffset = mean,
  bstart = xoffset,
  cstart = 0,
  xfun = NULL,
  yfun = NULL,
  bound = 0.04,
  terms_rhs = NULL,
  a_formula = ~1,
  b_formula = ~1,
  c_formula = ~1,
  d_formula = ~1,
  s_formula = ~1,
  a_formula_gr = ~1,
  b_formula_gr = ~1,
  c_formula_gr = ~1,
  d_formula_gr = ~1,
  a_formula_gr_str = NULL,
  b_formula_gr_str = NULL,
  c_formula_gr_str = NULL,
  d_formula_gr_str = NULL,
  d_adjusted = FALSE,
  sigma_formula = NULL,
  sigma_formula_gr = NULL,
  sigma_formula_gr_str = NULL,
  dpar_formula = NULL,
  autocor_formula = NULL,
  family = gaussian(),
  custom_family = NULL,
  custom_stanvars = NULL,
  group_arg = list(groupvar = NULL, by = NULL, cor = un, cov = NULL, dist = gaussian),
  sigma_group_arg = list(groupvar = NULL, by = NULL, cor = un, cov = NULL, dist =
    gaussian),
  univariate_by = list(by = NA, cor = un, terms = subset),
  multivariate = list(mvar = FALSE, cor = un, rescor = TRUE),
  a_prior_beta = student_t(3, ymean, ysd, autoscale = TRUE),
  b_prior_beta = student_t(3, 0, 3.5, autoscale = FALSE),
  c_prior_beta = student_t(3, 0, 1.5, autoscale = FALSE),
  d_prior_beta = student_t(3, 0, 1, autoscale = TRUE),
  s_prior_beta = student_t(3, 0, lm, autoscale = TRUE),
  a_cov_prior_beta = student_t(3, 0, 5, autoscale = FALSE),
  b_cov_prior_beta = student_t(3, 0, 1, autoscale = FALSE),
  c_cov_prior_beta = student_t(3, 0, 0.1, autoscale = FALSE),
  d_cov_prior_beta = student_t(3, 0, 1, autoscale = FALSE),
  s_cov_prior_beta = student_t(3, 0, 10, autoscale = FALSE),
  a_prior_sd = student_t(3, 0, ysd, autoscale = TRUE),
  b_prior_sd = student_t(3, 0, 2, autoscale = FALSE),
  c_prior_sd = student_t(3, 0, 1.25, autoscale = FALSE),
  d_prior_sd = student_t(3, 0, 1, autoscale = TRUE),
  a_cov_prior_sd = student_t(3, 0, 5, autoscale = FALSE),
  b_cov_prior_sd = student_t(3, 0, 1, autoscale = FALSE),
  c_cov_prior_sd = student_t(3, 0, 0.1, autoscale = FALSE),
  d_cov_prior_sd = student_t(3, 0, 1, autoscale = FALSE),
  a_prior_sd_str = NULL,
  b_prior_sd_str = NULL,
  c_prior_sd_str = NULL,
  d_prior_sd_str = NULL,
  a_cov_prior_sd_str = NULL,
  b_cov_prior_sd_str = NULL,
  c_cov_prior_sd_str = NULL,
  d_cov_prior_sd_str = NULL,
  sigma_prior_beta = student_t(3, 0, 1, autoscale = FALSE),
  sigma_cov_prior_beta = student_t(3, 0, 0.5, autoscale = FALSE),
  sigma_prior_sd = student_t(3, 0, 0.25, autoscale = FALSE),
  sigma_cov_prior_sd = student_t(3, 0, 0.15, autoscale = FALSE),
  sigma_prior_sd_str = NULL,
  sigma_cov_prior_sd_str = NULL,
  rsd_prior_sigma = exponential(ysd, autoscale = TRUE),
  dpar_prior_sigma = student_t(3, 0, ysd, autoscale = TRUE),
  dpar_cov_prior_sigma = student_t(3, 0, 1, autoscale = FALSE),
  autocor_prior_acor = uniform(-1, 1, autoscale = FALSE),
  autocor_prior_unstr_acor = lkj(1),
  gr_prior_cor = lkj(1),
  gr_prior_cor_str = lkj(1),
  sigma_prior_cor = lkj(1),
  sigma_prior_cor_str = lkj(1),
  mvr_prior_rescor = lkj(1),
  init = NULL,
  init_r = NULL,
  a_init_beta = lm,
  b_init_beta = 0,
  c_init_beta = 0,
  d_init_beta = 0,
  s_init_beta = lm,
  a_cov_init_beta = 0,
  b_cov_init_beta = 0,
  c_cov_init_beta = 0,
  d_cov_init_beta = 0,
  s_cov_init_beta = lm,
  a_init_sd = random,
  b_init_sd = random,
  c_init_sd = random,
  d_init_sd = random,
  a_cov_init_sd = random,
  b_cov_init_sd = random,
  c_cov_init_sd = random,
  d_cov_init_sd = random,
  sigma_init_beta = random,
  sigma_cov_init_beta = random,
  sigma_init_sd = random,
  sigma_cov_init_sd = random,
  gr_init_cor = random,
  sigma_init_cor = random,
  rsd_init_sigma = random,
  dpar_init_sigma = random,
  dpar_cov_init_sigma = random,
  autocor_init_acor = random,
  autocor_init_unstr_acor = random,
  mvr_init_rescor = random,
  r_init_z = random,
  vcov_init_0 = TRUE,
  jitter_init_beta = NULL,
  jitter_init_sd = NULL,
  jitter_init_cor = NULL,
  prior_data = NULL,
  init_data = NULL,
  init_custom = NULL,
  verbose = FALSE,
  expose_function = FALSE,
  get_stancode = FALSE,
  get_standata = FALSE,
  get_formula = FALSE,
  get_stanvars = FALSE,
  get_priors = FALSE,
  get_priors_eval = FALSE,
  get_init_eval = FALSE,
  validate_priors = FALSE,
  set_self_priors = NULL,
  set_replace_priors = NULL,
  set_same_priors_hierarchy = FALSE,
  outliers = NULL,
  unused = NULL,
  chains = 4,
  iter = 2000,
  warmup = floor(iter/2),
  thin = 1,
  cores = getOption("mc.cores", "optimize"),
  backend = getOption("brms.backend", "rstan"),
  threads = getOption("brms.threads", "optimize"),
  opencl = getOption("brms.opencl", NULL),
  normalize = getOption("brms.normalize", TRUE),
  algorithm = getOption("brms.algorithm", "sampling"),
  control = list(adapt_delta = 0.8, max_treedepth = 15),
  sample_prior = "no",
  save_pars = NULL,
  drop_unused_levels = TRUE,
  stan_model_args = list(),
  refresh = NULL,
  silent = 1,
  seed = 123,
  save_model = NULL,
  fit = NA,
  file = NULL,
  file_compress = TRUE,
  file_refit = getOption("brms.file_refit", "never"),
  future = getOption("future", FALSE),
  parameterization = "ncp",
  ...
)

Arguments

x

Predictor variable (typically age in years). For univariate model, the x is a single variable whereas for univariate_by and multivariate models, the x can be same for sub models, or different for each sub model. For example, when fitting a bivariate model, the x = list(x1, x2) specifies that x1 is the predictor variable for the first sub model, and x2 for the second sub model. To specify x1 as a common predictor variable for both sub models, the argument x is defined as x = list(x1) or simply x = x1.

y

Response variable (e.g., repeated height measurements). For univariate and univariate_by models, y is specified as a single variable. For univariate_by model, the response vector for each sub model is created and named internally based on the factor levels of the variable that is used to set up the univariate_by model. As an example, the model specified as univariate_by = sex creates response vectors Female and Male when Female is the first level and Male is the second level of the sex variable. For multivariate model, the response variables are specified as a list such as y = list(y1, y2) where y1 is the response variable for the first sub model and y2 for the second sub model. Note that for multivariate model, data are not stacked but rather response vectors are separate variables in the data and are of same length.

id

A factor variable uniquely identifying the groups (e.g., individuals) in the data frame. For univariate_by and multivariate models, the id can be same (typically) for sub models or different for each sub model (see argument x for details on setting different arguments for sub models).

data

Data frame containing variables such as x, y, id etc.

df

Degrees of freedom for the natural cubic spline design matrix (default 4). The df is internally used to construct the knots (quantiles of x distribution) that are then used in the construction of the spline design matrix. For univariate_by and multivariate models, the df can be same (e.g., df = 4) for sub models or different for each sub model such as df=list(4, 5) where df is 4 is for the first sub model, and 5 for the second sub model.

knots

A numeric vector vector specifying the knots for the natural cubic spline design matrix (default NULL) Note that df and knots can not be specified together, and also both of them can not be NULL. In other words, either df or knots must be specified. Like df, the knots can be same for sub models or different for each sub model when fitting univariate_by and multivariate models (see df for details).

fixed

A character string specifying the fixed effects structure (default 'a+b+c'). Note that different fixed effect structures can be specified when fitting univariate_by and multivariate models. As an example, fixed = list('a+b+c', 'a+b') implies that the fixed effect structure for the first sub model is 'a+b+c', and 'a+b' for the second sub model.

random

A character string specifying the random effects structure (default 'a+b+c'). The approach used in setting the random is same as described above for the fixed effects structure (see fixed).

xoffset

An optional character string, or a numeric value to set up the origin of the predictor variable, x (i.e., centering of x). The options available are 'mean' (mean of x, i.e., mean(x)), 'max' (maximum value of x, i.e., max(x)), 'min' (minimum value of x, i.e., min(x)), 'apv' (age at peak velocity estimated from the velocity curve derived from the simple linear model fit to the data), or any real number such as xoffset = 12. The default is xoffset = 'mean'. For univariate_by and multivariate models, the xoffset can be same for sub models or different for each sub model (see argument x for details on setting different arguments for sub models).

bstart

An optional character string, or a numeric value to set up the origin of the fixed effect parameter b. The argument bstart can be used to set up the location parameter for the location-scale based priors (such as normal()) via b_prior_beta argument and/or the initial value via the b_init_beta argument. The options available to set up the bstart are same as described above for the xoffset i.e., 'mean', 'min', 'max', 'apv' or a real number such as 12. The default is same as xoffset i.e., bstart = 'xoffset'. For univariate_by and multivariate models, the xoffset can be same for sub models (typically), or different for each sub model (see argument x for details on setting different arguments for sub models).

cstart

An optional character string, or a numeric value to set up the origin of the fixed effect parameter c. The argument cstart can be used to set up the location parameter for the location-scale based priors (such as normal()) via c_prior_beta argument and/or the initial value via the c_init_beta argument. The options available to set up the cstart are 'pv' (peak velocity estimated from the velocity curve derived from the simple linear model fit to the data), or a real number such as 1. Note that since parameter c is estimated on the exponential scale, the argument cstart should be adjusted accordingly. The default cstart is '0' i.e., cstart = '0'. For univariate_by and multivariate models, the xoffset can be same for sub models (typically), or different for each sub model (see argument x for details on setting different arguments for sub models).

xfun

An optional character string to specify the transformation of the predictor variable, The default is NULL indicating that no transformation is applied i.e., model is fit to the data with original scale of the x. Available transformation options are 'log' (logarithmic transformation) and 'sqrt' (square root transformation). For univariate_by and multivariate models, the xfun can be same for sub models (typically), or different for each sub model (see argument x for details on setting different arguments for sub models).

yfun

An optional character string to specify the transformation of the response variable, The default is NULL, indicating that no transformation is applied i.e., model is fit to the data with original scale of the y. Available transformation options are 'log' (logarithmic transformation) and 'sqrt' (square root transformation). For univariate_by and multivariate models, the xfun can be same for sub models (typically), or different for each sub model (see argument x for details on setting different arguments for sub models).

bound

An optional real number to extend the span of the predictor variable x by a small value (default 0.04). See package sitar::sitar() for details. For univariate_by and multivariate models, the bound can be same for sub models (typically), or different for each sub model (see argument x for details on setting different arguments for sub models).

terms_rhs

An optional character string (default NULL) to specify terms on the right hand side of the response variable (separated by |) but before the formula tilde sign i.e., ~. The terms_rhs is used when fitting a measurement error model. As an example, consider fitting a model with measurement error in the response variable which is specified in the brms::brmsformula() as brmsformula(y | mi(sdy) ~ ..). In this example, the mi(sdy) is passed to brms::brmsformula() as terms_rhs = mi(sdy). For multivariate model, each outcome can have its own measurement error variable that can be specified as follows:
terms_rhs = list(mi(sdy1), mi(sdy2)). Note that brms::brmsformula() does not allow combining mi() with the subset() formulation that is used for fitting univariate_by model.

a_formula

Formula for the fixed effect parameter, a (default ~ 1). User can specify different formula when fitting univariate_by and multivariate models. As an example a_formula = list(~1, ~1 + cov) implies that the a_formula for the first sub model includes an intercept only whereas the second sub model includes an intercept and a covariate, cov. The covariate(s) can be continuous variable(s) or factor variable(s). For factor covariates, dummy variables are created internally via the stats::model.matrix()). The formula can include any combination of continuous and factor variables as well as their interactions.

b_formula

Formula for the fixed effect parameter, b (default ~ 1). See a_formula for details.

c_formula

Formula for the fixed effect parameter, c (default ~ 1). See a_formula for details.

d_formula

Formula for the fixed effect parameter, d (default ~ 1). See a_formula for details.

s_formula

Formula for the fixed effect parameter, s (default ~ 1). The s_formula sets up the the spline design matrix. Typically, covariate(s) are not included in the s_formula to limit the population curve to be single curve for the whole data. In fact, the sitar::sitar() does not provide any option to include covariates in the s_formula, However, bsitar package allows inclusion of covariates but the user need to justify the modelling of separate curves for each category when covariate is a factor variable.

a_formula_gr

Formula for the random effect parameter, a (default ~ 1). Similar to a_formula, user can specify different formula when fitting univariate_by and multivariate models and formula can include continuous and/or factor variable(s) including their interactions as covariates (see a_formula for details). In addition to setting up the design matrix for the random effect parameter a, user can set up the group identifier and the correlation structure for random effects via the vertical bar || approach. For example, consider only an intercept for the random effects a, b, and c specified as a_formula_gr = ~1, b_formula_gr = ~1 and c_formula_gr = ~1. To specify the group identifier (e.g., id) and an unstructured correlation structure, the formula argument as specified as follows:
a_formula_gr = ~ (1|i|id)
b_formula_gr = ~ (1|i|id)
c_formula_gr = ~ (1|i|id)
where i within the vertical bars || is just a placeholder. A common identifier (i.e., i) shared across random effect formulas are modeled as unstructured correlated. For more details on the the vertical bar approach, please see brms::brm(). As explained below, an alternative approach to set up the group identifier and the correlation structure is to use group_by argument. In other words, to achieve the same set up as defined above by using the vertical bar approach, user can just specify the design matrix part of the formula as a_formula_gr = ~ 1
b_formula_gr = ~ 1
c_formula_gr = ~ 1
and use the group_by argument as group_by = list(groupvar = id, cor = un) where id specifies the group identifier and un sets up the unstructured correlation structure. See group_by argument for details.

b_formula_gr

Formula for the random effect parameter, b (default ~ 1). See a_formula_gr for details.

c_formula_gr

Formula for the random effect parameter, c (default ~ 1). See a_formula_gr for details.

d_formula_gr

Formula for the random effect parameter, d (default ~ 1). See a_formula_gr for details.

a_formula_gr_str

Formula for the random effect parameter, a (default NULL) when fitting a hierarchical model with three or more levels of hierarchy. An example is model applied to the data that comprise repeated measurements (level 1) on individuals (level 2) nested further within the growth studies (level 3). Note that When using a_formula_gr_str argument, only the vertical bar approach (see a_formula_gr) can be used to set up the group identifiers and the correlation structure. An example of setting up the formula for a three level model with random effect parameter a, b is as follows:
a_formula_gr_str = ~ (1|i|id:study) + (1|i2|study)
b_formula_gr_str = ~ (1|i|id:study) + (1|i2|study)
c_formula_gr_str = ~ (1|i|id:study) + (1|i2|study)
where |i| and |i2| set up the unstructured correlation structure for individual and study level random effects. Note that |i| and |i2| need to be distinct because random effect parameters are not allowed to be correlated across different levels of hierarchy. It is worth mentioning that user can set up model with any number of hierarchical levels and include covariate into the random effect formula.

b_formula_gr_str

Formula for the random effect parameter, b (default NULL) when fitting a hierarchical model with three or more levels of hierarchy. See a_formula_gr_str for details.

c_formula_gr_str

Formula for the random effect parameter, c (default NULL) when fitting a hierarchical model with three or more levels of hierarchy. See a_formula_gr_str for details.

d_formula_gr_str

Formula for the random effect parameter, d (default NULL) when fitting a hierarchical model with three or more levels of hierarchy. See a_formula_gr_str for details.

d_adjusted

A logical indicator to set up the scale of predictor variable x when fitting the model with random effect parameter d. The coefficient of parameter d is estimated as a linear function of x i.e., d * x. If FALSE (default), the original x is used. When d_adjusted = TRUE, the x is adjusted for the timing (b) and intensity (c) parameters as x - b) * exp(c) i.e., d * ((x-b)*exp(c)). The adjusted scale of x reflects individual developmental age rather than chronological age. This makes d more sensitive to the timing of puberty in individuals. See sitar::sitar() function for details.

sigma_formula

Formula for the fixed effect distributional parameter, sigma. The sigma_formula sets up the fixed effect design matrix that may include continuous and/or factor variables (and their interactions) as covariates(s) for the distributional parameter. In other words, setting up the covariates for sigma_formula is same as for any other fixed parameter such as a (see a_formula for details). Note that sigma_formula estimates sigma parameter at log scale. By default, the sigma_formula is NULL because the brms::brm() itself models the sigma as a residual standard deviation (RSD) parameter at the link scale. The sigma_formula along with the arguments sigma_formula_gr and sigma_formula_gr_str allow estimating the scale parameters as random effects for sigma. The set up to specify the fixed and random effects for sigma is similar to setting fixed and random effect structures for other model parameters such as a, b, and c. It is important to note that an alternative way to set up the fixed effect design matrix for the distributional parameter sigma is to use the dpar_formula argument. An advantage of dpar_formula over sigma_formula is that user can specify the linear and nonlinear formulation as allowed by the brms::lf() and brms::nlf() syntax. The brms::lf() and brms::nlf() offer flexibility in centering the predictors and also allows enabling/disabling of cell mean centering when excluding intercept via 0 + formulation. A disadvantage of dpar_formula approach is that it is not possible to include random effects for the sigma. Note that sigma_formula and dpar_formula can not be specified together. When either sigma_formula or dpar_formula is used, the default estimation of the RSD by brms::brm() is automatically turned off.

sigma_formula_gr

Formula for the random effect parameter, sigma (default NULL). See a_formula_gr for details.

sigma_formula_gr_str

Formula for the random effect parameter, sigma when fitting a hierarchical model with three or more levels of hierarchy. See a_formula_gr_str for details.

dpar_formula

Formula for the distributional fixed effect parameter, sigma (default NULL). See sigma_formula for details.

autocor_formula

Formula to set up the autocorrelation structure of residuals (default NULL). Allowed autocorrelation structures are:

  • autoregressive moving average (arma) of order p and q specified as autocor_formula = ~arms(p=1, q=1).

  • autoregressive (ar) of order p specified as autocor_formula = ~ar(p=1).

  • moving average (ma) of order q specified as autocor_formula = ~ma(q=1).

  • unstructured (unstr) over time (and individuals), The unstr structure is specified as autocor_formula = ~unstr(time, id)).

See brms::brm() for further details on modeling autocorrelation structure of residuals

family

Family distribution (default gaussian) and the link function (default identity). See brms::brm() for details on available distributions and link functions, and how to specify them. For univariate_by and multivariate models, the family can be same (e.g., family = gaussian()) for sub models or different for each sub model such as family = list(gaussian(), student()) which sets gaussian distribution for the first sub model and student_t distribution for the second sub model. Please note that argument family is ignored when use specifies custom_family i.e., custom_family is not NULL.

custom_family

Specify custom families (i.e. response distribution). Default NULL. Please see brms::custom_family() for details. It is important no note that user defined Stan functions must be expose by setting expose_functions = TRUE.

custom_stanvars

Prepare and pass user-defined variables that need to be added to the Stan's program blocks (default NULL). This is primarily useful when defining custom_family. Please see brms::custom_family() for details on specifying stanvars. Note that custom_stanvars are passed directly without conducting any sanity checks.

group_arg

Specify arguments for group-level random effects. The group_arg should be a named list that may include groupvar, dist, cor and by as described below:

  • The groupvar specifies the subject identifier. In case groupvar = NULL (default), the groupvar is automatically assigned based on the id argument.

  • The dist specifies the distribution from which the random effects are drawn (default gaussian). As per the brms::brm() documentation, the gaussian distribution is the only available distribution (as of now).

  • The by argument can be used to estimate separate variance covariance structure (i.e., standard deviation and correlation parameters) for random effect parameters (default NULL). If specified, variable used to set up the by argument must be a factor variable. For example, by = 'sex' implies that separate variance covariance structure are estimated for males and females.

  • The cor is used to set up the covariance (i.e., correlation) structure for random effect parameters. The default covariance is unstructured (i.e, cor = un) for all three model settings, i.e., univariate, univariate_by and multivariate. The alternative correlation structure available for univariate and univariate_by models is diagonal. While the cor = un models the full unstructured variance covariance structure, the cor = diagonal estimates only the variance (i.e, standard deviation) parameters and the covariance (i.e., correlation) parameters are set to zero. For multivariate model, options include un, diagonal and un_s. The un sets up the unstructured correlation implying that the group level random effects across response variables are drawn for a joint multivariate normal distribution with shared correlation parameters. The cor = diagonal specifies that only the variance parameter are estimates for each sub model whereas the correlation parameters set to zero. Option cor = un_s allows for estimating unstructured variance covariance parameters separately for each response variable.

Note that user need not to define all or any of these options (i.e., groupvar, dist, cor, or by) because if unspecified, they are are automatically set to their default values. Also note that only groupvar from the group_arg argument is passed on to the univariate_by and multivariate models because these model have their own additional options specified via the univariate_by and multivariate arguments. Lastly, the group_arg is completely ignored when user specify random effects via the vertical bar || approach (see a_formula_gr for details) or when fitting a hierarchical model with three or more levels of hierarchy (see a_formula_gr_str for details).

sigma_group_arg

Specify arguments for modelling distributional level random effects, sigma. The approach used in setting up the sigma_group_arg is exactly same as described above for the group level random effects (see group_arg for details).

univariate_by

Set up the univariate-by-subgroup model fitting (default NULL) via a named list as described below:

  • The by (an optional character string) is used to specify the variable (must be a factor variable) to define the sub models (default NA).

  • The cor (an optional character string) specifies the correlation structure. The options available are un and diagonal. The un = un (default) models the full unstructured variance covariance structure, whereas the cor = diagonal estimates only the variance (i.e, standard deviation) parameters and the covariance (i.e., correlation) parameters are set to zero.

  • The terms (an optional character string) specifies the method used in setting up the sub models. Options are 'subset' (default) and 'weights'. See brms::`addition-terms` for details.

multivariate

Set up the multivariate model fitting (default NULL) arguments as a named list:

  • The mvar (logical, default FALSE) indicates whether to fit a multivariate model.

  • The cor (an optional character string) sets up the correlation structure. The options available are un, diagonal and un_s. The un sets up the unstructured correlation implying that the group level random effects across response variables are drawn for a joint multivariate normal distribution with shared correlation parameters. The cor = diagonal specifies that only the variance parameter are estimates for each sub model whereas the correlation parameters set to zero. Option cor = un_s allows for estimating unstructured variance covariance parameters separately for each response variable.

  • The rescor (logical, default TRUE) indicates whether to estimate the residual correlation between response variables.

a_prior_beta

Specify priors for the fixed effect parameter, a. (default student_t(3, ymean, ysd, autoscale = TRUE)). The key points in prior specification that are applicable for all parameters are highlighted below. For full details on prior specification, please see brms::prior().

  • Allowed distributions are normal, student_t, cauchy, lognormal, uniform, exponential, gamma and inv_gamma (inverse gamma).

  • For each distribution, upper and lower bounds can be set via options lb and ub (default NA for both lb and ub).

  • For location-scale based distributions (such as normal, student_t, cauchy, and lognormal), an option autosclae (default FALSE) can be used to multiply the scale parameter by a numeric value. Both brms and rstanarm packages allow similar auto scaling under the hood. While rstanarm earlier used to set autosclae as TRUE which internally multiplied scale parameter by a value 2.5 (recently authors changed this behavior to FALSE), the brms package sets scaling factor as 1.0 or 2.5 depending on the standard deviation of the response variable (See brms::prior()). The bsitar package offers full flexibility in choosing the scaling factor as any real number instead of 1.0 or 2.5 (e.g., autosclae = 5.0). When autosclae = TRUE, 2.5 is the default scaling factor.

  • For location-scale based distributions such as normal, options fxl (function location) and fxs (function scale) are available to apply any function such as log and sqrt, or a function defined in the R environment to transform the location and scale parameters. For example, prior normal(2, 5, fxl = 'log', fxs = 'sqrt') will be translated internally as normal(log(2), sqrt(5)) implying that the actually prior assigned will be normal(0.693, 2.23). The default for both fxl and fxs is NULL.

  • Like fxl and fxs functions, another function fxls (function location scale) is available to transform location and scale parameters for the location-scale based distributions such as normal. Unlike fxl and fxs functions which transform location and scale parameters individually, the fxls function is used for those transformation for which both location and scale parameters are needed in the transformation of these parameters. For example, the transformation of location and scale parameters for the normal prior on log scale is as follows:
    log_location = log(location / sqrt(scale^2 / location^2 + 1)),
    log_scale = sqrt(log(scale^2 / location^2 + 1)),
    where location and scale are the original parameters supplied by the user and log_location and log_scale are the equivalent parameters on the log scale. The fxls can be set as a character string or a list comprised of two functions where first function of the list will be used to transform the location parameter and the second function will be for the scale transformation. If a character string is used such as fxls = 'log', then the above transformation for the log parametrization will be applied automatically. Note that if using a list, then the list must be crated within the R environment and then passed this to the fxls as:
    location_fun <- function(location, scale) { log(location / sqrt(scale^2 / location^2 + 1)) }
    scale_fun <- function(location, scale) { sqrt(log(scale^2 / location^2 + 1)) }
    fxls_fun <- list(location_fun = location_fun, scale_fun = scale_fun)
    fxls = 'fxls_fun'
    As an example, normal(2, 5, fxls = 'fxls_fun'. The default for fxls is NULL.

  • For strictly positive distributions such as exponential, gamma and inv_gamma, the lower bound (lb) is automatically set to zero i.e., lb = 0.

  • For uniform distribution, an option addrange is available to symmetrically widen the prior range. For example, prior uniform(a, b, addrange = 5) implies that the lower and upper limits will be evaluated as uniform(a-5, b+5).

  • For exponential distribution, the rate parameter is evaluated as inverse. In other words, prior set as exponential(10) is translated to 0.1 i.e., exponential(1.0/10.0).

  • User need not to specify each option explicitly because the missing options are set to their default values automatically. For example, the prior specified as a_prior_beta = normal(location = 5, scale = 1, lb = NA, ub = NA, addrange = NA, autosclae = FALSE, fxl = NULL, fxs = NULL)) is same as a_prior_beta = normal(5, 1)).

  • For univariate_by multivariate models, priors can be same for sub models (e.g., a_prior_beta = normal(5, 1)), or different for each sub such as a_prior_beta = list(normal(5,1), normal(10, 5)).

The location parameter for the location-scale based distributions can be specified as mean (by specifying 'ymean') or the median (by using 'ymedian') of the response variable. Similarly, the scale parameter can be set as the standard deviation (SD) or the median absolute deviation (MAD) of the response variable via 'ysd' and 'ymad' options. Another option available is to use the coefficients 'lm' from the simple linear model applied to the data (e.g., lm(y ~ age, data = data). This is true even when model has covariates i.e., lm(y ~ age + cov, data = data). A few examples of specifying priors using these options are:
a_prior_beta = normal(ymean, ysd),
a_prior_beta = normal(ymean, ysd),
a_prior_beta = normal(ymedian, ymad),
a_prior_beta = normal(lm, ysd),
Note that options 'ymean', 'ymedian', 'ysd', 'ymad', 'ymad' and 'lm' are available only for the fixed effect parameter, a and not for parameters b, c or d.

b_prior_beta

Specify priors for the fixed effect parameter, b. (default student_t(3, 0, 3.5, autoscale = FALSE)). See a_prior_beta for details.

c_prior_beta

Specify priors for the fixed effect parameter, c. (default student_t(3, 0, 1.5, autoscale = FALSE)). See a_prior_beta for details.

d_prior_beta

Specify priors for the fixed effect parameter, d. (default student_t(3, 0, 1.0, autoscale = FALSE)). See a_prior_beta for details.

s_prior_beta

Specify priors for the fixed effect parameter, s (i.e., spline coefficients). (default student_t(3, 0, 'lm', autoscale = TRUE)). The general approach is same as described earlier for the fixed effect parameters (see a_prior_beta for details). A few key points are highlighted below:

  • When specifying location-scale based priors using 'lm' such as s_prior_beta = normal(lm, 'lm') , it sets spline coefficients obtained from the simple linear model fit as location parameter whereas scale parameter is based on the standard deviation of the spline design matrix. However, typically, the location parameter is set at '0' (default), and the autoscale option is set as TRUE.

  • For location-scale based priors, an option sethp (logical, default FALSE) is available to set up the hierarchical priors. To set sethp as TRUE, the prior is specified as s_prior_beta = normal(0, 'lm', autoscale = TRUE, sethp = TRUE)). When sethp = TRUE, instead of setting prior as s ~ normal(0, 'lm') the hierarchical priors are set as s ~ normal(0, 'hp') where 'hp' is defined as hp ~ normal(0, 'lm'). Note that the scale parameter for the hp ~ normal(0, 'lm') is automatically taken from the s ~ normal(0, 'hp'). Setting sethp = TRUE implies that the scale for spline coefficients is estimated from the data itself. The distribution of hierarchical priors is automatically matched with the prior set for the s parameter, or else can be set by the same sethp option. For example, s_prior_beta = normal(0, 'lm', sethp = cauchy) will be translated to s ~ normal(0, 'lm'), hp ~ cauchy(0, 'lm').

  • For uniform priors, the optionaddrange can be used to symmetrically expand the prior range.

It is observed that location scale based prior distributions (e.g, normal, student_t, and cauchy) perform well for the spline coefficients.

a_cov_prior_beta

Specify priors for the covariate(s) included in the fixed effect parameter, a (default student_t(3, 0, 5.0, autoscale = FALSE)). The approach is same as described earlier for the a_prior_beta except that options 'ymean', 'ymedian', 'ysd', and 'ymad' are not allowed. The Option 'lm' for the location parameter sets covariate(s) coefficient obtained from the simple linear model fit to the data. Note that option 'lm' is allowed only for the a_cov_prior_beta and not for the covariate(s) included in the other fixed or random effect parameters. Lastly, separate priors can be specified for sub models when fitting univariate_by and a_prior_beta models (see a_prior_beta).

b_cov_prior_beta

Specify priors for the covariate(s) included in the fixed effect parameter, b (default student_t(3, 0, 1.0, autoscale = FALSE)). See a_cov_prior_beta for details.

c_cov_prior_beta

Specify priors for the covariate(s) included in the fixed effect parameter, c (default student_t(3, 0, 0.1, autoscale = FALSE)). See a_cov_prior_beta for details.

d_cov_prior_beta

Specify priors for the covariate(s) included in the fixed effect parameter, d (default student_t(3, 0, 1.0, autoscale = FALSE)). See a_cov_prior_beta for details.

s_cov_prior_beta

Specify priors for the covariate(s) included in the fixed effect parameter, s (default student_t(3, 0, 10.0, autoscale = FALSE)). However, as described earlier, (see s_formual), the SITAR model does not allows for inclusion of covariate(s) in the spline design matrix. If and when covariate(s) are specified (see s_formual), the approach of setting priors for the covariate(s) included in the parameter, s via s_cov_prior_beta is same as described earlier for the fixed effect parameter a (see a_cov_prior_beta). For the location-scale based priors, the option 'lm' sets the location parameter same as the spline coefficients obtained from fitting a simple linear to the data.

a_prior_sd

Specify priors for the random effect parameter, a. (default student_t(3, 0, 'ysd', autoscale = FALSE)). Note that prior is on the standard deviation (which is the square root of the variance) and not on the variance itself. The approach of setting the prior is same as described earlier for the fixed effect parameter, a (See a_prior_beta) with the exception that location parameter is always zero. The lower bound 0 is automatically set by the brms::brm(). For univariate_by and multivariate models, priors can be same for sub models or different for each sub model (See a_prior_beta).

b_prior_sd

Specify priors for the random effect parameter, b (default student_t(3, 0, 2.0, autoscale = FALSE)). See a_prior_sd for details.

c_prior_sd

Specify priors for the random effect parameter, c (default student_t(3, 0, 1.25, autoscale = FALSE)). See a_prior_sd for details.

d_prior_sd

Specify priors for the random effect parameter, d (default student_t(3, 0, 1.0, autoscale = FALSE)). See a_prior_sd for details.

a_cov_prior_sd

Specify priors for the covariate(s) included in the random effect parameter, a (default student_t(3, 0, 5.0, autoscale = FALSE)). The approach is same as described earlier for the a_cov_prior_beta except that no pre-defined option (e.g., 'lm') is allowed.

b_cov_prior_sd

Specify priors for the covariate(s) included in the random effect parameter, b (default student_t(3, 0, 1.0, autoscale = FALSE)). See a_cov_prior_sd for details.

c_cov_prior_sd

Specify priors for the covariate(s) included in the random effect parameter, c (default student_t(3, 0, 0.1, autoscale = FALSE)). See a_cov_prior_sd for details.

d_cov_prior_sd

Specify priors for the covariate(s) included in the random effect parameter, d (default student_t(3, 0, 1.0, autoscale = FALSE)). See a_cov_prior_sd for details.

a_prior_sd_str

Specify priors for the random effect parameter, a when fitting a hierarchical model with three or more levels of hierarchy (default NULL). The approach is same as described earlier (see the a_prior_sd).

b_prior_sd_str

Specify priors for the random effect parameter, b when fitting a hierarchical model with three or more levels of hierarchy (default NULL). The approach is same as described earlier (see the a_prior_sd_str).

c_prior_sd_str

Specify priors for the random effect parameter, c when fitting a hierarchical model with three or more levels of hierarchy (default NULL). The approach is same as described earlier (see the a_prior_sd_str).

d_prior_sd_str

Specify priors for the random effect parameter, d when fitting a hierarchical model with three or more levels of hierarchy (default NULL). The approach is same as described earlier (see the a_prior_sd_str).

a_cov_prior_sd_str

Specify priors for the covariate(s) included in the random effect parameter, a when fitting a hierarchical model with three or more levels of hierarchy (default NULL). The approach is same as described earlier (see the a_cov_prior_sd).

b_cov_prior_sd_str

Specify priors for the covariate(s) included in the random effect parameter, b when fitting a hierarchical model with three or more levels of hierarchy (default NULL). The approach is same as described earlier (see the a_cov_prior_sd_str).

c_cov_prior_sd_str

Specify priors for the covariate(s) included in the random effect parameter, c when fitting a hierarchical model with three or more levels of hierarchy (default NULL). The approach is same as described earlier (see the a_cov_prior_sd_str).

d_cov_prior_sd_str

Specify priors for the covariate(s) included in the random effect parameter, d when fitting a hierarchical model with three or more levels of hierarchy (default NULL). The approach is same as described earlier (see the a_cov_prior_sd_str).

sigma_prior_beta

Specify priors for the fixed effect distributional parameter, sigma (default student_t(3, 0, 1.0, autoscale = FALSE)). The approach is same as described earlier for the fixed effect parameter, a (See a_prior_beta for details).

sigma_cov_prior_beta

Specify priors for the covariate(s) included in the fixed effect distributional parameter, sigma (default student_t(3, 0, 0.5, autoscale = FALSE)). The approach is same as described earlier for the covariate(s) included the fixed effect parameter, a (see a_cov_prior_beta for details).

sigma_prior_sd

Specify priors for the random effect distributional parameter, sigma (default student_t(3, 0, 0.25, autoscale = FALSE)). The approach is same as described earlier the random effect parameter a (see a_prior_sd for details).

sigma_cov_prior_sd

Specify priors for the covariate(s) included in the random effect distributional parameter, sigma (default student_t(3, 0, 0.15, autoscale = FALSE)). The approach is same as described earlier for the covariate(s) included in the random effect parameter a (see a_cov_prior_sd for details).

sigma_prior_sd_str

Specify priors for the the random effect distributional parameter, sigma when fitting a hierarchical model with three or more levels of hierarchy (default NULL). The approach is same as described earlier for the random effect parameter, a (See a_prior_sd_str for details).

sigma_cov_prior_sd_str

Specify priors for the covariate(s) included in the random effect distributional parameter, sigma when fitting a hierarchical model with three or more levels of hierarchy (default NULL). The approach is same as described earlier for the covariate(s) included in the random effect parameter, a (See a_cov_prior_sd_str for details).

rsd_prior_sigma

Specify priors for the residual standard deviation parameter sigma (default exponential('ysd', autoscale = TRUE)). Note that this argument is evaluated only when both dpar_formula and sigma_formula are NULL. For location scale based distributions, user can use specify standard deviation (ysd) or the median absolute deviation (ymad) as scale parameter.

dpar_prior_sigma

Specify priors for the fixed effect distributional parameter sigma (default student_t(3, 0, 'ysd', autoscale = TRUE)). The argument is evaluated only when sigma_formula is NULL.

dpar_cov_prior_sigma

Specify priors for the covariate(s) included in the fixed effect distributional parameter sigma (default student_t(3, 0, 1.0, autoscale = FALSE)). The argument is evaluated only when sigma_formula is NULL.

autocor_prior_acor

Specify priors for the autocorrelation parameters when fitting a model with the 'arma', 'ar' or the 'ma' autocorrelation structures (see autocor_formula for details). The only allowed distribution is uniform distribution bounded between -1 and +1 (default uniform(-1, 1, autoscale = FALSE)). For the unstructured residual correlation structure, a separate argument autocor_prior_unstr_acor is used to specify the priors (see below).

autocor_prior_unstr_acor

Specify priors for the autocorrelation parameters when fitting a model with the unstructured ('un') autocorrelation structure (see autocor_formula for details). The only allowed distribution is the lkj (default lkj(1)). See gr_prior_cor below for details on setting up the lkj prior.

gr_prior_cor

Specify priors for the correlation parameter(s) of group-level random effects (default lkj(1)). The only allowed distribution is lkj that is specified via a single parameter eta (see brms::prior() for details).

gr_prior_cor_str

Specify priors for the correlation parameter(s) of group-level random effects when fitting a hierarchical model with three or more levels of hierarchy (default lkj(1)). The approach is same as described above (See gr_prior_cor).

sigma_prior_cor

Specify priors for the correlation parameter(s) of distributional random effects sigma (default lkj(1)). The only allowed distribution is lkj (see gr_prior_cor for details). Note that currently brms::brm() does not allow for setting different lkj priors for the group level and distributional random effects that share the same group identifier (id). Therefore, either create a copy of group identifier and use that but then this will not allow correlation parameter across group random effects and sigma.

sigma_prior_cor_str

Specify priors for the correlation parameter(s) of distributional random effects sigma when fitting a hierarchical model with three or more levels of hierarchy (default lkj(1)). The approach is same as described above (See sigma_prior_cor).

mvr_prior_rescor

Specify priors for the residual correlation parameter when fitting a multivariate model (default lkj(1)). The only allowed distribution is lkj (see gr_prior_cor for details).

init

Initial values for the sampler. If init = '0', all parameters are initialized to zero. For init = 'random', Stan will randomly generate initial values for each parameter within a range specified by the init_r (see below), or between -2 and 2 in unconstrained space when init_r = NULL. Another available option is init = 'prior' which sets initial values based on the prior specified for each parameter. Lastly, when init = NULL (default), initial value for each parameter is specified by the corresponding init arguments defined see below.

init_r

A positive real value to set range for the random generation of initial values (default NULL). This argument is evaluated only when init = 'random'.

a_init_beta

Initial values for the fixed effect parameter, a (default 'lm'). Options available are '0', 'random' and 'prior'. In addition, user can specify 'ymean' and 'ymedian' to set initial as the mean or the median of the response variable. Also, option 'lm' can be used to set coefficients obtained from the simple linear model fitted to the data as initial values for the fixed effect parameter, a. Note that this is similar to the location parameter for prior on the fixed effect parameter a (see a_prior_beta for details). These options ('ymean', 'ymedian', and 'lm') are available only for the fixed effect parameter a and not for other parameters described below. Lastly, For univariate_by and multivariate models, the initials can be same (e.g., a_init_beta = 0) for sub models or different for each sub model such as
list(a_init_beta = '0', a_init_beta = 'lm').

b_init_beta

Initial values for the fixed effect parameter, b (default '0'). See a_init_beta for details.

c_init_beta

Initial values for the fixed effect parameter, c (default '0'). See a_init_beta for details.

d_init_beta

Initial values for the fixed effect parameter, d (default '0'). See a_init_beta for details.

s_init_beta

Initial values for the fixed effect parameter, s (default 'lm'). Options available are '0', 'random', 'prior', and 'lm'.

a_cov_init_beta

Initial values for the covariate(s) included in the fixed effect parameter, a (default '0'). Options available are '0', 'random', 'prior' and 'lm'. The option 'lm' is available only for the a_cov_init_beta and not for the covariate(s) included in other fixed effect parameters b, c, or d.

b_cov_init_beta

Initial values for covariate(s) included in the fixed effect parameter, b (default '0'). See a_cov_init_beta for details.

c_cov_init_beta

Initial values for covariate(s) included in the fixed effect parameter, c (default '0'). See a_cov_init_beta for details.

d_cov_init_beta

Initial values for covariate(s) included in the fixed effect parameter, d (default '0'). See a_cov_init_beta for details.

s_cov_init_beta

Initial values for covariate(s) included in the fixed effect parameter, s (default 'lm'). See a_cov_init_beta for details. The option 'lm' will set the spline coefficients obtained from the simple linear model fitted to the data. Note that s_cov_init_beta is only a placeholder and is not valuated because covariate(s) are not allowed for the s parameter. See s_formula for details.

a_init_sd

Initial value for the standard deviation of group level random effect parameter, a (default 'random'). Options available are '0', 'random' and 'prior'. In addition, 'ysd', 'ymad', 'lme_sd_a', and 'lm_sd_a' can be used to specify initial values as described below:

  • The 'ysd' sets standard deviation (sd) of the response variable as an initial value.

  • The 'ymad' sets median absolute deviation (mad) of the response variable as an initial value.

  • The 'lme_sd_a' sets initial value based on the standard deviation of random Intercept obtained from the linear mixed model (nlme::lme()) fitted to the data. Note that in case nlme::lme() fails to converge, the option 'lm_sd_a' (see below) is set automatically.

  • The 'lm_sd_a' sets square root of the residual variance obtained from the simple linear model applied to the data as an initial value.

Note that these option described above ('ysd', 'ymad', 'lme_sd_a', and 'lm_sd_a') are available only for the random effect parameter a and not for other group level random effects. Lastly, when fitting univariate_by and multivariate models, user can set same initials for sub models, or different for each sub model.

b_init_sd

Initial value for the standard deviation of group level random effect parameter, b (default 'random'). See a_init_sd for details.

c_init_sd

Initial values for the group level random effect parameter, c (default 'random'). See a_init_sd for details.

d_init_sd

Initial value for the standard deviation of group level random effect parameter, d (default 'random'). See a_init_sd for details.

a_cov_init_sd

Initial values for the covariate(s) included in the random effect parameter, a (default 'random'). Options available are '0', 'random' and 'prior'.

b_cov_init_sd

Initial values for the covariate(s) included in the random effect parameter, b (default 'random'). See a_cov_init_sd for details.

c_cov_init_sd

Initial values for the covariate(s) included in the random effect parameter, c (default 'random'). See a_cov_init_sd for details.

d_cov_init_sd

Initial values for the covariate(s) included in the random effect parameter, d (default 'random'). See a_cov_init_sd for details.

sigma_init_beta

Initial values for the fixed effect distributional parameter, sigma (default 'random'). Options available are '0', 'random' and 'prior'.

sigma_cov_init_beta

Initial values for the covariate(s) included in the fixed effect distributional parameter, sigma (See sigma_init_beta for details).

sigma_init_sd

Initial value for the standard deviation of distributional random effect parameter, sigma (default 'random'). The approach is same as described earlier for the group level random effect parameters such as a (See a_init_sd for details).

sigma_cov_init_sd

Initial values for the covariate(s) included in the distributional random effect parameter, sigma (default 'random'). (See a_cov_init_sd for details).

gr_init_cor

Initial values for the correlation parameters of group-level random effects parameters (default 'random'). Allowed options are '0', 'random' and 'prior'.

sigma_init_cor

Initial values for the correlation parameters of distributional random effects parameter sigma (default 'random'). Allowed options are '0', 'random' and 'prior'.

rsd_init_sigma

Initial values for the residual standard deviation parameter, sigma (default 'random'). Options available are '0', 'random' and 'prior'. In addition, options 'lme_rsd' and 'lm_rsd' can be used as follows. The lme_rsd sets initial value based on the standard deviation of residuals obtained from the linear mixed model (nlme::lme()) fitted to the data. The initial value set by the 'lm_rsd' is the square root of the residual variance from the simple linear model applied to the data. Note that in case nlme::lme() fails to converge, then option 'lm_sd_a' is set automatically. The argument rsd_init_sigma is evaluated when dpar_formula and sigma_formula are set to NULL.

dpar_init_sigma

Initial values for the distributional parameter sigma (default 'random'). The approach and options available are same as described above for the rsd_init_sigma. This argument is evaluated only when dpar_formula is not NULL.

dpar_cov_init_sigma

Initial values for the covariate(s) included in the distributional parameter, sigma (default 'random'). Allowed options are '0', 'random', and 'prior'.

autocor_init_acor

Initial values for autocorrelation parameter (see autocor_formula for details). Allowed options are '0', 'random', and 'prior' (default 'random').

autocor_init_unstr_acor

Initial values for unstructured residual autocorrelation parameters (default 'random'). Allowed options are '0', 'random', and 'prior'. Note that the approach to set initials for autocor_init_unstr_acor is identical to the gr_init_cor.

mvr_init_rescor

Initial values for the residual correlation parameter when fitting a multivariate model (default 'random'). Allowed options are '0', 'random', and 'prior'.

r_init_z

Initial values for the standardized group level random effect parameters (default 'random'). These parameters are part of the Non-Centered Parameterization (NCP) approach used in the brms::brm().

vcov_init_0

A logical (default TRUE) to set initials for variance (i.e, standard deviation) and covariance (i.e., correlation) parameters as zero. This allows for setting custom initials for the fixed effects parameters but zero for variance covariance parameters.

jitter_init_beta

A value as proportion (between 0 and 1) to perturb the initial values for fixed effect parameters. The default is NULL indicating that same initials are used across all chains. A sensible option can be jitter_init_beta = 0.1 as it mildly perturb the initials. Note that jitter is not absolute but proportion of the specified initial value. For example, if initial value is 100, then jitter_init_beta = 0.1 implies that the perturbed initial value will be within 90 and 110. On the other hand, if initial values is 10, then the perturbed initial value will be within 9 and 11.

jitter_init_sd

A value as proportion (between 0 and 1) to perturb the initials for standard deviation of random effect parameters. The default is NULL indicating that same initials are used across all chains. An option of setting jitter_init_beta = 0.01 looked good during early testing.

jitter_init_cor

A value as proportion (between 0 and 1) to perturb the initials for correlation parameters of random effects. The default is NULL indicating that same initials are used across all chains. An option of setting jitter_init_beta = 0.001 looked good during early testing.

prior_data

An optional argument (a named list, default NULL) that can be used to pass information to the prior arguments for each parameter (e.g., a_prior_beta). The prior_data is particularly helpful in passing a long vector or a matrix as priors. These vectors and matrices can be created in the R framework and then passed using the prior_data. For example, to pass a vector of location and scale parameters when setting priors for covariate coefficients (with 10 dummy variables) included in the fixed effects parameter a, the following steps can be used to set covariate priors that each has scale parameter (sd) as 5 but mean values are drawn from a normal distribution with mean = 0 and sd = 1:

  • create the named objects prior_a_cov_location and prior_a_cov_scale in the R environment as follows:
    prior_a_cov_location <- rnorm(n = 10, mean = 0, sd = 1)
    prior_a_cov_scale <- rep(5, 10)

  • specify the above created objects prior_a_cov_location and prior_a_cov_scale in the prior_data as follows:
    prior_data = list(prior_a_cov_location = prior_a_cov_location, prior_a_cov_scale = prior_a_cov_scale).

  • now use the prior_data objects to set up the priors as:
    a_cov_prior_beta = normal(prior_a_cov_location, prior_a_cov_scale).

init_data

An optional argument (a named list, default NULL) that can be used to pass information to the initial arguments. The approach is the exact same as described above for the prior_data.

init_custom

Specify a custom initials object (a named list). The named list is directly passed to the init argument without checking for the dimensions and name matching. Note that in case initials are set for some parameter by using parameter specific argument (e.g., a_init_beta = 0), then init_custom is only passed to those parameters for which initials are missing. If user want to override this behaviors i.e., to pass all init_custom ignoring parameter specific initials, then init should be set as init = 'custom'.

verbose

An optional argument (logical, default FALSE) to indicate whether to print information collected during setting up the model formula priors, and initials. As an example, the user might be interested in knowing the response variables created for the sub model when fitting a univariate-by-subgroup model. This information can then be used in setting the desired order of options passed to each such model such as df, prior, initials etc.

expose_function

An optional argument (logical, default FALSE) to indicate whether to expose Stan function used in model fitting.

get_stancode

An optional argument (logical, default FALSE) to get the stancode (see brms::stancode() for details).

get_standata

An optional argument (logical, default FALSE) to get the standata (see brms::standata() for details).

get_formula

An optional argument (logical, default FALSE) to get the formula. (see brms::brmsformula() for details).

get_stanvars

An optional argument (logical, default FALSE) to get the stanvars (see brms::stanvar() for details).

get_priors

An optional argument (logical, default FALSE) to get the priors. (see brms::get_prior() for details).

get_priors_eval

An optional argument (logical, default FALSE) to get the priors specified by the user.

get_init_eval

An optional argument (logical, default FALSE) to get the initial values specified by the user.

validate_priors

An optional argument (logical, default FALSE) to validate the specified priors. (see brms::validate_prior() for details).

set_self_priors

An optional argument (default NULL) to manually specify the priors. Note that set_self_priors is passed directly to the brms::brm() without performing any checks.

set_replace_priors

An optional argument (default NULL) to replace part of prior object. This is for internal use only.

set_same_priors_hierarchy

An optional argument (default NULL) to replace part of the prior object. This is for internal use only.

outliers

An optional argument (default NULL) to remove outliers. The argument should be a named list which is passed directly to the sitar::velout() and sitar::zapvelout() functions. This is for internal use only.

unused

An optional formula that defines variables that are unused in the model but should still be stored in the model's data frame. This can be useful when variables are required during the post-processing.

chains

Number of Markov chains (default 4).

iter

Number of total iterations per chain, including warmup (default 2000)

warmup

A positive integer specifying the number of warmup (aka burnin) iterations. This also specifies the number of iterations used for stepsize adaptation, so warmup draws should not be used for inference. The number of warmup should not be larger than iter and the default is iter/2.

thin

A positive integer. Set thin > 1 to save memory and computation time if iter is large. The thin > 1 is often used in cases with high autocorrelation of MCMC draws An indication of high autocorrelation is poor mixing of chain ( i.e., high rhat values) despite the fact that model recovers the parameters well. An easy diagnostic to check for autocorrelation of MCMC draws is to use the mcmc_acf function from the bayesplot.

cores

Number of cores to be used when executing the chains in parallel. See brms::brm() for details. Note that unlike brms::brm(), which sets default cores argument as cores=getOption("mc.cores", 1), the default cores in bsitar package is cores=getOption("mc.cores", 'optimize') which optimizes the utilization of system resources. The maximum number of cores that can be deployed is calculated as the maximum number of available cores minus 1. When the number of available cores is greater than the number of chains (see chains), then number of cores is set equal to the number of chains. Another option is to set cores as getOption("mc.cores", 'maximise') which sets the number of cores as the maximum number of cores available from the system regardless of the number of chains specified. Note that the user can also set cores argument similar to the brms::brm() i.e., getOption("mc.cores", 1). All these three options can be set globally as options(mc.cores = x) where x can be 'optimize', 'maximise' or 1. Lastly, the cores can set by directly by specifying an integer e.g., cores = 4.

backend

A character string naming the package to be used when executing the the Stan model. Options are "rstan" (the default) or "cmdstanr". Can be set globally for the current R session via the "brms.backend". See brms::brm() for details.

threads

Number of threads to be used in within-chain parallelization. Note that unlike the brms::brm() which sets the threads argument as getOption("brms.threads", NULL) implying that no within-chain parallelization is used by default, the bsitar package, by default, sets threads as getOption("brms.threads", 'optimize') to utilize the available resources from the modern computing systems. The number of threads per chain is set as the maximum number of cores available minus 1. Another option is to set threads as getOption("brms.threads", 'maximise') which set the number threads per chains same as the maximum number of cores available. User can also set the threads similar to the brms i.e., getOption("brms.threads", NULL). All these three options can be set globally as options(brms.threads = x) where x can be 'optimize', 'maximise' or NULL. Alternatively, the number of threads can be set directly as threads = threading(x) where X is an integer. Other arguments that can be passed to the threads are grainsize and the static. See brms::brm() for further details on within-chain parallelization.

opencl

The platform and device IDs of the OpenCL device to use for fitting using GPU support. If you don't know the IDs of your OpenCL device, c(0,0) is most likely what you need. For more details, see opencl. Can be set globally for the current R session via the "brms.opencl" option.

normalize

Indicates whether normalization constants should be included in the Stan code (default TRUE). Setting it to FALSE requires Stan version >= 2.25. If FALSE, sampling efficiency may be increased but some post processing functions such as brms::bridge_sampler() will not be available. This option can be controlled globally via the brms.normalize option.

algorithm

Character string naming the estimation approach to use. Options are "sampling" for MCMC (the default), "meanfield" for variational inference with independent normal distributions, "fullrank" for variational inference with a multivariate normal distribution, or "fixed_param" for sampling from fixed parameter values. Can be set globally for the current R session via the "brms.algorithm" option (see options).

control

A named list to control the sampler's behavior. The default are same as brms::brm() with the exception that the max_treedepth has been increased form 10 to 12 to allow better exploration of typically challenging posterior geometry posed by the nonlinear model. However, another control parameter, the adpat_delta which is also often need to be increased for nonlinear model, has be set to default setting as in brms::brm() i.e, 0.8. This is to avoid unnecessarily increasing the sampling time. See brms::brm() for full details on control parameters and their default values.

sample_prior

Indicates whether to draw sample from priors in addition to the posterior draws. Options are "no" (the default), "yes", and "only". Among others, these draws can be used to calculate Bayes factors for point hypotheses via brms::hypothesis(). Please note that improper priors are not sampled, including the default improper priors used by brm. See brms::set_prior() on how to set (proper) priors. Please also note that prior draws for the overall intercept are not obtained by default for technical reasons. See brms::brmsformula() how to obtain prior draws for the intercept. If sample_prior is set to "only", draws are drawn solely from the priors ignoring the likelihood, which allows among others to generate draws from the prior predictive distribution. In this case, all parameters must have proper priors.

save_pars

An object generated by save_pars controlling which parameters should be saved in the model. The argument has no impact on the model fitting itself.

drop_unused_levels

Should unused factors levels in the data be dropped? Defaults to TRUE.

stan_model_args

A list of further arguments passed to rstan::stan_model for backend = "rstan" or backend = "cmdstanr", which allows to change how models are compiled.

refresh

An integer to set the printing of every nth iteration. Default NULL indicates that refresh will be set automatically by the brms::brm(). Setting refresh is useful especially when thin is greater than 1. In that case, the refresh is recalculated as (refresh * thin) / thin.

silent

Verbosity level between 0 and 2. If 1 (the default), most of the informational messages of compiler and sampler are suppressed. If 2, even more messages are suppressed. The actual sampling progress is still printed. Set refresh = 0 to turn this off as well. If using backend = "rstan" you can also set open_progress = FALSE to prevent opening additional progress bars.

seed

The seed for random number generation to make results reproducible. If NA (the default), Stan will set the seed randomly.

save_model

A character string or NULL (default). If not NULL, then the model's Stan code is saved via in a text file named after the string supplied in save_model.

fit

An instance of S3 class brmsfit derived from a previous fit; defaults to NA. If fit is of class brmsfit, the compiled model associated with the fitted result is re-used and all arguments modifying the model code or data are ignored. It is not recommended to use this argument directly, but to call the update method, instead.

file

Either NULL or a character string. In the latter case, the fitted model object is saved via saveRDS in a file named after the string supplied in file. The .rds extension is added automatically. If the file already exists, brm will load and return the saved model object instead of refitting the model. Unless you specify the file_refit argument as well, the existing files won't be overwritten, you have to manually remove the file in order to refit and save the model under an existing file name. The file name is stored in the brmsfit object for later usage.

file_compress

Logical or a character string, specifying one of the compression algorithms supported by saveRDS. If the file argument is provided, this compression will be used when saving the fitted model object.

file_refit

Modifies when the fit stored via the file argument is re-used. Can be set globally for the current R session via the "brms.file_refit" option (see options). For "never" (default) the fit is always loaded if it exists and fitting is skipped. For "always" the model is always refitted. If set to "on_change", brms will refit the model if model, data or algorithm as passed to Stan differ from what is stored in the file. This also covers changes in priors, sample_prior, stanvars, covariance structure, etc. If you believe there was a false positive, you can use brmsfit_needs_refit to see why refit is deemed necessary. Refit will not be triggered for changes in additional parameters of the fit (e.g., initial values, number of iterations, control arguments, ...). A known limitation is that a refit will be triggered if within-chain parallelization is switched on/off.

future

Logical; If TRUE, the future package is used for parallel execution of the chains and argument cores will be ignored. Can be set globally for the current R session via the "future" option. The execution type is controlled via plan (see the examples section below).

parameterization

A character string to specify Non-centered parameterization, NCP ('ncp') or the Centered parameterization, CP ('cp') to draw group level random effect. The NCP is generally recommended when likelihood is not strong (e.g., a few number of observations per individual). The NCP is the default (and only) approach implemented in the brms::brm(). The CP parameterization, on the other hand, is often considered more efficient than NCP when a relatively large number of observations are available across individual. The 'relatively large number' is not defined in the literature and we follow a general approach wherein CP parameterization is used when each individual provides at least 10 repeated measurements and NCP otherwise. Note this automatic behavior is set only when the argument parameterization = NULL. To set CP parameterization, use parameterization = 'cp'. The default is parameterization = 'ncp'. Note that since brms::brm() does not offer CP parameterization, the brms::brm() generated stancode is first edited internally and then the model is fit using the rstan::rstan() or cmdstanr, depending on the backend choice. Therefore, we caution that CP parameterization should be considered experimental and it may fail if structure of the brms::brm() generated stancode changes in future.

...

Further arguments passed to brms::brm()

Details

The SITAR is a shape-invariant nonlinear mixed effect growth curve model that fits a population average (i.e., mean average) curve to the data, and aligns each individual's growth trajectory to the underlying population average curve via a set of (typically) three random effects: the size, timing and intensity. Additionally, a slope parameter can be included as a random effect to estimate the variability in adult growth rate (See sitar::sitar() for details). The concept of shape invariant model (SIM) was first described by Lindstrom (1995) and later used by Beath (2007) to model infant growth data (birth to 2 years). The current version of the SITAR model is developed by Cole et al. (2010) and has been used extensively for modelling growth data (see Nembidzane et al. 2020 and Sandhu 2020).

The frequentist version of the SITAR model can be fit by using an already available R package, the sitar (Cole 2022). The framework of Bayesian implementation of the SITAR model in bsitar package is same as the sitar package with the exception that unlike the sitar package which uses B spline basis for the natural cubic spline design matrix (by calling the splines::ns()), the bsitar package uses the truncated power basis approach (see Harrell and others (2001), and Harrell Jr. (2022) for details) to construct the spline design matrix. Note that bsitar package builds the spline design matrix on the fly which is then included in the functions block of the Stan program and hence compiled (via the c++) during the model fit.

Like sitar package (Cole et al. 2010), the bsitar package fits SITAR model with (usually) up to three random effects: the size (parameter defined as a), the timing (parameter defined as b) and the intensity (parameter defined as c). In addition, there is a slope parameter (defined as d) that models the variability in the adult slope of the growth curve (See sitar::sitar() for details). Please note that author of the sitar package (Cole et al. 2010) enforces the inclusion of parameter d as a random effects only and therefore excludes it from the fixed fixed structure of the model. However, the bsitar package allows inclusion of parameter d in fixed and/or in the random effects structures of the SITAR model. For the three parameter version of the SITAR model (default), the fixed effects structure (i.e., population average trajectory) is specified as fixed = 'a+b+c', and the random effects structure that captures the deviation of individual trajectories from the population average curve is specified as random = 'a+b+c'. Note that user need not to include all the three parameters in the fixed or the random effect structure. For example, a fixed effect version of the SITAR model can be fit by setting randoms as an empty string i.e., random = ''. Furthermore, the fixed effect structure may include only a sub set of the parameters e.g., size and timing parameters (fixed = 'a+b') or the size and the intensity parameters (fixed = 'a+c'). The four parameters version of the SITAR model is fit by including parameter d in the fixed and/or the random arguments. Similar to the three parameter SITAR model, user can fit model with a sub set of the fixed and/or the random effects.

The sitar package internally depends on the brms package (see Bürkner 2022; Bürkner 2021). The brms can fit a wide range of hierarchical linear and nonlinear regression models including multivariate models. The brms itself depends on the Stan software program full Bayesian inference (see Stan Development Team 2023; Gelman et al. 2015). Like brms, the bsitar package allows a wide range of prior specifications that encourage the users to specify priors that actually reflect their prior knowledge about the human growth processes, (such as timing and intensity of the growth spurt). For prior specification, we follow the carefully crafted approaches used in the brms and rstanarm packages. For example, we follow the brms package in using the student_t distribution for the regression coefficients as well as the standard deviation for group level random effects, but set exponential distribution for the residual standard deviation as used in the rstanarm package. Like brms and rstanarm packages, the bsitar package allows for auto scaling of the scale parameter for the location-scale based distributions such as normal and student_t. While rstanarm earlier used to set autosclae as 2.5 (recently authors changed this behavior to FALSE), the brms package sets it as 1.0 or 2.5 depending on the standard deviation of the response variable (See brms::prior()). The bsitar package, on the other hand, offers full flexibility in choosing the scale factor as any real number (e.g., autosclae = 5.0). When autosclae = TRUE, the 2.5 is the default scaling factor. We strongly recommend to go through the well documented details on prior specifications used in brms and rstanarm packages.

Like brms package, the bsitar package offers a range of tools to evaluate the model fit that include posterior predictive check (see brms::pp_check()) and the leave one out (loo) cross validation (see brms::loo()). Furthermore, while the excellent post-processing support offered by the brms package is directly available to the users, the bsitar package includes many customized functions that allow for estimation and visualization of population average and individual specific distance (increase in size) and velocity (change in rate of growth), as well as computation of population average and individual specific growth parameters such as age at peak growth velocity (APGV) and the peak growth velocity (PGV).

Finally, the bsitar package allows three different types of model specifications: 'univariate', 'univariate_by' and 'multivariate'. A 'univariate' fitting involves a single model applied to an outcome whereas both 'univariate_by' and 'multivariate' specifications comprise two or more sub models. The 'univariate_by' fits two or more sub models to an outcome variable defined by a factor variable (e.g, sex). The data are typically stacked and the factor variable is used to set-up the sub models via the 'subset' option available in the brms::brm(). The 'multivariate' model allows simultaneous modelling of two or more outcomes with joint a distribution of the random effects. For both 'univariate_by' and 'multivariate' models, the bsitar package allows full flexibility in specifying separate arguments such as predictor variables (x), degree of freedom (df) for design matrix as well as the priors and the initial values. Furthermore, to enhance the ease of specifying different options and to make it user-friendly, there is no need to enclose the character option(s) in single or double quotes. For example to specify the 'univariate_by' for sex, the univariate_by = sex is same as univariate_by = 'sex' or univariate_by = "sex". The same applies for all character string options.

Value

An object of class brmsfit, bsiatr, that contains the posterior draws and other useful information about the model.

Note

The package is under continuous development and new models and post-processing features will be added soon.

Author(s)

Satpal Sandhu satpal.sandhu@bristol.ac.uk

References

Beath KJ (2007). “Infant growth modelling using a shape invariant model with random effects.” Statistics in Medicine, 26(12), 2547–2564. doi:10.1002/sim.2718, Type: Journal article.

Bürkner P (2021). “Bayesian Item Response Modeling in R with brms and Stan.” Journal of Statistical Software, 100(5), 1–54. doi:10.18637/jss.v100.i05.

Bürkner P (2022). brms: Bayesian Regression Models using Stan. R package version 2.18.0, https://CRAN.R-project.org/package=brms.

Cole T (2022). sitar: Super Imposition by Translation and Rotation Growth Curve Analysis. R package version 1.3.0, https://CRAN.R-project.org/package=sitar.

Cole TJ, Donaldson MDC, Ben-Shlomo Y (2010). “SITAR—a useful instrument for growth curve analysis.” International Journal of Epidemiology, 39(6), 1558–1566. ISSN 0300-5771, doi:10.1093/ije/dyq115, tex.eprint: https://academic.oup.com/ije/article-pdf/39/6/1558/18480886/dyq115.pdf.

Gelman A, Lee D, Guo J (2015). “Stan: A Probabilistic Programming Language for Bayesian Inference and Optimization.” Journal of Educational and Behavioral Statistics, 40(5), 530-543. doi:10.3102/1076998615606113.

Harrell FE, others (2001). Regression modeling strategies: with applications to linear models, logistic regression, and survival analysis, volume 608. Springer.

Harrell Jr. FE (2022). Hmisc: Harrell Miscellaneous. R package version 4.7-2, https://hbiostat.org/R/Hmisc/.

Lindstrom MJ (1995). “Self-modelling with random shift and scale parameters and a free-knot spline shape function.” Statistics in Medicine, 14(18), 2009-2021. doi:10.1002/sim.4780141807, https://pubmed.ncbi.nlm.nih.gov/8677401/.

Nembidzane C, Lesaoana M, Monyeki KD, Boateng A, Makgae PJ (2020). “Using the SITAR Method to Estimate Age at Peak Height Velocity of Children in Rural South Africa: Ellisras Longitudinal Study.” Children, 7(3), 17. ISSN 2227-9067, doi:10.3390/children7030017, https://www.mdpi.com/2227-9067/7/3/17.

Sandhu SS (2020). Analysis of longitudinal jaw growth data to study sex differences in timing and intensity of the adolescent growth spurt for normal growth and skeletal discrepancies. Thesis, University of Bristol.

Stan Development Team (2023). Stan Reference Manual version 2.31. https://mc-stan.org/docs/reference-manual/.

See Also

brms::brm() brms::brmsformula() brms::prior()

Examples


# Examples below fits SITAR model to the 'berkeley_exdata' which is a subset
# of the  Berkley height data. The same subset of the  Berkley height data
# has been used as an example data in the vignette for the 'sitar' package.
#
# The Berkley height data comprise of repeated growth measurements made on
# 66 boys and 70 girls (birth to 21 years). 
#
# The subset of the Berkley height data analysed here include growth 
# measurements for 70 girls (8 to 18 years).
#
# See 'sitar' package documentation for details on Berkley height data   
# (help file ?sitar::berkeley ). The details on subset data for 70 girls is 
# provided in the vignette('Fitting_models_with_SITAR', package = 'sitar').
  
# Fit frequentist SITAR model with df = 5 by using the sitar package 

# Get 'berkeley_exdata' data that has been already saved
berkeley_exdata <- getNsObject(berkeley_exdata)

model_ml <- sitar::sitar(x = age, y = height, id = id, 
                          df = 5, 
                          data = berkeley_exdata, 
                          xoffset = 'mean',
                          fixed = 'a+b+c', 
                          random = 'a+b+c',
                          a.formula = ~1, 
                          b.formula = ~1, 
                          c.formula = ~1
                          )


# Fit Bayesian SITAR model 

# To avoid mode estimation which takes time, the Bayesian SITAR model fit to  
# the 'berkeley_exdata' has been saved as an example fit ('berkeley_exfit').
# The model is fit using 2 chain (2000 iterations per) with thin set as 4 to 
# save time and memory.

# Check and confirm whether model fit object 'berkeley_exfit' exists
# berkeley_exfit <- bsitar:::berkeley_exfit
 berkeley_exfit <- getNsObject(berkeley_exfit)
 
 print(berkeley_exfit)
if(exists('berkeley_exfit')) {
  model <- berkeley_exfit
} else {
 # Fit model with default priors 
 # See documentation for prior on each parameter
  model <- bsitar(x = age, y = height, id = id, 
                  df = 3, 
                  data = berkeley_exdata,
                  xoffset = 'mean', 
                  fixed = 'a+b+c', 
                  random = 'a+b+c',
                  a_formula = ~1, 
                  b_formula = ~1, 
                  c_formula = ~1, 
                  threads = brms::threading(NULL),
                  chains = 2, cores = 2, iter = 6000, thin = 15)
                  
# Note that we can test for the sensitivity to the priors by re fitting the
# above model with flat (i.e., uniform) priors on the regression coefficients
# for parameters a, b and c.
model <- bsitar(x = age, y = height, id = id, 
                  df = 3, 
                  data = berkeley_exdata,
                  xoffset = 'mean', 
                  fixed = 'a+b+c', 
                  random = 'a+b+c',
                  a_formula = ~1, 
                  b_formula = ~1, 
                  c_formula = ~1, 
                  a_prior_beta = flat,
                  b_prior_beta = flat,
                  c_prior_beta = flat,
                  threads = brms::threading(NULL),
                  chains = 2, cores = 2, iter = 6000, thin = 15)
}

# Generate model summary
summary(model)

# Compare model summary with the maximum likelihood SITAR model
print(model_ml)


# Check model fit via posterior predictive checks. The plot_ppc is a based
# on the pp_check function from the brms package.  

plot_ppc(model, ndraws = 100)

# Plot distance and velocity curves using plot_conditional_effects() function.
# This function works exactly same as as conditional_effects() from the brms
# package with the exception that plot_conditional_effects allows for 
# plotting velocity curve also.

# Distance
plot_conditional_effects(model, deriv = 0)

# Velocity
plot_conditional_effects(model, deriv = 1)

# Plot distance and velocity curve along with the parameter estimates using 
# the plot_curves() function. This function works exactly the same way as 
# plot.sitar from the sitar package

plot_curves(model, apv = TRUE)

# Compare plot with the maximum likelihood SITAR model

plot(model_ml)




[Package bsitar version 0.2.1 Index]