csem {cSEM}  R Documentation 
csem(
.data = NULL,
.model = NULL,
.approach_2ndorder = c("2stage", "mixed"),
.approach_cor_robust = c("none", "mcd", "spearman"),
.approach_nl = c("sequential", "replace"),
.approach_paths = c("OLS", "2SLS"),
.approach_weights = c("PLSPM", "SUMCORR", "MAXVAR", "SSQCORR",
"MINVAR", "GENVAR","GSCA", "PCA",
"unit", "bartlett", "regression"),
.conv_criterion = c("diff_absolute", "diff_squared", "diff_relative"),
.disattenuate = TRUE,
.dominant_indicators = NULL,
.estimate_structural = TRUE,
.id = NULL,
.instruments = NULL,
.iter_max = 100,
.normality = FALSE,
.PLS_approach_cf = c("dist_squared_euclid", "dist_euclid_weighted",
"fisher_transformed", "mean_arithmetic",
"mean_geometric", "mean_harmonic",
"geo_of_harmonic"),
.PLS_ignore_structural_model = FALSE,
.PLS_modes = NULL,
.PLS_weight_scheme_inner = c("path", "centroid", "factorial"),
.reliabilities = NULL,
.starting_values = NULL,
.resample_method = c("none", "bootstrap", "jackknife"),
.resample_method2 = c("none", "bootstrap", "jackknife"),
.R = 499,
.R2 = 199,
.handle_inadmissibles = c("drop", "ignore", "replace"),
.user_funs = NULL,
.eval_plan = c("sequential", "multicore", "multisession"),
.seed = NULL,
.sign_change_option = c("none", "individual", "individual_reestimate",
"construct_reestimate"),
.tolerance = 1e05
)
.data 
A 
.model 
A model in lavaan model syntax or a cSEMModel list. 
.approach_2ndorder 
Character string. Approach used for models containing secondorder constructs. One of: "2stage", or "mixed". Defaults to "2stage". 
.approach_cor_robust 
Character string. Approach used to obtain a robust
indicator correlation matrix. One of: "none" in which case the standard
BravaisPearson correlation is used,
"spearman" for the Spearman rank correlation, or
"mcd" via 
.approach_nl 
Character string. Approach used to estimate nonlinear structural relationships. One of: "sequential" or "replace". Defaults to "sequential". 
.approach_paths 
Character string. Approach used to estimate the
structural coefficients. One of: "OLS" or "2SLS". If "2SLS", instruments
need to be supplied to 
.approach_weights 
Character string. Approach used to obtain composite weights. One of: "PLSPM", "SUMCORR", "MAXVAR", "SSQCORR", "MINVAR", "GENVAR", "GSCA", "PCA", "unit", "bartlett", or "regression". Defaults to "PLSPM". 
.conv_criterion 
Character string. The criterion to use for the convergence check. One of: "diff_absolute", "diff_squared", or "diff_relative". Defaults to "diff_absolute". 
.disattenuate 
Logical. Should composite/proxy correlations
be disattenuated to yield consistent loadings and path estimates if at least
one of the construct is modeled as a common factor? Defaults to 
.dominant_indicators 
A character vector of 
.estimate_structural 
Logical. Should the structural coefficients
be estimated? Defaults to 
.id 
Character string or integer. A character string giving the name or
an integer of the position of the column of 
.instruments 
A named list of vectors of instruments. The names
of the list elements are the names of the dependent (LHS) constructs of the structural
equation whose explanatory variables are endogenous. The vectors
contain the names of the instruments corresponding to each equation. Note
that exogenous variables of a given equation must be supplied as
instruments for themselves. Defaults to 
.iter_max 
Integer. The maximum number of iterations allowed.
If 
.normality 
Logical. Should joint normality of

.PLS_approach_cf 
Character string. Approach used to obtain the correction
factors for PLSc. One of: "dist_squared_euclid", "dist_euclid_weighted",
"fisher_transformed", "mean_arithmetic", "mean_geometric", "mean_harmonic",
"geo_of_harmonic". Defaults to "dist_squared_euclid".
Ignored if 
.PLS_ignore_structural_model 
Logical. Should the structural model be ignored
when calculating the inner weights of the PLSPM algorithm? Defaults to 
.PLS_modes 
Either a named list specifying the mode that should be used for
each construct in the form 
.PLS_weight_scheme_inner 
Character string. The inner weighting scheme
used by PLSPM. One of: "centroid", "factorial", or "path".
Defaults to "path". Ignored if 
.reliabilities 
A character vector of 
.starting_values 
A named list of vectors where the
list names are the construct names whose indicator weights the user
wishes to set. The vectors must be named vectors of 
.resample_method 
Character string. The resampling method to use. One of: "none", "bootstrap" or "jackknife". Defaults to "none". 
.resample_method2 
Character string. The resampling method to use when resampling
from a resample. One of: "none", "bootstrap" or "jackknife". For
"bootstrap" the number of draws is provided via 
.R 
Integer. The number of bootstrap replications. Defaults to 
.R2 
Integer. The number of bootstrap replications to use when
resampling from a resample. Defaults to 
.handle_inadmissibles 
Character string. How should inadmissible results
be treated? One of "drop", "ignore", or "replace". If "drop", all
replications/resamples yielding an inadmissible result will be dropped
(i.e. the number of results returned will potentially be less than 
.user_funs 
A function or a (named) list of functions to apply to every
resample. The functions must take 
.eval_plan 
Character string. The evaluation plan to use. One of "sequential", "multicore", or "multisession". In the two latter cases all available cores will be used. Defaults to "sequential". 
.seed 
Integer or 
.sign_change_option 
Character string. Which sign change option should be used to handle flipping signs when resampling? One of "none","individual", "individual_reestimate", "construct_reestimate". Defaults to "none". 
.tolerance 
Double. The tolerance criterion for convergence.
Defaults to 
Estimate linear, nonlinear, hierarchical or multigroup structural equation models using a compositebased approach. In cSEM any method or approach that involves linear compounds (scores/proxies/composites) of observables (indicators/items/manifest variables) is defined as compositebased. See the Get started section of the cSEM website for a general introduction to compositebased SEM and cSEM.
csem()
estimates linear, nonlinear, hierarchical or multigroup structural
equation models using a compositebased approach.
The .data
and .model
arguments are required. .data
must be given
a matrix
or a data.frame
with column names matching
the indicator names used in the model description. Alternatively,
a list
of data sets (matrices or data frames) may be provided
in which case estimation is repeated for each data set.
Possible column types/classes of the data provided are: "logical
",
"numeric
" ("double
" or "integer
"), "factor
" ("ordered
" and/or "unordered
"),
"character
", or a mix of several types. Character columns will be treated
as (unordered) factors.
Depending on the type/class of the indicator data provided cSEM computes the indicator
correlation matrix in different ways. See calculateIndicatorCor()
for details.
In the current version .data
must not contain missing values. Future versions
are likely to handle missing values as well.
To provide a model use the lavaan model syntax.
Note, however, that cSEM currently only supports the "standard" lavaan
model syntax (Types 1, 2, 3, and 7 as described on the help page).
Therefore, specifying e.g., a threshold or scaling factors is ignored.
Alternatively, a standardized (possibly incomplete) cSEMModellist may be supplied.
See parseModel()
for details.
By default weights are estimated using the partial least squares path modeling
algorithm ("PLSPM"
).
A range of alternative weighting algorithms may be supplied to
.approach_weights
. Currently, the following approaches are implemented
(Default) Partial least squares path modeling ("PLSPM"
). The algorithm
can be customized. See calculateWeightsPLS()
for details.
Generalized structured component analysis ("GSCA"
) and generalized
structured component analysis with uniqueness terms (GSCAm). The algorithms
can be customized. See calculateWeightsGSCA()
and calculateWeightsGSCAm()
for details.
Note that GSCAm is called indirectly when the model contains constructs
modeled as common factors only and .disattenuate = TRUE
. See below.
Generalized canonical correlation analysis (GCCA), including
"SUMCORR"
, "MAXVAR"
, "SSQCORR"
, "MINVAR"
, "GENVAR"
.
Principal component analysis ("PCA"
)
Factor score regression using sum scores ("unit"
),
regression ("regression"
) or bartlett scores ("bartlett"
)
It is possible to supply starting values for the weighting algorithm
via .starting_values
. The argument accepts a named list of vectors where the
list names are the construct names whose indicator weights the user
wishes to set. The vectors must be named vectors of "indicator_name" = value
pairs, where value
is the starting weight. See the examples section below for details.
Compositeindicator and compositecomposite correlations are properly disattenuated by default to yield consistent loadings, construct correlations, and path coefficients if any of the concepts are modeled as a common factor.
For PLSPM disattenuation is done using PLSc (Dijkstra and Henseler 2015).
For GSCA disattenuation is done implicitly by using GSCAm (Hwang et al. 2017).
Weights obtained by GCCA, unit, regression, bartlett or PCA are
disattenuated using Croon's approach (Croon 2002).
Disattenuation my be suppressed by setting .disattenuate = FALSE
.
Note, however, that quantities in this case are inconsistent
estimates for their construct level counterparts if any of the constructs in
the structural model are modeled as a common factor!
By default path coefficients are estimated using ordinary least squares (.approach_path = "OLS"
).
For linear models, twostage least squares ("2SLS"
) is available, however, only if
instruments are internal, i.e., part of the structural model. Future versions
will add support for external instruments if possible. Instruments must be supplied to
.instruments
as a named list where the names
of the list elements are the names of the dependent constructs of the structural
equations whose explanatory variables are believed to be endogenous.
The list consists of vectors of names of instruments corresponding to each equation.
Note that exogenous variables of a given equation must be supplied as
instruments for themselves.
If reliabilities are known they can be supplied as "name" = value
pairs to
.reliabilities
, where value
is a numeric value between 0 and 1.
Currently, only supported for "PLSPM".
If the model contains nonlinear terms csem()
estimates a polynomial structural equation model
using a noniterative method of moments approach described in
Dijkstra and SchermellehEngel (2014). Nonlinear terms include interactions and
exponential terms. The latter is described in model syntax as an
"interaction with itself", e.g., xi^3 = xi.xi.xi
. Currently only exponential
terms up to a power of three (e.g., threeway interactions or cubic terms) are allowed:
 Single, e.g., eta1
 Quadratic, e.g., eta1.eta1
 Cubic, e.g., eta1.eta1.eta1
 Twoway interaction, e.g., eta1.eta2
 Threeway interaction, e.g., eta1.eta2.eta3
 Quadratic and twoway interaction, e.g., eta1.eta1.eta3
The current version of the package allows two kinds of estimation:
estimation of the reduced form equation (.approach_nl = "replace"
) and
sequential estimation (.approach_nl = "sequential"
, the default). The latter does not
allow for multivariate normality of all exogenous variables, i.e.,
the latent variables and the error terms.
Distributional assumptions are kept to a minimum (an i.i.d. sample from a population with finite moments for the relevant order); for higher order models, that go beyond interaction, we work in this version with the assumption that as far as the relevant moments are concerned certain combinations of measurement errors behave as if they were Gaussian. For details see: Dijkstra and SchermellehEngel (2014).
Secondorder constructs are specified using the operators =~
and <~
. These
operators are usually used with indicators on their righthand side. For
secondorder constructs the righthand side variables are constructs instead.
If c1, and c2 are constructs forming or measuring a higherorder
construct, a model would look like this:
my_model < " # Structural model SAT ~ QUAL VAL ~ SAT # Measurement/composite model QUAL =~ qual1 + qual2 SAT =~ sat1 + sat2 c1 =~ x11 + x12 c2 =~ x21 + x22 # Secondorder construct (in this case a secondorder composite build by common # factors) VAL <~ c1 + c2 "
Currently, two approaches are explicitly implemented:
(Default) "2stage"
. The (disjoint) twostage approach as proposed by Agarwal and Karahanna (2000).
Note that by default a correction for attenuation is applied if common factors are
involved in modeling secondorder constructs. For instance, the threestage approach
proposed by Van Riel et al. (2017) is applied in case of a secondorder construct specified as a
composite of common factors. On the other hand, if no common factors are involved the twostage approach
is applied as proposed by Schuberth et al. (2020).
"mixed"
. The mixed repeated indicators/twostage approach as proposed by Ringle et al. (2012).
The repeated indicators approach as proposed by Joereskog and Wold (1982)
and the extension proposed by Becker et al. (2012) are
not directly implemented as they simply require a respecification of the model.
In the above example the repeated indicators approach
would require to change the model and to append the repeated indicators to
the data supplied to .data
. Note that the indicators need to be renamed in this case as
csem()
does not allow for one indicator to be attached to multiple constructs.
my_model < " # Structural model SAT ~ QUAL VAL ~ SAT VAL ~ c1 + c2 # Measurement/composite model QUAL =~ qual1 + qual2 SAT =~ sat1 + sat2 VAL =~ x11_temp + x12_temp + x21_temp + x22_temp c1 =~ x11 + x12 c2 =~ x21 + x22 "
According to the extended approach indirect effects of QUAL
on VAL
via c1
and c2
would have to be specified as well.
To perform a multigroup analysis provide either a list of data sets or one
data set containing a groupidentifiercolumn whose column
name must be provided to .id
. Values of this column are taken as levels of a
factor and are interpreted as group
identifiers. csem()
will split the data by levels of that column and run
the estimation for each level separately. Note, the more levels
the groupidentifiercolumn has, the more estimation runs are required.
This can considerably slow down estimation, especially if resampling is
requested. For the latter it will generally be faster to use
.eval_plan = "multisession"
or .eval_plan = "multicore"
.
Inference is done via resampling. See resamplecSEMResults()
and infer()
for details.
An object of class cSEMResults
with methods for all postestimation generics.
Technically, a call to csem()
results in an object with at least
two class attributes. The first class attribute is always cSEMResults
.
The second is one of cSEMResults_default
, cSEMResults_multi
, or
cSEMResults_2ndorder
and depends on the estimated model and/or the type of
data provided to the .model
and .data
arguments. The third class attribute
cSEMResults_resampled
is only added if resampling was conducted.
For a details see the cSEMResults helpfile .
assess()
Assess results using common quality criteria, e.g., reliability, fit measures, HTMT, R2 etc.
infer()
Calculate common inferential quantities, e.g., standard errors, confidence intervals.
predict()
Predict endogenous indicator scores and compute common prediction metrics.
summarize()
Summarize the results. Mainly called for its sideeffect the print method.
verify()
Verify/Check admissibility of the estimates.
Tests are performed using the testfamily of functions. Currently the following tests are implemented:
testOMF()
Bootstrapbased test for overall model fit based on Beran and Srivastava (1985)
testMICOM()
Permutationbased test for measurement invariance of composites proposed by Henseler et al. (2016)
testMGD()
Several (mainly) permutationbased tests for multigroup comparisons.
testHausman()
Regressionbased Hausman test to test for endogeneity.
Other miscellaneous postestimation functions belong do the dofamily of functions. Currently three do functions are implemented:
doIPMA()
Performs an importanceperformance matrix analyis (IPMA).
doNonlinearEffectsAnalysis()
Perform a nonlinear effects analysis as described in e.g., Spiller et al. (2013)
doRedundancyAnalysis()
Perform a redundancy analysis (RA) as proposed by Hair et al. (2016) with reference to Chin (1998)
Agarwal R, Karahanna E (2000).
“Time Flies When You're Having Fun: Cognitive Absorption and Beliefs about Information Technology Usage.”
MIS Quarterly, 24(4), 665.
Becker J, Klein K, Wetzels M (2012).
“Hierarchical Latent Variable Models in PLSSEM: Guidelines for Using ReflectiveFormative Type Models.”
Long Range Planning, 45(56), 359–394.
doi:10.1016/j.lrp.2012.10.001.
Beran R, Srivastava MS (1985).
“Bootstrap Tests and Confidence Regions for Functions of a Covariance Matrix.”
The Annals of Statistics, 13(1), 95–115.
doi:10.1214/aos/1176346579.
Chin WW (1998).
“Modern Methods for Business Research.”
In Marcoulides GA (ed.), chapter The Partial Least Squares Approach to Structural Equation Modeling, 295–358.
Mahwah, NJ: Lawrence Erlbaum.
Croon MA (2002).
“Using predicted latent scores in general latent structure models.”
In Marcoulides GA, Moustaki I (eds.), Latent Variable and Latent Structure Models, chapter 10, 195–224.
Lawrence Erlbaum.
ISBN 080584046X, Pagination: 288.
Dijkstra TK, Henseler J (2015).
“Consistent and Asymptotically Normal PLS Estimators for Linear Structural Equations.”
Computational Statistics & Data Analysis, 81, 10–23.
Dijkstra TK, SchermellehEngel K (2014).
“Consistent Partial Least Squares For Nonlinear Structural Equation Models.”
Psychometrika, 79(4), 585–604.
Hair JF, Hult GTM, Ringle C, Sarstedt M (2016).
A Primer on Partial Least Squares Structural Equation Modeling (PLSSEM).
Sage publications.
Henseler J, Ringle CM, Sarstedt M (2016).
“Testing Measurement Invariance of Composites Using Partial Least Squares.”
International Marketing Review, 33(3), 405–431.
doi:10.1108/imr0920140304.
Hwang H, Takane Y, Jung K (2017).
“Generalized structured component analysis with uniqueness terms for accommodating measurement error.”
Frontiers in Psychology, 8(2137), 1–12.
Joereskog KG, Wold HO (1982).
Systems under Indirect Observation: Causality, Structure, Prediction  Part II, volume 139.
North Holland.
Ringle CM, Sarstedt M, Straub D (2012).
“A Critical Look at the Use of PLSSEM in MIS Quarterly.”
MIS Quarterly, 36(1), iii–xiv.
Schuberth F, Rademaker ME, Henseler J (2020).
“Estimating and assessing secondorder constructs using PLSPM: the case of composites of composites.”
Industrial Management & Data Systems, 120(12), 22112241.
doi:10.1108/imds1220190642.
Spiller SA, Fitzsimons GJ, Lynch JG, Mcclelland GH (2013).
“Spotlights, Floodlights, and the Magic Number Zero: Simple Effects Tests in Moderated Regression.”
Journal of Marketing Research, 50(2), 277–288.
doi:10.1509/jmr.12.0420.
Van Riel ACR, Henseler J, Kemeny I, Sasovova Z (2017).
“Estimating hierarchical constructs using Partial Least Squares: The case of second order composites of factors.”
Industrial Management & Data Systems, 117(3), 459–477.
doi:10.1108/IMDS0720160286.
args_default()
, cSEMArguments, cSEMResults, foreman()
, resamplecSEMResults()
,
assess()
, infer()
, predict()
, summarize()
, verify()
, testOMF()
,
testMGD()
, testMICOM()
, testHausman()
# ===========================================================================
# Basic usage
# ===========================================================================
### Linear model 
# Most basic usage requires a dataset and a model. We use the
# `threecommonfactors` dataset.
## Take a look at the dataset
#?threecommonfactors
## Specify the (correct) model
model < "
# Structural model
eta2 ~ eta1
eta3 ~ eta1 + eta2
# (Reflective) measurement model
eta1 =~ y11 + y12 + y13
eta2 =~ y21 + y22 + y23
eta3 =~ y31 + y32 + y33
"
## Estimate
res < csem(threecommonfactors, model)
## Postestimation
verify(res)
summarize(res)
assess(res)
# Notes:
# 1. By default no inferential quantities (e.g. Std. errors, pvalues, or
# confidence intervals) are calculated. Use resampling to obtain
# inferential quantities. See "Resampling" in the "Extended usage"
# section below.
# 2. `summarize()` prints the full output by default. For a more condensed
# output use:
print(summarize(res), .full_output = FALSE)
## Dealing with endogeneity 
# See: ?testHausman()
### Models containing second constructs
## Take a look at the dataset
#?dgp_2ndorder_cf_of_c
model < "
# Path model / Regressions
c4 ~ eta1
eta2 ~ eta1 + c4
# Reflective measurement model
c1 <~ y11 + y12
c2 <~ y21 + y22 + y23 + y24
c3 <~ y31 + y32 + y33 + y34 + y35 + y36 + y37 + y38
eta1 =~ y41 + y42 + y43
eta2 =~ y51 + y52 + y53
# Composite model (second order)
c4 =~ c1 + c2 + c3
"
res_2stage < csem(dgp_2ndorder_cf_of_c, model, .approach_2ndorder = "2stage")
res_mixed < csem(dgp_2ndorder_cf_of_c, model, .approach_2ndorder = "mixed")
# The standard repeated indicators approach is done by 1.) respecifying the model
# and 2.) adding the repeated indicators to the data set
# 1.) Respecify the model
model_RI < "
# Path model / Regressions
c4 ~ eta1
eta2 ~ eta1 + c4
c4 ~ c1 + c2 + c3
# Reflective measurement model
c1 <~ y11 + y12
c2 <~ y21 + y22 + y23 + y24
c3 <~ y31 + y32 + y33 + y34 + y35 + y36 + y37 + y38
eta1 =~ y41 + y42 + y43
eta2 =~ y51 + y52 + y53
# c4 is a common factor measured by composites
c4 =~ y11_temp + y12_temp + y21_temp + y22_temp + y23_temp + y24_temp +
y31_temp + y32_temp + y33_temp + y34_temp + y35_temp + y36_temp +
y37_temp + y38_temp
"
# 2.) Update data set
data_RI < dgp_2ndorder_cf_of_c
coln < c(colnames(data_RI), paste0(colnames(data_RI), "_temp"))
data_RI < data_RI[, c(1:ncol(data_RI), 1:ncol(data_RI))]
colnames(data_RI) < coln
# Estimate
res_RI < csem(data_RI, model_RI)
summarize(res_RI)
### Multigroup analysis 
# See: ?testMGD()
# ===========================================================================
# Extended usage
# ===========================================================================
# `csem()` provides defaults for all arguments except `.data` and `.model`.
# Below some common options/tasks that users are likely to be interested in.
# We use the threecommonfactors data set again:
model < "
# Structural model
eta2 ~ eta1
eta3 ~ eta1 + eta2
# (Reflective) measurement model
eta1 =~ y11 + y12 + y13
eta2 =~ y21 + y22 + y23
eta3 =~ y31 + y32 + y33
"
### PLS vs PLSc and disattenuation
# In the model all concepts are modeled as common factors. If
# .approach_weights = "PLSPM", csem() uses PLSc to disattenuate compositeindicator
# and compositecomposite correlations.
res_plsc < csem(threecommonfactors, model, .approach_weights = "PLSPM")
res$Information$Model$construct_type # all common factors
# To obtain "original" (inconsistent) PLS estimates use `.disattenuate = FALSE`
res_pls < csem(threecommonfactors, model,
.approach_weights = "PLSPM",
.disattenuate = FALSE
)
s_plsc < summarize(res_plsc)
s_pls < summarize(res_pls)
# Compare
data.frame(
"Path" = s_plsc$Estimates$Path_estimates$Name,
"Pop_value" = c(0.6, 0.4, 0.35), # see ?threecommonfactors
"PLSc" = s_plsc$Estimates$Path_estimates$Estimate,
"PLS" = s_pls$Estimates$Path_estimates$Estimate
)
### Resampling 
## Not run:
## Basic resampling
res_boot < csem(threecommonfactors, model, .resample_method = "bootstrap")
res_jack < csem(threecommonfactors, model, .resample_method = "jackknife")
# See ?resamplecSEMResults for more examples
### Choosing a different weightning scheme 
res_gscam < csem(threecommonfactors, model, .approach_weights = "GSCA")
res_gsca < csem(threecommonfactors, model,
.approach_weights = "GSCA",
.disattenuate = FALSE
)
s_gscam < summarize(res_gscam)
s_gsca < summarize(res_gsca)
# Compare
data.frame(
"Path" = s_gscam$Estimates$Path_estimates$Name,
"Pop_value" = c(0.6, 0.4, 0.35), # see ?threecommonfactors
"GSCAm" = s_gscam$Estimates$Path_estimates$Estimate,
"GSCA" = s_gsca$Estimates$Path_estimates$Estimate
)
## End(Not run)
### Finetuning a weighting scheme 
## Setting starting values
sv < list("eta1" = c("y12" = 10, "y13" = 4, "y11" = 1))
res < csem(threecommonfactors, model, .starting_values = sv)
## Choosing a different inner weighting scheme
#?args_csem_dotdotdot
res < csem(threecommonfactors, model, .PLS_weight_scheme_inner = "factorial",
.PLS_ignore_structural_model = TRUE)
## Choosing different modes for PLS
# By default, concepts modeled as common factors uses PLS Mode A weights.
modes < list("eta1" = "unit", "eta2" = "modeB", "eta3" = "unit")
res < csem(threecommonfactors, model, .PLS_modes = modes)
summarize(res)