generate_data {autoRasch}R Documentation

Generate the artificial dataset

Description

This function generates simulated datasets with different attributes

Usage

generate_data(
  responseType = "multidim.nocorrel",
  theta = c(-3, 3),
  sdtheta = 6,
  ntheta = 301,
  beta = c(-2.5, 2.5),
  sdbeta = 4,
  nitem = 6,
  alpha = c(1),
  sdlambda = 1,
  ncat = 5,
  thGap = 0.8,
  ndim = 3,
  randtype = "uniform",
  corLevel = 0,
  dim.members = c(),
  seed = NULL
)

Arguments

responseType

The type of the dataset. The types include multidim.nocorrel, multidim.withcorrel, discriminate, multidim.within, and testlets.

theta

A vector of the ability parameters range value, c(min.theta,max.theta). It applies when the randtype = "uniform".

sdtheta

Standard deviation which is used to generate theta values using stats::rnorm() with n = ntheta, mean = 0, and sd = sdtheta.It applies when the randtype = "normal".

ntheta

The number of the observations.

beta

A vector of the item difficulty parameters range value, c(min.beta,max.beta). It applies when the randtype = "uniform".

sdbeta

Standard deviation which is used to generate item location values using stats::rnorm() with n = nitem, mean = 0, and sd = sdbeta.It applies when the randtype = "normal".

nitem

The number of the items in each subgroup.

alpha

A vector of the discrimination parameters apply to each items.

sdlambda

A vector of the standard deviation to simulate the testlet (local dependency) effect. The effect is added using stats::rnorm() with n = ntheta, mean = 0, and sd = sdlambda

ncat

The number of the response categories

thGap

The difference between adjacent threshold.

ndim

The number of subgroups (dimensions/testlets) created.

randtype

The randomize type. This includes uniform and normal.

corLevel

The correlation between the two dimensions.

dim.members

The list of item members in each dimension.

seed

Integer seed for reproducibility.

Value

The generated dataset as a data.frame.

Examples

# 1. Multidimensional Polytomous Dataset with 0.2 Correlation
# Generate multidimensional dataset which having correlation of 0.2 between the dimensions
correl02_multidim <- generate_data(
  responseType = "multidim.withcorrel", corLevel = 0.2, seed = 2021
)

# 2.  Within-item Multidimensional Polytomous Dataset
# Generate multidimensional dataset with some items relate to more than one
# dimension.
withinItem_multidim <- generate_data(
  responseType = "multidim.within", ndim = 3,
  dim.members = list(c(1:6,13),c(3,7:12),c(5,13:18)), seed = 2021
)

# 3. Multi-testlets Polytomous Dataset
# Generate dataset which consist of two bundle items with different level of
# local dependency effect.
testlets_dataset <- generate_data(
  responseType = "testlets", ndim = 2, sdlambda = c(0,4), seed = 2021
)

# 4a. Inhomogenous Dichotomous Dataset
# Generate dataset with binary type responses containing three subsets
# with different discrimination values.

dicho_inh_dset <- generate_data(
  responseType = "discriminate", ncat = 2, seed = 2021,
  alpha = c(0.04,0.045,0.05,0.055,0.06,0.065,0.2,0.25,0.3,0.35,0.4,0.45,
            2.6,2.65,2.7,2.75,2.8,2.85)
)

# 4b. Inhomogenous Polytomous Dataset
# Generate dataset with polytomous responses (five categories) containing
# three subsets with different discrimination values.

poly_inh_dset <- generate_data(
  responseType = "discriminate", ncat = 5, seed = 2021,
  alpha = c(0.04,0.045,0.05,0.055,0.06,0.065,0.2,0.25,0.3,0.35,0.4,0.45,
            2.6,2.65,2.7,2.75,2.8,2.85)
)

# 4c. Shorter Inhomogenous Polytomous Dataset
short_poly_data <- generate_data(
  alpha = c(0.02,0.5,2), nitem = 3, ndim = 3, ncat = 5,
  theta = c(-6,6), beta = c(-4,4), ntheta = 151, seed = 2021
)

# 4d. Short Dataset containing DIF items
# Generate dataset with polytomous responses (five categories) containing
# three subsets with different discrimination values and two DIF-items.
seed <- c(54748,96765)
difset_short1 <- generate_data(responseType = "discriminate", ncat = 3,
                                ntheta = 50, nitem = 3, ndim = 1,
                                seed = seed[1], alpha = c(2))
difset_short2 <- generate_data(responseType = "discriminate", ncat = 3,
                                ntheta = 50, nitem = 2, ndim = 1,
                                seed = seed[2], alpha = c(0.8),
                                beta = c(-2.5,2.5))
shortDIF <- cbind(rbind(difset_short1,difset_short1),
                   c(difset_short2[,1],difset_short2[,2]))

# 5a. Uncorrelated Multidimensional Dichotomous Dataset
# Generate dataset with binary type responses containing three subsets which
# represent different uncorrelated dimensions.
dicho_md_dset <- generate_data(
  responseType = "multidim.nocorrel", ncat = 2, seed = 2021
)

# 5b. Uncorrelated Multidimensional Polytomous Dataset
# Generate dataset with polytomous responses (five categories) containing
# three subsets which represent different uncorrelated dimensions.
poly_md_dset <- generate_data(
  responseType = "multidim.nocorrel", ncat = 5, seed = 2021
)


[Package autoRasch version 0.2.2 Index]