simCRM {EstCRM}R Documentation

Generating Data under the Continuous Response Model

Description

Generating data under the Continuous Response Model

Usage

simCRM(thetas, true.param, max.item)

Arguments

thetas

a vector of length N with N denoting the number of examinees. Each element of the vector is the true ability level for an examinee

true.param

a matrix of true item parameters with m rows and three columns, with m denoting the number of items. The first column is the a parameters, the second column is the b parameters, and the third column is the alpha parameters

max.item

a vector of length m indicating the maximum possible score for each hypothetical item.

Details

The simCRM generates data under Continuous Response Model as described in Shojima(2005). Given the true ability level for person i and the true item parameters for item j, the transformed response of person i for item j follows a normal distribution with a mean of &alpha((&theta-&beta)) and a standard deviation of &alpha^2/a^-2.

Value

a data frame with N rows and m columns with N denoting the number of observations and m denoting the number of items.

Author(s)

Cengiz Zopluoglu

References

Shojima, K.(2005). A Noniterative Item Parameter Solution in Each EM Cycyle of the Continuous Response Model. Educational Technology Research, 28, 11-22.

See Also

EstCRMitem for estimating item parameters, EstCRMperson for estimating person parameters, fitCRM for computing item-fit statistics and drawing empirical 3D item response curves, plotCRM for drawing theoretical 3D item category response curves,

Examples


#####################################################
#                      Example 1:                   #
#   Basic data generation and parameter recovery    #
#####################################################

#Generate true person ability parameters for 1000 examinees from 
#a standard normal distribution

true.thetas <- rnorm(1000,0,1)

#Generate the true item parameter matrix for the hypothetical items

true.par <- matrix(c(.5,1,1.5,2,2.5,
-1,-.5,0,.5,1,1,.8,1.5,.9,1.2),
nrow=5,ncol=3)
true.par

#Generate the vector maximum possible scores that students can 
#get for the items

max.item <- c(30,30,30,30,30)

#Generate the response matrix

simulated.data <- simCRM(true.thetas,true.par,max.item)

#Let's examine the simulated data

head(simulated.data)
summary(simulated.data)

#Let's try to recover the item parameters

min.item <- c(0,0,0,0,0)

CRM <- EstCRMitem(simulated.data,max.item, min.item,
max.EMCycle=500,converge=0.01)

#Compare the true item parameters with the estimated item parameters. 
#The first three column is the true item parameters, and the second
#three column is the estimated item parameters

cbind(true.par,CRM$param)

#Let's recover the person parameters

par <- CRM$param
CRMthetas <- EstCRMperson(simulated.data,par,min.item,max.item)
theta.par <- CRMthetas$thetas

#Compare the true person ability parameters to the estimated person
##ability parameters.The first column is the true parameters and the
##second column is the estimated parameters

thetas <- cbind(true.thetas,theta.par[,2])
head(thetas)
cor(thetas)
plot(thetas[,1],thetas[,2])

#RMSE for the estimated person parameters

sqrt(sum((thetas[,1]-thetas[,2])^2)/nrow(thetas))

#RMSE is comparable and similar to the standard error of the 
#theta estimates. Standard error of the theta estimate is the square
#root of the reciprocal of the total test information which is the sum
#of square of the "a" parameters

sqrt(1/sum(CRM$param[,1]^2))

#####################################################
#                      Example 2:                   #
#   Item fit Residuals, Empirical and Theoretical   #
#  Item Category Response Curves for the Simulated  #
#                    Data Above                     #
#####################################################

#Because of the run time issues during the package development,
#I run the fit analysis for a subset of simulated data above.
#The simulated data has 1000 examinees, but I run the fit analysis
#for the first 100 subjects of the simulated data. Please ignore the 
#following line and run the analysis for whole data

simulated.data <- simulated.data[1:100,] #Ignore this line 

par <- CRM$param
max.item <- c(30,30,30,30,30)
min.item <- c(0,0,0,0,0)
CRMthetas <- EstCRMperson(simulated.data,par,min.item,max.item)
theta.par <- CRMthetas$thetas
mean(theta.par[,2])
sd(theta.par[,2])
hist(theta.par[,2])

fit <- fitCRM(simulated.data,par, CRMthetas, max.item, group=10)

#Item-Fit Residuals

fit$fit.stat

#Empirical Item Category Response Curves
fit$emp.irf[[1]]   #Item 1
fit$emp.irf[[5]]   #Item 5

#Theoretical Item Category Response Curves
plotCRM(par,1,min.item, max.item)   #Item 1



#####################################################
#                      Example 3:                   #
#   The replication of Shojima's simulation study   #
#                         2005                      #
#####################################################

#In Shojima's simulation study published in 2005

 #true person parameters were generated from a standard normal distribution.
 #The natural logarithm of the true "a" parameters were generated from a N(0,0.09)
 #The true "b" parameters were generated from a N(0,1)
 #The natural logarithm of the true "alpha" parameters were generated from a N(0,0.09)

#The independent variables were the number of items and sample size 
#in the simulation study

#There were 9 different conditions and 100 replications for each condition.

#In Table 1 (Shojima,2005), the RMSD statistics were reported for each condition.

#The code below replicates the same study. The results are comparable to the Table 1. 
#The user 
#should only specify the sample size and the number of items. Then, the user should 
#run the rest of the code. 
#At the end, RMSEa, RMSEb, RMSEalp are the item parameter recovery statistics which is 
#comparable to Table 1


#Set the conditions for the simulation study. 
#It takes longer to run for big number of replications

N=500              #sample size
n=10               #number of items
replication=1      #number if replications for each condition  

############################################################
#       Run the rest of the code from START to END         #
############################################################

#START

true.person <- vector("list",replication)
true.item <- vector("list",replication)
est.person <- vector("list",replication)
est.item <- vector("list",replication)
simulated.datas <- vector("list",replication)

for(i in 1:replication) {
true.person[[i]] <- rnorm(N,0,1)
true.item[[i]] <- cbind(exp(rnorm(n,0,.09)),rnorm(n,0,1),1/exp(rnorm(n,0,.09)))
}

max.item <- rep(50,n)
min.item <- rep(0,n)

for(i in 1:replication) {
simulated.datas[[i]] <- simCRM(true.person[[i]],true.item[[i]],max.item)
}

for(i in 1:replication) {
CRM<-EstCRMitem(simulated.datas[[i]],max.item,min.item,max.EMCycle= 500,converge=0.01)
est.item[[i]]=CRM$par
}
for(i in 1:replication) {
persontheta <- EstCRMperson(simulated.datas[[i]],est.item[[i]],min.item,max.item)
est.person[[i]]<- persontheta$thetas[,2]
}

#END
############################################################

#RMSE for parameter "a"

RMSEa <- c()
for(i in 1:replication) {
RMSEa[i]=sqrt(sum((true.item[[i]][,1]-est.item[[i]][,1])^2)/n)
}

mean(RMSEa)


#RMSE for parameter "b"

RMSEb <- c()
for(i in 1:replication) {
RMSEb[i]=sqrt(sum((true.item[[i]][,2]-est.item[[i]][,2])^2)/n)
}

mean(RMSEb)

#RMSE for parameter "alpha" 
RMSEalp <- c()
for(i in 1:replication) {
RMSEalp[i]=sqrt(sum((true.item[[i]][,3]-est.item[[i]][,3])^2)/n)
}

mean(RMSEalp)

#RMSE for person parameter

RMSEtheta <- c()
for(i in 1:replication) {
RMSEtheta[i]=sqrt(sum((true.person[[i]]-est.person[[i]])^2)/N)
}

mean(RMSEtheta)


[Package EstCRM version 1.4 Index]