R: Bi-level selection of conditional main effects

cv.cmenet {cmenet}

R Documentation

Bi-level selection of conditional main effects

Description

The main function in this package. cv.cmenet performs variable selection of conditional main effects (CMEs) via a bi-level penalization framework, with penalty parameters tuned via cross-validation.

Usage

cv.cmenet(xme, xcme, y,
                    nfolds = 10, var.names = NULL,
                    nlambda.sib=20, nlambda.cou=20, lambda.min.ratio=1e-6,
                    ngamma=10, max.gamma=150, ntau=20,
                    max.tau=0.01, tau.min.ratio=0.01,
                    it.max=250, it.max.cv=25, warm.str="lasso")

Arguments

`xme`	An `n` x `p` binary model matrix for MEs.
`xcme`	An `n` x (4*choose(`p`,2)) model matrix for CMEs.
`y`	An `n`-length response vector.
`nfolds`	Number of folds for cross-validation.
`var.names`	A (`p`+4*choose(`p`,2))-length string vector for variable names.
`nlambda.sib`	Number of values for `lambda.sib`.
`nlambda.cou`	Number of values for `lambda.cou`.
`lambda.min.ratio`	Smallest value for `lambda.sib` and `lambda.cou`, as a fraction of `lambda.max` (the smallest penalty for which all coefficients are zero).
`ngamma`	Number of values for `gamma`.
`max.gamma`	Maximum value for `gamma`.
`ntau`	Number of values for `tau`.
`max.tau`	Maximum value for `tau`.
`tau.min.ratio`	Smallest value for `tau`, as a fraction of `max.tau`.
`it.max`	Number of optimization iterations.
`it.max.cv`	Number of optimization iterations in cross-validation.
`warm.str`	A string indicating which method should be used for warm-starting active variables. Current options include `"lasso"` (default) and `"hierNet"`.

Value

`cme.fit`	Fitted `cmenet` object.
`params`	Fitted penalty parameters (`lambda.sib`, `lambda.cou`, `gamma` and `tau`).
`select.names`	Selected ME and CME variables.
`select.idx`	Indices for `select.names`.

References

Mak and Wu (2018). cmenet: a new method for bi-level variable selection of conditional main effects. Journal of the American Statistical Association, to appear.

Examples

  ## Not run: 
library(MASS)
n <- 50 #number of observations
p <- 50 #number of main effects

## Simulate model matrix for MEs and CMEs
set.seed(1)
rho <- 0 #correlation
ones <- matrix(1,p,p)
covmtx <- rho*ones+(1-rho)*diag(p)
latmtx <- mvrnorm(n,p,mu=rep(0,p),Sigma=covmtx) #equicorrelated cov. matrix
memtx <- (latmtx>=0)-(latmtx<0) #simulate model matrix for MEs
model.mtx <- full.model.mtx(memtx)$model.mtx #generate model matrix for MEs and CMEs

## Set true model and generate response
num.act <- 2 # two siblings active
num.grp <- 4 # ... within four active groups
ind <- c()
for (ii in 1:num.grp){
  eff <- sample(seq(2*(p-1)),num.act)
  ind <- c(ind, p + eff + (ii-1)*(2*(p-1)))
}
colnames(model.mtx)[ind] # active CMEs

des.mtx <- model.mtx[,ind]
inter <- 12 #intercept
xbtrue <- inter + rowSums(des.mtx)
y <- xbtrue + rnorm(n,sd=1) #response
xme <- model.mtx[,1:p]
xcme <- model.mtx[,(p+1):ncol(model.mtx)]

#---------------------------------------------------------------
# Selection of MEs and CMEs:
#---------------------------------------------------------------

## cmenet (parameters tuned via cross-validation)
cv.cme <- cv.cmenet(xme, xcme, y, var.names=colnames(model.mtx))
fit.cme <- cv.cme$cme.fit
sel.cme <- cv.cme$select.idx
colnames(model.mtx)[ind] #true model
colnames(model.mtx)[sel.cme] #selected effects from cmenet
colnames(model.mtx)[setdiff(sel.cme,ind)] #selected effects not in true model
colnames(model.mtx)[setdiff(ind,sel.cme)] #true effects not in selected model

## lasso
library(glmnet)
cv.las <- cv.glmnet(cbind(xme,xcme),y)
fit.las <- glmnet(cbind(xme,xcme),y)
sel.las <- which(fit.las$beta[,which(cv.las$lambda==cv.las$lambda.min)]!=0)
colnames(model.mtx)[ind] #true model
colnames(model.mtx)[sel.las] #selected effects from lasso
colnames(model.mtx)[setdiff(sel.las,ind)] #selected effects not in true model
colnames(model.mtx)[setdiff(ind,sel.las)] #true effects not in selected model

## sparsenet
library(sparsenet)
cv.sn <- cv.sparsenet(cbind(xme,xcme),y)
fit.sn <- sparsenet(cbind(xme,xcme),y)
sel.sn <- which(fit.sn$coefficients[[cv.sn$which.min[2]]]$beta[,cv.sn$which.min[1]]!=0)
colnames(model.mtx)[ind] #true model
colnames(model.mtx)[sel.sn] #selected effects from sparsenet
colnames(model.mtx)[setdiff(sel.sn,ind)] #selected effects not in true model
colnames(model.mtx)[setdiff(ind,sel.sn)] #true effects not in selected model

#---------------------------------------------------------------
## Comparison:
#---------------------------------------------------------------

## (a) Misspecifications
length(setdiff(sel.cme,ind)) + length(setdiff(ind,sel.cme)) # cmenet:    25
length(setdiff(sel.las,ind)) + length(setdiff(ind,sel.las)) # lasso:     29
length(setdiff(sel.sn,ind)) + length(setdiff(ind,sel.sn))   # sparsenet: 60

## (b) MSPE
set.seed(1000)
ntst <- 20
latmtx <- mvrnorm(ntst,p,mu=rep(0,p),Sigma=covmtx)
memtx <- (latmtx>=0)-(latmtx<0)
tst.mtx <- full.model.mtx(memtx)$model.mtx
xbtst <- inter + rowSums(tst.mtx[,ind])
ytst <- xbtst + rnorm(ntst,sd=1)

pred.cme <- predictcme(fit.cme,newx=tst.mtx)[,which(cv.cme$lambda.sib==cv.cme$params[1]),
            which(cv.cme$lambda.cou==cv.cme$params[2])]
pred.las <- predict(fit.las,newx=tst.mtx)[,which(cv.las$lambda==cv.las$lambda.min)]
pred.sn <- predict(fit.sn,newx=tst.mtx)[[which(fit.sn$gamma==cv.sn$parms.min[1])]][,
           which(fit.sn$lambda==cv.sn$parms.min[2])]
mean( (ytst-pred.cme)^2 ) # cmenet:    3.61
mean( (ytst-pred.las)^2 ) # lasso:     4.22
mean( (ytst-pred.sn)^2 )  # sparsenet: 4.00


  
## End(Not run)

[Package cmenet version 0.1.2 Index]