pinferunemjmcmc {EMJMCMC}R Documentation

A wrapper for running the GLMM, BLR, or DBRM based inference and predictions in an expert but rather easy to use way

Description

A wrapper for running the GLMM, BLR, or DBRM based inference and predictions in an expert but rather easy to use way

Usage

pinferunemjmcmc(
  n.cores = 4,
  mcgmj = mcgmjpse,
  report.level = 0.5,
  simplify = FALSE,
  num.mod.best = 1000,
  predict = FALSE,
  test.data = 1,
  link.function = function(z) z,
  runemjmcmc.params
)

Arguments

n.cores

the maximal number of cores (and (R)(G)MJMCMC threads) to be addressed in the analysis

mcgmj

an mclapply like function for performing for performing parallel computing, do not change the default unless you are using Windows

report.level

a numeric value in (0,1) specifying the threshold for detections based on the marginal inclusion probabilities

simplify

a logical value specifying in simplification of the features is to be done after the search is completed

num.mod.best

the number of the best models in the thread to calculate marginal inclusion probabilities

predict

a logical value specifying if predictions should be done by the run of pinferunemjmcmc

test.data

covariates data.frame to be used for predictions

link.function

the link functions to be used to make predictions

runemjmcmc.params

a vector of parameters of runemjmcmc function, see the help of runemjmcmc for details

Value

a list of

feat.stat

detected features or logical expressions and their marginal inclusion probabilities

predictions

predicted values if they are required, NULL otherwise

allposteriors

all visited by (R)(G)MJMCMC features and logical expressions and their marginal inclusion probabilities

threads.stats

a vector of detailed outputs of individual n.cores threads of (R)(G)MJMCMC run

See Also

runemjmcmc LogrRegr DeepRegr LinRegr

Examples


# inference

X <- read.csv(system.file("extdata", "exa1.csv", package="EMJMCMC"))
data.example <- as.data.frame(X)

# specify the initial formula
formula1 <- as.formula(
  paste(colnames(X)[5], "~ 1 +", paste0(colnames(X)[-5], collapse = "+"))
)

# define the number or cpus
M <- 1L
# define the size of the simulated samples
NM <- 1000
# define \k_{max} + 1 from the paper
compmax <- 16
# define treshold for preinclusion of the tree into the analysis
th <- (10)^(-5)
# define a final treshold on the posterior marginal probability for reporting a
# tree
thf <- 0.05
# specify tuning parameters of the algorithm for exploring DBRM of interest
# notice that allow_offsprings=3 corresponds to the GMJMCMC runs and
# allow_offsprings=4 -to the RGMJMCMC runs

  res1 <- pinferunemjmcmc(
    n.cores = M, report.level = 0.5, num.mod.best = NM, simplify = TRUE,
    runemjmcmc.params = list(
      formula = formula1, data = data.example, estimator = estimate.gamma.cpen_2,
      estimator.args = list(data = data.example), recalc_margin = 249,
      save.beta = FALSE, interact = TRUE, outgraphs = FALSE,
      relations = c("to23", "expi", "logi", "to35", "sini", "troot", "sigmoid"),
      relations.prob = c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1),
      interact.param = list(allow_offsprings = 3, mutation_rate = 250,
      last.mutation = 10000, max.tree.size = 5, Nvars.max = 15,
      p.allow.replace = 0.9, p.allow.tree = 0.01, p.nor = 0.9, p.and = 0.9),
      n.models = 10000, unique = TRUE, max.cpu = M, max.cpu.glob = M,
      create.table = FALSE, create.hash = TRUE, pseudo.paral = TRUE,
      burn.in = 100, print.freq = 1000,
      advanced.param = list(
        max.N.glob = as.integer(10),
        min.N.glob = as.integer(5),
        max.N = as.integer(3),
        min.N = as.integer(1),
        printable = FALSE
      )
    )
  )
  print(res1$feat.stat)


# prediction

compmax <- 21

# read in the train and test data sets
test <- read.csv(
  system.file("extdata", "breast_cancer_test.csv", package="EMJMCMC"),
  header = TRUE, sep = ","
)[, -1]
train <- read.csv(
  system.file("extdata", "breast_cancer_train.csv", package="EMJMCMC"),
  header = TRUE, sep = ","
)[, -1]

# transform the train data set to a data.example data.frame that EMJMCMC class
# will internally use
data.example <- as.data.frame(train)

# specify the link function that will be used in the prediction phase
g <- function(x) {
  return((x <- 1 / (1 + exp(-x))))
}

formula1 <- as.formula(
  paste(
    colnames(data.example)[31], "~ 1 +",
    paste0(colnames(data.example)[-31], collapse = "+")
  )
)


  # Defining a custom estimator function
  estimate.bas.glm.cpen <- function(
    formula, data, family, prior, logn, r = 0.1, yid=1,
    relat =c("cosi","sigmoid","tanh","atan","erf","m(")
  ) {
    #only poisson and binomial families are currently adopted
    X <- model.matrix(object = formula,data = data)
    capture.output({out <- BAS::bayesglm.fit(x = X, y = data[,yid], family=family,coefprior=prior)})
    fmla.proc<-as.character(formula)[2:3]
    fobserved <- fmla.proc[1]
    fmla.proc[2]<- stringi::stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "")
    fmla.proc[2]<- stringi::stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "")
    sj<-2*(stringi::stri_count_fixed(str = fmla.proc[2], pattern = "*"))
    sj<-sj+1*(stringi::stri_count_fixed(str = fmla.proc[2], pattern = "+"))
    for(rel in relat) {
      sj<-sj+2*(stringi::stri_count_fixed(str = fmla.proc[2], pattern = rel))
    }
    mlik = ((-out$deviance +2*log(r)*sum(sj)))/2
    return(
      list(
        mlik = mlik, waic = -(out$deviance + 2*out$rank),
        dic = -(out$deviance + logn*out$rank),
        summary.fixed = list(mean = coefficients(out))
      )
    )
  }
  res <- pinferunemjmcmc(
    n.cores = M, report.level = 0.5, num.mod.best = NM, simplify = TRUE,
    predict = TRUE, test.data = as.data.frame(test), link.function = g,
    runemjmcmc.params = list(
      formula = formula1, data = data.example, gen.prob = c(1, 1, 1, 1, 0),
      estimator = estimate.bas.glm.cpen,
      estimator.args = list(
        data = data.example, prior = BAS::aic.prior(), family = binomial(),
        yid = 31, logn = log(143), r = exp(-0.5)
      ), recalc_margin = 95, save.beta = TRUE, interact = TRUE,
      relations = c("gauss", "tanh", "atan", "sin"),
      relations.prob = c(0.1, 0.1, 0.1, 0.1),
      interact.param = list(
        allow_offsprings = 4, mutation_rate = 100, last.mutation = 1000,
        max.tree.size = 6, Nvars.max = 20, p.allow.replace = 0.5,
        p.allow.tree = 0.4, p.nor = 0.3, p.and = 0.9
      ), n.models = 7000, unique = TRUE, max.cpu = M, max.cpu.glob = M,
      create.table = FALSE, create.hash = TRUE, pseudo.paral = TRUE,
      burn.in = 100, print.freq = 1000,
      advanced.param = list(
        max.N.glob = as.integer(10), min.N.glob = as.integer(5),
        max.N = as.integer(3), min.N = as.integer(1), printable = FALSE
      )
    )
  )

  for (jjjj in 1:10)
  {
    resw <- as.integer(res$predictions >= 0.1 * jjjj)
    prec <- (1 - sum(abs(resw - test$X), na.rm = TRUE) / length(resw))
    print(prec)
    # FNR
    ps <- which(test$X == 1)
    fnr <- sum(abs(resw[ps] - test$X[ps])) / (sum(abs(resw[ps] - test$X[ps])) + length(ps))

    # FPR
    ns <- which(test$X == 0)
    fpr <- sum(abs(resw[ns] - test$X[ns])) / (sum(abs(resw[ns] - test$X[ns])) + length(ns))
  }


[Package EMJMCMC version 1.5.0 Index]