w_naive_bayes {rbooster}R Documentation

Naive Bayes algorithm with case weights


Function for Naive Bayes algorithm classification with case weights.


w_naive_bayes(x_train, y_train, w = NULL, discretize = TRUE, breaks = 3)

w_gaussian_naive_bayes(x_train, y_train, w = NULL)

w_discrete_naive_bayes(x_train, y_train, breaks = 3, w = NULL)



explanatory variables.


a factor class variable.


a vector of case weights.


If TRUE numerical variables are discretized and discrete naive bayes is applied,


number of break points for discretization. Ignored if discretize = TRUE.


w_naive_bayes calls w_gaussian_naive_bayes or w_discrete_naive_bayes.

if discrete = FALSE, w_gaussian_naive_bayes is called. It uses Gaussian densities with case weights and allows multiclass classification.

if discrete = TRUE, w_discrete_naive_bayes is called. It uses conditional probabilities for each category with laplace smoothing and allows multiclass classification.


a w_naive_bayes object with below components.


Number of cases in the input dataset.


Number of explanatory variables.


A list of datasets, which are x_train separated for each class.


Number of cases for each class in input dataset.


Number of classes in class variable.


Prior probabilities.


Names of classes in class variable.


Weighted mean estimations for each variable.


Weighted standart deviation estimations for each variable.


Labels for discretized variables.


Upper and lower boundaries for discretization.


probabilities for each variable categories.


## short functions for cross-validation and data simulation
cv_sampler <- function(y, train_proportion) {
 unlist(lapply(unique(y), function(m) sample(which(y==m), round(sum(y==m))*train_proportion)))

data_simulation <- function(n, p, k, train_proportion){
 means <- seq(0, k*1.5, length.out = k)
 x <- do.call(rbind, lapply(means,
                            function(m) matrix(data = rnorm(n = round(n/k)*p,
                                                            mean = m,
                                                            sd = 2),
                                               nrow = round(n/k))))
 y <- factor(rep(letters[1:k], each = round(n/k)))
 train_i <- cv_sampler(y, train_proportion)

 data <- data.frame(x, y = y)
 data_train <- data[train_i,]
 data_test <- data[-train_i,]
 return(list(data = data,
             data_train = data_train,
             data_test = data_test))

### binary classification example
n <- 500
p <- 10
k <- 2
dat <- data_simulation(n = n, p = p, k = k, train_proportion = 0.8)
x <- dat$data[,1:p]
y <- dat$data[,p+1]

x_train <- dat$data_train[,1:p]
y_train <- dat$data_train[,p+1]

x_test <- dat$data_test[,1:p]
y_test <- dat$data_test[,p+1]

## discretized Naive Bayes classification
mm1 <- w_naive_bayes(x_train = x_train, y_train = y_train, discretize = TRUE, breaks = 4)
preds1 <- predict(object = mm1, newdata = x_test, type = "pred")
table(y_test, preds1)
# or
mm2 <- w_discrete_naive_bayes(x_train = x_train, y_train = y_train, breaks = 4)
preds2 <- predict(object = mm2, newdata = x_test, type = "pred")
table(y_test, preds2)

## Gaussian Naive Bayes classification
mm3 <- w_naive_bayes(x_train = x_train, y_train = y_train, discretize = FALSE)
preds3 <- predict(object = mm3, newdata = x_test, type = "pred")
table(y_test, preds3)

mm4 <- w_gaussian_naive_bayes(x_train = x_train, y_train = y_train)
preds4 <- predict(object = mm4, newdata = x_test, type = "pred")
table(y_test, preds4)

## multiclass example
n <- 500
p <- 10
k <- 5
dat <- data_simulation(n = n, p = p, k = k, train_proportion = 0.8)
x <- dat$data[,1:p]
y <- dat$data[,p+1]

x_train <- dat$data_train[,1:p]
y_train <- dat$data_train[,p+1]

x_test <- dat$data_test[,1:p]
y_test <- dat$data_test[,p+1]

# discretized
mm5 <- w_discrete_naive_bayes(x_train = x_train, y_train = y_train, breaks = 4)
preds5 <- predict(object = mm5, newdata = x_test, type = "pred")
table(y_test, preds5)

# gaussian
mm6 <- w_gaussian_naive_bayes(x_train = x_train, y_train = y_train)
preds6 <- predict(object = mm6, newdata = x_test, type = "pred")
table(y_test, preds6)

## example for case weights
n <- 500
p <- 10
k <- 5
dat <- data_simulation(n = n, p = p, k = k, train_proportion = 0.8)
x <- dat$data[,1:p]
y <- dat$data[,p+1]

x_train <- dat$data_train[,1:p]
y_train <- dat$data_train[,p+1]

# discretized
weights <- ifelse(y_train == "a" | y_train == "c", 1, 0.01)

mm7 <- w_discrete_naive_bayes(x_train = x_train, y_train = y_train, breaks = 4, w = weights)

preds7 <- predict(object = mm7, newdata = x_test, type = "pred")
table(y_test, preds7)

# gaussian
weights <- ifelse(y_train == "b" | y_train == "d", 1, 0.01)

mm8 <- w_gaussian_naive_bayes(x_train = x_train, y_train = y_train, w = weights)

preds8 <- predict(object = mm8, newdata = x_test, type = "pred")
table(y_test, preds8)

[Package rbooster version 1.1.0 Index]