w_naive_bayes {rbooster} | R Documentation |
Naive Bayes algorithm with case weights
Description
Function for Naive Bayes algorithm classification with case weights.
Usage
w_naive_bayes(x_train, y_train, w = NULL, discretize = TRUE, breaks = 3)
w_gaussian_naive_bayes(x_train, y_train, w = NULL)
w_discrete_naive_bayes(x_train, y_train, breaks = 3, w = NULL)
Arguments
x_train |
explanatory variables. |
y_train |
a factor class variable. |
w |
a vector of case weights. |
discretize |
If |
breaks |
number of break points for discretization. Ignored if |
Details
w_naive_bayes
calls w_gaussian_naive_bayes
or w_discrete_naive_bayes
.
if discrete = FALSE
, w_gaussian_naive_bayes
is called. It uses Gaussian densities with case weights and allows
multiclass classification.
if discrete = TRUE
, w_discrete_naive_bayes
is called. It uses conditional probabilities for each category with
laplace smoothing and allows multiclass classification.
Value
a w_naive_bayes
object with below components.
n_train |
Number of cases in the input dataset. |
p |
Number of explanatory variables. |
x_classes |
A list of datasets, which are |
n_classes |
Number of cases for each class in input dataset. |
k_classes |
Number of classes in class variable. |
priors |
Prior probabilities. |
class_names |
Names of classes in class variable. |
means |
Weighted mean estimations for each variable. |
stds |
Weighted standart deviation estimations for each variable. |
categories |
Labels for discretized variables. |
boundaries |
Upper and lower boundaries for discretization. |
ps |
probabilities for each variable categories. |
Examples
library(rbooster)
## short functions for cross-validation and data simulation
cv_sampler <- function(y, train_proportion) {
unlist(lapply(unique(y), function(m) sample(which(y==m), round(sum(y==m))*train_proportion)))
}
data_simulation <- function(n, p, k, train_proportion){
means <- seq(0, k*1.5, length.out = k)
x <- do.call(rbind, lapply(means,
function(m) matrix(data = rnorm(n = round(n/k)*p,
mean = m,
sd = 2),
nrow = round(n/k))))
y <- factor(rep(letters[1:k], each = round(n/k)))
train_i <- cv_sampler(y, train_proportion)
data <- data.frame(x, y = y)
data_train <- data[train_i,]
data_test <- data[-train_i,]
return(list(data = data,
data_train = data_train,
data_test = data_test))
}
### binary classification example
n <- 500
p <- 10
k <- 2
dat <- data_simulation(n = n, p = p, k = k, train_proportion = 0.8)
x <- dat$data[,1:p]
y <- dat$data[,p+1]
x_train <- dat$data_train[,1:p]
y_train <- dat$data_train[,p+1]
x_test <- dat$data_test[,1:p]
y_test <- dat$data_test[,p+1]
## discretized Naive Bayes classification
mm1 <- w_naive_bayes(x_train = x_train, y_train = y_train, discretize = TRUE, breaks = 4)
preds1 <- predict(object = mm1, newdata = x_test, type = "pred")
table(y_test, preds1)
# or
mm2 <- w_discrete_naive_bayes(x_train = x_train, y_train = y_train, breaks = 4)
preds2 <- predict(object = mm2, newdata = x_test, type = "pred")
table(y_test, preds2)
## Gaussian Naive Bayes classification
mm3 <- w_naive_bayes(x_train = x_train, y_train = y_train, discretize = FALSE)
preds3 <- predict(object = mm3, newdata = x_test, type = "pred")
table(y_test, preds3)
#or
mm4 <- w_gaussian_naive_bayes(x_train = x_train, y_train = y_train)
preds4 <- predict(object = mm4, newdata = x_test, type = "pred")
table(y_test, preds4)
## multiclass example
n <- 500
p <- 10
k <- 5
dat <- data_simulation(n = n, p = p, k = k, train_proportion = 0.8)
x <- dat$data[,1:p]
y <- dat$data[,p+1]
x_train <- dat$data_train[,1:p]
y_train <- dat$data_train[,p+1]
x_test <- dat$data_test[,1:p]
y_test <- dat$data_test[,p+1]
# discretized
mm5 <- w_discrete_naive_bayes(x_train = x_train, y_train = y_train, breaks = 4)
preds5 <- predict(object = mm5, newdata = x_test, type = "pred")
table(y_test, preds5)
# gaussian
mm6 <- w_gaussian_naive_bayes(x_train = x_train, y_train = y_train)
preds6 <- predict(object = mm6, newdata = x_test, type = "pred")
table(y_test, preds6)
## example for case weights
n <- 500
p <- 10
k <- 5
dat <- data_simulation(n = n, p = p, k = k, train_proportion = 0.8)
x <- dat$data[,1:p]
y <- dat$data[,p+1]
x_train <- dat$data_train[,1:p]
y_train <- dat$data_train[,p+1]
# discretized
weights <- ifelse(y_train == "a" | y_train == "c", 1, 0.01)
mm7 <- w_discrete_naive_bayes(x_train = x_train, y_train = y_train, breaks = 4, w = weights)
preds7 <- predict(object = mm7, newdata = x_test, type = "pred")
table(y_test, preds7)
# gaussian
weights <- ifelse(y_train == "b" | y_train == "d", 1, 0.01)
mm8 <- w_gaussian_naive_bayes(x_train = x_train, y_train = y_train, w = weights)
preds8 <- predict(object = mm8, newdata = x_test, type = "pred")
table(y_test, preds8)