R: Augment a rulelist

augment.rulelist {tidyrules}

R Documentation

Augment a rulelist

Description

augment outputs a rulelist with an additional column named augmented_stats based on summary statistics calculated using attribute validation_data.

Usage

## S3 method for class 'rulelist'
augment(x, ...)

Arguments

`x`	A rulelist
`...`	(expressions) To be send to tidytable::summarise for custom aggregations. See examples.

Details

The dataframe-column augmented_stats will have these columns corresponding to the estimation_type:

For regression: support, IQR, RMSE
For classification: support, confidence, lift

along with custom aggregations.

Value

A rulelist with a new dataframe-column named augmented_stats.

Examples

# Examples for augment ------------------------------------------------------
library("magrittr")

# C5 ----
att = modeldata::attrition
set.seed(100)
train_index = sample(c(TRUE, FALSE), nrow(att), replace = TRUE)

model_c5 = C50::C5.0(Attrition ~., data = att[train_index, ], rules = TRUE)
tidy_c5  =
  model_c5 %>%
  tidy() %>%
  set_validation_data(att[!train_index, ], "Attrition")

tidy_c5

augment(tidy_c5) %>%
  tidytable::unnest(augmented_stats, names_sep = "__") %>%
  tidytable::glimpse()

# augment with custom aggregator
augment(tidy_c5,output_counts = list(table(Attrition))) %>%
  tidytable::unnest(augmented_stats, names_sep = "__") %>%
  tidytable::glimpse()

# rpart ----
set.seed(100)
train_index = sample(c(TRUE, FALSE), nrow(iris), replace = TRUE)

model_class_rpart = rpart::rpart(Species ~ ., data = iris[train_index, ])
tidy_class_rpart  = tidy(model_class_rpart) %>%
  set_validation_data(iris[!train_index, ], "Species")
tidy_class_rpart

model_regr_rpart = rpart::rpart(Sepal.Length ~ ., data = iris[train_index, ])
tidy_regr_rpart  = tidy(model_regr_rpart) %>%
  set_validation_data(iris[!train_index, ], "Sepal.Length")
tidy_regr_rpart

# augment (classification case)
augment(tidy_class_rpart) %>%
  tidytable::unnest(augmented_stats, names_sep = "__") %>%
  tidytable::glimpse()

# augment (regression case)
augment(tidy_regr_rpart) %>%
  tidytable::unnest(augmented_stats, names_sep = "__") %>%
  tidytable::glimpse()

# party ----
pen = palmerpenguins::penguins %>%
  tidytable::drop_na(bill_length_mm)
set.seed(100)
train_index = sample(c(TRUE, FALSE), nrow(pen), replace = TRUE)

model_class_party = partykit::ctree(species ~ ., data = pen[train_index, ])
tidy_class_party  = tidy(model_class_party) %>%
  set_validation_data(pen[!train_index, ], "species")
tidy_class_party

model_regr_party =
  partykit::ctree(bill_length_mm ~ ., data = pen[train_index, ])
tidy_regr_party  = tidy(model_regr_party) %>%
  set_validation_data(pen[!train_index, ], "bill_length_mm")
tidy_regr_party

# augment (classification case)
augment(tidy_class_party) %>%
  tidytable::unnest(augmented_stats, names_sep = "__") %>%
  tidytable::glimpse()

# augment (regression case)
augment(tidy_regr_party) %>%
  tidytable::unnest(augmented_stats, names_sep = "__") %>%
  tidytable::glimpse()

# cubist ----
att         = modeldata::attrition
set.seed(100)
train_index = sample(c(TRUE, FALSE), nrow(att), replace = TRUE)
cols_att    = setdiff(colnames(att), c("MonthlyIncome", "Attrition"))

model_cubist = Cubist::cubist(x = att[train_index, cols_att],
                              y = att[train_index, "MonthlyIncome"]
                              )

tidy_cubist = tidy(model_cubist) %>%
  set_validation_data(att[!train_index, ], "MonthlyIncome")
tidy_cubist

augment(tidy_cubist) %>%
  tidytable::unnest(augmented_stats, names_sep = "__") %>%
  tidytable::glimpse()

[Package tidyrules version 0.2.7 Index]