R: Control Polygon Reduction

cpr {cpr}

R Documentation

Control Polygon Reduction

Description

Run the Control Polygon Reduction Algorithm.

Usage

cpr(x, progress = c("cpr", "influence", "none"), ...)

Arguments

`x`	a `cpr_cp` object
`progress`	controls the level of progress messaging. See Details.
`...`	not currently used

Details

cpr runs the control polygon reduction algorithm.

The algorithm is generally speaking fast, but can take a long time to run if the number of interior knots of initial control polygon is high. To help track the progress of the execution you can have progress = "cpr" which will show a progress bar incremented for each iteration of the CPR algorithm. progress = "influence" will use a combination of messages and progress bars to report on each step in assessing the influence of all the internal knots for each iteration of the CPR algorithm. See influence_of_iknots for more details.

Value

a list of cpr_cp objects

Examples

#############################################################################
# Example 1: find a model for log10(pdg) = f(day) from the spdg data set

# need the lme4 package to fit a mixed effect model
require(lme4)

# construct the initial control polygon.  Forth order spline with fifty
# internal knots.  Remember degrees of freedom equal the polynomial order
# plus number of internal knots.
init_cp <- cp(log10(pdg) ~ bsplines(day, df = 24, bknots = c(-1, 1)) + (1|id),
              data = spdg, method = lme4::lmer)
cpr_run <- cpr(init_cp)
plot(cpr_run, color = TRUE)

s <- summary(cpr_run)
s
plot(s, type = "rse")

# preferable model is in index 5 by eye
preferable_cp <- cpr_run[["cps"]][[5]]


#############################################################################
# Example 2: logistic regression
# simulate a binary response Pr(y = 1 | x) = p(x)
p <- function(x) { 0.65 * sin(x * 0.70) + 0.3 * cos(x * 4.2) }

set.seed(42)
x <- runif(2500, 0.00, 4.5)
sim_data <- data.frame(x = x, y = rbinom(2500, 1, p(x)))

# Define the initial control polygon
init_cp <- cp(formula = y ~ bsplines(x, df = 24, bknots = c(0, 4.5)),
              data    = sim_data,
              method  = glm,
              method.args = list(family  = binomial())
              )

# run CPR
cpr_run <- cpr(init_cp)

# preferable model is in index 6
s <- summary(cpr_run)
plot(s, color = TRUE, type = "rse")

plot(
    cpr_run
  , color = TRUE
  , from = 5
  , to = 7
  , show_spline = TRUE
  , show_cp = FALSE
  )


# plot the fitted spline and the true p(x)
sim_data$pred_select_p <- plogis(predict(cpr_run[[7]], newdata = sim_data))
ggplot2::ggplot(sim_data) +
ggplot2::theme_bw() +
ggplot2::aes(x = x) +
ggplot2::geom_point(mapping = ggplot2::aes(y = y), alpha = 0.1) +
ggplot2::geom_line(
    mapping = ggplot2::aes(y = pred_select_p, color = "pred_select_p")
  ) +
ggplot2::stat_function(fun = p, mapping = ggplot2::aes(color = 'p(x)'))

# compare to gam and a binned average
sim_data$x2 <- round(sim_data$x, digits = 1)
bin_average <-
  lapply(split(sim_data, sim_data$x2), function(x) {
           data.frame(x = x$x2[1], y = mean(x$y))
         })
bin_average <- do.call(rbind, bin_average)

ggplot2::ggplot(sim_data) +
ggplot2::theme_bw() +
ggplot2::aes(x = x) +
ggplot2::stat_function(fun = p, mapping = ggplot2::aes(color = 'p(x)')) +
ggplot2::geom_line(
    mapping = ggplot2::aes(y = pred_select_p, color = "pred_select_p")
  ) +
ggplot2::stat_smooth(mapping = ggplot2::aes(y = y, color = "gam"),
                     method = "gam",
                     formula = y ~ s(x, bs = "cs"),
                     se = FALSE,
                     n = 1000) +
ggplot2::geom_line(data = bin_average
                   , mapping = ggplot2::aes(y = y, color = "bin_average"))

[Package cpr version 0.4.0 Index]