R: Estimate Y_A and Y_B bounds with Dvoretzky–Kiefer

get_Y_AB_bounds_DKW {RVCompare}

R Documentation

Estimate Y_A and Y_B bounds with Dvoretzky–Kiefer–Wolfowitz

Description

Estimate the confidence intervals for the cumulative distributions of Y_A and Y_B with Dvoretzky–Kiefer–Wolfowitz.

Usage

get_Y_AB_bounds_DKW(
  X_A_observed,
  X_B_observed,
  nOfEstimationPoints = 1000,
  alpha = 0.05,
  EPSILON = 1e-20,
  ignoreMinimumLengthCheck = FALSE
)

Arguments

`X_A_observed`	array of the observed samples (real values) of X_A.
`X_B_observed`	array of the observed samples (real values) of X_B.
`nOfEstimationPoints`	(optional, default 1000) the number of points in the interval [0,1] in which the density is estimated.
`alpha`	(optional, default value 0.05) the error of the confidence interval. If alpha = 0.05 then we have 95 percent confidence interval.
`EPSILON`	(optional, default value 1e-20) minimum difference between two values to be considered different.
`ignoreMinimumLengthCheck`	(optional, default value FALSE) wether to check for a minimum length in X_A and X_B.

Value

Returns a list with the following fields:

- p: values in the interval [0,1] that represent the nOfEstimationPoints points in which the densities are estimated. Useful for plotting.

- Y_A_cumulative_estimation: an array with the empirical cumulative diustribution function of Y_A from 0 to p[[i]].

- Y_A_cumulative_upper: an array with the upper bounds of confidence 1 - alpha of the cumulative density of Y_A

- Y_A_cumulative_lower: an array with the lower bounds of confidence 1 - alpha of the cumulative density of Y_A

- Y_B_cumulative_estimation: The same as Y_A_cumulative_estimation for Y_B.

- Y_B_cumulative_upper: The same as Y_A_cumulative_upper for Y_B

- Y_B_cumulative_lower: The same as Y_A_cumulative_lower for Y_B

- diff_estimation: Y_A_cumulative_estimation - Y_B_cumulative_estimation

- diff_upper: an array with the upper bounds of confidence 1 - alpha of the difference between the cumulative distributions

- diff_lower: an array with the lower bounds of confidence 1 - alpha of the difference between the cumulative distributions

Examples

library(ggplot2)
### Example 1 ###
X_A_observed <- rnorm(100, mean = 2, sd = 1)
X_B_observed <- rnorm(100, mean = 2.1, sd = 0.5)
res <- get_Y_AB_bounds_DKW(X_A_observed, X_B_observed)
fig1 = plot_Y_AB(res, plotDifference=FALSE) + ggtitle("Example 1")
print(fig1)


### Example 2 ###
# Comparing the estimations with the actual distributions for two normal distributions.
##################################
## sample size = 100 #############
##################################
X_A_observed <- rnorm(100,mean = 1, sd = 1)
X_B_observed <- rnorm(100,mean = 1.3, sd = 0.5)
res <- get_Y_AB_bounds_DKW(X_A_observed, X_B_observed)

X_A_observed_large_sample <- sort(rnorm(1e4, mean = 1, sd = 1))
X_B_observed_large_sample <- sort(rnorm(1e4, mean = 1.3, sd = 0.5))
actualDistributions <- getEmpiricalCumulativeDistributions(X_A_observed_large_sample,
 X_B_observed_large_sample, nOfEstimationPoints=1e4, EPSILON=1e-20)


actualDistributions$Y_A_cumulative_estimation <- lm(Y_A_cumulative_estimation ~
        p + I(p^2) + I(p^3)+ I(p^4)+ I(p^5)+ I(p^6)+I(p^7)+ I(p^8),
        data = actualDistributions)$fitted.values
actualDistributions$Y_B_cumulative_estimation <- lm(Y_B_cumulative_estimation ~
        p + I(p^2) + I(p^3)+ I(p^4)+ I(p^5)+ I(p^6)+I(p^7)+ I(p^8),
        data = actualDistributions)$fitted.values

fig = plot_Y_AB(res, plotDifference=FALSE) +

geom_line(data=as.data.frame(actualDistributions),
aes(x=p, y=Y_A_cumulative_estimation, colour = "Actual Y_A", linetype="Actual Y_A")) +

geom_line(data=as.data.frame(actualDistributions),
aes(x=p, y=Y_B_cumulative_estimation, colour = "Actual Y_B", linetype="Actual Y_B")) +

scale_colour_manual("", breaks = c("X_A", "X_B","Actual Y_A", "Actual Y_B"),
values = c("X_A"="#00BFC4", "X_B"="#F8766D", "Actual Y_A"="#0000FF", "Actual Y_B"="#FF0000"))+

scale_linetype_manual("", breaks = c("X_A", "X_B","Actual Y_A", "Actual Y_B"),
values = c("X_A"="solid", "X_B"="dashed", "Actual Y_A"="solid", "Actual Y_B"="solid"))+

ggtitle("100 samples used in the estimation")
print(fig)

###################################
## sample size = 300 ##############
###################################
X_A_observed <- rnorm(300,mean = 1, sd = 1)
X_B_observed <- rnorm(300,mean = 1.3, sd = 0.5)
res <- get_Y_AB_bounds_DKW(X_A_observed, X_B_observed)

X_A_observed_large_sample <- sort(rnorm(1e4, mean = 1, sd = 1))
X_B_observed_large_sample <- sort(rnorm(1e4, mean = 1.3, sd = 0.5))
actualDistributions <- getEmpiricalCumulativeDistributions(X_A_observed_large_sample,
 X_B_observed_large_sample, nOfEstimationPoints=1e4, EPSILON=1e-20)


actualDistributions$Y_A_cumulative_estimation <- lm(Y_A_cumulative_estimation ~
        p + I(p^2) + I(p^3)+ I(p^4)+ I(p^5)+ I(p^6)+I(p^7)+ I(p^8),
        data = actualDistributions)$fitted.values
actualDistributions$Y_B_cumulative_estimation <- lm(Y_B_cumulative_estimation ~
        p + I(p^2) + I(p^3)+ I(p^4)+ I(p^5)+ I(p^6)+I(p^7)+ I(p^8),
        data = actualDistributions)$fitted.values

fig = plot_Y_AB(res, plotDifference=FALSE) +

geom_line(data=as.data.frame(actualDistributions),
aes(x=p, y=Y_A_cumulative_estimation, colour = "Actual Y_A", linetype="Actual Y_A")) +

geom_line(data=as.data.frame(actualDistributions),
aes(x=p, y=Y_B_cumulative_estimation, colour = "Actual Y_B", linetype="Actual Y_B")) +

scale_colour_manual("", breaks = c("X_A", "X_B","Actual Y_A", "Actual Y_B"),
values = c("X_A"="#00BFC4", "X_B"="#F8766D", "Actual Y_A"="#0000FF", "Actual Y_B"="#FF0000"))+

scale_linetype_manual("", breaks = c("X_A", "X_B","Actual Y_A", "Actual Y_B"),
values = c("X_A"="solid", "X_B"="dashed", "Actual Y_A"="solid", "Actual Y_B"="solid"))+
ggtitle("300 samples used in the estimation")
print(fig)

[Package RVCompare version 0.1.8 Index]