# get_Y_AB_bounds_DKW: Estimate Y_A and Y_B bounds with Dvoretzky–Kiefer–Wolfowitz In RVCompare: Compare Real Valued Random Variables

 get_Y_AB_bounds_DKW R Documentation

## Estimate Y_A and Y_B bounds with Dvoretzky–Kiefer–Wolfowitz

### Description

Estimate the confidence intervals for the cumulative distributions of Y_A and Y_B with Dvoretzky–Kiefer–Wolfowitz.

### Usage

get_Y_AB_bounds_DKW(
X_A_observed,
X_B_observed,
nOfEstimationPoints = 1000,
alpha = 0.05,
EPSILON = 1e-20,
ignoreMinimumLengthCheck = FALSE
)

### Arguments

 X_A_observed array of the observed samples (real values) of X_A. X_B_observed array of the observed samples (real values) of X_B. nOfEstimationPoints (optional, default 1000) the number of points in the interval [0,1] in which the density is estimated. alpha (optional, default value 0.05) the error of the confidence interval. If alpha = 0.05 then we have 95 percent confidence interval. EPSILON (optional, default value 1e-20) minimum difference between two values to be considered different. ignoreMinimumLengthCheck (optional, default value FALSE) wether to check for a minimum length in X_A and X_B.

### Value

Returns a list with the following fields:

- p: values in the interval [0,1] that represent the nOfEstimationPoints points in which the densities are estimated. Useful for plotting.

- Y_A_cumulative_estimation: an array with the empirical cumulative diustribution function of Y_A from 0 to p[[i]].

- Y_A_cumulative_upper: an array with the upper bounds of confidence 1 - alpha of the cumulative density of Y_A

- Y_A_cumulative_lower: an array with the lower bounds of confidence 1 - alpha of the cumulative density of Y_A

- Y_B_cumulative_estimation: The same as Y_A_cumulative_estimation for Y_B.

- Y_B_cumulative_upper: The same as Y_A_cumulative_upper for Y_B

- Y_B_cumulative_lower: The same as Y_A_cumulative_lower for Y_B

- diff_estimation: Y_A_cumulative_estimation - Y_B_cumulative_estimation

- diff_upper: an array with the upper bounds of confidence 1 - alpha of the difference between the cumulative distributions

- diff_lower: an array with the lower bounds of confidence 1 - alpha of the difference between the cumulative distributions

### Examples

library(ggplot2)
### Example 1 ###
X_A_observed <- rnorm(100, mean = 2, sd = 1)
X_B_observed <- rnorm(100, mean = 2.1, sd = 0.5)
res <- get_Y_AB_bounds_DKW(X_A_observed, X_B_observed)
fig1 = plot_Y_AB(res, plotDifference=FALSE) + ggtitle("Example 1")
print(fig1)

### Example 2 ###
# Comparing the estimations with the actual distributions for two normal distributions.
##################################
## sample size = 100 #############
##################################
X_A_observed <- rnorm(100,mean = 1, sd = 1)
X_B_observed <- rnorm(100,mean = 1.3, sd = 0.5)
res <- get_Y_AB_bounds_DKW(X_A_observed, X_B_observed)

X_A_observed_large_sample <- sort(rnorm(1e4, mean = 1, sd = 1))
X_B_observed_large_sample <- sort(rnorm(1e4, mean = 1.3, sd = 0.5))
actualDistributions <- getEmpiricalCumulativeDistributions(X_A_observed_large_sample,
X_B_observed_large_sample, nOfEstimationPoints=1e4, EPSILON=1e-20)

actualDistributions\$Y_A_cumulative_estimation <- lm(Y_A_cumulative_estimation ~
p + I(p^2) + I(p^3)+ I(p^4)+ I(p^5)+ I(p^6)+I(p^7)+ I(p^8),
data = actualDistributions)\$fitted.values
actualDistributions\$Y_B_cumulative_estimation <- lm(Y_B_cumulative_estimation ~
p + I(p^2) + I(p^3)+ I(p^4)+ I(p^5)+ I(p^6)+I(p^7)+ I(p^8),
data = actualDistributions)\$fitted.values

fig = plot_Y_AB(res, plotDifference=FALSE) +

geom_line(data=as.data.frame(actualDistributions),
aes(x=p, y=Y_A_cumulative_estimation, colour = "Actual Y_A", linetype="Actual Y_A")) +

geom_line(data=as.data.frame(actualDistributions),
aes(x=p, y=Y_B_cumulative_estimation, colour = "Actual Y_B", linetype="Actual Y_B")) +

scale_colour_manual("", breaks = c("X_A", "X_B","Actual Y_A", "Actual Y_B"),
values = c("X_A"="#00BFC4", "X_B"="#F8766D", "Actual Y_A"="#0000FF", "Actual Y_B"="#FF0000"))+

scale_linetype_manual("", breaks = c("X_A", "X_B","Actual Y_A", "Actual Y_B"),
values = c("X_A"="solid", "X_B"="dashed", "Actual Y_A"="solid", "Actual Y_B"="solid"))+

ggtitle("100 samples used in the estimation")
print(fig)

###################################
## sample size = 300 ##############
###################################
X_A_observed <- rnorm(300,mean = 1, sd = 1)
X_B_observed <- rnorm(300,mean = 1.3, sd = 0.5)
res <- get_Y_AB_bounds_DKW(X_A_observed, X_B_observed)

X_A_observed_large_sample <- sort(rnorm(1e4, mean = 1, sd = 1))
X_B_observed_large_sample <- sort(rnorm(1e4, mean = 1.3, sd = 0.5))
actualDistributions <- getEmpiricalCumulativeDistributions(X_A_observed_large_sample,
X_B_observed_large_sample, nOfEstimationPoints=1e4, EPSILON=1e-20)

actualDistributions\$Y_A_cumulative_estimation <- lm(Y_A_cumulative_estimation ~
p + I(p^2) + I(p^3)+ I(p^4)+ I(p^5)+ I(p^6)+I(p^7)+ I(p^8),
data = actualDistributions)\$fitted.values
actualDistributions\$Y_B_cumulative_estimation <- lm(Y_B_cumulative_estimation ~
p + I(p^2) + I(p^3)+ I(p^4)+ I(p^5)+ I(p^6)+I(p^7)+ I(p^8),
data = actualDistributions)\$fitted.values

fig = plot_Y_AB(res, plotDifference=FALSE) +

geom_line(data=as.data.frame(actualDistributions),
aes(x=p, y=Y_A_cumulative_estimation, colour = "Actual Y_A", linetype="Actual Y_A")) +

geom_line(data=as.data.frame(actualDistributions),
aes(x=p, y=Y_B_cumulative_estimation, colour = "Actual Y_B", linetype="Actual Y_B")) +

scale_colour_manual("", breaks = c("X_A", "X_B","Actual Y_A", "Actual Y_B"),
values = c("X_A"="#00BFC4", "X_B"="#F8766D", "Actual Y_A"="#0000FF", "Actual Y_B"="#FF0000"))+

scale_linetype_manual("", breaks = c("X_A", "X_B","Actual Y_A", "Actual Y_B"),
values = c("X_A"="solid", "X_B"="dashed", "Actual Y_A"="solid", "Actual Y_B"="solid"))+
ggtitle("300 samples used in the estimation")
print(fig)

RVCompare documentation built on Aug. 21, 2023, 5:13 p.m.