Nothing
#' Predict conditional probabilities
#'
#' @description The function \code{predict.qad()} can be utilized to predict the probabilities of the event that Y lies in
#' specific intervals given X=x, or vice versa. Thereby, the intervals are calculated as retransformed intervals (by using the empirical quantil function) defining the checkerboard grid. Additionally, the mass of the conditional distribution functions are plotted.
#' The prediction can be computed in the sample setting as well as in the copula setting (pseudo-observation in the unit square).
#'
#' @param object an object of class 'qad', which determines the underlying checkerboard aggregation.
#' @param values a vector containing the x or the y values for which the conditional probabilities should be predicted.
#' @param conditioned a character specifying on which variable is conditioned. Options are "x1" (default) or "x2".
#' @param nr_intervals an integer, which determines a different number of intervals for the prediction (only possible in the copula setting).
#' @param copula a logical (default = FALSE) determining whether the empirical checkerboard copula is used or the retransformed data.
#' @param prediction_interval a vector specifying the interval boundaries for which the conditional probability is computed. Options are NULL (default) to predict the conditional probabilities for all intervals or a vector c(lower boundary, upper boundary) indicating the boundaries.
#' @param pred_plot a logical indicating if the conditional probabilities are plotted.
#' @param panel.grid a logical indicating whether the panel.grid is plotted.
#' @param ... some methods for this generic require additional arguments. None are used in this method.
#'
#' @return a list containing a data.frame with the computed intervals (lower and upper boundaries) and the prediction probabilities (i.e., the probability that Y lies in the interval Ii given X = x).
#' Furthermore, a heatmap depicting the mass of the conditional distribution functions is returned.
#'
#' @note Predictions are only possible for values within the range of the sample (or between 0 and 1 in the copula setting). For given values exceeding the range NA is returned.
#'
#' @examples
#' set.seed(1)
#' n <- 100
#' x <- runif(n, -4 ,4)
#' y <- x^2 + rnorm(n, 0, 1)
#' sample <- data.frame(x, y)
#'
#' ##(Not Run)
#' qad.fit <- qad(sample)
#' predict.qad(qad.fit, values = c(-2,0.6), conditioned = "x1", pred_plot = TRUE)
#' predict.qad(qad.fit, values = c(1,9), conditioned = "x2", pred_plot = TRUE)
#' predict.qad(qad.fit, values = c(-2,0.6), conditioned = "x1", pred_plot = FALSE,
#' nr_intervals = 4)
#' predict.qad(qad.fit, values = c(-2,0.6), conditioned = "x1", pred_plot = FALSE,
#' prediction_interval = c(4,6))
#' predict.qad(qad.fit, values = c(4,0.6), conditioned = "x2", pred_plot = FALSE,
#' prediction_interval = c(2,3))
#'
#' qad.pred <- predict.qad(qad.fit, values = c(-2,0.6), conditioned = "x1", pred_plot = FALSE)
#' qad.pred$prediction
#' qad.pred$plot
#'
#'
#' @export predict qad
predict.qad <- function(object,
values,
conditioned = "x1",
nr_intervals = NULL,
prediction_interval = NULL,
copula = FALSE,
pred_plot = FALSE,
panel.grid = TRUE, ...) {
if(copula){
pred_cop <- .predict_qad_copula(values = values, conditioned = conditioned, qad_output = object,
nr_intervals = nr_intervals, prediction_interval = prediction_interval)
pred_prob <- pred_cop$pred_prob
grid <- seq(0,1,length.out = object$resolution+1)
index <- pred_cop$indices
#Prediction plot
p <- plot.qad(object, copula = TRUE, panel.grid = panel.grid)
if(conditioned == 'x1'){
df_rect <- data.frame(xmin = grid[index],
xmax = grid[index+1],
ymin = min(pred_prob$lowerBound),ymax = max(pred_prob$upperBound))
}else if(conditioned == "x2"){
df_rect <- data.frame(ymin = grid[index],
ymax = grid[index+1],
xmin = min(pred_prob$lowerBound), xmax = max(pred_prob$upperBound))
}
p <- p + geom_rect(data = df_rect, aes_(xmin = ~xmin, xmax = ~xmax, ymin = ~ymin, ymax = ~ymax), color = 'red', fill = NA, size = 1)
}else{
if(!is.null(nr_intervals)){
warning("nr_intervals only works in the copula setting. Checkerboard resolution is used instead.")
}
grid <- seq(0,1,length.out = object$resolution + 1)
GridX <- quantile(object$data$x1, grid, type = 1)
GridY <- quantile(object$data$x2, grid, type = 1)
if(conditioned == "x1"){
x_index <- as.numeric(cut(values, GridX, labels = 1:(length(GridX)-1), include.lowest = T))
new_values <- (grid[x_index] + grid[x_index + 1])/2
cInv <- "x2"
Grid <- GridX
}else if(conditioned == "x2"){
y_index <- as.numeric(cut(values, GridY, labels = 1:(length(GridY)-1), include.lowest = T))
new_values <- (grid[y_index] + grid[y_index + 1])/2
cInv <- "x1"
Grid <- GridY
}
pred_cop <- .predict_qad_copula(values = new_values, conditioned = conditioned, qad_output = object,
nr_intervals = NULL, prediction_interval = NULL)
pred_prob <- pred_cop$pred_prob
index <- pred_cop$indices
if(length(which(values > max(object$data[conditioned]) | values < min(object$data[conditioned])))>0){
pred_prob[,3+which(values > max(object$data[conditioned]) | values < min(object$data[conditioned]))] <- NA
exceeding <- which(values > max(object$data[conditioned]) | values < min(object$data[conditioned]))
}
names(pred_prob) <- c("Interval", "lowerBound", "upperBound", paste0(conditioned, "=",values))
if(conditioned == "x1"){
pred_prob$lowerBound <- GridY[1:NROW(pred_prob)]
pred_prob$upperBound <- GridY[2:(NROW(pred_prob)+1)]
}else if(conditioned == "x2"){
pred_prob$lowerBound <- GridX[1:NROW(pred_prob)]
pred_prob$upperBound <- GridX[2:(NROW(pred_prob)+1)]
}
if(!is.null(prediction_interval)){
pred_prob_new <- data.frame(Interval = "I", lowerBound = prediction_interval[1], upperBound = prediction_interval[2])
for(i in 4:(NCOL(pred_prob))){
f_int <- approxfun(c(pred_prob$lowerBound[1],pred_prob$upperBound), c(0,cumsum(pred_prob[,i])), method = "linear", yleft = 0, yright = 1)
pred_prob_new[1,names(pred_prob)[i]] <- f_int(prediction_interval[2])-f_int(prediction_interval[1])
}
pred_prob <- pred_prob_new
}
#Prediction plot
p <- plot.qad(object, copula = FALSE, panel.grid = panel.grid)
if(conditioned == 'x1'){
df_rect <- data.frame(xmin = Grid[index],
xmax = Grid[index+1],
ymin = min(pred_prob$lowerBound),ymax = max(pred_prob$upperBound))
if(length(which(values > max(object$data[conditioned]) | values < min(object$data[conditioned])))>0){
df_rect[exceeding, ] <- rep(NA, 4)
}
}else if(conditioned == "x2"){
df_rect <- data.frame(ymin = Grid[index],
ymax = Grid[index+1],
xmin = min(pred_prob$lowerBound), xmax = max(pred_prob$upperBound))
if(length(which(values > max(object$data[conditioned]) | values < min(object$data[conditioned])))>0){
df_rect[exceeding, ] <- rep(NA, 4)
}
}
p <- p + geom_rect(data = df_rect, aes_(xmin = ~xmin, xmax = ~xmax, ymin = ~ymin, ymax = ~ymax), color = 'red', fill = NA, size = 1)
}
print_prob <- data.frame(t(pred_prob[,4:NCOL(pred_prob)]))
names(print_prob) <- pred_prob$Interval
row.names(print_prob) <- names(pred_prob)[-c(1:3)]
cat(paste0("Intervalls for the variable ", setdiff(c("x1", "x2"), conditioned)," \n"))
print.data.frame(pred_prob[,1:3])
cat(paste0("\nProbability that ",toupper(setdiff(c("x1", "x2"), conditioned))," given ",toupper(conditioned),"=",conditioned," lies in the interval I_i :\n"))
print.data.frame(print_prob)
if(pred_plot){
print(p)
}
return(invisible(list(prediction = pred_prob,
plot = p)))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.