R/densityPlot.R

Defines functions densityPlot

Documented in densityPlot

#' Lattice plot of conditional densities
#'
#' This function produces conditional density plots at input covariate values.
#' @param X the covariate matrix to predict conditional densities at; each row represents an observation vector.
#' @param minY the minimal possible response value. If \code{yGrid} is NULL, then the response grid will be generated by dividing \code{(minY, maxY)} into \code{nGrid} equal bins and using the mid-points as \code{yGrid}. Default is NULL.
#' @param maxY the maximal possible response value. For more, see \code{minY}. Default is NULL.
#' @param nGrid the number of response grid points. For more, see \code{minY}. Default is 100.
#' @param yGrid the response grid to evaluate conditional densities at. Default is NULL.
#' @param model a conditional density model with a \code{predict} method, e.g., a LinCDE boosting model. The call \code{predict(model, X, y)} should return a vector of estimated conditional densities f(yi | Xi). Default is NULL. To only plot the true conditional densities, use the default value.
#' @param trueDensity true conditional density function. The call \code{trueDensity(X, y)} should return a vector of true conditional densities f(yi | Xi). Default is NULL. To only plot LinCDE's estimated conditional densities, use the default value.
#' @param plot if \code{TRUE}, the density plots are printed. If \code{FALSE}, the data frame of true/estimated densities to generate the density plots are returned.
#' @param layout a numeric vector of length 2 giving the number of columns, rows in the multipanel display. In general, a conditioning plot in lattice consists of several panels arranged in a rectangular array and layout determines this arrangement. Default is to arrange panels in an approximately square array.
#' @param factor.levels vector of character strings or expressions giving the levels of the conditioning variable. The \code{factor.levels} will be written on the panel strips. Default is R (for "region") followed by the index of the region, e.g., R1.
#' @param strip.font.size a numeric value indicating the font size on strips in the multipanel density plots. Default is 0.8.
#' @param main a main title for the plot.
#'
#' @return The function outputs a lattice plot of conditional densities.
#' @export
densityPlot = function(X, minY = NULL, maxY = NULL, nGrid = 100, yGrid = NULL, model = NULL, trueDensity = NULL, plot = TRUE, layout = NULL, factor.levels = NULL, strip.font.size = NULL, main = NULL){
  # preprocess
  if(!is.null(model)){
    d = length(model$var.names)
    if(is.null(dim(X))){
      if(d == 1){X = matrix(X, ncol = 1)} # one covariate
      if(d > 1){X = matrix(X, nrow = 1)} # one data point
    }
    n = dim(X)[1]
    if(!is.null(colnames(X))){
      if(min(colnames(X) %in% model$var.names) == 0)
        stop("please input all covariates")
      X = X[, model$var.names, drop = FALSE]
    } else {
      if(dim(X)[2] != d)
        stop("please input all covariates and their names")
      colnames(X) = model$var.names
      # print("no input var.names and var.names from the LinCDE.boost model are used")
    }
  } else {
    if(is.null(dim(X))){X = matrix(X, ncol = 1)}
    n = dim(X)[1]
  }
  XGrid = X[rep(seq(1,n), rep(nGrid, n)), , drop = FALSE]
  if(is.null(yGrid)){
    if(is.null(model) && (is.null(minY)||is.null(maxY))){stop("please input a response grid or the range of responses")}
    if(is.null(minY)){minY = 1.02 * min(model$y) - 0.02 * max(model$y)}
    if(is.null(maxY)){maxY = 1.02 * max(model$y) - 0.02 * min(model$y)}
    yGrid = seq(minY, maxY, length.out = (nGrid+1))
    yGrid = (yGrid[1:nGrid] + yGrid[2:(nGrid+1)])/2
  } else{nGrid = length(yGrid)}
  yGrid = rep(yGrid, n)

  if(is.null(layout)){layout = c(ceiling(n/floor(sqrt(n))), floor(sqrt(n)))}
  if(is.null(factor.levels)){factor.levels = paste("R", seq(1,n))}
  if(is.null(strip.font.size)){strip.font.size = 0.8}

  # lattice plot of the true and estimated densities
  if(!is.null(trueDensity) && !is.null(model)){
    if(is.null(main)){main = "Estimated and true conditional densities"}
    trueDens = trueDensity(XGrid, yGrid)
    h = model$splitMidPointY[2] - model$splitMidPointY[1]
    splitPointYTest = seq(model$splitMidPointY[1] - h/2, tail(model$splitMidPointY,n=1) + h/2, length.out = 2 * nGrid)
    estDens = predict(object = model, X = XGrid, y = yGrid, splitPointYTest = splitPointYTest)

    latticeCDE = data.frame(rep(yGrid, 2)); colnames(latticeCDE) = "y"
    latticeCDE$density = c(trueDens, estDens)
    latticeCDE$method = factor(c(rep(c("truth", "estimated"), rep(nGrid * n,2))), levels=c("truth", "estimated"))
    latticeCDE$group = rep(rep(seq(1,n), rep(nGrid,n)), 2)
    key=list(space="top", columns = 2,
             lines=list(col=c("blue", "red"), lty=c(3,1), lwd=3),
             text=list(c("truth", "estimated")), cex = 1)
    strip = lattice::strip.custom(bg="lightgrey", factor.levels = factor.levels, par.strip.text=list(col="black", cex=strip.font.size, font=1))
    densityPlot = lattice::xyplot(density ~ y | factor(group), group = latticeCDE$method, data = latticeCDE, type = "l", col = c("blue", "red"), lty = c(3,1), lwd = 3, ylab=list("conditional density", cex = 1), xlab = list("y", cex = 1), key = key, aspect = 0.75, layout = layout, strip = strip, scales = list(cex = 1), main = main)
  }

  # lattice plot of the true densities
  if(!is.null(trueDensity) && is.null(model)){
    if(is.null(main)){main = "True conditional densities"}
    trueDens = trueDensity(XGrid, yGrid)

    latticeCDE = data.frame(yGrid); colnames(latticeCDE) = "y"
    latticeCDE$density = trueDens
    latticeCDE$group = rep(seq(1,n), rep(nGrid,n))
    strip = lattice::strip.custom(bg="lightgrey", factor.levels = factor.levels, par.strip.text=list(col="black", cex=strip.font.size, font=1))
    densityPlot = lattice::xyplot(density ~ y | factor(group), data = latticeCDE, type = "l", col = c("blue"), lty = 3, lwd = 3, ylab=list("conditional density", cex = 1), xlab = list("y", cex = 1), aspect = 0.75, layout = layout, strip = strip, scales = list(cex = 1), main = main)
  }

  # lattice plot of the estimated densities
  if(is.null(trueDensity) && !is.null(model)){
    if(is.null(main)){main = "Estimated conditional densities"}
    h = model$splitMidPointY[2] - model$splitMidPointY[1]
    splitPointYTest = seq(model$splitMidPointY[1] - h/2, tail(model$splitMidPointY,n=1) + h/2, length.out = 2 * nGrid)
    estDens = predict(object = model, X = XGrid, y = yGrid, splitPointYTest = splitPointYTest)

    latticeCDE = data.frame(yGrid); colnames(latticeCDE) = "y"
    latticeCDE$density = c(estDens)
    latticeCDE$group = rep(seq(1,n), rep(nGrid,n))
    strip = lattice::strip.custom(bg="lightgrey", factor.levels = factor.levels, par.strip.text=list(col="black", cex=strip.font.size, font=1))
    densityPlot = lattice::xyplot(density ~ y | factor(group), data = latticeCDE, type = "l", col = c("red"), lty = 1, lwd = 3, ylab=list("conditional density", cex = 1), xlab = list("y", cex = 1), aspect = 0.75, layout = layout, strip = strip, scales = list(cex = 1), main = main)
  }

  if(plot){print(densityPlot)} else {return(latticeCDE)}
}
ZijunGao/LinCDE documentation built on Jan. 2, 2023, 11:14 p.m.