R/vpcstats.R
In tidyvpc: VPC Percentiles and Prediction Intervals

Documented in binning binning.tidyvpcobj censoring censoring.tidyvpcobj observed observed.data.frame simulated simulated.tidyvpcobj stratify stratify.tidyvpcobj

#' Perform a Visual Predictive Check (VPC) computation
#'
#' These functions work together to calculate the statistics that are plotted
#' in a VPC. They would typically be chained together using the "pipe" operator
#' (see Examples).
#'
#' @param o A \code{tidyvpcobj}.
#' @param ... Additional arguments.
#'
#' @import data.table
#' @import ggplot2
#' @importFrom magrittr %>%
#' @importFrom utils packageVersion
#' @import quantreg
#' @import classInt
#' @importFrom mgcv gam
#' @importFrom stats median as.formula model.frame quantile setNames update AIC fitted loess na.exclude optimize resid time qnorm cov runif
#' @importFrom utils install.packages installed.packages
#' @name generics
NULL

#' Specify observed dataset and variables for VPC
#'
#' The observed function is the first function in the vpc piping chain and is used for specifying observed data and variables for VPC. Note: Observed
#' data must not contain missing DV and may require filtering \code{MDV == 0} before generating VPC.
#'
#' @param o A \code{data.frame} of observation data.
#' @param x Numeric x-variable, typically named TIME.
#' @param yobs Numeric y-variable, typically named DV.
#' @param pred Population prediction variable, typically named PRED.
#' @param blq Logical variable indicating below limit of quantification.
#' @param lloq Number or numeric variable in data indicating the lower limit of quantification.
#' @param alq Logical variable indicating above limit of quantification .
#' @param uloq Number or numeric variable in data indicating the upper limit of quantification.
#' @param ... Other arguments.
#' @return A \code{tidyvpcobj} containing both original data and observed data formatted with \code{x} and \code{y} variables as specified in function.
#'   Resulting data is of class \code{data.frame} and \code{data.table}.
#' @examples
#'
#' obs_data <- obs_data[MDV == 0]
#' sim_data <- sim_data[MDV == 0]
#'
#' vpc <- observed(obs_data, x=TIME, y=DV)
#'
#' @seealso \code{\link{simulated}} \code{\link{censoring}} \code{\link{stratify}} \code{\link{predcorrect}} \code{\link{binning}} \code{\link{binless}} \code{\link{vpcstats}}
#' @export
observed <- function(o, ...) UseMethod("observed")

#' @rdname observed
#' @export
observed.data.frame <- function(o, x, yobs, pred=NULL, blq=NULL, lloq=-Inf, alq=NULL, uloq=Inf, ...) {
  data <- o
  x    <- rlang::eval_tidy(rlang::enquo(x),    data)
  yobs <- rlang::eval_tidy(rlang::enquo(yobs), data)
  pred <- rlang::eval_tidy(rlang::enquo(pred), data)
  lloq <- rlang::eval_tidy(rlang::enquo(lloq), data)
  uloq <- rlang::eval_tidy(rlang::enquo(uloq), data)
  blq  <- rlang::eval_tidy(rlang::enquo(blq),  data)
  alq  <- rlang::eval_tidy(rlang::enquo(alq),  data)

  obs <- data.table(x, y=yobs, blq, lloq, alq, uloq)

  o <- structure(list(data=data), class="tidyvpcobj")
  o <- update(o, obs=obs, pred=pred)
  censoring(o)
}

#' Specify simulated dataset and variables for VPC
#'
#' The simulated function is used for specifying simulated input data and variables for VPC. Note: Simulated data must not
#' contain missing DV and may require filtering \code{MDV == 0} before generating VPC. The ordering of observed and simulated
#' data must also be consistent, with replicates in simulated data stacked on top of each other.
#'
#' @param o A \code{tidyvpcobj}.
#' @param data A \code{data.frame} of simulated data.
#' @param ysim Numeric y-variable, typically named DV.
#' @param xsim Numeric x-variable, typically named TIME.
#' @param ... Other arguments.
#' @return A \code{tidyvpcobj} containing simulated dataset \code{sim} formatted with columns \code{x}, \code{y}, and \code{repl}, which indicates the replicate number.
#'  The column \code{x} is used from the \code{observed()} function. Resulting dataset is of class \code{data.frame} and \code{data.table}.
#' @examples
#' require(magrittr)
#'
#' vpc <- observed(obs_data, x=TIME, y=DV) %>%
#'     simulated(sim_data, y=DV)
#'
#' @seealso \code{\link{observed}} \code{\link{censoring}} \code{\link{stratify}} \code{\link{predcorrect}} \code{\link{binning}} \code{\link{binless}} \code{\link{vpcstats}}
#' @name simulated
#' @export
simulated <- function(o, ...) UseMethod("simulated")

#' @rdname simulated
#' @export
simulated.tidyvpcobj <- function(o, data, ysim, ..., xsim) {
  ysim <- rlang::eval_tidy(rlang::enquo(ysim), data)
  obs  <- o$obs
  nrep <- length(ysim)/nrow(obs)
  if (nrep != as.integer(nrep)) {
    stop("The number of simulated rows is not a multiple of the number of observed rows.  Ensure that you filtered your observed data to remove MDV rows.")
  }
  repl <- rep(seq_len(nrep), each=nrow(obs))
  if (!missing(xsim)) {
    xsim <- rlang::eval_tidy(rlang::enquo(xsim), data)
    # generate a replication vector to confirm time matching
    xrep_vec <- rep(seq_along(obs$x), nrep)
    if (!all(xsim == obs$x[xrep_vec])) {
      stop("Values of `xsim` do not match observed data x-values.  Ensure that you filtered your observed data to remove MDV rows.")
    }
  } else {
    xsim <- obs$x
  }

  sim <- data.table(x=xsim, y=ysim, repl)
  update(o, sim=sim)
}

#' Censoring observed data for Visual Predictive Check (VPC)
#'
#' Specify censoring variable or censoring value for VPC.
#'
#' @param o A \code{tidyvpcobj}.
#' @param blq blq variable if present in observed data.
#' @param lloq Numeric value or numeric variable in data indicating the upper limit of quantification.
#' @param alq Logical variable indicating above limit of quantification.
#' @param uloq Numeric value or numeric variable in data indicating the upper limit of quantification.
#' @param data Observed data supplied in \code{observed()} function.
#' @param ... Other arguments to include.
#' @return Updates \code{obs} \code{data.frame} in \code{tidypcobj} with censored values for observed data which includes \code{lloq} and \code{uloq} specified
#'  values for lower/upper limit of quantification. Logicals for \code{blq} and \code{alq} are returned that indicate whether the DV value lies below/above limit
#'  of quantification.
#' @examples
#' \donttest{
#' require(magrittr)
#'
#' vpc <- observed(obs_data, x=TIME, y=DV) %>%
#'     simulated(sim_data, y=DV) %>%
#'     censoring(blq=(DV < 50), lloq=50) %>%
#'     binning(bin = "pam", nbins = 5) %>%
#'     vpcstats()
#'
#' #Using LLOQ variable in data with different values of LLOQ by Study:
#'
#' obs_data$LLOQ <- obs_data[, ifelse(STUDY == "Study A", 50, 25)]
#'
#' vpc <- observed(obs_data, x=TIME, y=DV) %>%
#'     simulated(sim_data, y=DV) %>%
#'     censoring(blq=(DV < LLOQ), lloq=LLOQ) %>%
#'     stratify(~ STUDY) %>%
#'     binning(bin = "kmeans", nbins = 4) %>%
#'     vpcstats()
#' }
#'
#' @seealso \code{\link{observed}} \code{\link{simulated}} \code{\link{stratify}} \code{\link{predcorrect}} \code{\link{binning}} \code{\link{binless}} \code{\link{vpcstats}}

#' @export
censoring <- function(o, ...) UseMethod("censoring")

#' @rdname censoring
#' @export
censoring.tidyvpcobj <- function(o, blq, lloq, alq, uloq, data=o$data, ...) {
  if (missing(blq)) {
    blq <- o$obs$blq
  } else {
    blq <- rlang::eval_tidy(rlang::enquo(blq), data)
  }
  if (missing(lloq)) {
    lloq <- o$obs$lloq
  } else {
    lloq <- rlang::eval_tidy(rlang::enquo(lloq), data)
  }
  if (missing(alq)) {
    alq <- o$obs$alq
  } else {
    alq <- rlang::eval_tidy(rlang::enquo(alq), data)
  }
  if (missing(uloq)) {
    uloq <- o$obs$uloq
  } else {
    uloq <- rlang::eval_tidy(rlang::enquo(uloq), data)
  }

  if (is.null(blq)) {
    blq <- FALSE
  }
  if (!is.logical(blq)) {
    stop("blq must be of type logical")
  }
  if (any(is.na(blq))) {
    stop("blq cannot contain missing values")
  }
  if (is.null(lloq)) {
    if (any(blq)) {
      stop("No lloq specified for blq")
    }
    lloq <- -Inf
  }
  if (!is.numeric(lloq)) {
    stop("lloq must be of type numeric")
  }
  if (any(blq & (is.na(lloq) | lloq == -Inf))) {
    stop("No lloq specified for blq")
  }

  if (is.null(alq)) {
    alq <- FALSE
  }
  if (!is.logical(alq)) {
    stop("alq must be of type logical")
  }
  if (any(is.na(alq))) {
    stop("alq cannot contain missing values")
  }
  if (is.null(uloq)) {
    if (any(alq)) {
      stop("No uloq specified for alq")
    }
    uloq <- Inf
  }
  if (!is.numeric(uloq)) {
    stop("uloq must be of type numeric")
  }
  if (any(alq & (is.na(uloq) | uloq == Inf))) {
    stop("No uloq specified for alq")
  }

  .blq <- blq
  .lloq <- lloq
  .alq <- alq
  .uloq <- uloq
  o$obs[, blq := rep(.blq, len=.N)]
  o$obs[, lloq := rep(.lloq, len=.N)]
  o$obs[, alq := rep(.alq, len=.N)]
  o$obs[, uloq := rep(.uloq, len=.N)]
  update(o, censoring=TRUE)
}

#' Stratification for Visual Predictive Check (VPC)
#'
#' Use to specify stratification variables for VPC.
#'
#' @param o A \code{tidyvpcobj}.
#' @param formula Formula for stratification.
#' @param data Observed data supplied in \code{observed()} function.
#' @param ... Other arguments to include.
#' @return Returns updated \code{tidyvpcobj} with stratification formula, stratification column(s), and strat.split datasets, which
#'   is \code{obs} split by unique levels of stratification variable(s). Resulting datasets are of class object \code{data.frame}
#'   and \code{data.table}.
#' @examples
#' \donttest{
#' require(magrittr)
#'
#' vpc <- observed(obs_data, x=TIME, y=DV) %>%
#'     simulated(sim_data, y=DV) %>%
#'     stratify(~ GENDER) %>%
#'     binning(NTIME) %>%
#'     vpcstats()
#'
#' # Example with 2-way stratification by GENDER and STUDY.
#'
#' vpc <- vpc %>%
#'     stratify(~ GENDER + STUDY) %>%
#'     binning(bin = "centers", centers = c(1,3,5,7,10)) %>%
#'     vpcstats()
#' }
#'
#' @seealso \code{\link{observed}} \code{\link{simulated}} \code{\link{censoring}} \code{\link{predcorrect}} \code{\link{binning}} \code{\link{binless}} \code{\link{vpcstats}}

#' @export
stratify <- function(o, ...) UseMethod("stratify")

#' @method stratify tidyvpcobj
#' @rdname stratify
#' @export
stratify.tidyvpcobj <- function(o, formula, data=o$data, ...) {
  if (!inherits(formula, "formula")) {
    stop("Expecting a formula")
  }
  flist <- as.list(formula)
  if (length(flist) == 3) {
    lhs <- as.call(c(flist[[1]], flist[[2]]))
    rhs <- as.call(c(flist[[1]], flist[[3]]))
    if (flist[[2]] == as.symbol(".")) {
      lhsmf <- NULL
    } else {
      lhsmf <- as.data.table(model.frame(lhs, data))
    }
    if (flist[[3]] == as.symbol(".")) {
      rhsmf <- NULL
    } else {
      rhsmf <- as.data.table(model.frame(rhs, data))
    }
    if (is.null(lhsmf) && is.null(rhsmf)) {
      stop("Invalid stratification formula: no variables specified")
    }
    strat <- cbind(lhsmf, rhsmf)
  } else {
    strat <- as.data.table(model.frame(formula, data))
  }

  reserved.names <- c("x", "y", "ypc", "pred", "blq", "lloq", "repl", "bin", "xbin", "qname", "lo", "md", "hi",
                      "nobs", "xmedian", "xmean", "xmin", "xmax", "xmid", "xleft", "xright", "xcenter")
  if (any(names(strat) %in% reserved.names)) {
    stop(paste0("The names of used for stratification must not include: ",
                paste0(reserved.names, collapse=", ")))
  }

  o$obs[, names(strat) := strat]

  strat.split <- split(o$obs, strat)

  strat.split <- strat.split[lapply(strat.split,NROW)>0]

  update(o, strat=strat, strat.split = strat.split, strat.formula=formula)
}

#' Binning methods for Visual Predictive Check (VPC)
#'
#' This function executes binning methods available in classInt i.e. "jenks", "kmeans", "sd", "pretty", "pam", "kmeans", "hclust", "bclust", "fisher", "dpih", "box", "headtails", and "maximum".
#' You may also bin directly on x-variable or alternatively specify "centers" or "breaks". For explanation of binning methods see \code{\link[classInt]{classIntervals}}.
#'
#' @param o A \code{tidyvpcobj}.
#' @param bin Character string indicating binning method or unquoted variable name if binning on x-variable.
#' @param data Observed data supplied in \code{observed()} function.
#' @param xbin Character string indicating midpoint type for binning.
#' @param centers Numeric vector of centers for binning. Use \code{bin = "centers"}, if supplying centers.
#' @param breaks Numeric vector of breaks for binning. Use \code{bin = "breaks"}, if supplying breaks.
#' @param nbins Numeric number indicating the number of bins to use.
#' @param altx  Unquoted variable name in observed data for alternative x-variable binning.
#' @param stratum List indicating the name of stratification variable and level, if using different binning methods by strata.
#' @param by.strata Logical indicating whether binning should be performed by strata.
#' @param ... Other arguments to include for \code{classIntervals}. See \code{...} usage for \code{style} in \code{?classIntervals}.
#' @return Updates \code{tidyvpcobj} with \code{data.frame} containing bin information including left/right boundaries and midpoint, as specified in \code{xbin} argument.
#' @seealso \code{\link{observed}} \code{\link{simulated}} \code{\link{censoring}} \code{\link{predcorrect}} \code{\link{stratify}} \code{\link{binless}} \code{\link{vpcstats}}
#' @examples
#' \donttest{
#' require(magrittr)
#'
#'  # Binning on x-variable NTIME
#' vpc <- observed(obs_data, x=TIME, y=DV) %>%
#'     simulated(sim_data, y=DV) %>%
#'     binning(bin = NTIME) %>%
#'     vpcstats()
#'
#'  # Binning using ntile and xmean for midpoint
#' vpc <- observed(obs_data, x=TIME, y=DV) %>%
#'     simulated(sim_data, y=DV) %>%
#'     binning(bin = "ntile", nbins = 8, xbin = "xmean") %>%
#'     vpcstats()
#'
#'  # Binning using centers
#' vpc <- observed(obs_data, x=TIME, y=DV) %>%
#'     simulated(sim_data, y=DV) %>%
#'     binning(bin = "centers", centers = c(1,3,5,7)) %>%
#'     vpcstats()
#'
#'  # Different Binning for each level of Strata
#' vpc <- observed(obs_data, x=TIME, y=DV) %>%
#'     simulated(sim_data, y=DV) %>%
#'     stratify(~ GENDER) %>%
#'     binning(stratum = list(GENDER = "M"), bin = "jenks", nbins = 5, by.strata = TRUE) %>%
#'     binning(stratum = list(GENDER = "F"), bin = "pam", nbins = 4, by.strata = TRUE) %>%
#'     vpcstats()
#'
#'  # Binning Categorical DV using rounded time variable
#'
#'   vpc <- observed(obs_cat_data, x = agemonths, y = zlencat ) %>%
#'       simulated(sim_cat_data, y = DV) %>%
#'       binning(bin = round(agemonths, 0)) %>%
#'       vpcstats(vpc.type = "categorical")
#' }
#'
#' @export
binning <- function(o, ...) UseMethod("binning")

#' @rdname binning
#' @export
binning.tidyvpcobj <- function(o, bin, data=o$data, xbin="xmedian", centers, breaks, nbins, altx, stratum=NULL, by.strata=TRUE,  ...) {
  keep <- i <- ypc <- y <- NULL
  . <- list

  # If xbin is numeric, then that is the bin
  xbin <- rlang::eval_tidy(rlang::enquo(xbin), data)
  if (is.numeric(xbin)) {
    if (length(xbin) != nrow(o$obs)) {
      stop("A numeric xbin be of length equal to the number of observations")
    }
    bin <- xbin
  } else {
    if (missing(bin) && !missing(centers)) {
      bin <- "centers"
    } else if (missing(bin) && !missing(breaks)) {
      bin <- "breaks"
    } else {
      bin <- rlang::eval_tidy(rlang::enquo(bin), data)
    }
  }

  # If you don't want to bin on the observed x, you can specify an alternate x for binning
  if (missing(altx)) {
    x <- o$obs$x
  } else {
    x <- rlang::eval_tidy(rlang::enquo(altx), data)
  }

  if (!missing(nbins)) {
    nbins <- rlang::eval_tidy(rlang::enquo(nbins), data)
    if (is.numeric(nbins) && !is.null(o$strat) && (length(nbins) == nrow(o$strat))) {
      nbins <- data.table(nbins)[, .(nbins=unique(nbins)), by=o$strat]
    }
  }

  args <- lapply(rlang::enquos(...), rlang::eval_tidy, data=data)

  by.strata <- isTRUE(by.strata)

  # Check if specific stratum selected (can be more than 1), setup filter
  if (!is.null(stratum)) {
    if (!is.list(stratum)) {
      stop("stratum must be a list, data.frame or data.table")
    }
    if (is.null(o$strat)) {
      stop("No stratification has been specified")
    }
    if (!by.strata) {
      stop("by.strata must be TRUE when stratum is specified")
    }
    filter <- copy(o$strat)[, keep := F]
    filter[as.data.table(stratum), keep := T, on=names(stratum)]
    filter <- filter$keep
  } else {
    filter <- rep(T, nrow(o$obs))  # Keep all
  }

  # If xbin is numeric, then that is the bin
  if (is.numeric(xbin)) {
    if (length(xbin) != nrow(o$obs)) {
      stop("A numeric xbin be of length equal to the number of observations")
    }
    bin <- xbin
  } else if (is.character(bin) && length(bin) == 1) {

    known.classInt.styles <- c("fixed", "sd", "equal", "pretty", "quantile",
                               "kmeans", "hclust", "bclust", "fisher", "jenks",
                               "dpih", "headtails", "maximum", "box")
    if (bin == "centers") {
      if (missing(centers)) {
        stop("centers must be specified to use this binning method")
      }
      if (!by.strata && is.data.frame(centers)) {
        stop("by.strata must be TRUE when centers is a data.frame")
      }
      bin <- nearest(centers)
    } else if (bin == "breaks") {
      if (missing(breaks)) {
        stop("breaks must be specified to use this binning method")
      }
      if (!by.strata && is.data.frame(breaks)) {
        stop("by.strata must be TRUE when breaks is a data.frame")
      }
      bin <- cut_at(breaks)
    } else if (bin == "ntile") {
      if (missing(nbins)) {
        stop("nbins must be specified to use this binning method")
      }
      bin <- bin_by_ntile(nbins)
    } else if (bin == "eqcut") {
      if (missing(nbins)) {
        stop("nbins must be specified to use this binning method")
      }
      bin <- bin_by_eqcut(nbins)
    } else if (bin == "pam") {
      if (missing(nbins)) {
        stop("nbins must be specified to use this binning method")
      }
      bin <- bin_by_pam(nbins)
    } else if (bin %in% known.classInt.styles) {
      if (missing(nbins)) {
        nbins <- NULL
      }
      bin <- bin_by_classInt(bin, nbins)
    } else {
      stop(sprintf("Unknown binning method: %s", bin))
    }
  }

  if (is.function(bin)) {
    xdat <- data.table(i=1:nrow(o$obs), x=x)
    if (any(is.na(xdat[filter]$x))) {
      warning("x contains missing values, which could affect binning")
    }
    if (any(is.infinite(xdat[filter]$x))) {
      warning("x contains non-finite values, which could affect binning")
    }
    if (by.strata && !is.null(o$strat)) {
      sdat <- copy(o$strat)
      temp <- xdat[filter, .(i=i, j=do.call(bin, c(list(x), args, .BY))), by=sdat[filter]]
      j <- temp[order(i), j]
    } else {
      j <- xdat[filter, do.call(bin, c(list(x), args))]
    }
    if (length(j) != sum(filter)) {
      stop("The binning function did not return the right number of elements")
    }
  } else if (length(bin) == nrow(o$obs)) {
    j <- bin[filter]
  } else {
    stop("Incorrect binning specification")
  }
  o$obs[filter, bin := as.factor(j)]
  bin <- o$obs$bin

  if (!is.null(o$strat)) {
    stratbin <- data.table(o$strat, bin)
  } else {
    stratbin <- data.table(bin)
  }
  o <- update(o, .stratbin=stratbin, bin.by.strata=by.strata)

  # Assign an x value to each bin
  if (is.numeric(xbin)) {
    xbin <- data.table(xbin=xbin)[, .(xbin = unique(xbin)), by=stratbin]
  } else if (is.character(xbin) && length(xbin) == 1) {
    bi <- bininfo(o)
    xbin <- data.table(bi[, names(stratbin), with=FALSE], xbin=bi[[xbin]])
  } else if (is.function(xbin)) {
    xbin <- data.table(x=x)[, .(xbin = xbin(x)), by=stratbin]
  } else {
    stop("Invalid xbin")
  }
  vpc.method <- list(method = "binning")

  # check if user supplied predcorrect before binning
  if (!is.null(o$predcor) && o$predcor) {
    pred <- o$pred
    log <- o$predcor.log
    mpred <- data.table(stratbin, pred)[, mpred := median(pred), by = stratbin]$mpred

    if (log) {
      o$obs[, ypc := (mpred - pred) + y]
      o$sim[, ypc := (mpred - pred) + y]
    } else {
      o$obs[, ypc := ifelse(pred == 0, 0, (mpred / pred) * y)]
      o$sim[, ypc := ifelse(rep(pred, times = nrow(o$sim) / nrow(o$obs)) == 0, 0, (mpred / pred) * y)]
    }
  }

  update(o, xbin=xbin, vpc.method = vpc.method)
}

#' Prediction corrected Visual Predictive Check (pcVPC)
#'
#' Specify prediction variable for pcVPC.
#'
#' @param o A \code{tidyvpcobj}.
#' @param pred Prediction variable in observed data.
#' @param data Observed data supplied in \code{observed()} function.
#' @param ... Other arguments to include.
#' @param log Logical indicating whether DV was modeled in logarithmic scale.
#' @return Updates \code{tidyvpcobj} with required information to performing prediction correction, which includes the \code{predcor} logical indicating whether
#'   prediction corrected VPC is to be performed, the \code{predcor.log} logical indicating whether the DV is on a log-scale, and the \code{pred} prediction
#'   column from the original data.
#' @examples
#' \donttest{
#' require(magrittr)
#'
#' obs_data <- obs_data[MDV == 0]
#' sim_data <- sim_data[MDV == 0]
#'
#'  # Add PRED variable to observed data from first replicate of
#'  # simulated data
#'
#' obs_data$PRED <- sim_data[REP == 1, PRED]
#'
#'   vpc <- observed(obs_data, x=TIME, y=DV) %>%
#'        simulated(sim_data, y=DV) %>%
#'        binning(bin = NTIME) %>%
#'        predcorrect(pred=PRED) %>%
#'        vpcstats()
#'
#'  # For binless loess prediction corrected, use predcorrect() before
#'  # binless() and set loess.ypc = TRUE
#'
#'   vpc <- observed(obs_data, x=TIME, y=DV) %>%
#'        simulated(sim_data, y=DV) %>%
#'        predcorrect(pred=PRED) %>%
#'        binless(loess.ypc = TRUE) %>%
#'        vpcstats()
#'        }
#'
#' @seealso \code{\link{observed}} \code{\link{simulated}} \code{\link{censoring}} \code{\link{stratify}} \code{\link{binning}} \code{\link{binless}} \code{\link{vpcstats}}

#' @export
predcorrect <- function(o, ...) UseMethod("predcorrect")

#' @rdname predcorrect
#' @export
predcorrect.tidyvpcobj <- function(o, pred, data=o$data, ..., log=FALSE) {

  ypc <- y <- NULL

  if (missing(pred)) {
    pred <- o$pred
  } else {
    pred <- rlang::eval_tidy(rlang::enquo(pred), data)
  }
  if (is.null(pred)) {
    stop("No pred specified")
  }

  stratbin <- o$.stratbin
  # predcorrect after binning, check if binning/binless has already been specified

  if (!is.null(o$vpc.method)) {
    if(o$vpc.method$method == "binless") {
      o$vpc.method$loess.ypc <- TRUE
    } else { #binning specified, perform ypc calculcation
      mpred <- data.table(stratbin, pred)[, mpred := median(pred), by = stratbin]$mpred

      if (log) {
        o$obs[, ypc := (mpred - pred) + y]
        o$sim[, ypc := (mpred - pred) + y]
      } else {
        o$obs[, ypc := ifelse(pred == 0, 0, (mpred / pred) * y)]
        o$sim[, ypc := ifelse(rep(pred, times = nrow(o$sim) / nrow(o$obs)) == 0, 0, (mpred / pred) * y)]
      }
    }
  }

  update(o, predcor=TRUE, predcor.log=log, pred=pred)
}

#' Remove prediction correction for Visual Predictive Check (VPC)
#'
#' Optional function to use indicating no pred correction for VPC.
#'
#' @param o A \code{tidyvpcobj}.
#' @param ... Other arguments to include.
#' @export
nopredcorrect <- function(o, ...) UseMethod("nopredcorrect")

#' @rdname nopredcorrect
#' @export
nopredcorrect.tidyvpcobj <- function(o, ...) {
  update(o, predcor=FALSE)
}

#' @export
update.tidyvpcobj <- function(object, ...) {
  args <- list(...)
  for (i in names(args)) {
    object[[i]] <- args[[i, exact=TRUE]]
  }
  object
}

#' Print a \code{tidyvpcobj}
#'
#' Print generic used to return information about VPC.
#'
#' @param x An \code{tidyvpcobj}.
#' @param ... Further arguments can be specified but are ignored.
#' @return Returns \code{x} invisibly.
#' @export
print.tidyvpcobj <- function(x, ...) {
  if (!is.null(x$sim)) {
    nrep <- nrow(x$sim)/nrow(x$obs)
    if (isTRUE(x$predcor)) {
      cat("Prediction corrected ")
    }
    cat(sprintf("VPC with %d replicates", nrep), "\n")
  }
  cat(sprintf("Stratified by: %s", paste0(names(x$strat), collapse=", ")), "\n")
  if (!is.null(x$stats)) {
    print(x$stats)
  }
  invisible(x)
}

#' Obtain information about the bins from a \code{tidyvpcobj}
#' @param o An object.
#' @param ... Additional arguments.
#' @return A `data.table` containing the following columns:
#' \itemize{
#'   \item \code{nobs}: Number of observed data points in the bin
#'   \item \code{xmedian}: Median x-value of the observed data points in the bin
#'   \item \code{xmean}: Mean x-value of the observed data points in the bin
#'   \item \code{xmax}: Maximum x-value of the observed data points in the bin
#'   \item \code{xmin}: Minimum x-value of the observed data points in the bin
#'   \item \code{xmid}: Value halfway between `xmin` and `xmax`.
#'   x-value of the observed data points in the bin
#'   \item \code{xleft}: Value halfway between the minimum x-value of the
#'   current bin and the maximum x-value of the previous bin to the left (for
#'   the left-most bin, it is the minimum x-value).
#'   \item \code{xright}: Value halfway between the maximum x-value of the
#'   current bin and the minimum x-value of the next bin to the right (for the
#'   right-most bin, it is the maximum x-value).
#'   \item \code{xcenter}: Value halfway between `xleft` and `xright`.
#' }
#' In addition, if stratification was performed, the stratification columns will
#' be included as well.
#' @export
bininfo <- function(o, ...) UseMethod("bininfo")

#' @describeIn bininfo Method for \code{tidyvpcobj}.
#' @param by.strata Should the calculations be done by strata? Defaults to what
#'  was specified when the binning was done.
#' @export
bininfo.tidyvpcobj <- function(o, by.strata=o$bin.by.strata, ...) {

  x <- xmin <- xmax <- bin <- NULL

  f1 <- function(x) {
    nobs    <- sum(!is.na(x))
    xmedian <- median(x, na.rm=TRUE)
    xmean   <- mean(x, na.rm=TRUE)
    xmin    <- min(x, na.rm=TRUE)
    xmax    <- max(x, na.rm=TRUE)
    xmid    <- 0.5*(xmin + xmax)
    data.table(nobs, xmedian, xmean, xmin, xmax, xmid)
  }

  # Compute xleft and xright
  f2 <- function(xmin, xmax) {
    xmin    <- c(xmin, xmax[length(xmax)])
    xmax    <- c(xmin[1], xmax)
    breaks  <- 0.5*(xmin + xmax)
    xleft   <- breaks[-length(breaks)]
    xright  <- breaks[-1]
    xcenter <- 0.5*(xleft + xright)
    data.table(xleft, xright, xcenter)
  }
  if (by.strata && !is.null(o$strat)) {
    bi <- o$obs[, f1(x), by=o$.stratbin]
    setkeyv(bi, c(names(o$strat), "xmin"))
    bi[, c(.SD, f2(xmin, xmax)), by=names(o$strat)]
  } else {
    bi <- o$obs[, f1(x), by=bin]
    setkeyv(bi, "xmin")
    bi <- cbind(bi, bi[, f2(xmin, xmax)])
    bi <- bi[unique(o$.stratbin), on="bin"]
    setkeyv(bi, "xmin")
    bi[, c(names(o$.stratbin), setdiff(names(bi), names(o$.stratbin))), with=FALSE]
  }
}

#' Different functions that perform binning.
#'
#' @param breaks A numeric vector of values that designate cut points between bins.
#' @param centers A numeric vector of values that designate the center of each bin.
#' @param nbins The number of bins to split the data into.
#' @param style a binning style (see \link[classInt]{classIntervals} for details).
#' @return Each of these functions returns a function of a single numeric
#' vector `x` that assigns each value of `x` to a bin.
#' @examples
#'
#' x <- c(rnorm(10, 1, 1), rnorm(10, 3, 2), rnorm(20, 5, 3))
#' centers <- c(1, 3, 5)
#' nearest(centers)(x)
#'
#' breaks <- c(2, 4)
#' cut_at(breaks)(x)
#'
#' bin_by_eqcut(nbins=4)(x)
#' bin_by_ntile(nbins=4)(x)
#'
#' \donttest{
#' bin_by_pam(nbins=4)(x)
#' bin_by_classInt("pretty", nbins=4)(x)
#' }
#'
#' @name binningfunctions
NULL

#' @rdname binningfunctions
#' @export
cut_at <- function(breaks) {
  breaks <- .check_breaks(breaks)
  function(x, ..., right=FALSE) {
    breaks <- .resolve_breaks(breaks, ...)
    breaks <- sort(unique(breaks))
    if (min(x) < min(breaks)) {
      breaks <- c(min(x), breaks)
    }
    if (max(x) > max(breaks)) {
      breaks <- c(breaks, max(x))
    }
    as.character(cut(x, breaks, include.lowest=TRUE, right=right))
  }
}

#' @rdname binningfunctions
#' @export
nearest <- function(centers) {
  centers <- .check_centers(centers)
  function(x, ...) {
    centers <- .resolve_centers(centers, ...)
    centers <- sort(unique(centers))
    dist <- function(a, b) abs(a - b)
    d <- outer(x, centers, dist)
    m <- apply(d, 1, which.min)
    centers[m]
  }
}

#' @rdname binningfunctions
#' @export
bin_by_ntile <- function(nbins) {
  nbins <- .check_nbins(nbins)
  function(x, ...) {
    nbins <- .resolve_nbins(nbins, ...)

    # Mimic the function from dplyr
    len <- sum(!is.na(x))
    r <- rank(x, ties.method="first", na.last="keep")
    as.integer(floor(nbins*(r - 1)/len + 1))
  }
}

#' @rdname binningfunctions
#' @export
bin_by_eqcut <- function(nbins) {
  nbins <- .check_nbins(nbins)
  function(x, ..., quantile.type=7) {
    nbins <- .resolve_nbins(nbins, ...)

    # Mimic the function from table1
    breaks <- quantile(x, probs=seq.int(nbins - 1)/nbins, na.rm=TRUE, type=quantile.type)
    cut_at(breaks)(x)
  }
}

#' @rdname binningfunctions
#' @export
bin_by_pam <- function(nbins) {
  has_cluster <- requireNamespace("cluster", quietly=TRUE)
  if (!has_cluster) {
    stop("Package 'cluster' is required to use the binning method. Please install it.")
  }
  nbins <- .check_nbins(nbins)
  function(x, ...) {
    nbins <- .resolve_nbins(nbins, ...)

    centers <- sort(cluster::pam(x, nbins)$medoids)
    nearest(centers)(x)
  }
}

#' @rdname binningfunctions
#' @export
bin_by_classInt <- function(style, nbins=NULL) {
  has_classInt <- requireNamespace("classInt", quietly=TRUE)
  if (!has_classInt) {
    stop("Package 'classInt' is required to use the binning method. Please install it.")
  } else {
    if (style == "box" && paste0(packageVersion("classInt")) < "0.4.8") {
      stop("'classInt >= 0.4-8' is required to use the 'box' binning method. Please update.")
    }
  }
  style <- style
  if (!is.null(nbins)) {
    nbins <- .check_nbins(nbins)
  }
  function(x, ...) {
    if (length(unique(x)) > 1) {
      args <- list(var=x, style=style)
      if (!is.null(nbins)) {
        nbins <- .resolve_nbins(nbins, ...)
        args$n <- nbins
      }
      args <- c(args, list(...))
      if (style %in% c("kmeans", "hclust", "dpih")) {
        # These don't accept '...' arguments
        args1 <- args[intersect(names(args), methods::formalArgs(classInt::classIntervals))]
        args2 <- if (style == "kmeans") {
          args[intersect(names(args), methods::formalArgs(stats::kmeans))]
        } else if (style == "hclust") {
          args[intersect(names(args), methods::formalArgs(stats::hclust))]
        } else if (style == "dpih") {
          has_KernSmooth <- requireNamespace("KernSmooth", quietly=TRUE)
          if (!has_KernSmooth) {
            stop("Package 'KernSmooth' is required to use the binning method. Please install it.")
          }
          args[intersect(names(args), methods::formalArgs(KernSmooth::dpih))]
        } else {
          list()
        }
        args <- c(args1, args2)
      }
      args <- args[!duplicated(args)]
      breaks <- do.call(classInt::classIntervals, args)$brks
    } else {
      # If a group has a single value, `classInt::classIntervals` gives an error
      breaks <- rep(1, length(x))
    }
    cut_at(breaks)(x)
  }
}

#' Perform a consistency check on observed and simulated data
#'
#' This function performs a simple consistency check on an observed and
#' simulated dataset to make sure they are consistent with respect to ordering
#' as required by the other functions used in the VPC calculation.
#'
#' The consistency check is performed by comparing a combination of unique
#' subject identifier (ID) and time. Both \code{data.frame} objects must be given with
#' those in positions 1 and 2, respectively.
#'
#' @param obs,sim A `data.frame` with 2 columns (see Details).
#' @param tol A tolerance for comparing time values.
#' @return The number of replicates contained in `sim`.
#' @seealso \code{\link{observed}}, \code{\link{simulated}}.
#' @examples
#'
#' \donttest{
#'
#'  require(data.table)
#'
#' check_order(obs_data[, .(ID, TIME)], sim_data[, .(ID, TIME)])
#' }
#' @export
check_order <- function(obs, sim, tol=1e-5) {
  if (nrow(sim) %% nrow(obs) != 0) {
    stop("Rows in sim is not a multiple of rows in obs")
  }
  if (is.numeric(obs[[1]])) obs[[1]] <- as.numeric(obs[[1]])
  if (is.numeric(sim[[1]])) sim[[1]] <- as.numeric(sim[[1]])
  if (is.factor(obs[[1]])) obs[[1]] <- as.character(obs[[1]])
  if (is.factor(sim[[1]])) sim[[1]] <- as.character(sim[[1]])
  if (!identical(rep(obs[[1]], len=nrow(sim)), sim[[1]])) {
    stop("ID columns are not identical")
  }
  if (!all(abs(rep(obs[[2]], len=nrow(sim)) - sim[[2]]) < tol)) {
    stop("Time columns are not equal")
  }
  nrow(sim) / nrow(obs)
}


# Internal function
.check_centers <- function(centers) {
  if (is.data.frame(centers)) {
    centers <- as.data.table(centers)
    if (is.null(centers$centers)) {
      stop("centers data.frame must contain column centers")
    }
    if (any(is.na(centers$centers))) {
      stop("centers cannot contain missing values")
    }
    keycols <- setdiff(names(centers), "centers")
    setkeyv(centers, keycols)
  } else if (is.numeric(centers)) {
    if (any(is.na(centers))) {
      stop("centers cannot contain missing values")
    }
  } else {
    stop("centers must be a numeric vector or data.frame")
  }
  centers
}

# Internal function
.resolve_centers <- function(centers, ...) {
  if (is.data.table(centers)) {
    keycols <- key(centers)
    key <- as.data.table(list(...)[keycols])
    centers <- unique(centers[key]$centers)
  }
  if (is.null(centers) || !is.numeric(centers) || any(is.na(centers))) {
    stop("invalid centers")
  }
  centers
}

# Internal function
.check_breaks <- function(breaks) {
  if (is.data.frame(breaks)) {
    breaks <- as.data.table(breaks)
    if (is.null(breaks$breaks)) {
      stop("breaks data.frame must contain column breaks")
    }
    if (any(is.na(breaks$breaks))) {
      stop("breaks cannot contain missing values")
    }
    keycols <- setdiff(names(breaks), "breaks")
    setkeyv(breaks, keycols)
  } else if (is.numeric(breaks)) {
    if (any(is.na(breaks))) {
      stop("breaks cannot contain missing values")
    }
  } else {
    stop("breaks must be a numeric vector or data.frame")
  }
  breaks
}

# Internal function
.resolve_breaks <- function(breaks, ...) {
  if (is.data.table(breaks)) {
    keycols <- key(breaks)
    key <- as.data.table(list(...)[keycols])
    breaks <- breaks[key]$breaks
  }
  if (is.null(breaks) || !is.numeric(breaks) || any(is.na(breaks))) {
    stop("invalid breaks")
  }
  breaks
}

# Internal function
.check_nbins <- function(nbins) {
  if (is.data.frame(nbins)) {
    nbins <- as.data.table(nbins)
    if (is.null(nbins$nbins)) {
      stop("nbins data.frame must contain column nbins")
    }
    if (any(is.na(nbins$nbins))) {
      stop("nbins cannot contain missing values")
    }
    keycols <- setdiff(names(nbins), "nbins")
    setkeyv(nbins, keycols)
  } else if (is.numeric(nbins) && length(nbins) == 1) {
    if (any(is.na(nbins))) {
      stop("nbins cannot contain missing values")
    }
  } else {
    stop("nbins must be a numeric vector of length 1 or data.frame")
  }
  nbins
}

# Internal function
.resolve_nbins <- function(nbins, ...) {
  if (is.data.table(nbins)) {
    keycols <- key(nbins)
    key <- as.data.table(list(...)[keycols])
    nbins <- unique(nbins[key]$nbins)
  }
  if (is.null(nbins) || !(is.numeric(nbins) && length(nbins) == 1 && !is.na(nbins))) {
    stop("nbins must be uniquely determined")
  }
  nbins
}

# Internal Functions ----

# Dispatches AIC.gam method
.gam_optimize <- function(sp, y, x, data){
  suppressWarnings(
  AIC(gam(y ~ s(x, k = 1), sp = sp, family = "binomial", data = data))
  )
}

.fitcatgam <- function(y, x, sp){
  suppressWarnings(
  fitted(gam(y ~ s(x, k = 1), family = "binomial", sp = c(sp)))
  )
}

#Below function used for fitting rqss
.fitobs <- function(obs, llam.qpred, qpred, l.ypc = FALSE) {
  rqsslo <- rqssmed <- rqsshi <- NULL
  qnames <- paste0("q", as.character(qpred))

  if(l.ypc) {
    obs[, rqsslo := fitted(rqss(l.ypc ~ qss(x, lambda = exp(llam.qpred[[1]])), tau = qpred[1], na.action = na.exclude, data = obs))]
    obs[, rqssmed := fitted(rqss(l.ypc ~ qss(x, lambda = exp(llam.qpred[[2]])), tau = qpred[2], na.action = na.exclude, data = obs))]
    obs[, rqsshi := fitted(rqss(l.ypc ~ qss(x, lambda = exp(llam.qpred[[3]])), tau = qpred[3], na.action = na.exclude, data = obs))]
    setnames(obs, c("rqsslo", "rqssmed", "rqsshi"), qnames)
    obs.fits <- melt(obs, id.vars = "x", measure.vars = qnames)
    obs.fits <- setnames(obs.fits, c("variable", "value"), c("qname", "fit"))
  } else {
    obs[, rqsslo := fitted(rqss(y ~ qss(x, lambda = exp(llam.qpred[[1]])), tau = qpred[1], na.action = na.exclude, data = obs))]
    obs[, rqssmed := fitted(rqss(y ~ qss(x, lambda = exp(llam.qpred[[2]])), tau = qpred[2], na.action = na.exclude, data = obs))]
    obs[, rqsshi := fitted(rqss(y ~ qss(x, lambda = exp(llam.qpred[[3]])), tau = qpred[3], na.action = na.exclude, data = obs))]
    setnames(obs, c("rqsslo", "rqssmed", "rqsshi"), qnames)
    obs.fits <- melt(obs, id.vars = "x", measure.vars = qnames)
    obs.fits <- setnames(obs.fits, c("variable", "value"), c("qname", "fit"))
  }
  return(obs.fits)
}

# Internal Function
.fitobs.strat <- function(strat.split, x.strat, llam.qpred, qpred, l.ypc = FALSE) {
  rqsslo <- rqssmed <- rqsshi <- NULL
  qnames <- paste0("q", as.character(qpred))

  if(l.ypc) {
    for (i in seq_along(strat.split)) {
      strat.split[[i]][, rqsslo  := fitted(rqss(l.ypc ~ qss(x, lambda = exp(llam.qpred[[1]][[i]][[1]])),tau= qpred[1], na.action = na.exclude, data = strat.split[[i]]))]
      strat.split[[i]][, rqssmed := fitted(rqss(l.ypc ~ qss(x, lambda = exp(llam.qpred[[2]][[i]][[1]])),tau= qpred[2], na.action = na.exclude, data = strat.split[[i]]))]
      strat.split[[i]][, rqsshi  := fitted(rqss(l.ypc ~ qss(x, lambda = exp(llam.qpred[[3]][[i]][[1]])),tau= qpred[3], na.action = na.exclude, data = strat.split[[i]]))]
    }
  } else {
    for (i in seq_along(strat.split)) {
      strat.split[[i]][, rqsslo  := fitted(rqss(y ~ qss(x, lambda = exp(llam.qpred[[1]][[i]][[1]])),tau= qpred[1], na.action = na.exclude, data = strat.split[[i]]))]
      strat.split[[i]][, rqssmed := fitted(rqss(y ~ qss(x, lambda = exp(llam.qpred[[2]][[i]][[1]])),tau= qpred[2], na.action = na.exclude, data = strat.split[[i]]))]
      strat.split[[i]][, rqsshi  := fitted(rqss(y ~ qss(x, lambda = exp(llam.qpred[[3]][[i]][[1]])),tau= qpred[3], na.action = na.exclude, data = strat.split[[i]]))]
    }
  }
  strat.obs.fits <- rbindlist(strat.split)
  strat.obs.fits <- setnames(strat.obs.fits, c("rqsslo", "rqssmed", "rqsshi"), qnames)
  strat.obs.fits <- melt(strat.obs.fits, id.vars = x.strat, measure.vars = qnames)
  strat.obs.fits <- setnames(strat.obs.fits, c("variable", "value"), c("qname", "fit"))
}

# Internal Function
.fitsim <- function(sim, llam.qpred, span = NULL, qpred, qconf, l.ypc = FALSE, log = FALSE) {
  . <- list
  rqsslo <- rqssmed <- rqsshi <- y <- pred <- repl <- NULL
  qnames <- paste0("q", as.character(qpred))

  if(l.ypc) {
    if(log) {
      sim[, l.ypc := y + (fitted(loess(pred ~ x, span = span, na.action = na.exclude, .SD)) - pred), by = .(repl)]
    } else {
      sim[, l.ypc := y * (fitted(loess(pred ~ x, span = span, na.action = na.exclude, .SD)) / pred), by = .(repl)]
    }
    suppressWarnings(sim[, rqsslo := fitted(rqss(l.ypc ~ qss(x, lambda = exp(llam.qpred[[1]])),
                                                 tau  = qpred[1], na.action = na.exclude, .SD)), by = .(repl)])
    suppressWarnings(sim[, rqssmed := fitted(rqss(l.ypc ~ qss(x, lambda = exp(llam.qpred[[2]])),
                                                  tau = qpred[2], na.action = na.exclude, .SD)), by = .(repl)])
    suppressWarnings(sim[, rqsshi := fitted(rqss(l.ypc ~ qss(x, lambda = exp(llam.qpred[[3]])),
                                                 tau  = qpred[3], na.action = na.exclude, .SD)), by = .(repl)])
    setnames(sim, c("rqsslo", "rqssmed", "rqsshi"), qnames)
  } else {
    suppressWarnings(sim[, rqsslo := fitted(rqss(y ~ qss(x, lambda = exp(llam.qpred[[1]])),
                                                 tau  = qpred[1], na.action = na.exclude, .SD)), by = .(repl)])
    suppressWarnings(sim[, rqssmed := fitted(rqss(y ~ qss(x, lambda = exp(llam.qpred[[2]])),
                                                  tau = qpred[2], na.action = na.exclude, .SD)), by = .(repl)])
    suppressWarnings(sim[, rqsshi := fitted(rqss(y ~ qss(x, lambda = exp(llam.qpred[[3]])),
                                                 tau  = qpred[3], na.action = na.exclude, .SD)), by = .(repl)])
    setnames(sim, c("rqsslo", "rqssmed", "rqsshi"), qnames)
  }

  sim.lb <- sim[, lapply(.SD, quantile, probs = qconf[[1]]), by = "x"] #CI lb
  sim.lb <- setnames(melt(sim.lb, id.vars = "x", measure.vars = qnames), c("x", "qname", "fit.lb"))  #Wide to long

  sim.ub <- sim[, lapply(.SD, quantile, probs = qconf[[3]]), by = "x"] #CI ub
  sim.ub <- setnames(melt(sim.ub, id.vars = "x", measure.vars = qnames), c("x", "qname", "fit.ub"))   #Wide to long

  sim <-  sim[, lapply(.SD, median, na.rm = TRUE), by = "x"] #Med fits
  sim <- setnames(melt(sim, id.vars = "x", measure.vars = qnames), c("x", "qname", "fit")) #wide to long

  sim <- sim[sim.lb, on = c("x", "qname")]
  sim <- sim[sim.ub, on = c("x", "qname")]
}

# Internal Function
.fitsim.strat <- function(strat.split.sim, x.strat, llam.qpred, span = NULL, qpred, qconf, l.ypc = FALSE, log = FALSE) {
  . <- list
  rqsslo <- rqssmed <- rqsshi <- y <- pred <- repl <- NULL
  qnames <- paste0("q", as.character(qpred))

  if(l.ypc) {
    if(log) {
      for (i in seq_along(strat.split.sim)) {
        strat.split.sim[[i]][, l.ypc := y + (fitted(loess(pred ~ x, span = span[[i]], na.action = na.exclude, .SD)) - pred), by = .(repl)]

        suppressWarnings(strat.split.sim[[i]][, rqsslo := fitted(rqss(l.ypc ~ qss(x, lambda = exp(llam.qpred[[1]][[i]][[1]])),
                                                                      tau  = qpred[1], na.action = na.exclude, .SD)), by = .(repl)][,.(fit.lo = unlist(rqsslo))])

        suppressWarnings(strat.split.sim[[i]][, rqssmed := fitted(rqss(l.ypc ~ qss(x, lambda = exp(llam.qpred[[2]][[i]][[1]])),
                                                                       tau = qpred[2], na.action = na.exclude, .SD)), by = .(repl)][,.(fit.med = unlist(rqssmed))])

        suppressWarnings(strat.split.sim[[i]][, rqsshi := fitted(rqss(l.ypc ~ qss(x, lambda = exp(llam.qpred[[3]][[i]][[1]])),
                                                                      tau  = qpred[3], na.action = na.exclude, .SD)), by = .(repl)][,.(fit.hi = unlist(rqsshi))])
      }
    } else {
      for (i in seq_along(strat.split.sim)) {
        strat.split.sim[[i]][, l.ypc := y * (fitted(loess(pred ~ x, span = span[[i]], na.action = na.exclude, .SD)) / pred), by = .(repl)]

        suppressWarnings(strat.split.sim[[i]][, rqsslo := fitted(rqss(l.ypc ~ qss(x, lambda = exp(llam.qpred[[1]][[i]][[1]])),
                                                                      tau  = qpred[1], na.action = na.exclude, .SD)), by = .(repl)][,.(fit.lo = unlist(rqsslo))])

        suppressWarnings(strat.split.sim[[i]][, rqssmed := fitted(rqss(l.ypc ~ qss(x, lambda = exp(llam.qpred[[2]][[i]][[1]])),
                                                                       tau = qpred[2], na.action = na.exclude, .SD)), by = .(repl)][,.(fit.med = unlist(rqssmed))])

        suppressWarnings(strat.split.sim[[i]][, rqsshi := fitted(rqss(l.ypc ~ qss(x, lambda = exp(llam.qpred[[3]][[i]][[1]])),
                                                                      tau  = qpred[3], na.action = na.exclude, .SD)), by = .(repl)][,.(fit.hi = unlist(rqsshi))])
      }
    }
  } else {
    for (i in seq_along(strat.split.sim)) {
      suppressWarnings(strat.split.sim[[i]][, rqsslo := fitted(rqss(y ~ qss(x, lambda = exp(llam.qpred[[1]][[i]][[1]])),
                                                                    tau  = qpred[1], na.action = na.exclude, .SD)), by = .(repl)][,.(fit.lo = unlist(rqsslo))])

      suppressWarnings(strat.split.sim[[i]][, rqssmed := fitted(rqss(y ~ qss(x, lambda = exp(llam.qpred[[2]][[i]][[1]])),
                                                                     tau = qpred[2], na.action = na.exclude, .SD)), by = .(repl)][,.(fit.med = unlist(rqssmed))])

      suppressWarnings(strat.split.sim[[i]][, rqsshi := fitted(rqss(y ~ qss(x, lambda = exp(llam.qpred[[3]][[i]][[1]])),
                                                                    tau  = qpred[3], na.action = na.exclude, .SD)), by = .(repl)][,.(fit.hi = unlist(rqsshi))])
    }
  }

  sim <- rbindlist(strat.split.sim)
  setnames(sim, c("rqsslo", "rqssmed", "rqsshi"), qnames)

  sim.lb <- sim[, lapply(.SD, quantile, probs = qconf[[1]]), by = x.strat, .SDcols = qnames]
  sim.lb <- setnames(melt(sim.lb, id.vars = x.strat, measure.vars = qnames), c("variable", "value"), c("qname", "fit.lb"))

  sim.ub <- sim[, lapply(.SD, quantile, probs = qconf[[3]]), by = x.strat, .SDcols = qnames]
  sim.ub <- setnames(melt(sim.ub, id.vars = x.strat, measure.vars = qnames), c("variable", "value"), c("qname", "fit.ub"))

  sim <- sim[, lapply(.SD, median, na.rm = TRUE), by = x.strat, .SDcols = qnames]
  sim <- setnames(melt(sim, id.vars = x.strat, measure.vars = qnames), c("variable", "value"), c("qname", "fit"))

  sim <- sim[sim.lb, on = c(x.strat, "qname")]
  sim <- sim[sim.ub, on = c(x.strat, "qname")]
}



# Internal function for optimizing loess fit
.aicc.loess <- function(fit){
  #compute AIC_C for a LOESS fit, from:
  #Hurvich, C.M., Simonoff, J.S., and Tsai, C. L. 1998. Smoothing
  #parameter selection in nonparametric regression using an improved
  #Akaike Information Criterion. Journal of the Royal Statistical
  #Society B 60: 271–293.

  stopifnot(inherits(fit, 'loess'))
  n <- fit$n
  trace <- fit$trace.hat
  sigma2 <- sum(resid(fit) ^ 2) / (n - 1)
  return(log(sigma2) + 1 + (2 * (trace + 1)) / (n - trace - 2))
}

# Internal function for optimizing loess fit
.autoloess <- function(fit, span=c(.1, .9), ...){
  #compute loess fit which has span minimizes AIC_C
  #fit = loess fit; span parameter value doesn't matter
  #span = a two-value vector representing the minimum and maximum span values
  #Returns LOESS fit with span minimizing the AIC_C function

  stopifnot(inherits(fit, 'loess')) #, length(span) == 2)

  #loss function in form to be used by optimize
  f <- function(span) .aicc.loess(update(fit, span=span))

  #find best loess according to loss function
  opt.fit  <- update(fit, span=optimize(f, span)$minimum)
  opt.span <- optimize(f, span)$minimum
  return(list(fit=opt.fit, span=opt.span))

}

# Internal function for returning optimized span by strata
.getspan <- function(x) {
  span <- vector("list", length(x))
  for (i in seq_along(x)) {
    span[[i]] <- x[[i]]$span
  }
  names(span) <- names(x)
  return(span)
}

# Internal function for returning l.ypc fits by strata
.getfitted <- function(x) {
  fits <- vector("list", length(x))
  for (i in seq_along(x)) {
    fits[[i]] <- as.data.table(x[[i]]$loess[[1]]$fit$fitted)
    fits[[i]] <- setnames(fits[[i]], "fitted")
  }
  names(fits) <- names(x)
  return(fits)
}


.getfit <- function(x) {
  fit <- vector("list", length(x))
  for (i in seq_along(x)) {
    fit[[i]] <- x[[i]]$loess[[1]]$fit
  }
  names(fit) <- names(x)
  return(fit)
}

.isCensored <- function(obs) {
  if(!is.null(obs$blq) && any(obs$blq)){
    ret <- TRUE
  } else if (!is.null(obs$alq) && any(obs$alq)){
    ret <- TRUE
  } else {
    ret <- FALSE
  }

  ret

}

.onAttach <- function(libname, pkgname) {
  packageStartupMessage(paste0("tidyvpc is part of Certara.R!\n",
                               "Follow the link below to learn more about PMx R package development at Certara.\n",
                               "https://certara.github.io/R-Certara/"))
}
Any scripts or data that you put into this service are public.
tidyvpc documentation built on Nov. 2, 2023, 6:26 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
tidyvpc
VPC Percentiles and Prediction Intervals

R/vpcstats.R
In tidyvpc: VPC Percentiles and Prediction Intervals

Defines functions binning.tidyvpcobj binning stratify.tidyvpcobj stratify censoring.tidyvpcobj censoring simulated.tidyvpcobj simulated observed.data.frame observed

Documented in binning binning.tidyvpcobj censoring censoring.tidyvpcobj observed observed.data.frame simulated simulated.tidyvpcobj stratify stratify.tidyvpcobj

Try the tidyvpc package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

tidyvpc VPC Percentiles and Prediction Intervals

R/vpcstats.R In tidyvpc: VPC Percentiles and Prediction Intervals

Defines functions binning.tidyvpcobj binning stratify.tidyvpcobj stratify censoring.tidyvpcobj censoring simulated.tidyvpcobj simulated observed.data.frame observed

Documented in binning binning.tidyvpcobj censoring censoring.tidyvpcobj observed observed.data.frame simulated simulated.tidyvpcobj stratify stratify.tidyvpcobj

Try the tidyvpc package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

tidyvpc
VPC Percentiles and Prediction Intervals

R/vpcstats.R
In tidyvpc: VPC Percentiles and Prediction Intervals