R/tools.R

Defines functions get_seeds print_fit_info print_truncation_info print.summary_df emp_cdf pseudo_obs correct_var_types in_set check_margin_controls get_npars_distr check_distr depth multiplot check_and_match_family_set expand_family expand_family_set process_family_set args2bicop if_vec_to_matrix

Documented in emp_cdf pseudo_obs

#' Internal: Turn vector input into a matrix with two columns
#'
#' @param u input data
#' @param to_col if `u` is a vector, then `to_col = FALSE` (respectively
#' `to_col = TRUE`) transforms it into a matrix with a single row (respectively
#' single column)
#'
#'
#' @return either a matrix, or an error if u is neither a matrix, data.frame,
#' or a vector
#'
#' @noRd
if_vec_to_matrix <- function(u, to_col = FALSE) {
  if (is.null(u))
    return(NULL)
  assert_that(is.numeric(u) | is.data.frame(u))
  if (NCOL(u) == 1) {
    if (to_col) {
      u <- matrix(u, length(u), 1)
    } else {
      u <- matrix(u, 1, length(u))
    }
  }
  if (!is.matrix(u)) {
    u <- as.matrix(u)
  }

  u
}

#' Internal: Convert arguments to `bicop_dist` object.
#' @param family the family as passed in function call.
#' @param rotation the rotation as passed in function call.
#' @param parameters the parameters as passed in function call.
#' @return A `bicop_dist` object.
#' @noRd
args2bicop <- function(family, rotation, parameters, var_types = c("c", "c")) {
  if (all(inherits(family, "bicop_dist"))) {
    return(family)
  } else {
    if (missing(rotation)) {
      rotation <- 0
    }
    if (missing(parameters)) {
      parameters <- numeric(0)
    }
    assert_that(is.string(family), is.number(rotation), is.numeric(parameters))
    return(bicop_dist(family, rotation, parameters, var_types))
  }
}

process_family_set <- function(family_set, par_method) {
  family_set <- check_and_match_family_set(family_set)
  family_set <- expand_family_set(family_set)
  if (par_method == "itau") {
    if (any(!(family_set %in% family_set_itau))) {
      warning("Only families (",
        paste(family_set_itau, collapse = ", "),
        ") can be used with ", "'par_method = ", '"itau"', "'; ",
        "reducing family set.",
        call. = FALSE
      )
      family_set <- intersect(family_set, family_set_itau)
    }
  }

  family_set
}

#' Internal: Expand shortcuts in the familyset.
#' @noRd
expand_family_set <- function(family_set) {
  unique(unlist(lapply(family_set, expand_family)))
}

expand_family <- function(family) {
  switch(
    family,
    "archimedean" = family_set_archimedean,
    "elliptical" = family_set_elliptical,
    "bbs" = family_set_bb,
    "oneparametric" = family_set_onepar,
    "twoparametric" = family_set_twopar,
    "parametric" = family_set_parametric,
    "nonparametric" = family_set_nonparametric,
    "itau" = family_set_itau,
    "all" = family_set_all,
    family # default is no expansion
  )
}

#' Internal: Checks whether all families are known (including partial matching).
#' @noRd
check_and_match_family_set <- function(family_set) {
  matched_fams <- family_set_all_defs[pmatch(family_set, family_set_all_defs)]
  if (any(is.na(matched_fams))) {
    stop(
      "unknown families in family_set: ",
      paste0('"', family_set[is.na(matched_fams)], '"', collapse = ", ")
    )
  }
  matched_fams
}

# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot
# objects) - cols:   Number of columns in layout - layout: A matrix specifying
# the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE), then
# plot 1 will go in the upper left, 2 will go in the upper right, and 3 will go
# all the way across the bottom.
#
multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) {
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots <- length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots / cols)),
      ncol = cols, nrow = ceiling(numPlots / cols)
    )
  }

  if (numPlots == 1) {
    print(plots[[1]])
  } else {
    # Set up the page
    grid::grid.newpage()
    grid::pushViewport(grid::viewport(
      layout =
        grid::grid.layout(
          nrow(layout),
          ncol(layout)
        )
    ))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]],
        vp = grid::viewport(
          layout.pos.row = matchidx$row,
          layout.pos.col = matchidx$col
        )
      )
    }
  }
}

# Get the depth of a list
depth <- function(this) ifelse(is.list(this), 1L + max(sapply(this, depth)), 0L)

supported_distributions <- c(
  "beta", "cauchy", "chisq", "exp", "f", "gamma",
  "lnorm", "norm", "t", "unif", "weibull"
)

#' @importFrom stats pbeta qbeta qbeta dcauchy pcauchy qcauchy dchisq pchisq
#' @importFrom stats qchisq dexp pexp qexp df pf qf dgamma pgamma qgamma
#' @importFrom stats dlnorm plnorm qlnorm dt pt qt dunif punif qunif
#' @importFrom stats dweibull pweibull qweibull
check_distr <- function(distr) {

  ## if provided with a kde1d object, then there is nothing to do
  if (inherits(distr, "kde1d")) {
    return(TRUE)
  }

  ## basic sanity checks
  if (!is.list(distr)) {
    return("a distribution should be a kde1d object or a list")
  }
  if (!any(is.element(names(distr), "distr"))) {
    return("a distribution should be a kde1d object or a list with a 'distr' element")
  }
  nn <- distr[["distr"]]
  if (!is.element(nn, supported_distributions)) {
    return("the provided name does not belong to supported distributions")
  }

  ## check that the provided parameters are consistent with the distribution
  qfun <- get(paste0("q", nn))
  par <- distr[names(distr) != "distr"]
  par$p <- 0.5
  e <- tryCatch(do.call(qfun, par), error = function(e) e)
  if (any(class(e) == "error")) {
    return(e$message)
  }

  return(TRUE)
}

get_npars_distr <- function(distr) {
  switch(distr$distr,
    beta = 2,
    cauchy = 2,
    chisq = ifelse("ncp" %in% names(distr), 2, 1),
    exp = 1,
    f = 3,
    gamma = 2,
    lnorm = 2,
    norm = 2,
    t = ifelse("ncp" %in% names(distr), 3, 2),
    unif = 2,
    weibull = ifelse("scale" %in% names(distr), 2, 1)
  )
}

#' @param data the data (after expand_factors() was called).
#' @param ctrl the margin controls object (after expand_margin_controls() was
#'   called).
#' @noRd
check_margin_controls <- function(data, ctrl) {
  nms <- colnames(data)
  if (is.null(nms)) {
    nms <- as.character(seq_len(ncol(data)))
  }
  lapply(seq_len(NCOL(data)), function(k) {
    msg_var <- paste0("Problem with margin_controls for variable ", nms[k], ": ")
    tryCatch(
      assert_that(
        is.numeric(ctrl$mult[k]), ctrl$mult[k] > 0,
        is.numeric(ctrl$xmin[k]), is.numeric(ctrl$xmax[k]),
        is.na(ctrl$bw[k]) | (is.numeric(ctrl$bw[k]) & (ctrl$bw[k] > 0)),
        is.numeric(ctrl$deg[k])
      ),
      error = function(e) stop(msg_var, e$message)
    )

    if (is.ordered(data[, k]) & (!is.nan(ctrl$xmin[k]) | !is.nan(ctrl$xmax[k]))) {
      stop(msg_var, "xmin and xmax are not meaningful for x of type ordered.")
    }

    if (!is.nan(ctrl$xmax[k]) & !is.nan(ctrl$xmin[k])) {
      if (ctrl$xmin[k] > ctrl$xmax[k]) {
        stop(msg_var, "xmin is larger than xmax.")
      }
    }
    if (!is.nan(ctrl$xmin[k])) {
      if (any(data[, k] < ctrl$xmin[k])) {
        stop(msg_var, "not all data are larger than xmin.")
      }
    }
    if (!is.nan(ctrl$xmax[k])) {
      if (any(data[, k] > ctrl$xmax[k])) {
        stop(msg_var, "not all data are samller than xmax.")
      }
    }
    if (!(ctrl$deg[k] %in% 0:2)) {
      stop(msg_var, "deg must be either 0, 1, or 2.")
    }
  })
}


#' @noRd
#' @importFrom assertthat assert_that on_failure<-
#' @importFrom assertthat is.number is.string is.flag is.scalar
in_set <- function(el, set) {
  all(el %in% set)
}


on_failure(in_set) <- function(call, env) {
  paste0(
    deparse(call$el),
    " must be one of {",
    paste0(eval(call$set, env), collapse = ", "),
    "}."
  )
}

correct_var_types <- function(var_types, data) {
  is.character(var_types) && in_set(var_types, c("c", "d"))
}

on_failure(correct_var_types) <- function(call, env) {
  paste0("var_types must be vector with elements 'c' or 'd'.")
}

#' Pseudo-Observations
#'
#' Compute the pseudo-observations for the given data matrix.
#'
#' @param x vector or matrix random variates to be converted (column wise) to
#' pseudo-observations.
#' @param ties_method similar to `ties.method` of [rank()] (only `"average"`,
#' `"first"` and `"random"` currently available).
#' @param lower_tail `logical` which, if `FALSE``, returns the
#'   pseudo-observations when applying the empirical marginal survival
#'   functions.
#' @details
#' Given `n` realizations \eqn{x_i=(x_{i1}, \ldots,x_{id})},
#' \eqn{i \in \left\lbrace 1, \ldots,n \right\rbrace }
#' of a random vector `X`, the pseudo-observations are defined via
#' \eqn{u_{ij}=r_{ij}/(n+1)} for
#' \eqn{i \in \left\lbrace 1, \ldots,n \right\rbrace}
#' and
#' \eqn{j \in \left\lbrace 1, \ldots,d \right\rbrace }, where
#' \eqn{r_{ij}} denotes the rank of \eqn{x_{ij}} among all \eqn{x_{kj}},
#' \eqn{k \in \left\lbrace 1, \ldots,n \right\rbrace }.
#'
#' The pseudo-observations can thus also be computed by component-wise applying
#' the empirical distribution functions to the data and scaling the result by
#' \eqn{n/(n+1)}. This asymptotically negligible scaling factor is used to force
#' the variates to fall inside the open unit hypercube, for example, to avoid
#' problems with density evaluation at the boundaries.
#'
#' When `lower_tail = FALSE`, then `pseudo_obs()` simply returns
#' `1 - pseudo_obs()`.
#'
#' @return
#' a vector of matrix of the same dimension as the input containing the
#' pseudo-observations.
#' @examples
#' # pseudo-observations for a vector
#' pseudo_obs(rnorm(10))
#'
#' # pseudo-observations for a matrix
#' pseudo_obs(cbind(rnorm(10), rnorm(10)))
#' @export
pseudo_obs <- function(x, ties_method = "average", lower_tail = TRUE) {
  assert_that(is.scalar(lower_tail) && is.logical(lower_tail))
  assert_that(is.character(ties_method) && is.scalar(ties_method))
  assert_that(in_set(ties_method, c("average", "first", "random")))
  assert_that(is.numeric(x) || is.matrix(x) || is.data.frame(x))

  x[] <- pseudo_obs_cpp(if_vec_to_matrix(x, TRUE), ties_method)
  if (!lower_tail) {
    x <- 1 - x
  }
  x
}

#' Corrected Empirical CDF
#'
#' The empirical CDF with tail correction, ensuring that its output is never
#' 0 or 1.
#'
#' @details The corrected empirical CDF is defined as
#' \deqn{
#' F_n(x) = \frac{1}{n + 1} \min\biggl\{1, \sum_{i = 1}^n 1(X_i \le x)\biggr\}
#' }
#'
#' @param x numeric vector of observations
#'
#' @return A function with signature `function(x)` that returns \eqn{F_n(x)}.
#'
#' @importFrom stats ecdf
#' @export
#' @examples
#' # fit ECDF on simulated data
#' x <- rnorm(100)
#' cdf <- emp_cdf(x)
#'
#' # output is bounded away from 0 and 1
#' cdf(-50)
#' cdf(50)
emp_cdf <- function(x) {
  assert_that(is.numeric(x))
  n <- length(x)
  Fn <- ecdf(x)
  function(xx) pmax(n * Fn(xx), 1) / (n + 1)
}

#' Truncates output of model data frames.
#'
#' @param x a `data.frame` whose print output should be truncated.
#' @noRd
#' @export
print.summary_df <- function(x, ..., rows = 1:10) {
  assert_that(is.numeric(rows), all(rows > 0))
  rows <- intersect(seq_len(nrow(x)), rows)
  x_print <- x[rows, ]
  cat("# A data.frame:", nrow(x), "x", ncol(x), "\n")
  print.data.frame(x_print, digits = 2, row.names = FALSE)
  if (nrow(x) > length(rows)) {
    cat("# ... with", nrow(x) - length(rows), "more rows\n")
  }
  invisible(x)
}

#' internal function
#' @noRd
print_truncation_info <- function(x) {
  if (dim(x)[2] < dim(x)[1] - 1) {
    cat(", ", dim(x)[2], "-truncated", sep = "")
  }
  cat("\n")
}

#' internal function
#' @noRd
print_fit_info <- function(x) {
  ll <- logLik(x)
  cat("nobs =", x$nobs, "  ")
  cat("logLik =", round(ll[1], 2), "  ")
  cat("npars =", round(attr(ll, "df"), 2), "  ")
  cat("AIC =", round(-2 * ll[1] + 2 * attr(ll, "df"), 2), "  ")
  cat("BIC =", round(-2 * ll[1] + log(x$nobs) * attr(ll, "df"), 2), "  ")
  cat("\n")
}

#' internal function : synchronize C++ random number generators with R
#' @importFrom stats runif
#' @noRd
get_seeds <- function() {
  as.numeric(sprintf("%20.0f", runif(20, 1e6, 1e7)))
}

Try the rvinecopulib package in your browser

Any scripts or data that you put into this service are public.

rvinecopulib documentation built on March 7, 2023, 6:20 p.m.