R/distribution_methods.R
In reservr: Fit Distributions and Neural Networks to Censored and Truncated Data

Documented in fit.Distribution quantile.Distribution

#' Quantiles of Distributions
#'
#' Produces quantiles corresponding to the given probabilities with configurable distribution parameters.
#'
#' @param x A `Distribution`.
#' @param probs Quantiles to compute.
#' @param with_params Optional list of distribution parameters. Note that if `x$has_capability("quantile")` is false,
#' `with_params` is assumed to contain only one set of parameters.
#' @param ... ignored
#' @param .start Starting value if quantiles are computed numerically. Must be within the support of `x`.
#'
#' @details
#' If `x$has_capability("quantile")` is true, this returns the same as `x$quantile(probs, with_params = with_params)`.
#' In this case, `with_params` may contain separate sets of parameters for each quantile to be determined.
#'
#' Otherwise, a numerical estimation of the quantiles is done using the density and probability function.
#' This method assumes `with_params` to cantain only one set of parameters.
#' The strategy uses two steps:
#'
#' 1. Find the smallest and largest quantiles in `probs` using a newton method starting from `.start`.
#' 2. Find the remaining quantiles with bisection using [stats::uniroot()].
#'
#' @return The quantiles of `x` corresponding to `probs` with parameters `with_params`.
#'
#' @examples
#' # With quantiles available
#' dist <- dist_normal(sd = 1)
#' qqs <- quantile(dist, probs = rep(0.5, 3), with_params = list(mean = 1:3))
#' stopifnot(all.equal(qqs, 1:3))
#'
#' # Without quantiles available
#' dist <- dist_erlangmix(shapes = list(1, 2, 3), scale = 1.0)
#' my_probs <- c(0, 0.01, 0.25, 0.5, 0.75, 1)
#' qqs <- quantile(
#'   dist, probs = my_probs,
#'   with_params = list(probs = list(0.5, 0.3, 0.2)), .start = 2
#' )
#'
#' all.equal(dist$probability(qqs, with_params = list(probs = list(0.5, 0.3, 0.2))), my_probs)
#' # Careful: Numerical estimation of extreme quantiles can result in out-of-bounds values.
#' # The correct 0-quantile would be 0 in this case, but it was estimated < 0.
#' qqs[1L]
#'
#' @export
quantile.Distribution <- function(x, probs = seq(0, 1, 0.25),
                                  with_params = list(), ..., .start = 0.0) {
  assert_that(
    is.Distribution(x),
    msg = "`x` must be a Distribution."
  )
  assert_that(
    is.numeric(probs),
    all(probs >= 0),
    all(probs <= 1),
    msg = "`probs` must be a numeric vector with elements in [0, 1]."
  )

  if (x$has_capability("quantile")) {
    x$quantile(probs, with_params = with_params)
  } else {
    # Numeric quantile approximation.
    # Idea:
    # 1. Start at init, find q0_min and q0_max, the finite quantiles of the
    #    smallest and largest probability using nloptr::tnewton (probs 0 and
    #    1 might be infinite)
    # 2. All other quantiles are either in [q0_min, init] or in [init, q0_max].
    #    Find these remaining quantiles using uniroot

    x$require_capability(
      c("probability", "density"),
      fun_name = "quantile.Distribution() without capability 'quantile'"
    )
    assert_that(
      is_scalar_double(.start),
      x$is_in_support(.start, with_params = with_params),
      msg = "`start` must be a single numeric within the support of `x`."
    )

    p0 <- x$probability(.start, with_params = with_params)
    out <- rep_len(NA_real_, length(probs))

    out[probs == p0] <- .start

    if (1.0 %in% probs && x$is_in_support(.Machine$double.xmax)) {
      out[probs == 1.0] <- Inf
    }

    if (0.0 %in% probs && x$is_in_support(-.Machine$double.xmax)) {
      out[probs == 0.0] <- -Inf
    }

    d_squared <- function(q, prob_target) {
      delta_prob <- x$probability(q, with_params = with_params) - prob_target
      list(
        objective = delta_prob^2,
        gradient = 2 * delta_prob * x$density(q, with_params = with_params)
      )
    }

    diff_prob <- function(q, prob_target) {
      x$probability(q, with_params = with_params) - prob_target
    }

    if (any(is.na(out) & probs < p0)) {
      p0_min <- min(probs[is.na(out) & probs < p0])
      q0_min <- nloptr::tnewton(.start, fn = d_squared, prob_target = p0_min)$par
      if (diff_prob(q0_min, p0_min) > 0.0) {
        out[probs == p0_min] <- q0_min
      }

      for (prob in sort(unique(probs[is.na(out) & probs < p0]))) {
        q0_min <- uniroot(
          diff_prob, lower = q0_min, upper = .start, prob_target = prob
        )$root
        out[probs == prob] <- q0_min
      }
    }

    if (any(is.na(out) & probs > p0)) {
      p0_max <- max(probs[is.na(out) & probs > p0])
      q0_max <- nloptr::tnewton(.start, fn = d_squared, prob_target = p0_max)$par
      if (diff_prob(q0_max, p0_max) < 0.0) {
        out[probs == p0_max] <- q0_max
      }

      remaining_probs <- sort(
        unique(probs[is.na(out) & probs > p0]),
        decreasing = TRUE
      )
      for (prob in remaining_probs) {
        q0_max <- uniroot(
          diff_prob, lower = .start, upper = q0_max, prob_target = prob
        )$root
        out[probs == prob] <- q0_max
      }
    }

    out
  }
}

#' @rdname fit_dist
#' @param object same as parameter `dist`
#' @export
fit.Distribution <- function(object, obs, start, ...) {
  fit_dist(dist = object, obs = obs, start = start, ...)
}

#' @export
fit_dist.Distribution <- function(dist, obs, start, ...) {
  obs <- as_trunc_obs(obs)
  start <- .check_fit_dist_start(dist, obs, start, ...)

  n <- nrow(obs)
  init_flat <- flatten_params(start)
  param_names <- names(flatten_params(dist$get_placeholders()))

  # Ensure consistent ordering.
  init_flat <- init_flat[param_names]

  i_cens <- is.na(obs$x)
  i_obs <- !i_cens

  bounds <- flatten_bounds(dist$get_param_bounds())
  bounds$lower <- bounds$lower[param_names]
  bounds$upper <- bounds$upper[param_names]
  constraint <- dist$get_param_constraints()

  neg_loglik <- .fit_dist_objective(
    dist, obs, i_obs, i_cens, param_names, bounds
  )

  if (!is.null(constraint)) {
    constraint_impl <- constraint

    constraint <- function(par) {
      names(par) <- param_names
      wp <- inflate_params(par)
      constraint_impl(wp)
    }

  }

  if (length(init_flat) == 1L && is.null(constraint)) {
    xopt <- muffle_nans_produced(nloptr::tnewton(
      x0 = init_flat,
      fn = neg_loglik,
      lower = bounds$lower,
      upper = bounds$upper
    ))
  } else if (is.null(constraint)) {
    xopt <- muffle_nans_produced(nloptr::lbfgs(
      x0 = init_flat,
      fn = neg_loglik,
      lower = bounds$lower,
      upper = bounds$upper
    ))
  } else {
    xopt <- muffle_nans_produced(nloptr::slsqp(
      x0 = init_flat,
      fn = neg_loglik,
      lower = bounds$lower,
      upper = bounds$upper,
      heq = constraint
    ))
  }
  names(xopt$par) <- param_names

  list(
    params = inflate_params(xopt$par),
    opt = xopt,
    logLik = structure(
      -xopt$value, class = "logLik",
      df = dist$get_dof(),
      nobs = n
    )
  )
}

#' @export
format.Distribution <- function(x, short = FALSE, ...) {
  dists <- x$get_components()
  if (length(dists)) {
    dists <- vapply(dists, format, character(1L), short = TRUE)
    prefix <- gsub("Distribution$", "", class(x)[1L])
    dist <- paste0(prefix, "<", paste(dists, collapse = ", "), ">")
  } else {
    dist <- class(x)[1L]
  }
  if (short) return(dist)
  a_an <- if (substr(dist, 1L, 1L) %in% c("A", "E", "I", "O", "U")) "An" else "A"
  paste(a_an, dist, "with", x$get_dof(), "dof")
}

#' @export
format.ErlangMixtureDistribution <- function(x, short = FALSE, ...) {
  free_shapes <- "shapes" %in% names(x$get_placeholders())
  spec <- if (free_shapes) {
    paste0("k = ", length(x$get_components()))
  } else {
    paste(unlist(x$default_params$shapes), collapse = ", ")
  }
  dist <- paste0("ErlangMixture<", spec, ">")
  if (short) return(dist)
  paste("An", dist, "with", x$get_dof(), "dof")
}

#' @export
format.DiscreteDistribution <- function(x, short = FALSE, ...) {
  free_probs <- length(x$get_placeholders()$probs)
  spec <- if (free_probs) {
    paste0("size = ", length(x$default_params$probs))
  } else {
    p <- unlist(x$default_params$probs)
    p <- p / sum(p)
    paste(p, collapse = ", ")
  }
  dist <- paste0("Discrete<", spec, ">")
  if (short) return(dist)
  paste("A", dist, "with", x$get_dof(), "dof")
}

#' @export
format.BDEGPDistribution <- function(x, short = FALSE, ...) {
  free_shapes <- length(x$get_placeholders()$dists[[2]]$dists[[1]]$dist$shapes)
  free_xi <- "xi" %in% names(x$get_placeholders()$dists[[2]]$dists[[2]])
  free_sigmau <- "sigmau" %in% names(x$get_placeholders()$dists[[2]]$dists[[2]])
  spec <- paste0(
    "n = ", x$default_params$dists[[1]]$default_params$dist$default_params$size, ", ",
    if (free_shapes > 0L) {
      paste0("m = ", free_shapes)
    } else {
      paste0(
        "m = <",
        paste(
          unlist(x$default_params$dists[[2]]$default_params$dists[[1]]$default_params$dist$default_params$shapes),
          collapse = ", "
        ),
        ">"
      )
    }, ", ",
    "u = ", x$default_params$dists[[2]]$default_params$breaks[[1L]], ", ",
    "epsilon = ", x$default_params$dists[[2]]$default_params$bandwidths[[1L]],
    if (!free_xi) {
      paste0(", xi = ", x$default_params$dists[[2]]$default_params$dists[[2]]$default_params$xi)
    },
    if (!free_sigmau) {
      paste0(", sigmau = ", x$default_params$dists[[2]]$default_params$dists[[2]]$default_params$sigmau)
    }
  )
  dist <- paste0("BDEGP<", spec, ">")
  if (short) return(dist)
  paste("A", dist, "with", x$get_dof(), "dof")
}

Any scripts or data that you put into this service are public.

reservr documentation built on June 24, 2024, 5:10 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

reservr
Fit Distributions and Neural Networks to Censored and Truncated Data

R/distribution_methods.R
In reservr: Fit Distributions and Neural Networks to Censored and Truncated Data

Defines functions format.BDEGPDistribution format.DiscreteDistribution format.ErlangMixtureDistribution format.Distribution fit_dist.Distribution fit.Distribution quantile.Distribution

Documented in fit.Distribution quantile.Distribution

Try the reservr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

reservr Fit Distributions and Neural Networks to Censored and Truncated Data

R/distribution_methods.R In reservr: Fit Distributions and Neural Networks to Censored and Truncated Data

Defines functions format.BDEGPDistribution format.DiscreteDistribution format.ErlangMixtureDistribution format.Distribution fit_dist.Distribution fit.Distribution quantile.Distribution

Documented in fit.Distribution quantile.Distribution

Try the reservr package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

reservr
Fit Distributions and Neural Networks to Censored and Truncated Data

R/distribution_methods.R
In reservr: Fit Distributions and Neural Networks to Censored and Truncated Data