R/metalog.R

Defines functions is_metalog_valid rmetalog pmetalog dqmetalog fmetalog qmetalog approx_max_metalog approx_metalog fit_metalog metalog_prepare_Y metalog_prepare_z

Documented in approx_max_metalog approx_metalog dqmetalog fit_metalog fmetalog is_metalog_valid pmetalog qmetalog rmetalog

# internal function for preparing the vector z for metalog.
metalog_prepare_z <- function(q, bl, bu){
  if(is.finite(bl) || is.finite(bu)){
    z <-log((q-bl)/(bu-q))
    if(is.infinite(bu)) z<-log(q-bl) # bl is defined
    if(is.infinite(bl)) z<-(-log(bu-q))# bu is defined
  } else {return(q)}
  z
}

# internal function for preparing the matrix Y for metalog.
metalog_prepare_Y <- function(p, n, nterms){
  log_odds <- log(p)-log1p(-p) #stats::qlogis(p) #log(p/(1-p))
  pmhalf <- p-0.5
  Y <- matrix(NA_real_, n, nterms)
  Y[,1] <- 1
  odd <- TRUE
  if (nterms>1) Y[,2] <- log_odds;
  if (nterms>2) Y[,3] <- pmhalf*log_odds;
  if (nterms>3) Y[,4] <- pmhalf;
  if (nterms>4)
    for (m in 5:nterms){
      if(odd){
        pmhalf <- pmhalf*(p-0.5);
        Y[,m] <- pmhalf;
      } else {
        Y[,m] <- pmhalf*log_odds
      }
      odd <- isFALSE(odd)
    }
  Y
}

#' @title Fitting the metalog functions
#' @description Functions for fitting and sampling from metalog distribution
#' @details
#' `fit_metalog` is for fitting the metalog function to the set of QP values.
#' Number of metalog terms will match the number of QP pairs.
#' `approx_metalog` is for approximating metalog function to the set of data.
#' `is_metalog_valid` is a function for checking if the metalog is valid
#' @param a vector of `a`-coefficient parameters of metalog distribution
#' @param p vector of cumulative probabilities corresponding to quantile values
#' @param q vector of quantile values (data)
#' @param bl real value of lower boundary (for bounded metalog). Default -Inf
#' @param bu real value of upper boundary (for bounded metalog). Default Inf
#' @param log.p are probabilities provided on log scale. Default FALSE
#' @param tol tolerance for solve(). Default is .Machine$double.eps^2
#' @rdname fit_metalog
#' @export
#' @examples
#' p <- c(0.1, 0.5, 0.9)
#' q <- c(4, 9, 12)
#' fit_metalog(p,q)
fit_metalog <- function(p, q, bl=-Inf, bu=Inf, log.p=FALSE, tol=.Machine$double.eps^2){
  n <- length(q)
  if(length(p)!=n) stop("Length of p and q must be equal")
  if(log.p) p <- exp(p)
  z <- metalog_prepare_z(q, bl, bu)
  Y <- metalog_prepare_Y(p, n, n)
  solve(Y, z, tol=tol)
}

#' @param nterms integer number of terms for approximating metalog. Default is 3
#' @param p the grid of probability values the quantiles q correspond to. This would be specified if metalog is fitted to the empirical CDF. Default is NULL.
#' @param thin logical. Should original data be thinned. Default is FALSE.
#' @param n_grid in case data thinning is performed, integer number of quantiles to extract from data, if data vector `q` is longer than this value
#' @param s_grid in case data thinning is performed, probability grid shape parameter passed to `qpd::make_pgrid()`. Default is 10.
#' @param tol tolerance for solve() and qr.solve(), default is .Machine$double.eps^2
#' @rdname fit_metalog
#' @importFrom stats quantile
#' @export
approx_metalog <- function(q, nterms=3L, bl=-Inf, bu=Inf, p=NULL, thin=FALSE, n_grid=1e3, s_grid=2L, tol=.Machine$double.eps^2){
  stopifnot("Metalog should have at least 2 terms!"=nterms>1)
  n <- length(q)
  if (is.null(p)){ # do it if p is not specified
    if(thin && (n > n_grid)){
      n <- n_grid
      p <- make_pgrid(n_grid, s=s_grid, trim=TRUE)
      qs <- unname(stats::quantile(q, probs=p))
    } else {
      qs <- sort(q)
      # ecdf assignment trick to avoid 0 and 1
      p <- (seq_along(qs)-0.5)/n
    }
  } else {
    stopifnot("Lengths of q and p vectors do not match"=length(p)==n)
    idx <- p!=0 & p!=1
    p <-p[idx]
    qs <- q[idx]
  }

  if(length(nterms)!=1L) stop("Incorrectly specified number of metalog terms")
  z <- matrix(metalog_prepare_z(qs[is.finite(qs)], bl, bu), ncol=1, byrow=FALSE)
  Y <- metalog_prepare_Y(p[is.finite(qs)], n, nterms)
  M4i <- t(Y) %*% Y
  Mi <- tryCatch({
     #message("Trying to invert the matrix with solve()")
     solve(M4i, tol=tol)
    },
    error = function(e) {
      #message("Solve failed. Trying Cholesky decomposition.")
      res <- tryCatch(
        chol2inv(chol(M4i)),
        error=function(ee){
        #  message("Cholesky also failed. Trying QR decomposition.")
          return(qr.solve(M4i, tol=tol))
        }
      )
      return(res)
    }#,
    #finally = {
    #  message("Solved!")
    #}
    )
  ((Mi %*% t(Y)) %*% z)[,1]
}

#' @param max_nterms Maximum number of terms to be used for fitting
#' @rdname fit_metalog
#' @importFrom stats  median
#' @export
approx_max_metalog <- function(q, p=NULL, bl=-Inf, bu=Inf, max_nterms=Inf, thin=FALSE, n_grid=1e3, s_grid=2L, tol=1e-7){
  nterms <- 2
  a <- stats::median(q)
  metalog_valid <- TRUE
  while(metalog_valid && (nterms<=max_nterms)){
    tmp_a <- tryCatch({
      approx_metalog(q, nterms =nterms, bl=bl, bu=bu, p=p, thin=thin, n_grid=n_grid, s_grid=s_grid, tol=tol)
    }, error=function(e){
      message("Failed to fit. Returning null")
      NULL
    })
    if(is.null(tmp_a)){
      metalog_valid <- FALSE
    }else{
      metalog_valid <- is_metalog_valid(tmp_a, bl=bl, bu=bu, n_grid=n_grid, s_grid=s_grid)
    }
    if(!is.na(metalog_valid) && isTRUE(metalog_valid)) a <- tmp_a
    nterms <- nterms+1L
  }
  a
}

#' @title Metalog distribution functions
#' @description Functions for sampling from metalog distribution
#' @details
#' `qmetalog` is a quantile function.
#' `fmetalog` is a quantile density function q(u).
#' `dqmetalog` is the reciprocal of it is density quantile function f(Q(p)).
#' `pmetalog` is an approximation of the cumulative density function.
#' `rmetalog` is an RNG.
#' @param a vector of `a`-coefficient parameters of metalog distribution
#' @param p vector of cumulative probabilities corresponding to quantile values
#' @param bl real value of lower boundary (for bounded metalog). Default -Inf
#' @param bu real value of upper boundary (for bounded metalog). Default Inf
#' @param log.p are probabilities provided on log scale. Default FALSE
#' @param log should the log density be returned. Default FALSE
#' @rdname metalog
#' @export
#' @examples
#' a <- c(9,  1.8, -1.13)
#' p <- c(0.1, 0.5, 0.9)
#' qmetalog(p, a)
qmetalog <- function(p, a, bl=-Inf, bu=Inf, log.p=FALSE){
  n <- length(a)
  if(log.p) p <- exp(p)
  res <- a[1]
  logitp <- log(p/(1-p)) #stats::qlogis(p) #log(p/(1-p))
  pmhalf <- p-0.5
  odd <- TRUE
  #stopifnot(n>0)
  if(n>1) res <- res+a[2]*logitp
  if(n>2) res <- res+a[3]*pmhalf*logitp
  if(n>3) res <- res+a[4]*pmhalf
  if(n>4)
   for(m in 5:n){
    if(odd){
      res = res+a[m]*pmhalf^((m-1)/2)
    }else{
      res = res+a[m]*pmhalf^(m/2-1)*logitp
    }
    odd=isFALSE(odd)
   }
  res <- ifelse(p==0, bl,
                  ifelse(p==1, bu, res))
  if(is.infinite(bl) && is.infinite(bu)) return(res)
  if(is.infinite(bu)) return(ifelse(p==0, bl, bl+exp(res)))# bl is defined
  if(is.infinite(bl)) return(ifelse(p==1, bu, bu-exp(-res)))# bu is defined
  (bl+bu*exp(res))/(1+exp(res))  #logitmetalog case
}

#' @rdname metalog
#' @export
#' @examples
#' fmetalog(p, a)
fmetalog <- function(p, a, bl=-Inf, bu=Inf, log.p=FALSE, log=FALSE){
  n <- length(a)
  if(log.p) p <- exp(p)
  pt1mp <- p*(1-p)
  logitp <- log(p/(1-p)) #stats::qlogis(p) #log(p/(1-p))
  pmhalf <- p-0.5
  odd <- TRUE
  #stopifnot(n>0)
  if(n>1) res <- a[2]/pt1mp
  if(n>2) res <- res+a[3]*(pmhalf/pt1mp+logitp)
  if(n>3) res <- res+a[4]
  if(n>4)
   for(m in 5:n){
    if(odd){
      res <- res+a[m]*(m-1)/2*pmhalf^((m-3)/2)
    }else{
      res<- res+a[m]*(pmhalf^(m/2-1)/pt1mp+(m/2-1)*pmhalf^(m/2-2)*logitp)
    }
    odd=isFALSE(odd)
   }
  res <- ifelse(p==0, bl,
                ifelse(p==1, bu, res))
  if(is.infinite(bl) && is.infinite(bu)){
     if(log) return(ifelse(is.finite(res),log(res),res)) else return(res)}
  eQm <- exp(qmetalog(p,a))
  if(is.infinite(bu)) {res <- ifelse(p==0, 0, (res*eQm));
     if(log) return(ifelse(is.finite(res),log(res),res)) else return(res)}# bl is defined
  if(is.infinite(bl)) {res <- ifelse(p==1, 0, (res/eQm));
     if(log) return(ifelse(is.finite(res),log(res),res)) else return(res)}# bu is defined
    #both are defined, logitmetalog case
   res <- res*(bu-bl)*eQm/(1+eQm)^2
  #res <- ifelse(p==0 | p==1, 0, res*(bu-bl)*eQm/(1+eQm)^2)
  if(log) return(ifelse(is.finite(res),log(res),res))
  res
}

#' @rdname metalog
#' @export
#' @examples
#' dqmetalog(p, a)
dqmetalog <- function(p, a, bl=-Inf, bu=Inf, log.p=FALSE, log=FALSE){
  res <- fmetalog(p,a,bl, bu, log.p, log=FALSE)
  if(log) return(ifelse(is.finite(res),-log(res),res))
  1/res
}

#' @param q real vector of values
#' @param n_grid integer size of helper grid to be passed to `make_pgrid`. Default is 50
#' @param s_grid integer beta shape of helper grid to be passed to `make_pgrid`. Default is 2
#' @param tol tolerance value, default is 1e-6
#' @param maxiter maximum number of iterations for approximation, default is 1e6
#' @param log.p should log probability be returned
#' @rdname metalog
#' @export
#'
#' @examples
#' x <- c(5, 9, 14)
#' pmetalog(x, a)
#' @importFrom stats approx
pmetalog <- function(q, a, bl=-Inf, bu=Inf, n_grid=50L, s_grid=2L, tol=1e-15, maxiter=1e3, log.p=FALSE){

  afun <- function(u, q, a, bl, bu) {q - qmetalog(u, a, bl, bu)}
  p_grd <- sort(c(tol, qpd::make_pgrid(n=n_grid, s=s_grid), 1-tol))
  q_grd <- qmetalog(p_grd, a, bl, bu)
  idx_lower <- findInterval(q, q_grd, all.inside = TRUE)
  idx_upper <- idx_lower+1L
  int_lower <- p_grd[idx_lower]
  int_upper <- p_grd[idx_upper]
  ps <- mapply(function(.q, .il, .iu) {
    tmp_us <- NULL
    tmp_us <- stats::uniroot(afun, q=.q, a=a, bl=bl, bu=bu,
                             interval=c(.il, .iu), extendInt="no", check.conv=TRUE, tol = tol, maxiter = maxiter)
    if(is.null(tmp_us)) res <- NA else res <- tmp_us$root
    #tmp_ps
  },  q, int_lower, int_upper)
  ps <- pmin(1, pmax(0, ps))

  ps[!is.finite(ps)] <- NA
  if(log.p) return(log(ps)) else return(ps)
}

#' @param n integer value correponding to number of samples to draw
#' @rdname metalog
#' @export
#'
#' @examples
#' a <- c(2.4, 0.4, -0.08)
#' rmetalog(100, a)
#' @importFrom stats runif
rmetalog <- function(n, a, bl=-Inf, bu=Inf){
  qmetalog(stats::runif(n), a, bl,bu)
}

#' @rdname fit_metalog
#' @examples
#' a <- c(9,  1.8, -1.13)
#' is_metalog_valid(a)
#' @export
is_metalog_valid <- function(a, bl=-Inf, bu=Inf, n_grid=50L, s_grid=2L){
 grd <- make_pgrid(n_grid, s_grid)
 all(fmetalog(grd, a, bl, bu)>0)
}
dmi3kno/qpd documentation built on Sept. 29, 2024, 6:39 p.m.