R/core.R

Defines functions summary.cumulcalib summary.cumulcalib plot.cumulcalib plot.cumulcalib qMAD_BM_c pMAD_BM_c qKolmogorov qMAD_BM pMAD_BM cumulcalib

Documented in cumulcalib plot.cumulcalib pMAD_BM pMAD_BM_c qKolmogorov qMAD_BM qMAD_BM_c summary.cumulcalib

#' Cumulative calibration assessment
#'
#' This is the core function for performing cumulative calibration assessment
#'
#' @return an objective of class cumulcalib that can be printed or plotted
#  @seealso [stringi::stri_length()] which this function wraps.
#' @param y vector of binary responses
#' @param p vector of predicted probabilities.
#' @param method string with either BB (Brownian bridge test, default method), BM (Brownian motion test), BM2p (two-part BM test - experimental), BB1p (one-part BB test wit only the 'bridge' component). Multiple methods can be specified. The first one will be the 'main' method (e.g., when submitting the resulting object to plot()). Default is c("BB","BM")
#' @param ordered if TRUE, y and p are already ordered based on ascending values of p. This is to speed up simulations.
#' @param n_sim if >0, indicates a simulation-based test is requested for inference.
#' @examples
#' pi <- rbeta(1000,1,2)
#' Y <- rbinom(length(pi),1,pi)
#' res <- cumulcalib(Y, pi, method="BB")
#' summary(res)
#' plot(res)
#' @export
cumulcalib <- function(y, p, method=c("BB","BM"), ordered=FALSE, n_sim=0)
{
  out <- list()
  methods <- list()

  if(!ordered) #Order ascendingly based on p, if not already ordered
  {
    o <- order(p)
    p <- p[o]
    y <- y[o]
  }

  n <- length(p)

  #The time component of the random walk
  T_ <- sum(p*(1-p)) #Total 'time'
  if(T_<30) warning("Total obsered time (sum(pi*(1-pi))) is less than 30; the data might be too small for reliable inference.")
  t <- cumsum(p*(1-p))/T_ #time values at each p
  X <- p #Predicted values

  #Scaled cumulative sum (divided by n)
  C <- cumsum(y-p)/n  #Scaled partial sum of prediction errors
  C_n <- C[n] #Mean calibration error
  C_star <- max(abs(C))  #Macimum distance

  #This is the S process as described in the paper. The function returns all these metrics regardless of which method is used. But inference is only done per specified method(s)
  scale <- n/sqrt(T_)
  S <- scale*C
  S_star <- scale*C_star
  B_star <- max(abs(S-S[n]*t))
  S_n <- scale*C_n

  #Inference part. We loop over the different methods requested by the user
  for(i in 1:length(method))
  {
    mt <- method[i]

    if(mt %in% c('BM2p','BB')) #Two-part BM and BB both generate component-specific p-values
    {
      stat1 <- S[n]
      pval1 <- 2*stats::pnorm(-abs(S[n]),0,1) #Two-sided z test for mean calibration

      if(mt=='BB')
      {
        loc <- which.max(S-S[n]*t)  #The bridge component of the BB test
        stat2 <- B_star
        pval2 <- 1-pKolmogorov(stat2)
      }
      else
      {
        loc <- which.max(abs(S))
        stat2 <- max(abs(S))
        pval2 <- 1-pMAD_BM_c(stat2, w1=S[n])
      }

      fisher <- -2*(log(pval1)+log(pval2))  #Fisher's method for combining p-values
      pval <- 1-stats::pchisq(fisher,4)

      methods[[mt]]$stat <- fisher
      methods[[mt]]$pval <- pval
      methods[[mt]]$stat_by_component <- c(mean=stat1, distance=stat2)
      methods[[mt]]$pval_by_component <- c(mean=pval1, distance=pval2)
      methods[[mt]]$loc <- loc
    }

    if(mt %in% c('BM','BB1p'))  #These are one-part methods
    {
      if(mt=='BB1p')
      {
        S2 <- S-S[n]*t
        loc <- which.max(abs(S2))
        stat <- max(abs(S2))
        pval <- 1-pKolmogorov(stat)
        methods[[mt]]$stat <- stat
        methods[[mt]]$pval <- pval
        methods[[mt]]$loc <- loc
      }
      else
      {
        stat <- max(abs(S))
        loc <- which.max(abs(S))
        pval <- 1-pMAD_BM(stat)
        methods[[mt]]$stat <- stat
        methods[[mt]]$pval <- pval
        methods[[mt]]$loc <- loc
      }
    }
  }

  out$data <- cbind(X=X,t=t,C=C, S=S) #Returns the generated random-walk
  out$method <- names(methods[1]) #The base method is the first requested one

  out$scale <- scale
  out$C_n <- C_n
  out$S_n <- S_n
  out$C_star <- C_star
  out$S_star <- S_star
  out$B_star <- B_star

  #Copy the first method results to root of the list
  for(nm in names(methods[[1]]))
  {
    out[nm] <- methods[[1]][nm]
  }
  out$by_method <- methods

  class(out) <- "cumulcalib"
  return(out)
}





#' CDF of the distribution of the maximum absolute deviation of Brownian motion in \[0,1\] interval
#' @return a scalar value
#' @param q the quantity at which CDF will be evaluated. Currently accepts only a scalar
#' @param summands maximum number of terms to be evaluated in the infinite series (default=100)
#' @export
pMAD_BM <- function(q, summands=100)
{
  pi <- base::pi
  m<-0:summands
  out <- sum((-1)^m/(2*m+1)*exp(-(2*m+1)^2*pi^2/8/q^2))

  return(4/pi*out)
}


#' Quantile function of the distribution of the maximum absolute deviation of Brownian motion in \[0,1\] interval
#' @return a scalar value
#' @param p the quantity at which the quantile function will be evaluated. Currently accepts only a scalar
#' @export
qMAD_BM <- function(p)
{
  x <- stats::uniroot(function(x) {pMAD_BM(x)-p}, interval=c(0,10))
  unname(x$root)
}




#
# W2CDF <- function(q, n_terms=10)
# {
#   n <- 0:n_terms
#   A1<- gamma(-0.5+1)/(gamma(n+1)*gamma(-0.5-n+1))
#   A2 <- erfc((4*n+1)/(2*sqrt(2*q)))
#
#   sqrt(2)*sum(A1*A2)
# }
#
#
# W2PDF <- function(x, n_terms=10)
# {
#   n <- 0:n_terms
#   A1<- gamma(-0.5+1)/(gamma(n+1)*gamma(-0.5-n+1))
#   A2 <- (4*n+1)*exp(-((4*n+1)^2)/(8*x))
#
#   1/(2*sqrt(base:::pi*x^3))*sum(A1*A2)
# }



#' CDF of the Kolmogorov distribution
#' @return a scalar value
#' @param q the quantity at which CDF will be evaluated. Currently accepts only a scalar
#' @param summands maximum number of terms to be evaluated in the infinite series (default=ceiling(q*sqrt(72)+3/2))
#' @export
pKolmogorov <- function (q, summands=ceiling(q*sqrt(72)+3/2))
{
  #if(!is.null(summands)) q <- q*(1+0.12/sqrt(summands)+0.11/summands)

  sqrt(2 * pi) * sapply(q, function(x) {
    if (x > 0) {
      sum(exp(-(2 * (1:summands) - 1)^2 * pi^2/(8 * x^2)))/x
    }
    else {
      0
    }
  })
}


#' Quantile function of the Kolmogorov distribution
#' @return a scalar value
#' @param p the quantity at which the quantile function will be evaluated. Currently accepts only a scalar
#' @export
qKolmogorov <- function(p)
{
  x <- stats::uniroot(function(x) {pKolmogorov(x)-p}, interval=c(0,10))
  unname(x$root)
}




#' CDF of the distribution of the maximum absolute deviation of Brownian motion in \[0,1\] interval, conditional on its terminal value
#' @return a scalar value
#' @param q the quantity at which CDF will be evaluated. Currently accepts only a scalar
#' @param w1 the terminal value
#' @param method different infinite series to use (1,2,3)
#' @param exp_tolerance numerical tolerance as the stopping rule when evaluating the infinite sum (default -30 on the exponential scale)
#' @param summands number of terms to evaluate (default is ceiling(q * sqrt(72) + 3/2))
#' @export
pMAD_BM_c <- function(q, w1, method=1, exp_tolerance=-30, summands = ceiling(q * sqrt(72) + 3/2))
{
  if(q<=max(0,w1)) return(0)
  out <- c()
  if(1 %in% method)
  {
    A <- -2*q*q
    B <- 2*q*w1
    C <- -exp_tolerance
    D <- sqrt(B*B-4*A*C)

    n <- seq(round((-B+D)/A/2,0), round((-B-D)/A/2,0))
    un <- 2*n*q

    terms <- un*w1-un^2/2
    out <- c(out,sum((-1)^n*exp(terms)))
  }
  if(2 %in% method)
  {
    out <- c(out,sqrt(2*pi)/q*sum(exp(w1^2/2-(2*(1:summands)-1)^2*pi^2/(8*q^2))*(cos((2*(1:summands)-1)*pi*w1/2/q))))
  }
  if(3 %in% method)
  { #Wrong
    n <- -1000:1000
    x <- sum(stats::dnorm(w1+4*n*q)-stats::dnorm(w1+4*n*q+2*q))
    out <- c(out,x)
  }

  out
}




#' Quantile function of the distribution of the maximum absolute deviation of Brownian motion in \[0,1\] interval, conditional on its terminal value
#' @return a scalar value
#' @param p the quantity at which the quantile function will be evaluated. Currently accepts only a scalar
#' @param w1 the terminal value
#' @export
qMAD_BM_c <- function(p, w1)
{
  x <- stats::uniroot(function(x) {pMAD_BM_c(x,w1)-p}, interval=c(0,10))
  unname(x$root)
}






#' Generates cumulative calibration plots
#' @return None
#' @param x A cumulcalib object
#' @param ... Parameters to be passed to plot()
#' @export
plot.cumulcalib <- function(x,...)
{
  UseMethod("plot",x)
}

#' @return None
#' @param x An object of class cumulcalib generated by cumulcalib()
#' @param method Which method to use. Options are BB (Brownian bridge test), BM (Brownian motion test), BB1p (1-part Brownian bridge test), and BM2p (2-part Brownian bridge test). If unspecified, returns the default method used in the cumulcalib() call
#' @param draw_stat Should the statistic (terminal value an/or maximum drift, depending on method) be drawn? Default is TRUE
#' @param stat_col The color(s) to draw the stat. Default is c('blue','red'). For single-part tests (BM and BB1p) only the first element is used
#' @param draw_sig Whether significance lines should be drawn. Default is T. Colors will be the same as stat_col
#' @param sig_level If to draw significance lines, at what level? Default is c(0.95,0.95). For single-part tests (BM and BB1p) only the first element is used
#' @param x2axis If true, draws a second x-axis (on top) showing predicted risks
#' @param y2axis If true, draws a second y-axis (on right) showing scaled partial sums
#' @param ... Parameters to be passed to plot()
#' @method plot cumulcalib
plot.cumulcalib <- function(x, method=NULL, draw_stat=TRUE, stat_col=c('blue','red'), draw_sig=TRUE, sig_level=c(0.95,0.95), x2axis=TRUE, y2axis=TRUE, ...)
{
  if(is.null(method))
  {
    method <- x$method
  }
  else
  {
    if(length(method)>1)
    {
      stop("Only one method can be equested.")
    }
    if(!(method %in% names(x$by_method)))
      stop(paste("The requested method has not been provided by the original cumulcalib() call. You asked for", method, "but the submitted object has method(s)", paste(names(x$by_method),collapse=",")))
  }

  oldpar <- par(no.readonly=TRUE)
  on.exit(par(oldpar))

  args <- list(...)

  t_ <- x$data[,'t']
  X <- x$data[,'X']
  W <- x$data[,'S']
  n <- length(W)
  loc <- x$by_method[[method]]$loc

  sign_p1 <- sign(W[n])
  if(method %in% c("BB","BB1p","BB2p"))
  {
    sign_p2 <- sign(W[loc]-t_[loc]*W[n])
  }
  else
  {
    sign_p2 <- sign(W[loc])
  }


  sig_p1 <- sig_p2 <- 0 #0 indicates do not draw signifcance lines
  if(draw_sig)
  {
    sig_p1 <- stats::qnorm(1-(1-sig_level[1])/2)

    if(method %in% c("BM"))
    {
      sig_p2 <- qMAD_BM(sig_level[2])
    }
    if(method %in% c("BM2p"))
    {
      sig_p2 <- qMAD_BM_c(sig_level[2],w1=unname(x$by_method$BM2p$stat_by_component['mean']))
    }
    if(method %in% c("BB1p","BB"))
    {
      sig_p2 <- qKolmogorov(sig_level[2])
    }
  }

  i <- match("xlim",names(args))
  if(is.na(i))
  {
    args$xlim<-c(-0.03,1.01)
  }
  i <- match("ylim",names(args))
  if(is.na(i))
  {
    args$ylim<- c(min(W,-sig_p1*(sign_p1==-1),-sig_p2*(sign_p2==-1),-1),max(W,sig_p1*(sign_p1==1),sig_p2*(sign_p2==1),1))
  }
  i <- match("type",names(args))
  if(is.na(i))
  {
    args$type<-'l'
  }
  i <- match("xaxt",names(args))
  if(is.na(i))
  {
    args$xaxt<-'n'
  }
  i <- match("yaxt",names(args))
  if(is.na(i))
  {
    args$yaxt<-'n'
  }
  i <- match("xlab",names(args))
  if(is.na(i))
  {
    args$xlab<-'Time'
  }
  i <- match("ylab",names(args))
  if(is.na(i))
  {
    args$ylab<-'Standardized cumulative sum'
  }

  args$x<-t_
  args$y<-W

  args$xaxs<-'i'

  par(mar=c(3,3,ifelse(x2axis,3,1),ifelse(y2axis,3,1)))

  do.call(plot, args)

  #Axes
  axis(1, line=0,  at=(0:10)/10, labels=(0:10)/10, padj=0)
  mtext(args$xlab, 1, line=2)

  axis(side=2, at=pretty(args$ylim), padj=0)
  mtext(args$ylab, 2, line=2)

  if(x2axis)
  {
    xs <- round(unlist(lapply((0:10)/10, function (z) {x$data[which.min((x$data[,'t']-z)^2),'X']})),3)
    axis(side=3, at=(0:10)/10, labels=xs, padj=0)
    mtext("Predicted values", 3, line=2) #expression(pi) for label will generate the symbol!
  }

  if(y2axis)
  {
    y2p <- pretty(args$ylim/x$scale)
    axis(side=4, at=y2p*x$scale, labels=y2p, padj=0)
    mtext('Scaled cumulative sum',4, line=2)
  }


  lines(c(0,1),c(0,0),col="grey")

  #Triangle
  {
    polygon(x=c(0,-0.03,-0.03), y=c(0,-1,1), col = 'black')
  }

  #P1 lines
  if(method %in% c("BM2p","BB"))
  {
    if(draw_stat) lines(c(1,1),c(0,W[n]), col=stat_col[1])
    if(draw_sig) lines(c(0,1),c(sign_p1*sig_p1,sign_p1*sig_p1),col=stat_col[1],lty=3)
  }

  #P2 lines
  if(method %in% c('BB')) #If 2p bridge test then adjust the length of the red line and draw the bridge line
  {
    lines(c(0,1),c(0,W[n]),col="gray", lty=2)
    if(draw_stat) lines(c(t_[loc],t_[loc]),c(t_[loc]/t_[n]*W[n],W[loc]),col=stat_col[2])
    if(draw_sig) lines(c(0,1),c(sign_p2*sig_p2,sign_p2*sig_p2+W[n]),col=stat_col[2],lty=3)
  }
  else #BM or BM2p
  {
    if(draw_stat) lines(c(t_[loc],t_[loc]),c(0,W[loc]),col=stat_col[2])
    if(draw_sig)  lines(c(0,1),c(sign_p2*sig_p2,sign_p2*sig_p2),col=stat_col[2],lty=3)
  }
}





#' Summarizes a cumulcalib object
#' @return None
#' @param object An object of class cumulcalib generated by cumulcalib()
#' @param ... Other parameters passed to summary()
#' @export
summary.cumulcalib <- function(object, ...)
{
  UseMethod("summary",object)
}



#' Summarizes a cumulcalib object
#' @return None
#' @param object An object of class cumulcalib generated by cumulcalib()
#' @param method Which method to use. Options are BB (Brownian bridge test), BM (Brownian motion test), BB1p (1-part Brownian bridge test), and BM2p (2-part Brownian bridge test). If unspecified, returns the default method used in the cumulcalib() call
#' @param ... Other parameters passed to summary()
#' @method summary cumulcalib
summary.cumulcalib <- function(object, method=NULL, ...)
{
  if(is.null(method))
  {
    method <- object$method
  }
  else
  {
    if(length(method)>1)
    {
      stop("Only one method can be equested.")
    }
    if(!(method %in% names(object$by_method)))
      stop(paste("The requested method has not been provided by the original cumulcalib() call. You asked for", method, "but the submitted object has method(s)", paste(names(object$by_method),collapse=",")))
  }

  n <- dim(object$data)[1]
  writeLines(paste("C_n (mean calibration error):",object$C_n))
  writeLines(paste("C* (maximum absolute cumulative calibration error):",object$C_star))

  if(method=="BM" || method=="BM1p")
  {
    writeLines("Method: One-part Brownian Motion (BM)")
    writeLines(paste("S* (test statistic for cumulative calibration error):",object$S_star))
    writeLines(paste("p-value:",object$by_method$BM$pval))
    writeLines(paste("Location of maximum drift:",object$by_method$BM$loc,
                " | time value:", object$data[object$by_method$BM$loc,'t'],
                " | predictor value:", object$data[object$by_method$BM$loc,'X']))
  }
  if(method=="BM2p")
  {
    writeLines("Method: Two-part Brownian Motion (BM2p)")
    writeLines(paste("S_n (Z score for mean calibration error)",object$S_n))
    writeLines(paste("S* (test statistic for cumulative calibration error):",object$S_star))
    writeLines(paste0("Component-wise p-values: mean calibration=", object$by_method$BM2p$pval_by_component[1], " | Distance=", object$by_method$BM2p$pval_by_component[2]))
    writeLines(object$by_method$BM2p$pval_by_component)
    writeLines(paste("Combined p-value (Fisher's method):",object$by_method$BM2p$pval))
    writeLines(paste("Location of maximum drift:",object$by_method$BM2p$loc,
                " | time value:", object$data[object$by_method$BM2p$loc,'t'],
                " | predictor value:", object$data[object$by_method$BM2p$loc,'X']))
  }
  if(method=="BB1p")
  {
    writeLines("Method: One-part Brownian Bridge (BB1p)")
    writeLines(paste("B* (test statistic for maximum absolute bridged calibration error):",object$B_star))
    writeLines(paste("Test statistic value:",object$by_method$BB1p$stat))
    writeLines(paste("p-value:",object$by_method$BB1p$pval))
    writeLines(paste("Location of maximum drift:",object$by_method$BB1p$loc,
                " | time value:", object$data[object$by_method$BB1p$loc,'t'],
                " | predictor value:", object$data[object$by_method$BB1p$loc,'X']))
  }
  if(method=="BB" || method=="BB2p")
  {
    writeLines("Method: Two-part Brownian Bridge (BB)")
    writeLines(paste("S_n (Z score for mean calibration error)",object$S_n))
    writeLines(paste("B* (test statistic for maximum absolute bridged calibration error):",object$B_star))
    writeLines(paste0("Component-wise p-values: mean calibration=", object$by_method$BB$pval_by_component[1], " | Distance (bridged)=", object$by_method$BB$pval_by_component[2]))
    writeLines(paste("Combined p-value (Fisher's method):",object$by_method$BB$pval))
    writeLines(paste("Location of maximum drift:",object$by_method$BB$loc,
                " | time value:", object$data[object$by_method$BB$loc,'t'],
                " | predictor value:", object$data[object$by_method$BB$loc,'X']))
  }
}

Try the cumulcalib package in your browser

Any scripts or data that you put into this service are public.

cumulcalib documentation built on June 22, 2024, 10:36 a.m.