#' Z-scores based mean absolute calibration error
#'
#' @param Z (vector) z-scores
#' @param intrv (list) intervals generated by `genIntervals`
#'
#' @return The mean abolute calibration error
#'
#' @export
#'
#' @examples
#' \donttest{
#' uE = sqrt(rchisq(1000, df = 4)) # Re-scale uncertainty
#' E = rnorm(uE, mean=0, sd=uE) # Generate errors
#' intrv = ErrViewLib::genIntervals(uE, 30)
#' zceFun(E/uE, intrv)
#' }
zceFun <- function(Z, intrv) {
# Z-scores based mean calibration error
AE = c()
for(i in 1:intrv$nbr) {
sel = intrv$lwindx[i]:intrv$upindx[i]
AE[i] = abs(log(mean(Z[sel]^2)))
}
return(mean(AE))
}
#' ENCE and UCE
#'
#' @param E (vector) errors
#' @param u (vector) prediction uncertainties
#' @param intrv (list) intervals generated by `genIntervals`
#'
#' @return A list with ence and uce values
#'
#' @export
#'
#' @examples
#' \donttest{
#' uE = sqrt(rchisq(1000, df = 4)) # Re-scale uncertainty
#' E = rnorm(uE, mean=0, sd=uE) # Generate errors
#' intrv = ErrViewLib::genIntervals(uE, 30)
#' eceFun(E, uE, intrv)
#' }
eceFun = function(E, uE, intrv) {
# Errors based mean calibration error
MV = MSE = c()
for(i in 1:intrv$nbr) {
sel = intrv$lwindx[i]:intrv$upindx[i]
MV[i] = mean(uE[sel]^2)
MSE[i] = mean(E[sel]^2)
}
return(
list(
ence = mean(abs(sqrt(MV) - sqrt(MSE)) / sqrt(MV)),
uce = mean(abs(MV - MSE))
)
)
}
#' Negative Log Likelihood and components
#'
#' @param E (vector) errors
#' @param u (vector) prediction uncertainties
#'
#' @return A list with the nll and the calibration and sharpness scores
#'
#' @export
#'
#' @examples
#' \donttest{
#' uE = sqrt(rchisq(1000, df = 4)) # Re-scale uncertainty
#' E = rnorm(uE, mean=0, sd=uE) # Generate errors
#' nllFun(E, uE)
#' }
nllFun = function(E, u) {
# Negative Log Likelihood
calib = mean( E^2/u^2 )
sharp = mean( log(u^2) )
nll = 0.5*(calib + sharp + 1.837877)
return(
list(
calib = calib,
sharp = sharp,
nll = nll
)
)
}
#' Calibration, sharpness, consistency and adaptativity scores
#'
#' @param E (vector) errors
#' @param u (vector) prediction uncertainties
#' @param X (vector/matrix) input features / coordinates (default: `NULL`)
#' @param nBin (integer) number of bins used for statistics
#' (default: `sqrt(length(E))`). Superseded by `intrv$nbr` if present.
#' @param intrv (list) intervals generated by `genIntervals` (default: `NULL`)
#'
#' @return A list containing z-scores based statistics(Scal, Su, SX and Stot),
#' ENCE (enceu, enceX) and UCE (uceu, uceX) calibration errors, the NLL and
#' its calibration and sharpness components (nll, calib, sharp).
#' The presence and length of SX, enceX and uceX match the presence and
#' column number of X.
#'
#' @export
#'
#' @examples
#' \donttest{
#' uE = sqrt(rchisq(1000, df = 4)) # Re-scale uncertainty
#' E = rnorm(uE, mean=0, sd=uE) # Generate errors
#' calibScores(E, uE)
#' }
calibScores <- function(
E, u,
X = NULL,
nBin = NULL, intrv = NULL) {
Z = E / u
if(is.null(intrv)) {
if(is.null(nBin))
nBin = sqrt(length(u))
# Generate equal-size bins
intrv = ErrViewLib::genIntervals(u, nBin)
}
# Global stats
Scal = abs(log(mean(Z^2)))
nl = nllFun(E, u)
nll = nl$nll
calib = nl$calib
sharp = nl$sharp
# Local stats
## Consistency
iou = order(u)
Su = zceFun(Z[iou], intrv)
en = eceFun(E[iou], u[iou], intrv)
enceu = en$ence
uceu = en$uce
Stot = Scal + Su
## Adaptivity
SX = enceX = uceX = NA
if(!is.null(X)) {
if(NCOL(X) > 1) {
SX = enceX = uceX = c()
for(i in 1:ncol(X)) {
ioXi = order(X[,i])
SX[i] = zceFun(Z[ioXi], intrv)
en = eceFun(E[ioXi], u[ioXi], intrv)
enceX[i] = en$ence
uceX[i] = en$uce
}
} else {
ioXi = order(X)
SX = zceFun(Z[ioXi], intrv)
en = eceFun(E[ioXi], u[ioXi], intrv)
enceX = en$ence
uceX = en$uce
}
Stot = Stot + sum(SX)
}
return(
list(
Scal = Scal,
Su = Su,
SX = SX,
Stot = Stot,
enceu = enceu,
enceX = enceX,
uceu = uceu,
uceX = uceX,
nll = nll,
calib = calib,
sharp = sharp
)
)
}
#' Probabilistic reference values for calibration scores
#'
#' @param E (vector) errors
#' @param u (vector) prediction uncertainties
#' @param X (vector/matrix) input features / coordinates (default: `NULL`)
#' @param nBin (integer) number of bins used for statistics
#' (default: `sqrt(length(E))`). Superseded by `intrv$nbr` if present.
#' @param intrv (list) intervals generated by `genIntervals` (default: `NULL`)
#' @param nMC (integer) sampling size (default: 1000)
#' @param dist (string) model error distribution to generate the reference
#' values. One of 'Normal' (default), 'Uniform', 'Normp4', 'Laplace' or 'T4'
#' @param quant (logical) if TRUE, return 95\% confidence interval limits
#'
#' @return A list containing two lists: meanRefScores is a list of mean scores
#' with the same structure as the output of `calibScores` and sdRefScores
#' contains the standard deviations with the same structure
#'
#' @export
#'
#' @examples
#' \donttest{
#' uE = sqrt(rchisq(1000, df = 4)) # Re-scale uncertainty
#' E = rnorm(uE, mean=0, sd=uE) # Generate errors
#' calibScoresProbRef(E, uE)
#' }
#'
calibScoresProbRef <- function(
E, u, X = NULL,
nBin = NULL, intrv = NULL,
nMC = 1000,
dist = c('Normal','Uniform','Normp4','Laplace','T4'),
quant = FALSE
) {
dist = match.arg(dist)
M = length(u)
if(is.null(intrv)) {
if(is.null(nBin))
nBin = sqrt(length(E))
# Generate equal-size bins
intrv = ErrViewLib::genIntervals(u, nBin)
}
# Unit-variance distributions
Normal = function(N)
rnorm(N)
T4 = function(N, df = 4)
rt(N, df = df) / sqrt(df/(df-2))
Uniform = function(N)
runif(N, -sqrt(3), sqrt(3))
Laplace = function(N, df = 1)
normalp::rnormp(N, p = df) / sqrt(df^(2/df)*gamma(3/df)/gamma(1/df))
Normp4 = function(N, df = 4)
normalp::rnormp(N, p = df) / sqrt(df^(2/df)*gamma(3/df)/gamma(1/df))
# Get distribution function from arg name
distFun = get(dist)
# Nominal values
gs = ErrViewLib::calibScores(E, u, X = X, intrv = intrv)
# Generate sample
tSim = matrix(NA, ncol = length(unlist(gs)), nrow = 1 + nMC)
colnames(tSim) = names(unlist(gs))
tSim[1,] = unlist(gs)
for (i in 1:nMC) {
Esim = u * distFun(M) # Generate errors from uncertainties
gs = ErrViewLib::calibScores(Esim, u, X = X, intrv = intrv)
tSim[i+1,] = unlist(gs)
}
meanScores = apply(tSim, 2, mean, na.rm = TRUE)
sdScores = apply(tSim, 2, sd, na.rm = TRUE)
if(quant)
qScores = apply(tSim, 2, quantile, probs= c(0.025,0.975), na.rm = TRUE)
names(meanScores) = names(sdScores) = names(unlist(gs))
colnames(qScores) = names(unlist(gs))
return(
list(
meanRefScores = as.list(meanScores),
sdRefScores = as.list(sdScores),
qRefScoreslw = as.list(qScores[1,]),
qRefScoresup = as.list(qScores[2,])
)
)
}
#' Table of calibration scores with probabilistic references
#'
#' @param E (vector) errors
#' @param u (vector) prediction uncertainties
#' @param X (vector/matrix) input features / coordinates (default: `NULL`)
#' @param nBin (integer) number of bins used for statistics
#' (default: `sqrt(length(E))`). Superseded by `intrv$nbr` if present.
#' @param intrv (list) intervals generated by `genIntervals` (default: `NULL`)
#' @param nMC (integer) sampling size (default: 1000)
#' @param dist (string) model error distribution to generate the reference
#' values. One of 'Normal' (default), 'Uniform', 'Normp4', 'Laplace' or 'T4'
#' @param rawUnc (logical) if TRUE, output unformatted reference uncertainties
#' @param quant (logical) if TRUE, return 95\% confidence interval limits
#'
#' @return A table with two lines, three if rawUnc is TRUE,
#' and two more if quant is TRUE.
#'
#' @export
#'
#' @examples
#' \donttest{
#' uE = sqrt(rchisq(1000, df = 4)) # Re-scale uncertainty
#' E = rnorm(uE, mean=0, sd=uE) # Generate errors
#' tabScoresRef(E, uE)
#' }
#'
tabScoresRef <- function(
E, u, X = NULL,
nBin = NULL, intrv = NULL,
nMC = 1000,
dist = c('Normal','Uniform','Normp4','Laplace','T4'),
rawUnc = FALSE,
quant = FALSE
) {
res = ErrViewLib::calibScores(E, u, X, nBin, intrv)
ref = ErrViewLib::calibScoresProbRef(E, u, X, nBin, intrv, nMC, dist, quant)
v1 = signif(unlist(res),3)
v2 = unlist(ref$meanRefScores)
v3 = unlist(ref$sdRefScores)
if(rawUnc) {
tab = rbind(v1, v2, v3)
rownames(tab) = c('Data','Ref','uRef')
} else {
v4 = apply(cbind(v2,v3), 1,
function(x) ErrViewLib::prettyUnc(x[1],x[2],numDig = 1))
tab = rbind(v1,v4)
rownames(tab) = c('Data','Ref(uRef)')
}
rn = rownames(tab)
if(quant) {
v1 = unlist(ref$qRefScoreslw)
v2 = unlist(ref$qRefScoresup)
tab = rbind(tab, v1, v2)
rownames(tab) = c(rn,'lwQ','upQ')
}
return(tab)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.