#' Split conformal prediction intervals with Multivariate Response
#'
#' Compute prediction intervals using split conformal inference with multivariate
#' response.
#'
#' @param x The feature variables, a matrix n x p.
#' @param y The matrix of multivariate responses (dimension n x q)
#' @param x0 The new points to evaluate, a matrix of dimension n0 x p.
#' @param train.fun A function to perform model training, i.e., to produce an
#' estimator of E(Y|X), the conditional expectation of the response variable
#' Y given features X. Its input arguments should be x: matrix of features,
#' and y: matrix of responses.
#' @param predict.fun A function to perform prediction for the (mean of the)
#' responses at new feature values. Its input arguments should be out: output
#' produced by train.fun, and newx: feature values at which we want to make
#' predictions.
#' @param alpha Miscoverage level for the prediction intervals, i.e., intervals
#' with coverage 1-alpha are formed. Default for alpha is 0.1.
#' @param split Indices that define the data-split to be used (i.e., the indices
#' define the first half of the data-split, on which the model is trained).
#' Default is NULL, in which case the split is chosen randomly.
#' @param seed Integer to be passed to set.seed before defining the random
#' data-split to be used. Default is FALSE, which effectively sets no seed.
#' If both split and seed are passed, the former takes priority and the latter
#' is ignored.
#' @param randomized Should the randomized approach be used? Default is FALSE.
#' @param seed_tau The seed for the randomized version. Default is FALSE.
#' @param verbose Should intermediate progress be printed out? Default is FALSE.
#' @param training_size Split proportion between training and calibration set.
#' Default is 0.5.
#' @param score The non-conformity measure. It can either be "max", "l2", "mahalanobis".
#' The default is "l2".
#' @param s_type The type of modulation function.
#' Currently we have 3 options: "identity","st-dev","alpha-max". Default is "st-dev"
#' @param mad.train.fun A function to perform training on the absolute residuals
#' i.e., to produce an estimator of E(R|X) where R is the absolute residual
#' R = |Y - m(X)|, and m denotes the estimator produced by train.fun.
#' This is used to scale the conformal score, to produce a prediction interval
#' with varying local width. The input arguments to mad.train.fun should be
#' x: matrix of features, y: vector of absolute residuals, and out: the output
#' produced by a previous call to mad.train.fun, at the \emph{same} features
#' x. The function mad.train.fun may (optionally) leverage this returned
#' output for efficiency purposes. See details below. The default for
#' mad.train.fun is NULL, which means that no training is done on the absolute
#' residuals, and the usual (unscaled) conformal score is used. Note that if
#' mad.train.fun is non-NULL, then so must be mad.predict.fun (next).
#' @param mad.predict.fun A function to perform prediction for the (mean of the)
#' absolute residuals at new feature values. Its input arguments should be
#' out: output produced by mad.train.fun, and newx: feature values at which we
#' want to make predictions. The default for mad.predict.fun is NULL, which
#' means that no local scaling is done for the conformal score, i.e., the
#' usual (unscaled) conformal score is used.
#'
#' @return A list with the following components: x0,pred,k_s,s_type,s,alpha,randomized,tau,
#' average_width,lo,up. In particular pred, lo, up are the matrices of
#' dimension n0 x q, k_s is a scalar, s_type is a string, s is a vector of length q,
#' alpha is a scalar between 0 and 1, randomized is a logical value,
#' tau is a scalar between 0 and 1,and average_width is a positive scalar.
#'
#' @details If the two mad functions are provided they take precedence over the s_type parameter,
#' and they force a local scoring via the mad function predicted values.
#'
#' @importFrom stats mad mahalanobis
#'
#' @seealso \code{\link{conformal.multidim.full}}
#'
#' @references The s_regression and the "max" score are taken from "Conformal Prediction Bands
#' for Multivariate Functional Data" by Diquigiovanni, Fontana, Vantini (2021).
#'
#' @example inst/examples/ex.split.R
#' @export conformal.multidim.split
conformal.multidim.split = function(x,y, x0, train.fun, predict.fun, alpha=0.1,
split=NULL, seed=FALSE, randomized=FALSE,seed_tau=FALSE,
verbose=FALSE, training_size=0.5,
score="l2",s_type="st-dev", mad.train.fun = NULL,
mad.predict.fun = NULL) {
## Check Data and Splits
check.split(x=x,y=y,x0=x0,train.fun=train.fun,
predict.fun=predict.fun, alpha=alpha, seed=seed, training_size
=training_size, seed.tau=seed.tau, randomized=randomized,
mad.train.fun = mad.train.fun, mad.predict.fun = mad.predict.fun, score = score)
n=dim(x)[1]
p=dim(x)[2]
q=dim(y)[2]
n0=nrow(x0)
flag = FALSE #in general mad funs are not present
# If mad funs exist, they take precedence
if(is.function(mad.train.fun) && is.function(mad.predict.fun)){
score = "identity"
flag = TRUE
}
if (verbose == TRUE) txt = ""
if (verbose != TRUE && verbose != FALSE) {
txt = verbose
verbose = TRUE
}
if(is.null(split)){
if(ceiling(n*training_size) !=n )
m=ceiling(n*training_size)
else
m=ceiling(n*training_size)-1
l=n-m
if(seed!=FALSE){set.seed(seed)}
training=sample(1:n,m)
}
else{
training=split
}
calibration=setdiff(1:n,training)
if(randomized==FALSE) {tau=1} else{
if(seed_tau!=FALSE){set.seed(seed_tau)}
tau=stats::runif(n=1,min=0,max=1)
}
check.tau(alpha=alpha,tau=tau,l=l)
###### TRAINING & RESIDUALS COMPUTATION
if (verbose) {
cat(sprintf("%sComputing models on first part ...\n",txt))
}
out = train.fun(x[training,],y[training,])
fit = predict.fun(out,x)
pred = predict.fun(out,x0)
if (verbose) {
cat(sprintf("%sComputing residuals and quantiles on second part ...\n",txt))
}
res = y - fit
s=computing_s_regression(mat_residual=res[training,],type=s_type,
alpha=alpha,tau=tau)
resc = t(t(res[calibration,] )/ s)
if(flag){ # with mad
mad.out = mad.train.fun(x[training,],res[training,])
resc = resc / mad.predict.fun(mad,out,x[calibration,])
mad.x0 = mad.predict.fun(mad.out,x0)
}
switch(score,
"max"={rho=apply(resc,1,function(x) max(abs(x)))},
"l2" = {rho=rowSums(resc^2)},
"mahalanobis" = {rho = mahalanobis(resc,colMeans(resc),cov(resc),tol=1e-2)}
)
k_s=sort(rho,decreasing=FALSE)[ceiling(l+tau-(l+1)*alpha)]
average_width = mean(2*k_s*s)
if(flag)
band = mad.x0 * k_s
else
band=k_s*matrix(rep(s,n0),nrow = n0,byrow = TRUE)
up=pred+band
lo=pred-band
return(structure(.Data=list(x0,pred,k_s,s_type,s,alpha,randomized,tau,
average_width,
lo,up),
names=c("x0","pred","k_s","s_type","s","alpha","randomized","tau"
,"average_width", "lo", "up")))
}
utils::globalVariables(c("pval", "seed.tau"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.