Nothing
#' Functional Split Conformal Prediction Intervals
#'
#' Compute prediction intervals using split conformal inference.
#'
#' @param x The input variable, a list of n elements. Each element is composed by a list
#' of p vectors(with variable length, since the evaluation grid may change).
#' If x is NULL, the function will sample it from a gaussian.
#' @param t_x The grid points for the evaluation of function x. It is a list of vectors.
#' If the x data type is "fData" or "mfData" is must be NULL.
#' @param t_y The grid points for the evaluation of function y_val. It is a list of vectors.
#' If the y_val data type is "fData" or "mfData" is must be NULL.
#' @param y The response variable. It is either, as with x, a list of list of
#' vectors or an fda object (of type fd, fData, mfData).
#' @param x0 The new points to evaluate, a list of n0 elements. Each element is composed
#' by a list of p vectors(with variable length).
#' @param train.fun A function to perform model training, i.e., to produce an
#' estimator of E(Y|X), the conditional expectation of the response variable
#' Y given features X. Its input arguments should be x: list of features,
#' and y: list of responses.
#' @param predict.fun A function to perform prediction for the (mean of the)
#' responses at new feature values. Its input arguments should be out: output
#' produced by train.fun, and newx: feature values at which we want to make
#' predictions.
#' @param alpha Miscoverage level for the prediction intervals, i.e., intervals
#' with coverage 1-alpha are formed. Default for alpha is 0.1.
#' @param split Indices that define the data-split to be used (i.e., the indices
#' define the first half of the data-split, on which the model is trained).
#' Default is NULL, in which case the split is chosen randomly.
#' @param seed Integer to be passed to set.seed before defining the random
#' data-split to be used. Default is FALSE, which effectively sets no seed.
#' If both split and seed are passed, the former takes priority and the latter
#' is ignored.
#' @param randomized Should the randomized approach be used? Default is FALSE.
#' @param seed.rand The seed for the randomized version.Default is FALSE.
#' @param verbose Should intermediate progress be printed out? Default is FALSE.
#' @param rho Split proportion between training and calibration set.
#' Default is 0.5.
#' @param s.type The type of modulation function.
#' Currently we have 3 options: "identity","st-dev","alpha-max". Default is "std-dev".
#'
#' @return A list with the following components: t,pred,average_width,lo, up.
#' t is a list of vectors,
#' pred has the same interval structure of y_val, but the outside list is of length n0,
#' lo and up are lists of length n0 of lists of length
#' p, each containing a vector of lower and upper bounds respectively.
#'
#'
#' @example inst/examples/ex.split.R
#' @export
conformal.fun.split = function(x, t_x, y,t_y, x0, train.fun, predict.fun, alpha=0.1,
split=NULL, seed=FALSE, randomized=FALSE,seed.rand=FALSE,
verbose=FALSE, rho=0.5,s.type="st-dev") {
############ DATA PREPARATION #############################
check.null.data(y)
conv = convert2data(x,t_x,y,t_y,x0)
x = conv$x
y = conv$y
t_x = conv$t_x
t_y = conv$t_y
x0 = conv$x0
n0<-length(x0)
n=length(y)
p=length(y[[1]])
grid_size=vapply(y[[1]],function(x) length(x),integer(1))
# Check input arguments
check.args(x=x, t_y=t_y,y=y,x0=x0,train.fun=train.fun,
predict.fun=predict.fun, alpha=alpha, seed=seed, training_size
=rho, seed.tau=seed.tau, randomized=randomized)
# Users may pass in a string for the verbose argument
if (verbose == TRUE) txt = ""
if (verbose != TRUE && verbose != FALSE) {
txt = verbose
verbose = TRUE
}
## Splitting Data
if(is.null(split)){
if(ceiling(n*rho) !=n ){
m=ceiling(n*rho)
}
else{
m=ceiling(n*rho)-1
}
l=n-m
if(seed!=FALSE){set.seed(seed)}
training=sample(1:n,m)
}
else{
training=split
}
calibration=setdiff(1:n,training)
if(randomized==FALSE) {tau=1} else{
if(seed.rand!=FALSE){set.seed(seed.rand)}
tau=stats::runif(n=1,min=0,max=1)
}
check.tau(alpha=alpha,tau=tau,l=l)
# TRAINING & RESIDUALS COMPUTATION ######################
if (verbose) {
cat(sprintf("%sComputing models on first part ...\n",txt))
}
out = train.fun(x[training,drop=F],t_y,y[training])
fit = predict.fun(out,x,t_y)
pred = predict.fun(out,x0,t_y)
#print(out)
if (verbose) {
cat(sprintf("%sComputing residuals and quantiles on second part ...\n",txt))
}
vect_y=t(vapply(y,unlist,numeric(sum(grid_size))))
vect_fit=t(vapply(fit,unlist,numeric(sum(grid_size))))
vect_residuals_y=vect_y-vect_fit
if (verbose) {
cat(sprintf("%sComputing modulation function ...\n",txt))
}
s=computing_s_regression(vec_residual=vect_residuals_y[training,],type=s.type,
alpha=alpha,tau=tau,grid_size=grid_size)
vect_s=unlist(s)
############## COMPUTATION OF THE NCS #################
rho=apply(vect_residuals_y[calibration,],1,function(x) max(abs(x)/vect_s))
k_s=sort(rho,decreasing=FALSE)[ceiling(l+tau-(l+1)*alpha)]
if ((ceiling(l+tau-(l+1)*alpha))==1) v=0
else{v=sum(sort(rho,decreasing=FALSE)[1:(ceiling(l+tau-(l+1)*alpha)-1)]==k_s)}
if ((ceiling(l+tau-(l+1)*alpha))==l) r=0
else{r=sum(sort(rho,decreasing=FALSE)[(ceiling(l+tau-(l+1)*alpha)+1):length(rho)]==k_s)}
average_width=mean(2*k_s*vect_s)
##################### BUILD BOUNDS #######################
acc_grid=c(1,cumsum(grid_size))
lo<-lapply(1:n0, function(i) lapply(1:p, function(j) pred[[i]][[j]]-k_s* s[[j]]))
up<-lapply(1:n0, function(i) lapply(1:p, function(j) pred[[i]][[j]]+k_s* s[[j]]))
return(structure(.Data=list(t_y,pred,lo,up),
names=c("t","pred","lo","up")))
}
utils::globalVariables(c("seed.tau", "td", "vec_residual"))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.