Nothing
      ### performanceResample.R --- 
##----------------------------------------------------------------------
## Author: Brice Ozenne
## Created: mar  3 2022 (12:01) 
## Version: 
## Last-Updated: jul  4 2023 (18:46) 
##           By: Brice Ozenne
##     Update #: 188
##----------------------------------------------------------------------
## 
### Commentary: 
## 
### Change Log:
##----------------------------------------------------------------------
## 
### Code:
## * performanceResample (documentation)
##' @title Uncertainty About Performance of a Classifier (EXPERIMENTAL)
##' @description Use resampling to quantify uncertainties about the performance of one or several binary classifiers evaluated via cross-validation.
##'
##' @param object a \code{glm} or \code{range} object, or a list of such object.
##' @param data [data.frame] the training data.
##' @param name.response [character] The name of the response variable (i.e. the one containing the categories).
##' @param type.resampling [character] Should non-parametric bootstrap (\code{"bootstrap"}) or permutation of the outcome (\code{"permutation"}) be used.
##' @param n.resampling [integer,>0] Number of bootstrap samples or permutations.
##' @param fold.repetition [integer,>0] Nnumber of folds used in the cross-validation. Should be strictly positive.
##' @param conf.level [numeric, 0-1] confidence level for the confidence intervals.
##' @param cpus [integer, >0] the number of CPU to use. If strictly greater than 1, resampling is perform in parallel. 
##' @param seed [integer, >0] Random number generator (RNG) state used when starting resampling.
##' If \code{NULL} no state is set.
##' @param trace [logical] Should the execution of the function be traced.
##' @param filename [character] Prefix for the files containing each result.
##' @param ... arguments passed to \code{\link{performance}}.
##'
##' @details WARNING: using bootstrap after cross-validation may not provide valid variance/CI/p-value estimates.
##' 
##' @return An S3 object of class \code{performance}.
##' @keywords htest
## * performanceResample (code)
##' @export
performanceResample <- function(object, data = NULL, name.response = NULL,
                                type.resampling = "permutation", n.resampling = 1000, fold.repetition = 0, conf.level = 0.95,
                                cpus = 1, seed = NULL, trace = TRUE, filename = NULL, ...){
    ## ** Normalize arguments
    type.resampling <- match.arg(type.resampling, c("permutation", "bootstrap"))
    if(length(n.resampling)==1){
        vec.resampling <- 1:n.resampling
    }else{
        vec.resampling <- n.resampling
        n.resampling <- length(vec.resampling)
    }
    ## ** fix randomness
    if(!is.null(seed)){
        tol.seed <- 10^(floor(log10(.Machine$integer.max))-1)
        if(n.resampling>tol.seed){
            stop("Cannot set a seed per sample when considering more than ",tol.seed," samples. \n")
        }
        if(!is.null(get0(".Random.seed"))){ ## avoid error when .Random.seed do not exists, e.g. fresh R session with no call to RNG
            old <- .Random.seed # to save the current seed
            on.exit(.Random.seed <<- old) # restore the current seed (before the call to the function)
        }else{
            on.exit(rm(.Random.seed, envir=.GlobalEnv))
        }
        set.seed(seed)
        seqSeed <- sample.int(tol.seed, max(vec.resampling),  replace = FALSE) 
    }else{
        seqSeed <- NULL
    }
    ## ** Point estimate
    initPerf <- performance(object, data = data, name.response = name.response,
                            fold.repetition = fold.repetition, se = FALSE, trace = FALSE, seed = seqSeed[1], ...)
    if(!is.null(filename)){
        if(!is.null(seed)){
            filename <- paste0(filename,"-seed",seqSeed[1])
        }
        saveRDS(initPerf, file = paste0(filename,".rds"))
    }
                
    if(is.null(data)){
        data <- initPerf$data
    }
    if(is.null(name.response)){
        name.response <- initPerf$args$name.response
    }
    data[["XXresponseXX"]] <- NULL
    ## ** single run function
    if(type.resampling=="permutation"){
        dataResample <- as.data.frame(data)
        attr(dataResample,"internal") <- attr(data,"internal") ## only do CV
        warperResampling <- function(i){
            test.factice <- i %in% vec.resampling == FALSE
            dataResample[[name.response]] <- sample(data[[name.response]])
            iPerf <- try(suppressWarnings(performance(object, data = dataResample, name.response = name.response, fold.repetition = fold.repetition,
                                                      trace = trace-1, se = FALSE, seed = seqSeed[iB], ...)),
                         silent = FALSE)
            if(inherits(iPerf, "try-error") || test.factice){
                return(NULL)
            }else{
                dt.iPerf <- as.data.table(iPerf, type = "performance")
                if(!is.null(filename)){
                    saveRDS(cbind(sample = i, dt.iPerf[,c("method","metric","model","estimate")]), file = paste0(filename,"-",type.resampling,i,".rds"))
                }
                return(cbind(sample = i, dt.iPerf[,c("method","metric","model","estimate")]))
            }
        }
    }else if(type.resampling=="bootstrap"){
        warperResampling <- function(i){
            test.factice <- i %in% vec.resampling == FALSE
            dataResample <- data[sample(NROW(data), size = NROW(data), replace = TRUE),,drop=FALSE]
            attr(dataResample,"internal") <- attr(data,"internal") ## only do CV
            iPerf <- try(suppressWarnings(performance(object, data = dataResample, name.response = name.response, fold.repetition = fold.repetition,
                                                      trace = trace-1, se = FALSE, seed = seqSeed[iB], ...)),
                         silent = FALSE)
            if(inherits(iPerf, "try-error") || test.factice){
                return(NULL)
            }else{
                dt.iPerf <- as.data.table(iPerf, type = "performance")
                if(!is.null(filename)){
                    saveRDS(cbind(sample = i, dt.iPerf[,c("method","metric","model","estimate")]), file = paste0(filename,"-",type.resampling,i,".rds"))
                }
                return(cbind(sample = i, dt.iPerf[,c("method","metric","model","estimate")]))
            }
        }
    }
    ## warperResampling(5)
    
    ## serial calculations
    if(cpus==1){
        ## ** method to loop
        if (trace > 0) {
            requireNamespace("pbapply")
            method.loop <- pbapply::pblapply
        }else{
            method.loop <- lapply
        }
        ## ** loop
        ls.resampling <- do.call(method.loop,
                                 args = list(X = 1:max(vec.resampling),
                                             FUN = warperResampling)
                                 )
    }else{ ## parallel calculations
        ## define cluster
        cl <- parallel::makeCluster(cpus)
        if(trace>0){
            pb <- utils::txtProgressBar(max = max(vec.resampling), style = 3)          
            progress <- function(n){utils::setTxtProgressBar(pb, n)}
            opts <- list(progress = progress)
        }else{
            opts <- list()
        }
        ## link to foreach
        doSNOW::registerDoSNOW(cl)
        ## seed
        if (!is.null(seed)) {
            parallel::clusterExport(cl, varlist = "seqSeed", envir = environment())
        }         
        ## export package
        parallel::clusterCall(cl, fun = function(x){
            suppressPackageStartupMessages(library(BuyseTest, quietly = TRUE, warn.conflicts = FALSE, verbose = FALSE))
        })
        toExport <- NULL
        iB <- NULL ## [:forCRANcheck:] foreach        
        ls.resampling <- foreach::`%dopar%`(
                                      foreach::foreach(iB=1:max(vec.resampling),
                                                       .export = toExport,
                                                       .packages = "data.table",
                                                       .options.snow = opts),                                            
                                      {                                           
                                          return(warperResampling(iB))
                                      })
        parallel::stopCluster(cl)
        if(trace>0){close(pb)}
    }
    ## ** statistical inference
    dt.resampling <- data.table::as.data.table(do.call(rbind, ls.resampling[sapply(ls.resampling,length)>0]))
    new.performance <- .performanceResample_inference(performance = initPerf$performance[,c("method","metric","model","estimate")],
                                                      resampling = dt.resampling,
                                                      type.resampling = type.resampling,
                                                      conf.level = conf.level)
    ## ** gather results
    out <- list(call = match.call(),
                response = initPerf$response,
                performance = new.performance,
                prediction = initPerf$prediction,
                resampling = dt.resampling,
                auc = initPerf$auc,
                brier = initPerf$brier,
                data = initPerf$data,
                args = initPerf$args
                )
    out$args$transformation <- NA
    out$args$null <- NULL
    out$args$conf.level <- conf.level
    out$args$n.resampling <- n.resampling
    out$args$type.resampling <- type.resampling
    out$args$filename <- filename
    ## ** export
    class(out) <- append("performance",class(out))
    return(out)
}
## * .performanceResample_inference
.performanceResample_inference <- function(performance, resampling, type.resampling, conf.level){
    resampling <- data.table::copy(resampling)
    if(type.resampling=="permutation"){
        data.table::setnames(resampling, old = "estimate", new ="estimate.perm")
        dt.merge <- resampling[performance, on = c("method","metric","model")]
        if(length(unique(dt.merge$model))>1){
            dt.merge[,c("delta","delta.perm") := list(c(NA,.SD$estimate[-1]-.SD$estimate[-length(.SD$estimate)]),
                                                      c(NA,.SD$estimate.perm[-1]-.SD$estimate.perm[-length(.SD$estimate.perm)])),
                     by = c("sample","metric","method")]
            
            
        }else{
            dt.merge[,c("delta","delta.perm") := as.numeric(NA)]
        }
        out <- rbind(dt.merge[dt.merge$metric == "auc", list(estimate = mean(.SD$estimate), resample = mean(.SD$estimate.perm), se.resample = stats::sd(.SD$estimate.perm),
                                                             p.value = (sum(.SD$estimate<=.SD$estimate.perm) + 1)/(NROW(.SD)+1),
                                                             p.value_comp = (sum(abs(.SD$delta)<=abs(.SD$delta.perm)) + 1)/(NROW(.SD)+1)),
                              by = c("method","metric","model")],
                     dt.merge[dt.merge$metric == "brier", list(estimate = mean(.SD$estimate), resample = mean(.SD$estimate.perm), se.resample = stats::sd(.SD$estimate.perm),
                                                               p.value = (sum(.SD$estimate>=.SD$estimate.perm) + 1)/(NROW(.SD)+1),
                                                               p.value_comp = (sum(abs(.SD$delta)<=abs(.SD$delta.perm)) + 1)/(NROW(.SD)+1)),
                              by = c("method","metric","model")]
                     )
    }else if(type.resampling=="bootstrap"){
        vec.estimate <- performance[["estimate"]]
        M.resampling <- as.matrix(dcast(resampling, value.var = "estimate", formula = sample~method+metric+model))[,-1,drop=FALSE]
        out.method <- sapply(strsplit(colnames(M.resampling), split = "_", fixed = TRUE),"[[",1)
        out.metric <- sapply(strsplit(colnames(M.resampling), split = "_", fixed = TRUE),"[[",2)
        out.model <- as.character(mapply(x = paste0("^",out.method,"_",out.metric,"_"), y = colnames(M.resampling), FUN = function(x,y){gsub(pattern = x, replacement = "", x = y)}))
        out <- data.frame(method = out.method, metric = out.metric, model = out.model,
                          confint_percentileBootstrap(Delta = vec.estimate,
                                                      Delta.resampling = M.resampling,
                                                      null = c(auc = 0.5, brier = NA)[out.metric], alternative = "two.sided", alpha = 1-conf.level,
                                                      endpoint = colnames(M.resampling), backtransform.delta = function(x){x}))
        vecauc.estimate <- vec.estimate[out.metric=="auc"]
        vecbrier.estimate <- vec.estimate[out.metric=="brier"]
        Mauc.resampling <- M.resampling[,out.metric=="auc",drop=FALSE]
        Mbrier.resampling <- M.resampling[,out.metric=="brier",drop=FALSE]
        if(length(vecauc.estimate)>1){
            deltaout <- confint_percentileBootstrap(Delta = c(0,vecauc.estimate[-length(vecauc.estimate)] - vecauc.estimate[-1],
                                                              0,vecbrier.estimate[-length(vecbrier.estimate)] - vecbrier.estimate[-1]),
                                                    Delta.resampling = cbind(0,Mauc.resampling[,-ncol(Mauc.resampling)] - Mauc.resampling[,-1],
                                                                             0,Mbrier.resampling[,-ncol(Mbrier.resampling)] - Mbrier.resampling[,-1]),
                                                    null = c(NA, rep(0, length(vecauc.estimate)-1), NA, rep(0, length(vecbrier.estimate)-1)), alternative = "two.sided", alpha = 1-conf.level,
                                                    endpoint = colnames(M.resampling), backtransform.delta = function(x){x})
            out$p.value_comp <- deltaout[,"p.value"]
        }else{
            out$p.value_comp <- NA
        }
        names(out)[names(out) == "lower.ci"] <- "lower"
        names(out)[names(out) == "upper.ci"] <- "upper"
        out$null <- NULL
    }
    ## ** export
    return(as.data.frame(out))
}
##----------------------------------------------------------------------
### performanceResample.R ends here
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.