R/utils.jackknife.R

Defines functions utils.jackknife

Documented in utils.jackknife

#' @name utils.jackknife
#' @title Conducts jackknife resampling using a genlight object
#' @description
#' Jackknife resampling is a statistical procedure where for a dataset of sample 
#' size n, subsamples of size n-1 are used to compute a statistic. The collection 
#' of the values obtained can be used to evaluate the variability around the point 
#' estimate. This function can take the loci, the individuals or the populations 
#' as units over which to conduct resampling.
#' 
#' bold{Note} that when n is very small, jackknife resampling is not recommended.
#' 
#' Parallel computation is implemented. The argument code{n.cores} indicates the 
#' number of core to use. If "auto" [default], it will use all but one available 
#' cores. If the number of units is small (e.g. a few populations), there is not 
#' real advantage in using parallel computation. On the other hand, if the number 
#' of units is large (e.g. thousands of loci), even with parallel computation, 
#' this function can be very slow.
#' 
#' @inheritParams gl.drop.ind
#' @param FUN the name of the function to be used to calculate the statistic
#' @param unit The unit to use for resampling. One of c("loc", "ind", "pop"): 
#' loci, individuals or populations
#' @param n.cores The number of cores to use. If "auto" [default], it will 
#' use all but one available cores.
#' @param ... any additional arguments to be passed to FUN
#' @return A list of length n where each element is the output of FUN
#'
#' @author Custodian: Carlo Pacioni -- Post to
#' \url{https://groups.google.com/d/forum/dartr}
#'
#' @examples
#' require("dartR.data")
#' platMod.gl <- gl.filter.allna(platypus.gl) 
#' chk.pop <- utils.jackknife(x=platMod.gl, FUN="gl.alf", unit="pop", 
#' recalc = FALSE, mono.rm = FALSE, n.cores = 1, verbose=0)
#' @export

utils.jackknife <- function(x, 
                            FUN, 
                            unit="loc", 
                            recalc = FALSE, 
                            mono.rm = FALSE, 
                            n.cores = "auto",
                            verbose = NULL, 
                            ...) {
  
  # SET VERBOSITY
  verbose <- gl.check.verbosity(verbose)
  
  # FLAG SCRIPT START
  funname <- match.call()[[1]]
  utils.flag.start(func = funname,
                   build = "Jody",
                   verbosity = verbose)
  
  # CHECK DATATYPE
  datatype <- utils.check.datatype(x, verbose = verbose)
  
  # FUNCTION SPECIFIC ERROR CHECKING
  if(!is.character(unit)){
    stop(error("  The argument 'unit' should be character vector"))
  } 
  
  if(length(unit == 1)) {
    if(!unit %in% c("loc", "ind", "pop")) {
      stop(error('  The argument "unit" should one of the following: "loc", "ind", "pop"'))
    }
    } else {
      stop(error('  The argument "unit" should be of length 1'))
  }
  
  # set variables based on unit
  if(unit == "loc") {
    subsetFUN <- "gl.drop.loc"
    subsetList <- locNames(x)
    } else {
    if(unit == "ind") {
      subsetFUN <- "gl.drop.ind"
      subsetList <- indNames(x)
      } else {
        subsetFUN <- "gl.drop.pop"
        subsetList <- unique(pop(x))
      }
    }
  argmts <- list(...)
  #subsetList.item <- subsetList[1]
  
  # PUT CHECKS HERE for correct unit and x and FUN is a char, and ... is a list
  jacknife <- function(gl, jckfun, subsetFUN, subsetList.item, rec = recalc, 
                       mono = mono.rm, opt.argt=argmts) {
    xsub <- do.call(subsetFUN, 
                    args = if(subsetFUN == "gl.drop.loc") {
                      list(gl, subsetList.item, verbose=0)
                      } else {
                        list(gl, subsetList.item, recal=rec, mono.rm=mono, verbose=0)
                      }
                    )
    oldVerb <- gl.check.verbosity()
    gl.set.verbosity(0)
    on.exit(gl.set.verbosity(oldVerb))
    res <- do.call(jckfun, c(list(x=xsub), opt.argt))
    return(res)
  }
  
  if(n.cores != 1) {
    if(n.cores == "auto"){
      n.cores <- parallel::detectCores() - 1
    }
    
    if(length(subsetList) < n.cores){
      n.cores <- length(subsetList)
    } 
    
    cl <- parallel::makeCluster(n.cores)
    on.exit(expr=parallel::stopCluster(cl))
    catch <- parallel::clusterEvalQ(cl, library("dartR"))
    
    parallel::clusterExport(cl, 
                  varlist=c("subsetList", "x", "FUN", "subsetFUN"), 
                  envir=environment()) 
    
    jck <- parallel::parLapply(cl = cl, X = subsetList, fun = jacknife, 
                               gl=x, 
                               jckfun=FUN, subsetFUN = subsetFUN)
    
  } else {
    jck <- lapply(subsetList, jacknife, gl=x, jckfun=FUN, subsetFUN = subsetFUN)
  }
    return(jck)
  
}

Try the dartR package in your browser

Any scripts or data that you put into this service are public.

dartR documentation built on June 8, 2023, 6:48 a.m.