R/sampling.R

#' Perform rarefaction analysis.
#' 
#' \code{rarefy} performs user-defined rarefaction at a range of subsampling depths
#' specified by the user.
#' 
#' @param   vec             a vector based on which rarefaction is to be performed.
#' @param   rarefyFunc      a function that defines rarefaction to be performed on (a subset of)
#'                          \code{vec}. E.g.: \code{length(unique(vec))}.
#' @param   subsampleDepth  a vector specifying subsampling depths. Must be between 1 and 
#'                          \code{length(vec)} inclusive.
#' @param   numReplicate    number of times of subsampling to be performed at each depth.
#' @param   seed            if set, result would be reproducible. Default is \code{NULL}.
#' 
#' @return  A \code{matrix}. Rows correspond to be replicates. Columns and their names 
#'          correspond to subsampling depths as specified by \code{subsampleDepth}.
#'
#' @details Interpretation of the values in the returned \code{matrix} depends on the
#'          user-supplied \code{rarefyFunc}. For example, if \code{rarefyFunc} is set to
#'          be \code{length(unique(vec))}, each returned value will be the number of unique 
#'          items subsampled from \code{vec} at a given subsampling depth during one 
#'          replicate.
#'
#' @examples
#' # rarefyFunc
#' countUniq = function(x) {length(unique(x))}
#' 
#' vec = c("a", "a", "a", "b", "c", "d", "d", "d", "d", "e", "f", "h", 'h')
#' rarefy(vec=vec, rarefyFunc=countUniq, subsampleDepth=c(2,4,6,8), 
#'        numReplicate=5, seed=10204)
#' 
#' @export
rarefy = function(vec, rarefyFunc, subsampleDepth, numReplicate, seed=NULL){
    # subsampling depth be between 1 & total number of samples incl. 
    stopifnot( (min(subsampleDepth) >= 1) & 
                   (max(subsampleDepth) <= length(vec)) )
    
    # reproducible?
    if (!is.null(seed)) {set.seed(seed)}
    
    # subsample numReplicate times
    # pre-transposition, replicate returns each column as one replicate
    # post-transposition, each row is a replicate
    rarefiedMtx = t(replicate(n=numReplicate, 
                              subsampleHelper(vec, rarefyFunc, subsampleDepth)))
    stopifnot(is.matrix(rarefiedMtx))
    colnames(rarefiedMtx) = as.character(subsampleDepth)
    
    return(rarefiedMtx)
}

# helper to rarefy()
# perform subsampling once at every subsampleDepth on vec
subsampleHelper = function(vec, rarefyFunc, subsampleDepth) {
    sapply(subsampleDepth, function(m){
        rarefyFunc(vec[sample(x=1:length(vec), size=m, replace=FALSE)])
    })
}
julianqz/igtracker documentation built on May 20, 2019, 4:21 a.m.