R/balanced.folds.R

Defines functions balanced.folds

#==========================================================================================#
#===============#
# Usage         :
#===============#
#                    balanced.folds(y, 
#                                   nfolds=min(min(table(y)), 10))
#
#===============#
# Description   :
#===============#
#
#===============#
# Arguments     :
#===============#
#
#===============#
# Values        :
#===============#
#
#==========================================================================================#

balanced.folds <- function(y, 
                           nfolds=min(min(table(y)), 10)) {

    totals <- table(y)
    fmax <- max(totals)
    nfolds <- min(nfolds, fmax)                  # makes no sense to have more folds than the max class size
    folds <- as.list(seq(nfolds))
    yids <- split(seq(y), y)                     # nice to get the ids in a list, split by class

    # Make a big matrix, with enough rows to get in all the folds per class
    
    bigmat <- matrix(NA, ceiling(fmax/nfolds) * nfolds, length(totals))
    for(i in seq(totals)) {
        bigmat[seq(totals[i]), i] <- sample(yids[[i]],size=length(yids[[i]]))
    }
    smallmat <- matrix(bigmat, nrow=nfolds)      # reshape the matrix
    
    # Now do a clever unlisting to mix up the NAs
    # the "clever" unlist doesn't work when there are no NAs
    #       apply(smallmat, 2, function(x)
    #        x[!is.na(x)])
    
    smallmat <- permute.rows(t(smallmat))
    
    res <-vector("list", nfolds)
    for (j in 1:nfolds) {
        jj <- !is.na(smallmat[, j])
        res[[j]] <- smallmat[jj, j]
    }
    
    return(res)
    
}
#==========================================================================================#

Try the superpc package in your browser

Any scripts or data that you put into this service are public.

superpc documentation built on Oct. 24, 2020, 1:07 a.m.