#' Produces indexes to split data into test and training groups K times
#'
#' Index to randomly split data frame into groups, optionally stratified by a
#' grouping variable.
#'
#' \code{Kcross(DF, K = 10, Strat = NULL)}
#'
#' @param DF name of a data frame which is to be split
#' @param K number of 'folds' to split the data into
#' @param Strat name of index of grouping variable in data frame
#'
#' @details returns a list of lists where each of the sublists consist of
#' two objects, 'train' and 'test' which contain indexes to be used to choose
#' rows of the data frame.
Kcross = function(DF, K = 10, Strat = NULL){
if(is.null(Strat)==0){
DF[,Strat]=factor(DF[,Strat])
LStrat = length(levels(DF[,Strat]))
FX = function(x) which(DF[,Strat]==levels(DF[,Strat])[x])
L1 = lapply(1:LStrat, FX) # L1 is a list of indexes for each level
UL1 = unlist(L1) # a single vector of indices arranged by group
FX2 = function(x) (sample(1:length(L1[[x]]) %% K + 1)) # assign group index
L2 = lapply(1:LStrat, FX2)
UL2 = unlist(L2) # a vector of K group indexes ordered same as UL1
} else {
UL1 = sample(1:nrow(DF)) # if no stratification, just mix up the indexes
UL2=sample(1:nrow(DF) %% K + 1)
}
FX3 = function(x) list("train" = sort(UL1[UL2 != x]),
"test" = sort(UL1[UL2 == x])) # find indices of each group
L3 = lapply(1:K, FX3)
names(L3) = paste0("G",1:K)
return(invisible(L3))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.