#' KFold
#'
#' \code{KFold} Creates a K-Fold cross-validation set.
#'
#' @section Methods:
#' \itemize{
#' \item{\code{new()}}{Not implemented.}
#' }
#'
#' @param x Corpus object.
#' @param k Numeric of cvSets
#' @param stratify Logical. If TRUE (default), KFolds will be taken from
#' each document in accordance with the proportions or counts indicated
#' in the 'n' parameter.
#' @param seed Numeric used to initialize a pseudorandom number generator.
#'
#' @docType class
#' @author John James, \email{jjames@@datasciencesalon.org}
#' @family Corpus Sample Family of Classes
#' @family CorpusStudio Family of Classes
#' @export
KFold <- R6::R6Class(
classname = "KFold",
lock_objects = FALSE,
lock_class = FALSE,
inherit = Sample0,
private = list(
processFold = function(k, name) {
# Instantiate cvSet
cvSet <- CVSet$new(name)
cvSet <- Copy$new()$this(x = private$..corpus, to = cvSet)
train <- Clone$new()$this(x = private$..corpus, reference = FALSE)
test <- Clone$new()$this(x = private$..corpus, reference = FALSE)
# Set names
if (is.null(name)) name <- private$..corpus$getName()
cvSet$setName(paste0(name, " Fold #", k))
train$setName(paste0(name, " Fold #", k, " Training Set"))
test$setName(paste0(name, " Fold #", k, " Test Set"))
# Set cross-validation type
cvSet$setMeta(key = 'cv', value = paste0("Fold #", k), type = 'f')
train$setMeta(key = 'cv', value = 'training', type = 'f')
test$setMeta(key = 'cv', value = 'test', type = 'f')
# Create and add training and test Documents to Corpora object
corpusDocs <- private$..corpus$getDocuments()
for (i in 1:length(corpusDocs)) {
id <- corpusDocs[[i]]$getId()
trainIdx <- private$..indices$n[private$..indices$document == id & private$..indices$label != k]
testIdx <- private$..indices$n[private$..indices$document == id & private$..indices$label == k]
trainDoc <- Clone$new()$this(x = corpusDocs[[i]], content = FALSE)
testDoc <- Clone$new()$this(x = corpusDocs[[i]], content = FALSE)
trainDoc$setName(paste0(corpusDocs[[i]]$getName(), "Training Document"))
testDoc$setName(paste0(corpusDocs[[i]]$getName(), "Test Document"))
trainDoc$setMeta(key = 'cv', value = 'training', type = 'f')
testDoc$setMeta(key = 'cv', value = 'test', type = 'f')
trainDoc$content <- corpusDocs[[i]]$content[trainIdx]
testDoc$content <- corpusDocs[[i]]$content[testIdx]
train$addDocument(trainDoc)
test$addDocument(testDoc)
}
cvSet$addCorpus(train)
cvSet$addCorpus(test)
private$..KFolds$addCVSet(cvSet)
event <- paste0("Fold #", k, " created.")
private$logR$log(method = 'processFold', event = event, level = "Info")
return(TRUE)
},
buildFolds = function(k, name) {
cvSetName <- name
if (is.null(cvSetName)) cvSetName <- paste0(private$..corpus$getName(),
" KFold CV Set")
private$..KFolds <- CVSetKFold$new(name = cvSetName)
for (i in 1:k) {
private$processFold(k = i, name = name)
}
},
validate = function(x, k, stratify) {
private$..params <- list()
private$..params$classes$name <- list('x','k', 'stratify')
private$..params$classes$objects <- list(x, k, stratify)
private$..params$classes$valid <- list('Corpus', 'numeric', 'logical')
private$..params$logicals$variables <- c('stratify')
private$..params$logicals$values <- c(stratify)
v <- private$validator$validate(self)
if (v$code == FALSE) {
private$logR$log(method = 'execute', event = v$msg, level = "Error")
return(FALSE)
}
if (length(k) > 1) {
event <- paste0("Invalid parameter 'k'. Parameter must be of length one.")
private$logR$log(method = 'execute', event = event, level = "Error")
return(FALSE)
}
return(TRUE)
}
),
public = list(
#-------------------------------------------------------------------------#
# Constructor #
#-------------------------------------------------------------------------#
initialize = function() {
private$loadServices("KFold")
invisible(self)
},
#-------------------------------------------------------------------------#
# KFold Method #
#-------------------------------------------------------------------------#
execute = function(x, k, name = NULL, stratify = TRUE, seed = NULL) {
if (!private$validate(x, k, stratify)) stop()
private$..corpus <- x
private$segment(k, stratify, seed)
private$buildFolds(k, name)
invisible(self)
},
#-------------------------------------------------------------------------#
# Get KFolds #
#-------------------------------------------------------------------------#
getKFolds = function() private$..KFolds,
#-------------------------------------------------------------------------#
# Visitor Method #
#-------------------------------------------------------------------------#
accept = function(visitor) {
visitor$splitKFold(self)
}
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.