#' Get train/test splits of the phenotypic MET dataset based on CV2.
#'
#' @description Get train/test splits of the phenotypic MET dataset based on a
#' number of random k-folds partitions determined by the user, according to the
#' type CV2. Creation of the list of train/test splits based on phenotypic data,
#' so that all the Year x Location phenotypic observations from the phenotypic
#' MET dataset are assigned randomly to k-fold partitions (prediction of
#' incomplete field trials).
#'
#' @param pheno_data \code{data.frame} Dataset containing phenotypic outcome
#' data, as well as the predictor variables
#'
#' @param nb_folds \code{numeric} Number of folds in the CV process
#'
#' @param reps \code{numeric} Number of repeats of the k-folds CV
#'
#' @return a \code{cv_object} object which contains nb_folds x reps elements.
#' Each element of the object corresponds to a `split` object with two
#' elements:
#' \describe{
#' \item{training}{\code{data.frame} Dataset with all observations for the
#' training set.}
#' \item{test}{\code{data.frame} Dataset with all observations for the test
#' set.}
#' }
#' @author Cathy C. Westhues \email{cathy.jubin@@hotmail.com}
#' @references
#' \insertRef{jarquin2017increasing}{learnMET}
#' \insertRef{jarquin2014reaction}{learnMET}
#' @export
predict_cv2 <-
function(pheno_data,
nb_folds,
reps,
seed
) {
# Randomly assign phenotypic observations to folds: k-fold cross-validation
# randomly splits the lines into k folds of roughly equal size.
# A resample of the analysis data consisted of K-1 of the folds while the
# assessment set contains the final fold.
set.seed(seed)
lines_folds <-
rsample::vfold_cv(data = pheno_data,
v = nb_folds,
repeats = reps)
partition_data <- function(splits, pheno) {
training_data <- rsample::analysis(splits)
test_data <- rsample::assessment(splits)
split <- list("training_data"= training_data, "test_data" = test_data)
class(split) <- c('split')
names(split) <- c('training','test')
return(split)
}
# Apply the function over the complete resampling object lines_folds (rset)
train_test_splits <- purrr::map(
lines_folds$splits,
.f = function (x)
partition_data(x, pheno = pheno_data)
)
class(train_test_splits) <- c('cv_object')
return(train_test_splits)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.