#' Data dimensionality reduction using PCA on a split object.
#'
#' @description
#' Fit PCA on the training set and apply the same transformation to the test
#' set. The goal is to use principal components in prediction models as a
#' smaller number of variables instead of all the marker predictors.
#'
#' @param split An object of class \code{split}, corresponding to one element of
#' the total `cv_object` generated by one of the functions [predict_cv0()],
#' [predict_cv00()], [predict_cv1()], or [predict_cv2()],
#' and containing the following items:
#' * **training**: \code{data.frame} Training dataset
#' * **test**: \code{data.frame} Test dataset
#'
#' @param geno \code{data.frame} It corresponds to a `geno` element
#' within an object of class `METData`.
#'
#' @param threshold \code{numeric} A fraction of the total variance that
#' should be covered by the components
#'
#' @return pc_values A \code{data.frame} containing the principal components
#' in columns and the names of all lines used in the study is contained in the
#' first column 'geno_ID'. PCs for the lines present in the test set were
#' computed based on the transformation done on the training set.
#'
#' @author Cathy C. Westhues \email{cathy.jubin@@hotmail.com}
#' @export
#'
#'
#'
apply_pca <- function(split,
geno,
threshold = 0.95,
...) {
geno <- as.data.frame(geno)
geno$geno_ID = row.names(geno)
geno_training = geno[geno$geno_ID%in%unique(split[['training']][,'geno_ID']),]
geno_training = unique(geno_training)
geno_test = geno[geno$geno_ID%in%unique(split[['test']][,'geno_ID']),]
geno_test = unique(geno_test)
rec <- recipes::recipe(geno_ID ~ . ,
data = geno_training) %>%
recipes::update_role(geno_ID, new_role = 'outcome') %>%
recipes::step_nzv(recipes::all_predictors()) %>%
recipes::step_pca(recipes::all_predictors(),
threshold = threshold,
options = list(center = T, scale. = T))
norm_obj <- recipes::prep(rec, training = geno_training,strings_as_factors = FALSE)
training_pca <- recipes::bake(norm_obj, geno_training)
test_pca <- recipes::bake(norm_obj, geno_test)
training <-
plyr::join(split[[1]], training_pca, by = 'geno_ID')
test <-
plyr::join(split[[2]], test_pca, by = 'geno_ID')
return(list(training,test))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.