R/analyze_factor.R

Defines functions analyze_factor

Documented in analyze_factor

#' @title analyze_factor
#'
#' @description Analyze a particular factor in the matrix of factor scores by
#' loading the metadata for grouped observations of positive scores and
#' negative scores defined by the score threshold. Looking into this subset
#' of metadata may provide an idea of the latent variable, and whether it
#' could be confounding the analysis (e.g. all observations being male or
#' female if the cancer is not gender specific).
#'
#' @param clean_metadata Clean metadata returned by standardize_metadata().
#' @param fi_mat Full matrix of factor scores for the observations.
#' @param factor_num The factor number to analyze. Default value is 1
#' (the first factor).
#' @param score_threshold Score cutoff for positive and negative scores.
#' Default value is 0.75.
#'
#' @return A list containing 2 metadata data frames.
#' \itemize{
#'   \item positive_group - Observations corresponding to positive scores
#'   above the threshold.
#'   \item negative_group - Observations corresponding to negative scores
#'   below the threshold.
#' }
#'
#'
#' @examples
#' # Using tcga_metadata from package.
#' library(MetaConIdentifier)
#' ca_info <- run_ca(tcga_meta_clean)
#'
#' # Find the optimal number of factors to extract.
#' obs_groupings <- analyze_factor(tcga_meta_clean, ca_info$fi_mat,
#' factor_num = 1, score_threshold = 0.75)
#'
#'
#' @export
#'
analyze_factor <- function(clean_metadata, fi_mat, factor_num = 1,
                           score_threshold = 0.75){
  if (!is.data.frame(clean_metadata) ||
      !any(class(clean_metadata) == "metaStandard")){
    stop("Metadata is either not a data frame or did not undergo cleaning.")
  }

  if (!is.matrix(fi_mat)){
    stop("Factor scores are not in a matrix.")
  }

  positive_obs <- which(fi_mat[ , factor_num] > score_threshold)
  negative_obs <- which(fi_mat[ , factor_num] < -(score_threshold))

  positive_group <- NULL
  negative_group <- NULL

  if (length(positive_obs) > 0){
    positive_group <- clean_metadata[positive_obs, , drop = FALSE]
  }

  if (length(negative_obs) > 0){
    negative_group <- clean_metadata[negative_obs, , drop = FALSE]
  }

  return (list(positive_group = positive_group,
               negative_group = negative_group))
}

# [END]
ahnjedid/MetaConIdentifier documentation built on Dec. 18, 2021, 11:26 p.m.