R/Features_MinimumFeatures.R

Defines functions Features_MinimumFeatures

Documented in Features_MinimumFeatures

#' The minimum set of the most important features.
#'
#' \code{Features_MinimumFeatures} automates the feature selection workflows, i.e. preprocessing, filtering by correlation, filtering by importance, extract shared features from ascending vs descending models. \cr
#'
#' @param featureDFList A list of feature dataframes generated by \code{Features}, or imported by \code{readFeatureDFList}.
#' @param metadataDF A dataframe containing metadata, consisting of "Peptide", "Immunogenicity", and "Cluster" columns.
#' @param seedSet A set of random seeds.
#' @param corThreshold The threshold of correlation to eliminate features.
#' @param featureNSet The number of the features that should be retained. Can also be a vector of numbers. In that case, a list of minimum sets of features would be returned.
#' @param criteria The criteria of feature extraction. Can either be "intersect" or "union".
#' @param returnImpDFList Logical. Whether the lists of feature importance dataframes should be returned as well.
#' @export
#' @rdname Features_MinimumFeatures
#' @name Features_MinimumFeatures
Features_MinimumFeatures <- function(
  featureDFList,
  metadataDF,
  seedSet=1:5,
  corThreshold=0.75,
  featureNSet=100,
  criteria="intersect",
  returnImpDFList=T
){
  # Preprocessing & feature selection
  impDFList_Asc <- featureDFList %>%
    Features_Preprocess(metadataDF, seedSet) %>%
    Features_CorFilter(corThreshold) %>%
    Features_Importance(featureNSet[[1]])
  minFeatureSet_Asc <- Features_ExtractFeatures(impDFList_Asc, criteria)
  gc();gc()

  # Preprocessing & feature selection [Inversely ordered]
  impDFList_Desc <- featureDFList %>%
    Features_Preprocess(metadataDF, seedSet) %>%
    inverseColOrderDFList() %>%
    Features_CorFilter(corThreshold) %>%
    Features_Importance(featureNSet[[1]])
  minFeatureSet_Desc <- Features_ExtractFeatures(impDFList_Desc, criteria)
  gc();gc()

  # The minimum set of features
  minFeatureSet <- eval(parse(text=paste0(criteria, "(minFeatureSet_Asc, minFeatureSet_Desc)")))

  # Feature reduction (optional)
  if(length(featureNSet)>=2){
    minFeatureSet <- list(minFeatureSet)
    for(i in 2:length(featureNSet)){
      asc <- impDFList_Asc %>%
        Features_Importance_Reduce(featureN=featureNSet[[i]]) %>%
        Features_ExtractFeatures(criteria)
      desc <- impDFList_Desc %>%
        Features_Importance_Reduce(featureN=featureNSet[[i]]) %>%
        Features_ExtractFeatures(criteria)
      minFeatureSet[[i]] <- eval(parse(text=paste0(criteria, "(asc, desc)")))
    }
    names(minFeatureSet) <- featureNSet
  }

  # Output
  if(returnImpDFList){
    res <- list("MinimumFeatureSet"=minFeatureSet, "ImpDFList_Asc"=impDFList_Asc, "ImpDFList_Desc"=impDFList_Desc)
  }else{
    res <- minFeatureSet
  }
  return(res)
}
masato-ogishi/Repitope documentation built on Feb. 14, 2023, 5:47 a.m.