R/Features_ExtractFeatures.R

Defines functions Features_ExtractParameters Features_ExtractFeatures

Documented in Features_ExtractFeatures Features_ExtractParameters

#' Extract features from a list of feature dataframes.
#'
#' \code{Features_ExtractFeatures} obtains a set of features from a list of preprocessed/feature-selected dataframes. It also generates a venn diagram.\cr
#' \code{Features_ExtractParameters} obtains a set of parameters.\cr
#'
#' @param preprocessedDFList A list of feature dataframes generated by \code{Features_Preprocess}, \code{Features_CorFilter}, \code{Features_FeatureSelect}, \code{Features_Importance}, or \code{Features_Importance_Reduce}.
#' @param criteria The criteria of feature extraction. Can either be "intersect" or "union".
#' @export
#' @rdname Features_ExtractFeatures
#' @name Features_ExtractFeatures
Features_ExtractFeatures <- function(preprocessedDFList, criteria="intersect"){
  featureSet <- lapply(preprocessedDFList, function(l){setdiff(colnames(l$"dt"), c("DataType", "Peptide", "Immunogenicity", "Cluster"))})
  if(length(preprocessedDFList)<=5) vennDiagram(featureSet, show_category_names=F)
  featureSet <- Reduce(criteria, featureSet)
  return(featureSet)
}

#' @export
#' @rdname Features_ExtractFeatures
#' @name Features_ExtractFeatures
Features_ExtractParameters <- function(preprocessedDFList, criteria="intersect"){
  # Decode parameter sets
  paramSet <- names(preprocessedDFList)
  paramSet <- t(as.data.frame(stringr::str_split(paramSet, stringr::fixed(".")), fix.empty.names=F))
  fragLibTypeSet <- as.character(unique(paramSet[,1]))
  fragDepthSet <- as.numeric(as.character(unique(paramSet[,2])))
  seedSet <- as.numeric(as.character(unique(paramSet[,3])))
  message("Fragment library types: ", paste0(fragLibTypeSet, collapse=", "))
  message("Fragment library depths: ", paste0(fragDepthSet, collapse=", "))
  message("Random seeds: ", paste0(seedSet, collapse=", "))

  # Decode additional parameter sets from the minimally selected features
  featureSet <- Features_ExtractFeatures(preprocessedDFList, criteria=criteria)
  featureSet_pept <- grep("PeptDesc_", featureSet, value=T)
  if(length(featureSet_pept)>=1){
    featureSet_pept <- as.data.frame(t(as.data.frame(stringr::str_split(featureSet_pept, stringr::fixed("_")), fix.empty.names=F)))
    fragLenSet_pept <- as.numeric(as.character(unique(featureSet_pept[[4]])))
  }else{
    fragLenSet_pept <- numeric(0)
  }
  featureSet_CPP <- grep("CPP_", featureSet, value=T)
  if(length(featureSet_CPP)>=1){
    featureSet_CPP <- as.data.frame(t(as.data.frame(stringr::str_split(featureSet_CPP, stringr::fixed("_")), fix.empty.names=F)))
    fragLenSet_CPP <- as.numeric(as.character(unique(featureSet_CPP[[4]])))
    aaIndexIDSet <- sort(as.character(unique(gsub("inv$", "", featureSet_CPP[[2]]))))
  }else{
    fragLenSet_CPP <- numeric(0)
    aaIndexIDSet <- character(0)
  }
  fragLenSet <- sort(union(fragLenSet_pept, fragLenSet_CPP))
  message("Fragment lengths: ", paste0(fragLenSet, collapse=", "))
  message("AAIndex scales: ", paste0(aaIndexIDSet, collapse=", "))

  # Output
  list("featureSet"=featureSet,
       "aaIndexIDSet"=aaIndexIDSet,
       "fragLenSet"=fragLenSet,
       "fragDepthSet"=fragDepthSet,
       "fragLibTypeSet"=fragLibTypeSet,
       "seedSet"=seedSet)
}
masato-ogishi/Repitope documentation built on Feb. 14, 2023, 5:47 a.m.