R/EpitopePrioritization.R

Defines functions EpitopePrioritization

Documented in EpitopePrioritization

#' Epitope prioritization based on immunogenicity scores and escape potentials.
#'
#' @param featureDF A pre-computed feature dataframe for model construction.
#' @param metadataDF A metadata dataframe, consisting only of "Peptide" and "Immunogenicity" columns, for model construction.
#' @param peptideSet A set of new peptide sequences.
#' @param peptideLengthSet A set of allowed amino acid sequence lengths. Peptides not falling in this range will be discarded.
#' @param fragLib Either the dataframe of fragment libraries generated by \code{CPP_FragmentLibrary} or the path to the fst file.
#' @param aaIndexIDSet A set of AAIndex IDs indicating the AACP scales to be used. Set "all" to shortcut the selection of all available AACP scales.
#' @param fragLenSet A set of sliding window sizes.
#' @param fragDepth A fragment library size. Must be an integer vector of length one.
#' @param fragLibType A string indicating the types of fragment libraries to be used. Must be a character vector of length one.
#' @param featureSet A minimum set of features. Combinations of fragment lengths and AAIndex IDs are internally extracted for calculating CPPs.
#' @param seedSet A set of random seeds.
#' @param coreN The number of cores to be used for parallelization.
#' @param outDir Destination directory to save the results.
#' @export
#' @rdname EpitopePrioritization
#' @name EpitopePrioritization
EpitopePrioritization <- function(
  featureDF,
  metadataDF,
  peptideSet,
  peptideLengthSet=8:11,
  fragLib,
  aaIndexIDSet="all",
  fragLenSet=3:8,
  fragDepth=10000,
  fragLibType="Weighted",
  featureSet=NULL,
  seedSet=1:5,
  coreN=parallel::detectCores(logical=F),
  outDir=paste0("./Output_", format(Sys.time(), "%Y.%m.%d.%H.%M.%S"))
){
  ## Preparations before starting...
  dir.create(file.path(outDir, "Temp"), showWarnings=F, recursive=T)
  seed <- eval(parse(text=paste0(as.character(seedSet), collapse="")))
  set.seed(seed)

  cat("#1. In silico mutagenesis.\n")
  peptideSet_ISM <- sort(InSilicoMutagenesis(peptideSet=unique(peptideSet), peptideLengthSet=peptideLengthSet, coreN=coreN)) ## including original peptides

  cat("#2. Feature computation.\n")
  featureDT_ISM_Filename <- list.files(pattern="FeatureDF.fst", path=outDir, full.names=T)
  if(length(featureDT_ISM_Filename)==1){
    message(" - Skipping computation and importing the previously computed file.")
    featureDT_ISM <- fst::read_fst(featureDT_ISM_Filename, as.data.table=T)
  }else{
    featureDT_ISM <- Features(
      peptideSet=peptideSet_ISM,
      fragLib=fragLib,
      aaIndexIDSet=aaIndexIDSet,
      fragLenSet=fragLenSet,
      fragDepth=fragDepth,
      fragLibType=fragLibType,
      featureSet=featureSet,
      seedSet=seedSet,
      coreN=coreN,
      tmpDir=file.path(outDir, "Temp")
    )[[1]]
    fst::write_fst(featureDT_ISM, file.path(outDir, "FeatureDF.fst"))
  }
  gc();gc()

  cat("#3. Immunogenicity prediction.\n")
  ert <- Immunogenicity_TrainModels(
    featureDF=featureDF,
    metadataDF=metadataDF,
    featureSet=featureSet,
    seedSet=seedSet
  )
  scoreDT_ISM <- Immunogenicity_Predict(list(featureDT_ISM), ert)[[1]]
  scoreDT_ISM[Peptide %in% peptideSet_ISM, Peptide_Type:="InSilicoMutated"]
  scoreDT_ISM[Peptide %in% peptideSet, Peptide_Type:="Original"]
  readr::write_csv(scoreDT_ISM, file.path(outDir, "ScoreDF.csv"))
  gc();gc()

  cat("#4. Neighbor network analysis and escape potential prediction.\n")
  nnet_ISM <- neighborNetwork(scoreDT_ISM$"Peptide", scoreDT_ISM$"ImmunogenicityScore")
  nnet_ISM_cluster <- neighborNetwork_Cluster_Batch(nnet_ISM, scoreDT_ISM[,.(Peptide,ImmunogenicityScore)], seed=seed, coreN=coreN)
  nnet_ISM_cluster_featureDF <- neighborNetwork_Cluster_FeatureDF(nnet_ISM_cluster, coreN=coreN)
  nnet_ISM_cluster_featureDF[Peptide %in% peptideSet_ISM, Peptide_Type:="InSilicoMutated"]
  nnet_ISM_cluster_featureDF[Peptide %in% peptideSet, Peptide_Type:="Original"]
  saveRDS(nnet_ISM, file.path(outDir, "NeighborNetwork.rds"))
  saveRDS(nnet_ISM_cluster, file.path(outDir, "NeighborNetwork_Cluster.rds"))
  readr::write_csv(nnet_ISM_cluster_featureDF, file.path(outDir, "NeighborNetwork_Cluster_FeatureDF.csv"))
  gc();gc()

  cat("#5. Epitope prioritization.\n")
  summaryDT <- merge(scoreDT_ISM[Peptide %in% peptideSet,], nnet_ISM_cluster_featureDF[Peptide %in% peptideSet,], by=c("Peptide","Peptide_Type","ImmunogenicityScore"))
  summaryDT[,EscapePotential:=ImmunogenicityScore_Cluster_Diff_Min]
  summaryDT <- summaryDT[Peptide_Type=="Original", .(Peptide, ImmunogenicityScore, ImmunogenicityScore_Cluster_Average, EscapePotential)]
  data.table::setorder(summaryDT, -ImmunogenicityScore, -ImmunogenicityScore_Cluster_Average, EscapePotential)
  readr::write_csv(summaryDT, file.path(outDir, "EpitopePrioritizationSummary.csv"))
  return(summaryDT)
}
masato-ogishi/Repitope documentation built on Feb. 14, 2023, 5:47 a.m.