R/predictTilState_function.R

#' Predict state of tumor-infiltrating CD8 T-cells from scRNA-seq data
#'
#' \code{predictTilState} This function evaluates a logistic regression model to predict the state of individual CD8 tumor-infiltrating lymphocytes (mouse or human) based on their transcriptomes (scRNA-seq data)
#'
#' @author Santiago J. Carmona  <santiago.carmona@@unil.ch>
#'
#' @param tpm expression matrix using TPM (Transcripts per Million) normalization, where rows correspond to genes and columns to single-cells. Row names must corresond to gene symbols in uppercase (e.g. LAG3, LGALS1, ANXA2).
#' Only the following genes are necessary for the function to work: LAG3, LGALS1, ANXA2, SELL, BHLHE40, PDCD1, TIGIT, KLRK1, TCF7, LEF1, S1PR1, RPS24, EMB, IL7R, GPR18, PIK3IP1, ST6GAL1, RPS6, KLRG1, GZMA, RPS15A, RPS26, S1PR5, RPS25, RPL39, NRP2, DTX1, FBXL2, CXCR6, CCL4, HAVCR2, TMSB4X, CCL3, TOX, ADAM8, ATXN1, GLRX, LAT2, PRF1, HILPDA, SLC37A2, ST14, CCNA2, CDCA8, ITGB7, CCR7, ADAM19, ISG20, NUP43, PADI2, FANCI, NCOR2, LSM3, ORC1, DUSP4, TNFRSF4, ITGB1, CCL5, IL2RA, CD83, TNFSF11, XCL1, CRTAM, RAMP3, LAD1, LTA, PPIL1, TM4SF5, RAMP1, WRAP53, STMN1, CCNB2, KIF20A, CDCA2, FAM83D, NEIL3, PTPRC, SIVA1, 1500009L16RIK, SLC9A5
#' @param ignore is set to TRUE, up to 10 missing genes will be imputed to 0s (NOT RECOMMENDED, UNKOWN PREDICTIVE PERFORMANCE)
#'
#' @return a two-element list containing 1) \emph{predictedState}, the predicted states
#' (naive, terminal effector, exhausted, memory-like, cycling effector or "unknown" if no class had a score above a threshold of 0.5), and
#' 2) \emph{stateProbabilityMatrix}, a matrix of number_of_cells x number_of_states (5) of probabilities of cell c belonging to class s
#'
#' @examples
#' data(B16CD8TILs_tpm)
#' x <- predictTilState(B16CD8TILs_tpm)
#' table(x$predictedState)
#' head(x$stateProbabilityMatrix)
#' @export
predictTilState <- function(tpm, ignore=F) {

  if(sum(!predictorGenes %in% row.names(tpm)) >0 ){

    if(ignore==F){

      print("Please provide an input expression matrix including the following row names")
      print(paste(predictorGenes[!predictorGenes %in% row.names(tpm)],collapse=","))
      stop("ERROR: some requiered genes are missing in input expression matrix or gene names are different than expected (e.g. CD8A is a valid gene name).
           Use ignore=T to ignore missing genes (allows up to 10 missing genes) (WARNING: UNKNOWN PERFORMANCE)")

    } else {
      if(sum(!predictorGenes %in% row.names(tpm)) > 10 ){
        print("Please provide an input expression matrix including the following row names")
        print(paste(predictorGenes[!predictorGenes %in% row.names(tpm)],collapse=","))
        stop("ERROR: more than 10 requiered genes are missing in input expression matrix or gene names are different than expected")
      } else {
        tpm[predictorGenes[!predictorGenes %in% row.names(tpm)],]=0
      }
    }

  }

  odf = function(x) exp(colSums((tpm[names(x),] * x)))
  scoreM=matrix(NA,nrow=ncol(tpm),ncol=5)
  for (i in 1:5){
    scoreM[,i]=odf(pred[[i]])
  }

  probM=scoreM/rowSums(scoreM)
  colnames(probM)=classNames
  probM.un=probM
  probM.un[probM<0.5]=NA
  cellClass=as.character(apply(probM.un,1,function(x) classNames[which.max(x)]))
  cellClass[cellClass=="character(0)"]="unknown"
  cellClass=factor(cellClass)
  return(list(predictedState=cellClass,stateProbabilityMatrix=probM))

}


#' B16 CD8 Tumor-infiltrating T-cells from Singer et al 2016
#'
#' This dataset was used to train the logistic regression model and is provided as an example for testing.
#' It is a subset (filtered genes and cells) of the Singer M scRNA-seq dataset available at NCBI GEO under accession GSE85947
#'
#' @references Singer M, Wang C, Cong L, Marjanovic ND et al. A Distinct Gene Module for Dysfunction Uncoupled from Activation in Tumor-Infiltrating T Cells. Cell 2016 Sep 8;166(6):1500-1511.e9. \url{https://www.ncbi.nlm.nih.gov/pubmed/27610572}
#' @references \url{https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE85947}
"B16CD8TILs_tpm"
GfellerLab/TILAtlas documentation built on May 17, 2019, 8:03 a.m.