R/mathScore.R

Defines functions math.score

Documented in math.score

#' calculates MATH (Mutant-Allele Tumor Heterogeneity) score.
#'
#' @description calcuates MATH scores from variant allele frequencies. Mutant-Allele Tumor Heterogeneity (MATH) score is a measure of  intra-tumor genetic heterogeneity.
#' High MATH scores are related to lower survival rates. This function requies vafs.
#'
#' @references Mroz, Edmund A. et al. Intra-Tumor Genetic Heterogeneity and Mortality in Head and Neck Cancer: Analysis of Data from The Cancer Genome Atlas. Ed. Andrew H. Beck. PLoS Medicine 12.2 (2015): e1001786.
#'
#' @param maf an \code{\link{MAF}} object generated by \code{\link{read.maf}}
#' @param vafCol manually specify column name for vafs. Default looks for column 't_vaf'
#' @param sampleName sample name for which MATH score to be calculated. If NULL, calculates for all samples.
#' @param vafCutOff minimum vaf for a variant to be considered for score calculation. Default 0.075
#' @return \code{data.table} with MATH score for every Tumor_Sample_Barcode
#' @examples
#' laml.maf <- system.file("extdata", "tcga_laml.maf.gz", package = "maftools")
#' laml <- read.maf(maf = laml.maf)
#' laml.math <- math.score(maf = laml, vafCol = 'i_TumorVAF_WU',
#' sampleName = c('TCGA-AB-3009', 'TCGA-AB-2849', 'TCGA-AB-3002', 'TCGA-AB-2972'))
#' @export


math.score = function(maf, vafCol = NULL, sampleName = NULL, vafCutOff = 0.075){

  if(is.null(sampleName)){
    sampleName = as.character(getSampleSummary(maf)[,Tumor_Sample_Barcode])
  }

  dat = subsetMaf(maf, includeSyn = FALSE, tsb = sampleName, mafObj = FALSE)

  if(!'t_vaf' %in% colnames(dat)){
    if(is.null(vafCol)){
      if(all(c('t_ref_count', 't_alt_count') %in% colnames(dat))){
        message("t_vaf field is missing, but found t_ref_count & t_alt_count columns. Estimating vaf..")
        dat[,t_ref_count := as.numeric(as.character(t_ref_count))]
        dat[,t_alt_count := as.numeric(as.character(t_alt_count))]
        dat[,t_vaf := t_alt_count/(t_ref_count + t_alt_count)]
      }else{
        print(colnames(maf))
        stop('t_vaf field is missing. Use vafCol to manually specify vaf column name.')
      }
    }else{
      colnames(dat)[which(colnames(dat) == vafCol)] = 't_vaf'
      dat[, t_vaf := as.numeric(as.character(t_vaf))]
    }
  }


  if(max(dat[,t_vaf], na.rm = TRUE) > 1){
    dat[,t_vaf:= as.numeric(as.character(t_vaf))/100]
  }

  dat = dat[!t_vaf < vafCutOff]
  dat = dat[,.(Tumor_Sample_Barcode, Hugo_Symbol, t_vaf)]

  dat.spl = split(dat, as.factor(as.character(dat$Tumor_Sample_Barcode)))

  math.dt = lapply(X = dat.spl, function(pat){

    vaf = pat$t_vaf
    vaf = vaf[!is.na(vaf)] #remove NA's

    if(length(vaf) < 5){
      message(paste('Not enough mutations in', unique(pat[,Tumor_Sample_Barcode]), '. Skipping..'), sep='')
    }else{
      abs.med.dev = abs(vaf - median(vaf)) #absolute deviation from median vaf
      pat.mad = median(abs.med.dev) * 100
      pat.math = pat.mad * 1.4826 /median(vaf)
      d = data.table::data.table(Tumor_Sample_Barcode = as.character(unique(pat[,Tumor_Sample_Barcode])),
                             MedianAbsoluteDeviation = pat.mad,
                             MATH = pat.math)
      return(d)
    }
  })

  math.dt = data.table::rbindlist(math.dt, use.names = TRUE, fill = TRUE)
  return(math.dt)
}
PoisonAlien/maftools documentation built on April 7, 2024, 2:49 a.m.