R/cancerhsAggr.R

Defines functions cancerhotspotsAggr

Documented in cancerhotspotsAggr

#' Aggregate cancerhotspots reports
#' @description Takes tsv files generated by \code{\link{cancerhotspots}} and aggregates them into an MAF for downstream analysis
#' @param tsvs TSV files generated by \code{\link{cancerhotspots}}
#' @param minVaf Min. VAF threshold. Default 0.02
#' @param minDepth Min. depth of coverage. Default 15
#' @param sampleNames samples for each tsv file. Default NULL. Parses from file names.
#' @param maf Return as an MAF object. Default TRUE.
#' @param ... Additional argumnets passed to \code{\link{read.maf}} if `maf` is TRUE.
#' @return \code{\link{MAF}} object
#' @seealso \code{\link{cancerhotspots}}
#' @export
#'
cancerhotspotsAggr = function(tsvs = NULL, minVaf = 0.02, minDepth = 15, sampleNames = NULL, maf = TRUE, ...){

  if(is.null(sampleNames)){
    sampleNames = gsub(pattern = "\\.tsv$",
                       replacement = "",
                       x = basename(path = tsvs))
  }



  canhs = lapply(tsvs, function(tsv){
    x = data.table::fread(tsv)
    x$t_depth = apply(x[,.(A, T, G, C, Ins, Del)], 1, sum, na.rm = TRUE)
    x = x[t_depth >= minDepth][VAF >= minVaf]
    y = as.data.frame(
      data.table::tstrsplit(x = x$loci, split = ":"),
      col.names = c("Chromosome", "Start_Position")
    )
    data.table::setDT(x = y)
    y[, End_Position := Start_Position]
    x = cbind(y, x)

    x$Variant_Type = ifelse(
      test = x$Variant_Classification %in% c("INS", "DEL"),
      yes = "INDEL",
      no = "SNP"
    )

    y = as.data.frame(
      data.table::tstrsplit(x = x$NT_change, split = ">"),
      col.names = c("Reference_Allele", "Tumor_Seq_Allele2")
    )
    x = cbind(y, x)

    x
  })

  names(canhs) = sampleNames
  canhs = data.table::rbindlist(l = canhs,idcol = "Tumor_Sample_Barcode", use.names = TRUE)

  if(maf){
    if(nrow(canhs[,.N,Tumor_Sample_Barcode]) < 2){
      warning("Min. 2 samples required. Coercing into MAF object is not possible.")
    }else{
      canhs = read.maf(maf = canhs, ...)
    }
  }

  canhs
}
PoisonAlien/maftools documentation built on April 7, 2024, 2:49 a.m.