R/titv.R

Defines functions titv

Documented in titv

#' Classifies SNPs into transitions and transversions
#' @description takes output generated by read.maf and classifies Single Nucleotide Variants into Transitions and Transversions.
#'
#' @param maf an \code{\link{MAF}} object generated by \code{\link{read.maf}}
#' @param useSyn Logical. Whether to include synonymous variants in analysis. Defaults to FALSE.
#' @param plot plots a titv fractions. default TRUE.
#' @param file basename for output file name. If given writes summaries to output file. Default NULL.
#' @return list of \code{data.frame}s with Transitions and Transversions summary.
#' @seealso  \code{\link{plotTiTv}}
#' @examples
#' laml.maf <- system.file("extdata", "tcga_laml.maf.gz", package = "maftools")
#' laml <- read.maf(maf = laml.maf)
#' laml.titv = titv(maf = laml, useSyn = TRUE)
#'
#' @export


titv = function(maf, useSyn = FALSE, plot = TRUE, file = NULL){

  mafDat = subsetMaf(maf = maf, query = "Variant_Type == 'SNP'", mafObj = FALSE, includeSyn = useSyn, fields = "Variant_Type")

  if(nrow(mafDat) == 0){
    stop('No more single nucleotide variants left after filtering for SNP in Variant_Type field.')
  }

  #mafDat = mafDat[,.(Hugo_Symbol, Start_Position, End_Position, Reference_Allele, Tumor_Seq_Allele2, Tumor_Sample_Barcode)]
  mafDat$con = paste(mafDat[,Reference_Allele], mafDat[,Tumor_Seq_Allele2], sep = '>')

  conv = c("T>C", "T>C", "C>T", "C>T", "T>A", "T>A", "T>G", "T>G", "C>A", "C>A", "C>G", "C>G")
  names(conv) = c('A>G', 'T>C', 'C>T', 'G>A', 'A>T', 'T>A', 'A>C', 'T>G', 'C>A', 'G>T', 'C>G', 'G>C')
  conv.class = c('Ti', 'Ti', 'Tv', 'Tv', 'Tv', 'Tv')
  names(conv.class) = c("T>C", "C>T", "T>A", "T>G", "C>A", "C>G")

  if(nrow(mafDat[!con %in% names(conv)]) > 0){
    warning(paste0("Non standard Ti/Tv class: ", nrow(mafDat[!con %in% names(conv)]), immediate. = TRUE))
    mafDat = mafDat[con %in% names(conv)]
  }

  maf.con.summary = mafDat[,.N, by = .(Tumor_Sample_Barcode, con)]
  maf.con.summary$con.class = conv[as.character(maf.con.summary$con)]
  # maf.con.summary$con.class = suppressWarnings(as.character(factor(maf.con.summary$con, levels = c("A-G", "T-C", "C-T", "G-A", "A-T", "T-A", "A-C", "T-G", "C-A", "G-T", "C-G", "G-C"),
  #                                                                  labels = c("T-C", "T-C", "C-T", "C-T", "T-A", "T-A", "T-G", "T-G", "C-A", "C-A", "C-G", "C-G"))))

  maf.con.class.summary = maf.con.summary[,sum(N), by = .(Tumor_Sample_Barcode, con.class)]
  colnames(maf.con.class.summary)[ncol(maf.con.class.summary)] = 'nVars'
  maf.con.class.summary[,fract := (nVars/sum(nVars))*100, by = .(Tumor_Sample_Barcode)]

  maf.con.class.summary$TiTv = conv.class[maf.con.class.summary$con.class]
  maf.con.class.summary$con.class = factor(x = maf.con.class.summary$con.class,
                                           levels = c("C>A", "C>G", "C>T", "T>C", "T>A", "T>G"))
  maf.con.class.summary$TiTv = factor(x = maf.con.class.summary$TiTv, levels = c('Ti', 'Tv'))
  # maf.con.class.summary$TiTv = suppressWarnings(as.character(factor(x = maf.con.class.summary$con.class,
  #                                                                   levels = c("T>C", "C>T", "T>A", "T>G", "C>A", "C>G"), labels = c('Ti', 'Ti', 'Tv', 'Tv', 'Tv', 'Tv'))))

  fract.classes = data.table::dcast(data = maf.con.class.summary, formula = Tumor_Sample_Barcode ~ con.class, value.var = 'fract', fill = 0, drop = FALSE)
  raw.classes = data.table::dcast(data = maf.con.class.summary, formula = Tumor_Sample_Barcode ~ con.class, value.var = 'nVars', fill = 0, drop = FALSE)
  titv.summary = maf.con.class.summary[,sum(fract), by = .(Tumor_Sample_Barcode, TiTv)]
  titv.summary = data.table::dcast(data = titv.summary, Tumor_Sample_Barcode ~ TiTv, value.var = 'V1', fill = 0, drop = FALSE)

  titv.res = list(fraction.contribution = fract.classes, raw.counts = raw.classes, TiTv.fractions = titv.summary)

  if(plot){
    plotTiTv(res = titv.res)
  }

  if(!is.null(file)){
    write.table(x = fract.classes,file = paste(file, '_fraction_contribution.txt', sep = ''), quote = FALSE, row.names = FALSE, sep = '\t')
    write.table(x = raw.classes,file = paste(file, '_fraction_counts.txt', sep = ''), quote = FALSE, row.names = FALSE, sep = '\t')
    write.table(x = titv.summary,file = paste(file, '_TiTv_fractions.txt', sep = ''), quote = FALSE, row.names = FALSE, sep = '\t')
  }

  return(titv.res)
}

Try the maftools package in your browser

Any scripts or data that you put into this service are public.

maftools documentation built on Feb. 6, 2021, 2 a.m.