R/prepAscat_t.R

Defines functions prepAscat_t

Documented in prepAscat_t

#'Prepare input files for ASCAT tumor only samples
#' @description Function takes the output from \code{\link{gtMarkers}} and generates `logR` and `BAF` files required for ASCAT analysis.
#' @details The function will filter SNPs with low coverage (default <15), estimate BAF, logR, and generates the input files for ASCAT. Tumor `logR` file will be normalized for median depth of coverage. Alternatively, logR file can be segmented with \code{\link{segmentLogR}}
#' @param t_counts read counts from tumor generated by \code{\link{gtMarkers}}
#' @param sample_name Sample name. Used as a basename for output files. Default NA, parses from `t_counts` file.
#' @param min_depth Min read depth required to consider a marker. Default 15
#' @return Generates logR and BAF files required by ASCAT
#' @seealso \code{\link{gtMarkers}} \code{\link{prepAscat}} \code{\link{segmentLogR}}
#' @references Van Loo P, Nordgard SH, Lingjærde OC, et al. Allele-specific copy number analysis of tumors. Proc Natl Acad Sci U S A. 2010;107(39):16910-16915. doi:10.1073/pnas.1009843107
#' @export

prepAscat_t = function(t_counts = NULL, sample_name = NA, min_depth = 15){

  if(any(is.null(t_counts))) stop("Missing tumor or normal read counts!")

  if(is.na(sample_name)){
    sample_name = gsub(pattern = "\\.tsv$", replacement = "", x = basename(path = t_counts))
  }

  counts = c(t_counts)

  #library sizes
  tot_map_reads = lapply(counts, function(x){
    x = data.table::fread(input = x,nrows = 1)
    as.numeric(x$V2)
  })
  names(tot_map_reads) = c("tumor")
  message("Library sizes:")
  message("Tumor: ", tot_map_reads$tumor)

  counts = lapply(counts, function(x){
    message("Counts file: ", basename(x))
    x = data.table::fread(file = x)
    x[,loci := gsub(pattern = "^chr", replacement = "", x = loci)]
    message("Markers: ", nrow(x))
    if(nrow(x[duplicated(loci)]) > 0){
      message("Removed ", nrow(x[duplicated(loci)]), " duplicated loci")
      x = x[!duplicated(loci)]
    }
    x[, tot_depth := apply(x[,.(A, T, G, C)], 1, sum)]
    x = x[tot_depth > min_depth]
    message("Markers > ", min_depth, ": ", nrow(x))
    x[, baf := apply(x[,.(A, T, G, C)], 1, function(r) {r = sort(r); r[4]/sum(r[4], r[3])})]
    #x[, baf := ifelse(test = baf < 0.5, yes = baf, no = 1 - baf)]
    #x$baf = ifelse(x$baf <0.5,x$baf,1-x$baf)
    x$baf = ifelse(runif(length(x$baf))<0.5,x$baf,1-x$baf)
    x = data.frame(data.table::tstrsplit(x = x$loci, split = ":"), x$baf, x$tot_depth, row.names = x$loci)
    colnames(x) = c("chr", "pos", "BAF", "depth")
    x$pos = as.numeric(as.character(x$pos))
    x
  })
  names(counts) = c("tumor")
  counts = counts$tumor

  med_cov = median(counts[counts$chr %in% c(1:22), "depth"], na.rm = TRUE)
  message("Median depth of coverage (autosomes): ", med_cov)
  message("------")

  counts$logR = round(log2(counts$depth) - log2(med_cov), digits = 3)

  data.table::fwrite(x = counts[,c("chr", "pos", "BAF")], file = paste0(sample_name, ".tumour.BAF.txt"), sep = "\t", row.names = TRUE)
  data.table::fwrite(x = counts[,c("chr", "pos", "logR")], file = paste0(sample_name, ".tumour.logR.txt"), sep = "\t", row.names = TRUE)

  message("Generated following files:")
  message(paste0(sample_name, ".tumour.BAF.txt"))
  message(paste0(sample_name, ".tumour.logR.txt"))
  message("------")

  # ascat_obj = NULL
  # if("ASCAT" %in% rownames(installed.packages())){
  #   message("Running ASCAT::ascat.loadData:")
  #   ascat_obj = ASCAT::ascat.loadData(Tumor_LogR_file = paste0(sample_name, ".tumour.logR.txt"),
  #                                     Tumor_BAF_file = paste0(sample_name, ".tumour.BAF.txt"),
  #                                     chrs = c(1:22, "X", "Y"), sexchromosomes = c("X", "Y"))
  #   message("Running ASCAT::ascat.plotRawData()")
  #   ASCAT::ascat.plotRawData(ASCATobj = ascat_obj, img.prefix = sample_name)
  #   #message("Running ASCAT::ascat.predictGermlineGenotypes() for tumor only")
  #   #ascat.gg = ASCAT::ascat.predictGermlineGenotypes(ASCATobj = ascat.bc, platform = PLATFORM)
  #
  #   message("Returned ASCAT object!")
  # }else{
  #   warning("ASCAT package not found!")
  # }
  #
  # ascat_obj
}
PoisonAlien/maftools documentation built on April 7, 2024, 2:49 a.m.