R/prepAscat_t.R

Defines functions prep_ascat_t

Documented in prep_ascat_t

#'Prepare input files for ASCAT tumor only samples
#' @param t_counts read counts from tumor generated by `get_counts`
#' @param sample_name Sample name. Used as a basename for output files. Default NA, parses from t_counts file.
#' @param min_depth Min read depth required to consider a marker. Default 30
#' @param PLATFORM Default AffySNP6. Only change if you have used custom loci. See here for available options https://www.crick.ac.uk/research/labs/peter-van-loo/software
#' @return An \code{\link{ascat.loadData}} object; ascat data structure
#' @export

prep_ascat_t = function(t_counts = NULL, sample_name = NA, min_depth = 30, PLATFORM = "AffySNP6"){
  
  if(any(is.null(t_counts))) stop("Missing tumor or normal read counts!")
  
  if(is.na(sample_name)){
    sample_name = gsub(pattern = "\\.tsv$", replacement = "", x = basename(path = t_counts))
  }
  
  counts = c(t_counts)
  
  #library sizes
  tot_map_reads = lapply(counts, function(x){
    x = data.table::fread(input = x,nrows = 1)
    as.numeric(x$V2)
  })
  names(tot_map_reads) = c("tumor")
  message("Library sizes:")
  message("Tumor: ", tot_map_reads$tumor)
  
  counts = lapply(counts, function(x){
    message("Counts file: ", basename(x))
    x = data.table::fread(file = x)
    x[,loci := gsub(pattern = "^chr", replacement = "", x = loci)]
    message("Markers: ", nrow(x))
    if(nrow(x[duplicated(loci)]) > 0){
      message("Removed ", nrow(x[duplicated(loci)]), " duplicated loci")
      x = x[!duplicated(loci)]
    }
    x[, tot_depth := apply(x[,.(A, T, G, C)], 1, sum)]
    x = x[tot_depth > min_depth]
    message("Markers > ", min_depth, ": ", nrow(x))
    x[, baf := apply(x[,.(A, T, G, C)], 1, function(r) {r = sort(r); r[4]/sum(r[4], r[3])})]
    #x[, baf := ifelse(test = baf < 0.5, yes = baf, no = 1 - baf)]
    #x$baf = ifelse(x$baf <0.5,x$baf,1-x$baf)
    x$baf = ifelse(runif(length(x$baf))<0.5,x$baf,1-x$baf)
    x = data.frame(data.table::tstrsplit(x = x$loci, split = ":"), x$baf, x$tot_depth, row.names = x$loci)
    colnames(x) = c("chr", "pos", "BAF", "depth")
    x$pos = as.numeric(as.character(x$pos))
    x
  })
  names(counts) = c("tumor")
  counts = counts$tumor
  
  med_cov = median(counts[counts$chr %in% c(1:22), "depth"], na.rm = TRUE)
  message("Median depth of coverage (autosomes): ", med_cov)
  message("------")
  
  counts$logR = round(log2(counts$depth) - log2(med_cov), digits = 3)
  
  data.table::fwrite(x = counts[,c("chr", "pos", "BAF")], file = paste0(sample_name, ".tumour.BAF.txt"), sep = "\t", row.names = TRUE)
  data.table::fwrite(x = counts[,c("chr", "pos", "logR")], file = paste0(sample_name, ".tumour.logR.txt"), sep = "\t", row.names = TRUE)
  
  message("Generated following files:")
  message(paste0(sample_name, ".tumour.BAF.txt"))
  message(paste0(sample_name, ".tumour.logR.txt"))
  message("------")
  
  ascat_obj = NULL
  if("ASCAT" %in% rownames(installed.packages())){
    message("Running ASCAT::ascat.loadData:")
    ascat_obj = ASCAT::ascat.loadData(Tumor_LogR_file = paste0(sample_name, ".tumour.logR.txt"),
                                      Tumor_BAF_file = paste0(sample_name, ".tumour.BAF.txt"),
                                      chrs = c(1:22, "X", "Y"), sexchromosomes = c("X", "Y"))
    message("Running ASCAT::ascat.plotRawData()")
    ASCAT::ascat.plotRawData(ASCATobj = ascat_obj, img.prefix = sample_name)
    #message("Running ASCAT::ascat.predictGermlineGenotypes() for tumor only")
    #ascat.gg = ASCAT::ascat.predictGermlineGenotypes(ASCATobj = ascat.bc, platform = PLATFORM) 
    
    message("Returned ASCAT object!")
  }
  
  ascat_obj
}
CompEpigen/ezASCAT documentation built on Dec. 17, 2021, 3:04 p.m.