R/segment_logR.R

Defines functions .dnaCopy_plotter segmentLogR

Documented in segmentLogR

#' Segment and plot log ratio values with DNACopy
#' @description The function takes logR file generated by \code{\link{prepAscat}} or \code{\link{prepAscat_t}} and performs segmentation with \code{\link{DNAcopy}}
#' @param tumor_logR logR.txt file generated by \code{\link{prepAscat}} or \code{\link{prepAscat_t}}
#' @param sample_name Default NULL. Parses from `tumor_logR` file
#' @param build Reference genome. Default hg19. Can be hg18, hg19, or hg38
#' @return Invisibly returns \code{\link{DNAcopy}} object
#' @seealso \code{\link{gtMarkers}} \code{\link{prepAscat}}
#' @export
#' @import DNAcopy
segmentLogR = function(tumor_logR = NULL, sample_name = NULL, build = "hg19"){

  if(is.null(sample_name)){
    sample_name = gsub(pattern = "\\.logR\\.txt", replacement = "", x = basename(path = tumor_logR))
  }

  tn = data.table::fread(input = tumor_logR)
  colnames(tn)[1:4] = c('SNP', 'contig', 'pos', 'logR')
  tn$contig = gsub(pattern = 'chr', replacement = '', x = tn$contig, fixed = TRUE)
  tnxy = tn[contig %in% c('X', 'Y')]
  tn = tn[!contig %in% c('X', 'Y')]
  #tn = tn[!contig == 'Y']
  tn = tn[order(as.numeric(tn$contig)),]
  tn = rbind(tn, tnxy)

  #samp.name = gsub(pattern = '.denoisedCR.tsv', replacement = '', x = copynumber_file)
  cn = DNAcopy::CNA(genomdat = tn[,logR], chrom = tn[,contig], maploc = tn[,pos],
                    data.type = "logratio", sampleid= sample_name, presorted = TRUE)

  cn = DNAcopy::smooth.CNA(cn)
  cn = DNAcopy::segment(cn, alpha = 0.01, nperm = 10000, p.method = 'hybrid', min.width = 5, kmax = 25, nmin = 210,
                        eta = 0.05, trim = 0.025, undo.SD = 3, undo.prune = 0.05, undo.splits = 'sdundo', verbose = 2)

  segs = cn$output
  colnames(segs) = c("Sample",'Chromosome','Start','End','Num_Probes','Segment_Mean')
  write.table(segs, paste(sample_name, '_cbs.seg', sep=''), quote = FALSE, row.names = FALSE, sep='\t')

  png(filename = paste(sample_name, '_cbs.png', sep=''), width = 8, height = 4, bg = "white", units = "in", res = 70)
  .dnaCopy_plotter(dc = cn, genome = build)
  dev.off()

  message("Segments are written to: ", paste(sample_name, '_cbs.seg', sep=''))
  message("Segments are plotted to: ", paste(sample_name, '_cbs.png', sep=''))

  invisible(cn)
  #save(cn, file = paste(sample_name, '_cbs.RData', sep=''))
}


.dnaCopy_plotter = function(dc, genome = "hg19"){
  dc.dat = dc$output

  tn = data.table::data.table(name = unique(dc$output[,1]) ,contig = dc$data$chrom, start = dc$data$maploc, stop = dc$data$maploc, ratio = dc$data[,3])
  #colnames(tn)[5] = unique(dc$output[,ID])
  tn$contig = gsub(pattern = 'chr', replacement = '', x = tn$contig, fixed = T)
  tn.xy = tn[contig %in% c("X", "Y")]
  tn = tn[!contig %in% c("X", "Y")]
  tn = tn[order(as.numeric(as.character(contig)))]
  # tn = tn[-grep(pattern = 'X',x = tn$contig),]
  # tn = tn[-grep(pattern = 'Y', x = tn$contig),]
  tn = rbind(tn, tn.xy)

  seg = dc$output
  data.table::setDT(x = seg)
  colnames(seg) = c('Sample', 'Chromosome', 'Start', 'End', 'Num_Probes', 'Segment_Mean')
  seg$Chromosome = gsub(pattern = 'chr', replacement = '', x = seg$Chromosome, fixed = T)
  seg.xy = seg[Chromosome %in% c("X", "Y")]
  seg = seg[!Chromosome %in% c("X", "Y")]
  # seg = seg[-grep(pattern = 'X', x = seg$Chromosome),]
  # seg = seg[-grep(pattern = 'Y', x = seg$Chromosome),]
  seg = seg[order(as.numeric(Chromosome))]
  seg = rbind(seg, seg.xy)

  tn$contig = factor(x = tn$contig, levels = c(as.character(1:22), "X", "Y"))
  seg$Chromosome = factor(x = seg$Chromosome, levels = c(as.character(1:22), "X", "Y"))

  tn.spl = split(tn, as.factor(tn$contig))
  seg.spl = split(seg, as.factor(seg$Chromosome))

  chr.lens = getContigLens(build = genome)

  tn.spl.tranformed = tn.spl[[1]]
  seg.spl.transformed = seg.spl[[1]]

  chr.lens.sumsum = cumsum(chr.lens)

  for(i in 2:length(tn.spl)){
    x = tn.spl[[i]]
    x$start = x$start + chr.lens.sumsum[i-1]
    x$stop = x$stop + chr.lens.sumsum[i-1]
    tn.spl.tranformed = rbind(tn.spl.tranformed, x)

    x.seg = seg.spl[[i]]
    x.seg$Start = x.seg$Start + chr.lens.sumsum[i-1]
    x.seg$End = x.seg$End + chr.lens.sumsum[i-1]
    seg.spl.transformed = rbind(seg.spl.transformed, x.seg)
  }

  #tn.spl.tranformed$contig = factor(x = tn.spl.tranformed$contig, levels = c(1:22, "X", "Y"))
  #y.max = round(max(tn.spl.tranformed[,5]))
  samp.name = colnames(tn.spl.tranformed)[5]

  data.table::setDF(x = tn.spl.tranformed)
  data.table::setDF(x = seg.spl.transformed)

  #pdf(file = paste0(samp.name, ".pdf"), width = 8, height = 4, bg = "white", paper = "special")
  #ylims = c(floor(min(tn.spl.tranformed[,5], na.rm = TRUE)), ceiling(max(tn.spl.tranformed[,5], na.rm = TRUE)))
  ylims = c(-2, 2)
  par(mar = c(3, 3, 2, 2))
  plot(tn.spl.tranformed[,3], tn.spl.tranformed[,5], xlim = c(0, max(chr.lens.sumsum)), pch = 16,
       cex = 0.1, frame.plot = FALSE, axes = FALSE, xlab = NA, ylab = NA, ylim = ylims, col = "gray70")
  abline(v = chr.lens.sumsum, lty = 2, col = "gray70")
  abline(h = 0, lwd = 1, lty = 2, col = "black")
  segments(x0 = seg.spl.transformed$Start, y0 = seg.spl.transformed$Segment_Mean,
           x1 = seg.spl.transformed$End, y1 = seg.spl.transformed$Segment_Mean, col = "maroon")
  axis(side = 1, at = c(0, chr.lens.sumsum), labels = c(0, 1:22, "X", "Y"), font = 2, cex.axis = 0.7, las = 2)
  axis(side = 2, at = c(ylims[1], -1, 0, 1, ylims[2]), font = 2, las = 2)
  #dev.off()
}
PoisonAlien/maftools documentation built on April 7, 2024, 2:49 a.m.