#' Summarize CBS segmentation results
#' @details
#' A handy function to summarize CBS segmentation results. Takes segmentation results generated by DNAcopy package \code{\link{segment}} and summarizes the CN for each cytoband and chromosomal arms.
#'
#' @param seg segmentation results generated from \code{DNAcopy} package \code{\link{segment}}. Input should be a multi-sample segmentation file or a data.frame. First six columns should correspond to sample name, chromosome, start, end, Num_Probes, Segment_Mean in log2 scale. (default output format from DNAcopy)
#' @param build genome build. Default hg19. Can be hg19, hg38. If other than these, use `cytoband` argument
#' @param cytoband cytoband data from UCSC genome browser. Only needed if `build` is other than `hg19` or `hg38`
#' @param thr threshold to call amplification and deletion. Any cytobands or chromosomal arms with median logR above or below this will be called. Default 0.3
#' @param verbose Default TRUE
#' @param maf optional MAF
#' @param genes Add mutation status of these genes as an annotation to the heatmap
#' @param topanno annotation for each sample. This is passed as an input to `annotation_col` of `pheatmap`
#' @param topannocols annotation cols for `topanno`. This is passed as an input to `annotation_colors` of `pheatmap`
#' @return List of median CN values for each cytoband and chromosomal arm along with the plotting matrix
#' @export
#' @examples
#' laml.seg <- system.file("extdata", "LAML_CBS_segments.tsv.gz", package = "maftools")
#' segSummarize(seg = laml.seg)
#'
#' #Heighlight some genes as annotation
#' laml.maf = system.file("extdata", "tcga_laml.maf.gz", package = "maftools") #MAF file
#' laml.clin = system.file('extdata', 'tcga_laml_annot.tsv', package = 'maftools') #clinical data
#' laml = read.maf(maf = laml.maf, clinicalData = laml.clin)
#'
#' segSummarize(seg = laml.seg, maf = laml, genes = c("FLT3", "DNMT3A"))
segSummarize = function(seg = NULL, build = "hg19", cytoband = NULL, thr = 0.3, verbose = TRUE, maf = NULL, genes = NULL, topanno = NULL, topannocols = NA){
if(is.null(cytoband)){
build = match.arg(arg = build, choices = c("hg19", "hg38"))
cytoband <- system.file("extdata", paste0(build, "_cytobands.tsv.gz"), package = "maftools")
if(!file.exists(cytoband)){
stop("Cytoband file does not exist!")
}
}
if (is(object = seg, class2 = "data.frame")) {
seg = data.table::as.data.table(seg)
}else {
seg = data.table::fread(input = seg)
}
colnames(seg)[1:6] = c("Sample", "Chromosome", "Start", "End", "Num_Probes", "Segment_Mean")
seg$Chromosome = gsub(pattern = "^chr", replacement = "", x = seg$Chromosome)
data.table::setkey(x = seg, Chromosome, Start, End)
if(nrow(seg[,.N,Sample]) < 3){
stop("Not enough samples to proceed (N: ", nrow(seg[,.N,Sample]), ")")
}
cy = data.table::fread(input = cytoband)
colnames(cy) = c("Chromosome", "Start", "End", "name", "stain")
cy$arm = substr(cy$name, 1, 1)
cy$Chromosome = gsub(pattern = "^chr", replacement = "", x = cy$Chromosome)
data.table::setkey(x = cy, Chromosome, Start, End)
seg_events = lapply(split(seg, seg$Sample), function(x){
.seg2arm(se = x, cy = cy, thr = thr)
})
cy[, id := paste(Chromosome, name, sep = "_")]
cy[, arm := paste(Chromosome, substr(name, 1, 1), sep = "_")]
#Summarize CN per chromosomal cytoband
cytoband_events = lapply(seg_events, function(x) x$seg) |> data.table::rbindlist(idcol = "Tumor_Sample_Barcode")
cytoband_events = data.table::dcast(data = cytoband_events, formula = chromosome+arm ~ Tumor_Sample_Barcode, value.var = "cn")
cytoband_events[, id := paste(chromosome, arm, sep = "_")]
cytoband_events$chromosome = NULL
cytoband_events$arm = NULL
data.table::setDF(x = cytoband_events, cytoband_events$id)
cytoband_events$id = NULL
cytoband_events = cytoband_events[cy[id %in% rownames(cytoband_events), id],,]
cytoband_events[cytoband_events > 4] = 4
cytoband_events = cytoband_events[complete.cases(cytoband_events),]
##row anno
cn_band_anno = as.data.frame(cy[id %in% rownames(cytoband_events)])
cn_band_anno = data.frame(row.names = cn_band_anno$id, chr = cn_band_anno$Chromosome, arm = substr(x = cn_band_anno$name, 1, 1))
#order by chromosome for human build
chr_ord = intersect(c(1:22, "X", "Y"), names(split(cn_band_anno, cn_band_anno$chr)))
cn_band_anno = cn_band_anno[unlist(lapply(split(cn_band_anno, cn_band_anno$chr)[chr_ord], rownames), use.names = FALSE), , drop = FALSE]
cytoband_events = cytoband_events[rownames(cn_band_anno),, drop = FALSE]
##Heatmap colors
cols_chromosome = setNames(object = rep(c("#95a5a6", "#7f8c8d"), length(unique(cn_band_anno$chr))), nm = unique(cn_band_anno$chr))
cols_chromosome = cols_chromosome[unique(cn_band_anno$chr)]
cols_chromosome_arms = setNames(object = c("#ecf0f1", "#bdc3c7"), nm = c("p", "q"))
gene2barcode = gene2barcode_cols = NA
show_annotation_legend = FALSE
if(!is.null(maf)){
if(!is.null(genes)){
gene2barcode = genesToBarcodes(maf = maf, genes = genes, justNames = TRUE)
print(gene2barcode)
gene2barcode = lapply(gene2barcode, function(x) data.table::data.table(Tumor_Sample_Barcode = x, status = 'yes')) |> data.table::rbindlist(idcol = "Hugo_Symbol") |> data.table::dcast(formula = Tumor_Sample_Barcode ~ Hugo_Symbol, value.var = "status")
data.table::setDF(x = gene2barcode, rownames = gene2barcode$Tumor_Sample_Barcode)
gene2barcode = gene2barcode[, 2:ncol(gene2barcode), drop = FALSE]
gene2barcode_cols = lapply(X = colnames(gene2barcode), function(x) setNames(object = "#34495e", nm = "yes"))
names(gene2barcode_cols) = colnames(gene2barcode)
}
}
if(!is.null(topanno)){
if(!is.null(nrow(gene2barcode))){
gene2barcode = merge(gene2barcode, topanno, by = "row.names", all = TRUE)
rownames(gene2barcode) = gene2barcode$`Row.names`
gene2barcode$`Row.names` = NULL
}else{
gene2barcode = topanno
}
show_annotation_legend = TRUE
}
pheatmap::pheatmap(
cytoband_events,
cluster_rows = FALSE,
cluster_cols = TRUE,
show_colnames = FALSE,
annotation_row = cn_band_anno,
color = grDevices::colorRampPalette(rev(brewer.pal(
n = 7, name = "PiYG"
)))(100),
annotation_colors = c(list(
chr = cols_chromosome,
arm = cols_chromosome_arms
), gene2barcode_cols, topannocols),
show_rownames = FALSE,
border_color = 'black', annotation_legend = show_annotation_legend, legend_breaks = seq(0, 4, 1), legend_labels = c("0", "1", "2", "3", ">4"), annotation_col = gene2barcode, na_col = "gray"
)
#Arm events
arm_events = lapply(seg_events, function(x) x$arm) |> data.table::rbindlist(fill = TRUE, use.names = TRUE, idcol = "Tumor_Sample_Barcode")
arm_events = arm_events[!is.na(Variant_Classification)][order(chromosome)]
arm_events_n = arm_events[,.N,.(arm,Variant_Classification)][!is.na(Variant_Classification)][order(-N)]
if(verbose){
message("Recurrent chromosomal arm aberrations")
print(arm_events_n)
}
#Focal band events
band_events = lapply(seg_events, function(x) x$seg) |> data.table::rbindlist(fill = TRUE, use.names = TRUE, idcol = "Tumor_Sample_Barcode")
band_events = band_events[!is.na(Variant_Classification)][order(chromosome)]
colnames(band_events)[3] = "cytoband"
band_events_n = band_events[,.N,.(chromosome, cytoband,Variant_Classification)][!is.na(Variant_Classification)][order(-N)]
attr(cytoband_events, "meta") = list(top_anno = gene2barcode, top_anno_cols = gene2barcode_cols)
list(heatmap_matrix = cytoband_events, arm_events = arm_events, band_events = band_events)
}
.seg2arm = function (se, cy = NULL, thr = 0.3) {
data.table::setkey(x = se, Chromosome, Start, End)
se_olaps = data.table::foverlaps(x = cy, y = se)
cytoband_mean = se_olaps[, median(Segment_Mean, na.rm = TRUE),
.(Chromosome, name)]
colnames(cytoband_mean) = c("chromosome", "arm", "logR")
cytoband_mean[, `:=`(cn, 2 * (2^logR))]
cytoband_mean$Variant_Classification = ifelse(cytoband_mean$logR >
thr, "Amp", no = ifelse(cytoband_mean$logR < -thr, yes = "Del",
no = NA))
cytoband_mean = cytoband_mean[!chromosome %in% c("chrX",
"chrY")]
arm_mean = se_olaps[, median(Segment_Mean, na.rm = TRUE),
.(Chromosome, arm)]
colnames(arm_mean) = c("chromosome", "arm", "logR")
arm_mean[, `:=`(cn, 2 * (2^logR))]
arm_mean$Variant_Classification = ifelse(arm_mean$logR >
thr, "Gain", no = ifelse(arm_mean$logR < -thr, yes = "Loss",
no = NA))
arm_mean$arm = paste(arm_mean$chromosome, arm_mean$arm, sep = "_")
arm_mean = arm_mean[!chromosome %in% c("chrX", "chrY")]
list(arm = arm_mean, seg = cytoband_mean)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.