Nothing
#' @title plotCNA
#'
#' @param seg Object generated by \code{\link{readSegment}} function.
#' @param patient.id Select the specific patients. Default NULL, all patients are included.
#' @param sampleOrder A named list which contains the sample order used in plotting the final profile. Default NULL.
#' @param chrSilent Chromosomes excluded in the analysis. e.g, 1, 2, 3. Default NULL.
#' @param refBuild Human reference genome versions of hg18, hg19 or hg38 by UCSC. Default "hg19".
#' @param sample.text.size Fontsize of sample name. Default 11.
#' @param chrom.text.size Fontsize of chromosome text. Default 3.
#' @param legend.text.size Fontsize of legend text. Default 9.
#' @param legend.title.size Fontsize of legend title. Default 11.
#' @param annot.text.size Fontsize of cytoband or gene symbols. Default 3.
#' @param sample.bar.height Bar height of each sample. Default 0.5.
#' @param chrom.bar.height Bar height of each chromosome. Default 0.5.
#' @param showRownames Logical (Default: TRUE). Show sample names of rows.
#' @param removeEmptyChr Remove empty chromosomes that do not exist in all samples. Default TRUE.
#' @param showCytoband Logical (Default: FALSE). Show cytobands on the plot. Only when the seg object is created with GISTIC results, this parameter can be TRUE.
#' @param showGene Logical (Default: FALSE). Show gene symbols on the plot. Only when the seg object is created with txdb, this parameter can be TRUE.
#' @param use.tumorSampleLabel Logical (Default: FALSE). Rename the 'Tumor_Sample_Barcode' with 'Tumor_Sample_Label'.
#'
#' @examples
#' segFile <- system.file("extdata", "CRC_HZ.seg.txt", package = "MesKit")
#' seg <- readSegment(segFile = segFile)
#' plotCNA(seg)
#'
#' ## showCytoband
#' gisticAmpGenesFile <- system.file("extdata", "COREAD_amp_genes.conf_99.txt", package = "MesKit")
#' gisticDelGenesFile <- system.file("extdata", "COREAD_del_genes.conf_99.txt", package = "MesKit")
#' gisticAllLesionsFile <- system.file("extdata", "COREAD_all_lesions.conf_99.txt", package = "MesKit")
#' seg <- readSegment(segFile = segFile,
#' gisticAmpGenesFile = gisticAmpGenesFile,
#' gisticDelGenesFile = gisticDelGenesFile,
#' gisticAllLesionsFile = gisticAllLesionsFile)
#' plotCNA(seg, showCytoband = TRUE)
#'
#'
#' @return a heatmap plot of CNA profile
#' @import cowplot RColorBrewer
#' @importFrom Biostrings start end
#' @export plotCNA
#'
plotCNA <- function(seg,
patient.id = NULL,
sampleOrder = NULL,
chrSilent = NULL,
refBuild = "hg19",
sample.text.size = 11,
chrom.text.size = 3,
legend.text.size = 9,
legend.title.size = 11,
annot.text.size = 3,
sample.bar.height = 0.5,
chrom.bar.height = 0.5,
showRownames = TRUE,
removeEmptyChr = TRUE,
showCytoband = FALSE,
showGene = FALSE,
use.tumorSampleLabel = FALSE
){
## combine data frame
if(is(seg, "list")){
seg <- dplyr::bind_rows(seg) %>% as.data.table()
}
if(use.tumorSampleLabel){
if(!"Tumor_Sample_Label" %in% colnames(seg)){
stop("Tumor_Sample_Label was not found. Please check seg file or let use.tumorSampleLabel be 'FALSE'")
}
seg <- seg %>%
dplyr::mutate(Tumor_Sample_Barcode = .data$Tumor_Sample_Label)
}
if(!is.null(chrSilent)){
chrSilent <- gsub(pattern = 'chr', replacement = '', x = chrSilent, fixed = TRUE)
seg <- seg[!seg$Chromosome %in% chrSilent]
chrSilent <- gsub(pattern = 'X', replacement = '23', x = chrSilent, fixed = TRUE)
chrSilent <- gsub(pattern = 'Y', replacement = '24', x = chrSilent, fixed = TRUE)
}
seg$Chromosome = gsub(pattern = 'X', replacement = '23', x = seg$Chromosome, fixed = TRUE)
seg$Chromosome = gsub(pattern = 'Y', replacement = '24', x = seg$Chromosome, fixed = TRUE)
## select patients in patient.id
if(is.null(patient.id)|length(patient.id) > 1){
if(length(patient.id) > 1){
patient.setdiff <- setdiff(patient.id, unique(seg$Patient_ID))
if(length(patient.setdiff) > 0){
stop(paste0(patient.setdiff, " cannot be found in your data"))
}
seg <- seg[seg$Patient_ID %in% patient.id, ]
seg$patient <- factor(seg$Patient_ID, levels = rev(patient.id))
}else{
seg$patient <- seg$Patient_ID
}
patient_num <- length(unique(seg$Patient_ID))
seg <- dplyr::select(seg, -"Patient_ID")
seg$Patient_ID <- "ALL"
}else if(length(patient.id) == 1){
patient.setdiff <- setdiff(patient.id, unique(seg$Patient_ID))
if(length(patient.setdiff) > 0){
stop(paste0(patient.setdiff, " cannot be found in your data"))
}
patient_num <- 1
seg <- seg[seg$Patient_ID %in% patient.id, ]
seg$patient <- seg$Patient_ID
seg <- dplyr::select(seg, -"Patient_ID")
}
# if(show.GISTIC.gene){
# if(!"Gistic_type" %in% colnames(seg)){
# stop("Error: GISTIC genes information were not found. Please check readSegment")
# }
# }
# if("Start" %in% colnames(seg)){
# seg <- dplyr::rename(seg,Start_Position = Start, End_Position = End)
# }
ref.options <- c("hg19","hg18","hg38")
if(!refBuild %in% ref.options){
stop("refBuild should be either 'hg18','hg19' or 'hg38'.")
}
if(refBuild == 'hg19'){
chrLens = c(249250621, 243199373, 198022430, 191154276, 180915260, 171115067, 159138663,
146364022, 141213431, 135534747, 135006516, 133851895, 115169878, 107349540,
102531392, 90354753, 81195210, 78077248, 59128983, 63025520, 48129895, 51304566,
155270560, 59373566)
} else if(refBuild == 'hg18'){
chrLens = c(247249719, 242951149, 199501827, 191273063, 180857866, 170899992,
158821424, 146274826, 140273252, 135374737, 134452384, 132349534,
114142980, 106368585, 100338915, 88827254, 78774742, 76117153,
63811651, 62435964, 46944323, 49691432, 154913754, 57772954)
} else if(refBuild == 'hg38'){
chrLens = c(248956422, 242193529, 198295559, 190214555, 181538259, 170805979,
159345973, 145138636, 138394717, 133797422, 135086622, 133275309,
114364328, 107043718, 101991189, 90338345, 83257441, 80373285,
58617616, 64444167, 46709983, 50818468, 156040895, 57227415)
}
chrLabels <- seq_len(24)
chrTable <- data.table::data.table(chr = as.character(chrLabels),
start = as.numeric(chrLens),
end = as.numeric(chrLens))
if(removeEmptyChr){
chr_seg <- unique(as.character(seg$Chromosome))
chrTable <- dplyr::filter(chrTable, .data$chr %in% chr_seg)
}
chrTable <- chrTable %>%
dplyr::filter(!.data$chr %in% chrSilent)
seg <- seg %>%
dplyr::filter(!.data$Chromosome %in% chrSilent)
chrLens <- append(cumsum(chrTable$start),1,after = 0)
chrTable <- chrTable %>%
dplyr::mutate(
start = chrLens[seq_len(length(chrLens)-1)],
end = chrLens[2:length(chrLens)]
)
chrLabels <- chrTable$chr
# chr_bar_color <- c()
# chr_text_color <- c()
chr_bar_color <- lapply(seq_len(length(as.numeric(chrLabels) )), function(i){
if((i %% 2) == 0){
return("white")
}else{
return("black")
}
}) %>% unlist()
chr_text_color <- lapply(seq_len(length(as.numeric(chrLabels))), function(i){
if((i %% 2) == 0){
return("black")
}else{
return("white")
}
}) %>% unlist()
chrTable$color = chr_bar_color
chr_start_pos <- as.numeric(chrTable$start)
names(chr_start_pos) <- paste0("chr",chrTable$chr)
seg <- seg %>%
dplyr::mutate(
Update_Start = as.numeric(.data$Start_Position) + chr_start_pos[paste0("chr",.data$Chromosome)],
Update_End = as.numeric(.data$End_Position) + chr_start_pos[paste0("chr",.data$Chromosome)]
)
seg$hmin <- 0
seg$hmax <- 0
h <- chrom.bar.height
if("patient" %in% colnames(seg)& length(unique(seg$patient)) > 1){
## sort sampleid
seg <- seg %>%
dplyr::arrange(dplyr::desc(.data$patient) ,
dplyr::desc(.data$Tumor_Sample_Barcode),
.data$Chromosome,
.data$Start_Position) %>%
as.data.table()
seg$Sample_ID <- paste(seg$patient, seg$Tumor_Sample_Barcode,sep = "&")
sampleids <- unique(seg$Sample_ID)
## reorder sample by sampleOrder
if(!is.null(sampleOrder)){
patient.setdiff <- setdiff(names(sampleOrder), unique(seg$patient))
if(length(patient.setdiff) > 0){
stop(paste0(patient.setdiff,
" cannot be found in your data. Please check sampleOrder!"))
}
slist <- lapply(unique(seg$patient) ,function(p){
o1 <- sampleOrder[[p]]
if(is.null(o1)){
return(sampleids[grepl(p, sampleids)])
}
o2 <- unique(seg[seg$patient == p]$Tumor_Sample_Barcode)
s1 <- paste(p, o1, sep = "&")
sample.setdiff <- setdiff(o1, o2)
if(length(sample.setdiff) > 0){
stop(paste0(paste(sample.setdiff, collapse = ","),
" cannot be found in ", p, ". Please check sampleOrder!"))
}else{
return(s1)
}
}) %>% unlist()
sampleids <- slist
}
patient.rect.hmin <- c(h)
patient.rect.hmax <- c()
sample_patient_list <- (seg %>% dplyr::distinct(.data$Sample_ID, .keep_all = TRUE))$patient
hlist1 <- lapply(seq_len(length(sampleids)), function(i){
p <- sample_patient_list[i]
h1 <- h + (sample.bar.height + sample.bar.height/10) * (i-1)
p_idx <- which(unique(seg$patient) == p)
h1 <- h1 + sample.bar.height*2/5 * (p_idx - 1)
return(h1)
}) %>% unlist()
seg_sample_list <- lapply(seq_len(length(sampleids)), function(i){
h1 <- hlist1[i]
sampleid <- sampleids[i]
dat <- seg[seg$Sample_ID == sampleid,]
dat$hmin <- h1
dat$hmax <- h1 + sample.bar.height
return(dat)
})
seg <- dplyr::bind_rows(seg_sample_list)
patient.rect.hmin <- lapply(unique(seg$patient), function(p){
return(min(seg[seg$patient == p,]$hmin))
}) %>% unlist()
patient.rect.hmax <- lapply(unique(seg$patient), function(p){
return(max(seg[seg$patient == p,]$hmax))
}) %>% unlist()
patient.rect.table <- data.frame(hmin = patient.rect.hmin,
hmax = patient.rect.hmax,
patient = as.character(unique(seg$patient)))
}else{
seg <- seg %>%
dplyr::arrange(.data$patient,
.data$Tumor_Sample_Barcode,
.data$Chromosome,
.data$Start_Position) %>%
as.data.table()
seg$Sample_ID <- paste(seg$patient,seg$Tumor_Sample_Barcode,sep = "&")
sampleids <- sort(unique(seg$Sample_ID))
## sort sampleid
p <- unique(seg$patient)
if(!is.null(sampleOrder)){
patient.setdiff <- setdiff(names(sampleOrder), p)
if(length(patient.setdiff) > 0){
stop(paste0(patient.setdiff,
" cannot be found in your data. Please check sampleOrder!"))
}
o1 <- sampleOrder[[p]]
o2 <- unique(seg[seg$patient == p]$Tumor_Sample_Barcode)
s1 <- paste(p, sampleOrder[[p]], sep = "&")
s2 <- paste(p, unique(seg[seg$patient == p]$Tumor_Sample_Barcode), sep = "&")
sample.setdiff <- setdiff(o1, o2)
if(length(sample.setdiff) > 0){
stop(paste0(paste(sample.setdiff, collapse = ","),
" cannot be found in ", p, ". Please check sampleOrder!"))
}else if(length(sample.setdiff) == 0 & length(s1)< length(s2)){
s3 <- setdiff(s2, s1)
sampleids <- sampleids[!sampleids %in% s3]
sampleids[grepl(p, sampleids)] <- s1
}else if(length(sample.setdiff) > 0){
sampleids[grepl(p, sampleids)] <- s1
}
}
seg$Sample_ID <- factor(seg$Sample_ID, levels = sampleids)
seg <- dplyr::arrange(seg, .data$Sample_ID)
hlist1 <- seq(h,
h + (sample.bar.height + sample.bar.height/10) * (length(sampleids)-1),
(sample.bar.height + sample.bar.height/10))
hlist2 <- hlist1 + sample.bar.height
slist <- lapply(seq_len(length(sampleids)), function(i){
h <- hlist1[i]
sampleid <- sampleids[i]
dat <- seg[seg$Sample_ID == sampleid,]
dat$hmin <- h
dat$hmax <- h + sample.bar.height
return(dat)
})
seg <- dplyr::bind_rows(slist)
# for(sampleid in sampleids){
# seg[seg$Sample_ID == sampleid,]$hmin <- h
# seg[seg$Sample_ID == sampleid,]$hmax <- h + sample.bar.height
# h <- h + sample.bar.height + sample.bar.height/10
# }
}
seg <- seg[seg$Sample_ID %in% sampleids]
min <- sort(unique(seg$hmin))
max <- sort(unique(seg$hmax))
CNADat <- seg[seg$Type != "Neutral",]
patient.type <- unique(CNADat$Type)
## set color for CNA type
type.all.levels <- c("Loss", "Deletion", "Gain", "Amplification")
type.all.colors <- c("#6baed6", "#084594", "#f4a582", "#d73027")
type.level <- type.all.levels[type.all.levels %in% patient.type]
type.color <- type.all.colors[type.all.levels %in% patient.type]
CNADat$Type <- factor(CNADat$Type, levels = type.level)
segmentTable <- data.table::data.table(y = c(min(min),max(max)), yend = c(min(min),max(max)))
seg.add <- 20000000
text.add <- -200000000
segmentTable$x <- -seg.add
segmentTable$xend <- max(chrTable$end)+seg.add
segmentTable <- rbind(segmentTable, list(min(min), max(max), -seg.add,-seg.add))
segmentTable <- rbind(segmentTable, list(min(min), max(max), max(chrTable$end)+seg.add,max(chrTable$end)+seg.add))
## get tumor sample barcode of patient
patient.tsbs <- unlist(lapply(sampleids,
function(x){return(strsplit(x,"&")[[1]][2])}))
## label on axis Y
Y.text.table <- data.table::data.table(Pos = min + (max-min)/2,
Tumor_Sample_Barcode = patient.tsbs)
## set background for the graph
backgroundTable <- data.table::data.table(
xmin = min(chrTable$start),
xmax = max(chrTable$end),
ymin = min,
ymax = max)
## set color for patient bar
allScales <- type.color
names(allScales) <- type.level
if("patient" %in% colnames(seg)&
length(unique(seg$patient)) > 1){
# set.seed(1234)
# patient_colors <- sample(colors(),length(patient.rect.table$patient),replace = FALSE)
# names(patient_colors) <- patient.rect.table$patient
qual_col_pals <- RColorBrewer::brewer.pal.info[RColorBrewer::brewer.pal.info$category == 'qual',]
col_vector <- unlist(mapply(RColorBrewer::brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
patient_colors <- col_vector[seq_len(length(patient.rect.table$patient))]
names(patient_colors) <- patient.rect.table$patient
}
## cytoband table
# Cytoband_table <- CNADat %>%
# distinct(Cytoband, .keep_all = TRUE)
## convert chromosome name
chrTable$chr = gsub(pattern = '23', replacement = 'X', x = chrTable$chr, fixed = TRUE)
chrTable$chr = gsub(pattern = '24', replacement = 'Y', x = chrTable$chr, fixed = TRUE)
## initialize
xmin <- NULL
xmax <- NULL
ymin <- NULL
ymax <- NULL
Update_Start <- NULL
Update_End <- NULL
hmin <- NULL
hmax <- NULL
Type <- NULL
chr <- NULL
sample_num <- length(unique(CNADat$Sample_ID))
p <- ggplot()+
## background color
geom_rect(data = backgroundTable,
mapping = aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
fill = "#f0f0f0")+
## CNA regions
geom_rect(data = CNADat,
mapping = aes(xmin = Update_Start, xmax = Update_End, ymin = hmin, ymax = hmax, fill = Type))+
scale_fill_manual(values = type.color, name = "Type")+
## chr bar
geom_rect(data = chrTable,
mapping = aes(xmin = start, xmax = end, ymin = 0, ymax = chrom.bar.height),
fill = chr_bar_color, color = "black")+
geom_text(data = chrTable,
mapping = aes(x = start + (end - start)/2, y = chrom.bar.height/2, label = chr),
size = chrom.text.size,
color = chr_text_color)+
## vertical line
geom_segment(aes(x = chrTable$end[seq_len(nrow(chrTable)-1)],
xend = chrTable$end[seq_len(nrow(chrTable)-1)],
y = chrom.bar.height,
yend = max(backgroundTable$ymax)),
linetype = "dotted",
color = "#252525",
size = 0.7) +
theme(
axis.text.x = element_blank(),
# axis.text.y.left = element_text(size = sample.text.size,
# margin = margin(l = 0),
# colour = "black"),
axis.ticks = element_blank(),
# axis.line.y = element_line(colour = "darkblue", size = 1, linetype = "solid"),
panel.grid =element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
# legend.key.width = unit(1.1,'lines'),
# legend.box.margin = margin(l = 0),
# legend.title = element_text(size = legend.title.size),
# legend.text = element_text(size = legend.text.size,margin = margin(b = 3))
)+
# coord_fixed(ratio = 1) +
scale_x_continuous(expand = c(0,0))+
# scale_y_continuous(breaks = Y.text.table$Pos,
# labels = Y.text.table$Tumor_Sample_Barcode)+
#ggtitle("Copy number alteration profile")+
theme(plot.title = element_text(size = 13.5, face = "bold", hjust = 0.5, vjust = -2))
if(showRownames){
p <- p + scale_y_continuous(breaks = Y.text.table$Pos,
labels = Y.text.table$Tumor_Sample_Barcode) +
theme(
axis.text.y.left = element_text(size = sample.text.size,
margin = margin(l = 0),
colour = "black")
)
}else{
p <- p + theme(axis.text.y.left = element_blank())
}
if(showCytoband){
if(!"Cytoband_pos" %in% colnames(CNADat)){
stop("Cannot find information of cytobands. Please check your seg object.")
}
## cytoband
CNADat$Cytoband_pos <- as.numeric(CNADat$Start_Position) + chr_start_pos[paste0("chr",CNADat$Chromosome)]
Cytoband_table <- CNADat %>%
distinct(Cytoband, .keep_all = TRUE)
p <- p +
## blank bar
geom_rect(aes(xmin = 0,
xmax = max(backgroundTable$xmax),
ymin = max(backgroundTable$ymax),
ymax = max(backgroundTable$ymax) + sample.bar.height*sample_num/5),
fill = "white") +
ggrepel::geom_text_repel(data = Cytoband_table,
aes(x = Cytoband_pos,
y = max(backgroundTable$ymax),
label = Cytoband),
size = annot.text.size,
max.overlaps = 200,
angle = 90,
nudge_y = sample.bar.height*0.3*sample_num/8,
direction = "x",
vjust = 1)
}
if(showGene){
if(showGene & showCytoband){
stop("showGene and showCytoband cannot both be TRUE.")
}
if(!"Hugo_Symbol" %in% colnames(CNADat)){
stop("Cannot find gene information. Please provide correct 'txdb' object in readSegment.")
}
CNADat$gene_pos <- as.numeric(CNADat$Update_Start) + (as.numeric(CNADat$Update_End) - as.numeric(CNADat$Update_Start ))/2
CNADat$gene_id <- paste(CNADat$Hugo_Symbol)
gene_table <- CNADat %>%
distinct(gene_id, .keep_all = TRUE)
p <- p +
geom_rect(aes(xmin = 0,
xmax = max(backgroundTable$xmax),
ymin = max(backgroundTable$ymax),
ymax = max(backgroundTable$ymax) + sample.bar.height*sample_num/5),
fill = "white") +
ggrepel::geom_text_repel(data = gene_table,
aes(x = gene_pos,
y = max(backgroundTable$ymax) + sample.bar.height*sample_num/5,
label = Hugo_Symbol),
size = annot.text.size,
angle = 90,
# nudge_y = sample.bar.height*0.3*patient_num,
direction = "both",
vjust = 1)
}
## patient bar
if("patient" %in% colnames(seg)&
length(unique(seg$patient)) > 1){
p <- p +
geom_rect(data = patient.rect.table,
mapping = aes(xmax = -20000000, xmin = -40000000,ymin = hmin, ymax = hmax),
fill = patient_colors)
# print(CNADat)
## multi legend
patients <- unique(seg$patient)
## initialize
patient <- NULL
patient_legend <- (
ggplot()+
geom_rect(data = patient.rect.table,
mapping = aes(xmax = -20000000, xmin = -40000000,
ymin = hmin, ymax = hmax,fill = patient)) +
scale_fill_manual(breaks = rev(patients),
values = patient_colors,
name = "Patient") +
theme(legend.background = element_blank(),
legend.title = element_text(size = legend.title.size),
legend.text = element_text(size = legend.text.size,margin = margin(b = 3)))
)%>% ggplotGrob()
patient_legend <- patient_legend$grobs[[which(unlist(lapply(patient_legend$grobs, function(x) {x$name})) == "guide-box")]]
type_legend <- (
ggplot()+
geom_rect(data = CNADat,mapping = aes(xmin = Update_Start, xmax = Update_End, ymin = hmin, ymax = hmax, fill = Type))+
scale_fill_manual(values = type.color, name = "Type")+
theme(legend.background = element_blank(),
legend.title = element_text(size = legend.title.size),
legend.text = element_text(size = legend.text.size,margin = margin(b = 3)))
) %>% ggplotGrob()
type_legend <- type_legend$grobs[[which(unlist(lapply(type_legend$grobs, function(x) {x$name})) == "guide-box")]]
legends_column <-
plot_grid(
patient_legend + theme(legend.position = "none"),
type_legend,
patient_legend + theme(legend.position = "none"),
patient_legend,
patient_legend + theme(legend.position = "none"),
ncol = 1,
rel_heights = c(1,1,.1,1,1),
align = "v")
p <- plot_grid(p + theme(legend.position = "none"),
legends_column + theme(plot.margin = margin(r = 15)),
nrow = 1,
rel_widths = c(1,.2))
}
# if(length(CNAplot.list) == 1){
# return(CNAplot.list[[1]])
# }
return(p)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.