#' function to create an oncoprint plot
#'
#' @param A dataframe containing the alterations
#' @param keys list with the following elements: splicing, somatic,
#' germline, amp, del, upreg, downreg
#' @param sortGenes boolean whether or not to sort the genes, default TRUE
#' @importFrom ggplot2 ggplot labs scale_alpha_manual scale_fill_manual theme theme_minimal geom_tile element_text aes element_blank
#' @importFrom tidyr gather
#' @export
oncoprint <- function(A,
keys = list(somatic = "MUT", germline = "GERMLINE", amp = "AMP",
del = "HOMDEL", upreg = "UP", downreg = "DOWN",
splicing = "SPLICING"),
sortGenes = TRUE){
M <- long2wide(A)
M.sorted <- getSortedMatrix(M,keys,sortGenes)
mutmat <- M.sorted$mutmat
alterations <- M.sorted$alterations
genes <- M.sorted$genes
samples <- M.sorted$samples
# order alterations and samples based on matrix
alterations$gene <- factor(alterations$gene, levels=rev(rownames(mutmat)))
alterations$sample <- factor(alterations$sample, levels=colnames(mutmat))
# create background ie cases with no alteration found
background <- as.data.frame(matrix(0, ncol=length(samples), nrow=length(genes)))
colnames(background) <- samples
background$gene <- factor(genes, levels=rev(rownames(mutmat)))
background.m <- gather(background, sample, alteration, -gene)
background.m$sample <- factor(background.m$sample, levels=colnames(mutmat))
plot.params <- data.frame(alteration = c("amp","del","somatic","splicing",
"germline","upreg","downreg"),
size = c(2,2,1,1,1,2,2),
width = c(.9,.9,.9,.9,.9,.9,.9),
height = c(.9,.9,.4,.95,.4,.9,.9))
alterations <- merge(alterations,plot.params)
plot.fill <- c("amp" = "firebrick", "del" = "blue", "upreg" = NA, "downreg" = NA,
"splicing" = "forestgreen", "germline" = "purple", "somatic" = "#36454F")
plot.alpha <- c("amp" = 0.6, "del" = 0.6, "upreg" = 0.6, "downreg" = 0.6,
"splicing" = 0.6, "germline" = 1, "somatic" = 1)
ggplot() +
geom_tile(data = background.m, aes(x = sample, y = gene),
fill = "#DCDCDC", colour = "white", size = 1.1) +
geom_tile(data = alterations, aes(x = sample, y = gene, fill = alteration,
size = size, width = width, height = height,
alpha = alteration)) +
scale_fill_manual(values = plot.fill) +
scale_alpha_manual(values = plot.alpha) +
theme_minimal() +
labs(x = "Sample", y = "Gene") +
theme(axis.text.x = element_text(angle = 90, size = 9),
legend.position = "none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank())
}
#' @importFrom dplyr %>% group_by mutate select
#' @importFrom tidyr spread
long2wide <- function(alterations) {
colnames(alterations) <- c("sample", "gene", "alteration")
alterations <- alterations %>%
group_by(sample, gene) %>%
mutate(alterations = paste(alteration, collapse = ";")) %>%
select(sample, gene, alterations) %>%
unique
wide <- spread(alterations, sample, alterations) %>%
as.matrix
rownames(wide) <- wide[,"gene"]
wide <- wide[,-1]
return(wide)
}
#' Function to order the alteration matrix to evidence the mutual exclusion pattern.
#'
#' @param M mutation matrix
#' @param keys list with the following elements: splicing, somatic,
#' germline, amp, del, upreg, downreg
#' @param sortGenes boolean whether or not to sort the genes, default TRUE
#' @importFrom stats na.omit
#' @importFrom tidyr gather
getSortedMatrix <- function(M, keys=list(somatic="MUT", germline="GERMLINE", amp="AMP",
del="HOMDEL", upreg="UP", downreg="DOWN",
splicing="SPLICING"), sortGenes=TRUE){
# convert from wide to long format
Mdf <- as.data.frame(M, stringsAsFactors = F)
Mdf$sample <- rownames(Mdf)
all <- gather(Mdf, gene, alteration, -sample)
genes <- na.omit(unique(as.character(all$gene)))
samples <- na.omit(unique( as.character(all$sample) ))
# create data structure for each alteration in a list
alterations <- list()
for( a in names(keys) ){
df <- all[ grep(pattern = keys[[a]], all$alteration) ,]
if(nrow(df) > 0 ){
df$alteration <- a
alterations[[a]] <- df
}
}
alterations <- do.call("rbind",alterations)
## create numerical mutation matrix
mutmat <- as.data.frame(matrix(0, ncol=length(samples), nrow=length(genes)))
colnames(mutmat) <- samples
rownames(mutmat) <- genes
# from https://github.com/gideonite/WIP/blob/gh-pages/oncoprint/MemoSort.js
# // sorting order : amplification, deletion, mutation, splicing, mrna, rppa
# // mutation > 0
# // amp > del > 0
scoringMatrix <- c("amp" = 128, "del" = 64, "somatic" = 32, "splicing" = 25,
"germline" = 16, "up" = 8, "downreg" = 4)
mutmat <- incrementMatrix(M=mutmat, events=alterations, scoring=scoringMatrix)
mutmat <- memoSort(mutmat, sortGenes = sortGenes)
list(mutmat=mutmat,alterations=alterations,genes=genes,samples=samples)
}
#' Function to increment gene matrix
#' @param M data frame, colnames=samples, rownames=genes
#' @param events data frame. Needs columns sample and gene
#' @param scoring number by which to increment
incrementMatrix <- function(M, events, scoring){
for(k in 1:nrow(events)){
alteration <- events$alteration[k]
increment <- as.numeric(scoring[alteration])
gene <- as.character(events$gene[k])
sample <- as.character(events$sample[k])
idx.g <- which(rownames(M)==gene)
idx.s <- which(colnames(M)==sample)
if(!is.na(gene)){
M[idx.g,idx.s] <- M[idx.g,idx.s] + increment
}
}
M
}
#' Function to sort gene mutation matrix
#' from https://gist.github.com/dakl/5974ac1d78dac88c1c78
#'
#' @param mutmat matrix, genes as rows, samples as columns
#' @param sortGenes boolean wheather or not to sort genes (rows)
memoSort <- function(mutmat, sortGenes=TRUE) {
# order rows
if(sortGenes){
geneOrder <- sort(rowSums(mutmat), decreasing=TRUE, index.return=TRUE)$ix;
} else {
geneOrder <- 1:nrow(mutmat)
}
# order columns
scoreCol <- function(x) {
score <- 0;
for(i in 1:length(x)) {
if(x[i]) {
score <- score + 2^(20+length(x)-i);
break
}
}
score <- score + sum(x*(length(x):1))
return(score);
}
scores <- apply(mutmat[geneOrder, ], 2, scoreCol);
sampleOrder <- order(scores, decreasing = TRUE)# sort(scores, decreasing=TRUE, index.return=TRUE)$ix;
mutmat <- mutmat[geneOrder, sampleOrder]
firstRow <- mutmat[1,]
new.order <- c()
for(i in unique(as.numeric(firstRow))){
sub.mutmat <- mutmat[2:nrow(mutmat),firstRow==i]
if (is.vector(sub.mutmat))
new.order <- c(new.order, colnames(mutmat)[firstRow==i])
else if (is.data.frame(sub.mutmat) & nrow(sub.mutmat)==2)
new.order <- c(new.order, colnames(mutmat)[firstRow==i])
else {
sub.mutmat <- memoSort(sub.mutmat, sortGenes = FALSE)
new.order <- c(new.order, colnames(sub.mutmat))
}
}
mutmat <- mutmat[,new.order]
return(mutmat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.