#' Plot the results of tabulateEpibed() as a quasi-lollipop plot.
#'
#' NOTE: If you run tabulateEpibed() in NOMe mode with include_empty_reads = TRUE,
#' then it is highly suggested you run plotEpibed() with show_all_points = TRUE also.
#' If not, it is possible that reads in the CG and GC plots will not line up, depending
#' on if there are empty reads in one plot, but not the other.
#'
#' @param mat Input matrix that comes out of tabulateEpibed()
#' @param plot_meth_avg Whether to also plot the average methylation state (default: FALSE)
#' @param show_readnames Whether to show the read names (default: TRUE)
#' @param show_positions Whether to show the genomic positions (default: TRUE)
#' @param show_all_points Whether to show all points (i.e., empty reads, unknown, and filtered sites) (default: FALSE)
#' @param show_legend Whether to include legend in plots (default: TRUE)
#' @param meth_color What color should the methylated states be (default: '#FF2400')
#' @param unmeth_color What color should the unmethylated states be (default: '#6495ED')
#' @param na_color What color should the NA values be (default: 'grey')
#' @param size What size the points should be drawn as (default: 6)
#' @param n_reads Maximum number of reads to plot, if force = FALSE then takes first n_reads (default: 20)
#' @param n_loci Maximum number of loci to plot, if force = FALSE then takes first n_loci (default: 10)
#' @param force Force plotting of all reads and loci in input table (default: FALSE)
#'
#' @return An epibed ggplot object or list of ggplot objects if plot_read_avg is TRUE
#'
#' @import ggplot2
#' @importFrom reshape2 melt
#' @importFrom cowplot plot_grid
#'
#' @export
#'
#' @examples
#'
#' epibed.nome <- system.file("extdata", "hct116.nome.epibed.gz",
#' package="biscuiteer")
#' epibed.nome.gr <- readEpibed(epibed = epibed.nome, is.nome = TRUE,
#' genome = "hg19", chr = "chr1")
#' epibed.tab.nome <- tabulateEpibed(epibed.nome.gr)
#' plotEpibed(epibed.tab.nome$gc_table)
#'
plotEpibed <- function(mat,
plot_meth_avg = FALSE,
show_readnames = TRUE,
show_positions = TRUE,
show_all_points = FALSE,
show_legend = TRUE,
meth_color = "#FF2400",
unmeth_color = "#6495ED",
na_color = "grey",
size = 6,
n_reads = 20,
n_loci = 10,
force = FALSE) {
# check if input is a matrix
if (is.list(mat) & is(mat[[1]], "matrix")) {
message("Input given is likely the output of tabulateEpibed().")
stop("Please select $cg_table, $gc_table, $vr_table, or $tab_cgvr to plot.")
}
if (!is.list(mat) & !is(mat, "matrix")) {
stop("Input needs to be a matrix generated by tabulateEpibed().")
}
if (all(is.na(mat))) {
stop("All data are NA. Try a different matrix.")
}
# Reduce number of reads plotted
if ((nrow(mat) > n_reads) && (isFALSE(force))) {
message("Removing ", nrow(mat)-n_reads, " reads. Keeping first ", n_reads, " reads.")
message("Rerun with increased `n_reads` option to plot more reads.")
mat <- mat[1:n_reads,]
}
# Reduce number of loci plotted
if ((ncol(mat) > n_loci) && (isFALSE(force))) {
message("Removing ", ncol(mat)-n_loci, " loci. Keeping first ", n_loci, " loci.")
message("Rerun with increased `n_loci` option to plot more loci.")
mat <- mat[, 1:n_loci]
}
mat_melt <- .makePlotData(mat, show_all_points)
ql_theme <- .setQlTheme(show_readnames, show_positions)
plt <- .methByReadPlot(mat_melt, ql_theme, show_legend, meth_color, unmeth_color, na_color, size)
if (plot_meth_avg) {
avg <- .plotMethAvg(mat, ql_theme, show_legend, meth_color, unmeth_color, size)
} else {
avg <- NA
}
return(
list(
epi = plt,
avg = avg
)
)
}
# helper to make the melted dataset for plotting
.makePlotData <- function(mat, show_all_points) {
cg <- c("M", "U", "N")
gc <- c("O", "S")
vr <- c("A", "C", "G", "T", "R", "Y")
possibles <- c(
cg, gc, vr,
apply(expand.grid(vr, cg), 1, paste, collapse=""),
apply(expand.grid(cg, vr), 1, paste, collapse=""),
apply(expand.grid(vr, cg, vr), 1, paste, collapse="")
)
# break if no known values exist in matrix
if (!any(mat %in% possibles)) {
message("Unsure what to do with input.")
stop("Please run tabulateEpibed() first to produce input to this function.")
}
# cast to a 'melted' data frame
mat_melt <- reshape2::melt(mat, id.vars = rownames(mat))
if (!show_all_points) {
mat_melt <- subset(mat_melt, !is.na(value))
}
mat_melt
}
# helper to set the plot theme
.setQlTheme <- function(show_readnames, show_positions) {
ql_theme <- theme_bw(12) +
theme(
axis.title = element_blank(),
legend.position = "none",
legend.title = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(color = "black")
)
if (!show_readnames) {
ql_theme <- ql_theme +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank()
)
}
if (!show_positions) {
ql_theme <- ql_theme +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
} else {
ql_theme <- ql_theme +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
}
ql_theme
}
# helper to plot epibed by reads/fragments
.methByReadPlot <- function(pd, ql_theme, show_legend, meth_color, unmeth_color, na_color, size) {
cg <- c("M", "U", "N")
gc <- c("O", "S")
vr <- c("A", "C", "G", "T", "R", "Y")
cg_vr_r <- apply(expand.grid(vr, cg), 1, paste, collapse="")
cg_vr_l <- apply(expand.grid(cg, vr), 1, paste, collapse="")
cg_vr_m <- apply(expand.grid(vr, cg, vr), 1, paste, collapse="")
# CGs with SNP at C position
cvr_cg <- subset(pd, value %in% cg_vr_r)
cvr_vr <- subset(pd, value %in% cg_vr_r)
cvr_cg$value <- gsub(paste(c("[", vr, "]"), collapse=""), "", cvr_cg$value)
cvr_vr$value <- gsub(paste(c("[", cg, "]"), collapse=""), "", cvr_vr$value)
# CGs with SNP at G position
cvl_cg <- subset(pd, value %in% cg_vr_l)
cvl_vr <- subset(pd, value %in% cg_vr_l)
cvl_cg$value <- gsub(paste(c("[", vr, "]"), collapse=""), "", cvl_cg$value)
cvl_vr$value <- gsub(paste(c("[", cg, "]"), collapse=""), "", cvl_vr$value)
# CGs with SNPs at C and G positions
cvm_cg <- subset(pd, value %in% cg_vr_m)
cvm_vr <- subset(pd, value %in% cg_vr_m)
cvm_cg$value <- gsub(paste(c("[", vr, "]"), collapse=""), "", cvm_cg$value)
cvm_vr$value <- gsub(paste(c("[", cg, "]"), collapse=""), ",", cvm_vr$value)
# CG/GC/SNP that don't collide with one another
plt <- ggplot(pd, aes(x = Var2, y = Var1)) +
geom_point(data = subset(pd, is.na(value)), aes(fill = value), size=size, pch=21, color="black") +
geom_point(data = subset(pd, value %in% cg), aes(fill = value), size=size, pch=21, color="black") +
geom_point(data = subset(pd, value %in% gc), aes(fill = value), size=size, pch=21, color="black") +
geom_point(data = subset(pd, value %in% vr), fill = "white", size=size+1, pch=21, color="black") +
geom_text(data = subset(pd, value %in% vr), aes(label = value), size=size-1, color="black") +
guides(color = "legend") +
ql_theme
# SNP in C of CG
plt <- plt +
geom_point(data = cvr_cg, aes(fill = value), size=size+1, pch=22, color="black") +
geom_text(data = cvr_vr, aes(label = value), size=size-1, color="black")
# SNP in G of CG
plt <- plt +
geom_point(data = cvl_cg, aes(fill = value), size=size+1, pch=23, color="black") +
geom_text(data = cvl_vr, aes(label = value), size=size-1, color="black")
# SNP in C and G of CG
plt <- plt +
geom_point(data = cvm_cg, aes(fill = value), size=size+1, pch=24, color="black") +
geom_text(data = cvm_vr, aes(label = value), size=size-1, color="black")
# Color methylation/accessibility
if (any(pd$value %in% gc)) {
plt <- plt +
scale_fill_manual(
values = c(O = meth_color, S = unmeth_color),
labels = c(O = "Open", S = "Closed"),
na.value = na_color,
name = "Accessibility"
)
} else {
plt <- plt +
scale_fill_manual(
values = c(M = meth_color, U = unmeth_color, N = na_color),
labels = c(M = "Methylated", U = "Unmethylated", N = "Unknown"),
na.value = na_color,
name = "Methylation"
)
}
# Add legend if desired
if (show_legend) {
# Check which values are found in data
extra_vals <- c()
if (any(pd$value %in% c("N"))) { extra_vals <- c(extra_vals, "N") }
if (length(cvr_cg$value) > 0) { extra_vals <- c(extra_vals, "C") }
if (length(cvl_cg$value) > 0) { extra_vals <- c(extra_vals, "G") }
if (length(cvm_cg$value) > 0) { extra_vals <- c(extra_vals, "CG") }
leg <- .createLegend(extra_vals, size, meth_color, unmeth_color, na_color)
out <- plot_grid(plt, leg, rel_widths = c(4, 1))
} else {
out <- plt
}
out
}
# helper to create legend for epiBED plot
# it was easier to manually create the figure than to deal with ggplot forming the legend auto-magically
.createLegend <- function(extra_vals, size, meth_color, unmeth_color, na_color) {
# Set up what points will be shown
# M, U, and Lone CpG/SNP will always be shown
xs <- rep(1,3)
ys <- c(3, 2, -1)
ds <- c("M", "U", "A")
ls <- c("Methylated", "Unmethylated", "Lone CpG/SNP")
# Add extra values if they exist
if ("N" %in% extra_vals) { # unknown
xs <- c(xs, 1)
ys <- c(ys, 1)
ds <- c(ds, "N")
ls <- c(ls, "Unknown")
}
if ("C" %in% extra_vals) { # CpG, SNP at C
xs <- c(xs, 1)
ys <- c(ys, -2)
ds <- c(ds, "C")
ls <- c(ls, "CpG, SNP at C")
}
if ("G" %in% extra_vals) { # CpG, SNP at G
y <- -3
if (!("C" %in% extra_vals)) { y <- y + 1 }
xs <- c(xs, 1)
ys <- c(ys, y)
ds <- c(ds, "G")
ls <- c(ls, "CpG, SNP at G")
}
if ("CG" %in% extra_vals) { # CpG, SNP at both
y <- -4
if (!("C" %in% extra_vals)) { y <- y + 1 }
if (!("G" %in% extra_vals)) { y <- y + 1 }
xs <- c(xs, 1)
ys <- c(ys, y)
ds <- c(ds, "CG")
ls <- c(ls, "CpG, SNP at both")
}
points <- data.frame(x=xs, y=ys, data=ds, labels=ls)
plt <- ggplot(points, aes(x=x, y=y)) +
geom_point(aes(fill=data, shape=data), size=size, color="black") +
geom_text(aes(label=labels), hjust=0, nudge_x=1, size=size-1, color="black") +
xlim(0, 10) +
ylim(-10, 10) +
scale_fill_manual(
values = c(
A = "black",
C = "black",
G = "black",
CG = "black",
M = meth_color,
U = unmeth_color,
N = na_color
)
) +
scale_shape_manual(values = c(A = 21, C = 22, G = 23, CG = 24, M = 21, N = 21, U = 21))
plt <- plt +
theme_bw() +
theme(
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
legend.position = "none"
)
plt
}
# helper to calculate avg methylation
.plotMethAvg <- function(mat, ql_theme, show_legend, meth_color, unmeth_color, size) {
vr <- c("A", "C", "G", "T", "R", "Y")
meth <- c("M", "O")
unme <- c("U", "S")
# check if input is a matrix
if (!is(mat, "matrix")) {
stop("To plot average methylation, a matrix is needed as input.")
}
# Remove any SNPs and replace missing/empty values with NA
mat <- gsub(paste(c("[", vr, "]"), collapse=""), "", mat)
mat <- gsub("N", "", mat)
mat <- replace(mat, mat == "", NA)
mat[mat %in% meth] <- 1
mat[mat %in% unme] <- 0
mat <- apply(mat, 2, as.numeric)
avg <- data.frame(avg_meth = colMeans(mat, na.rm=TRUE)) # SNP only columns end up as NaN
# Replace SNP only columns with NA so they have a spot retained in the figure but don't receive a circle
avg$avg_meth <- ifelse(is.nan(avg$avg_meth), NA, avg$avg_meth)
avg$position <- rownames(avg)
avg$y <- ifelse(is.na(avg$avg_meth), "SNP status", "Average methylation")
plt <- ggplot(avg, aes(x=position, y=y)) +
geom_point(aes(fill = avg_meth), size=size, pch=21, color="black", na.rm = TRUE) +
scale_fill_gradient(low=unmeth_color, high=meth_color, limits=c(0, 1)) +
scale_x_discrete(name="", limits=rownames(avg)) +
scale_y_discrete(limits=c("Average methylation")) +
guides(color="legend") +
ql_theme
if (show_legend) {
plt <- plt + theme(legend.position = "right")
}
plt
}
#' Calculate and plot a hierarchical clustering of the epibeds
#'
#' WARNING: This is a beta function and should not be used for publication level analyses!!!!
#'
#' @param mat Input matrix that comes out of tabulateEpibed()
#' @param stringdist_method stringdist::stringdist algorithm to use (default: "hamming")
#' @param hclust_method Clustering algorithm to use (default: "ward.D2")
#' @param plot Whether to plot the clustered epibeds (default: TRUE)
#'
#' @return A hierarchical cluster (hclust) object
#'
#' @import ggtree
#' @importFrom stringdist stringdist
#' @importFrom cowplot plot_grid
#'
#' @examples
#'
#' #epibed.nome <- system.file("extdata", "hct116.nome.epibed.gz",
#' # package="biscuiteer")
#' #epibed.nome.gr <- readEpibed(epibed = epibed.nome, is.nome = TRUE,
#' # genome = "hg19", chr = "chr1")
#' #epibed.tab.nome <- tabulateEpibed(epibed.nome.gr)
#' #epistateCaller(epibed.tab.nome)
#'
#epistateCaller <- function(mat,
# stringdist_method="hamming",
# hclust_method="ward.D2",
# plot = TRUE) {
# # check for both cg and gc tables
# if (is.list(mat)) {
# stopifnot(exists("cg_table", where = mat) & exists("gc_table", where = mat))
#
# mat.merge <- merge(mat$cg_table, mat$gc_table, by = 0)
# row.names(mat.merge) <- mat.merge[, 1]
# mat.merge <- mat.merge[-1]
# } else {
# mat.merge <- mat
# }
#
# mat.merge[is.na(mat.merge) |
# mat.merge == "A" | mat.merge == "T" |
# mat.merge == "G" | mat.merge == "C" |
# mat.merge == "U" | mat.merge == "S"] <- 0
# mat.merge[mat.merge == "M" | mat.merge == "O"] <- 1
#
# epistates <- apply(mat.merge, 1, paste0, collapse = "")
# hamming <- outer(epistates, epistates, stringdist, method = stringdist_method)
#
# hcluster <- hclust(as.dist(hamming), method = hclust_method)
# if (!plot) {
# return(hcluster)
# }
#
# if (!is.list(mat)) {
# matplotdata <- list(.makePlotData(mat, show_all_points = FALSE))
# } else {
# matplotdata <- lapply(
# mat, function(meth_table) { .makePlotData(meth_table, show_all_points = FALSE) }
# )
# }
#
# tree <- ggtree(as.dendrogram(hcluster), branch.length = "none")
#
# ql_theme <- .set_ql_theme(TRUE, TRUE)
#
# plots <- lapply(
# matplotdata,
# function(m) {
# m$Var1 <- factor(m$Var1, levels = rev(get_taxa_name(tree)))
# plt <- .epiClustPlot(m, ql_theme)
#
# clust_plot <- plot_grid(
# tree, NULL, plt,
# rel_widths = c(1, -0.05, 2), nrow = 1, align = 'h'
# )
# return(clust_plot)
# }
# )
#
# return(plots)
#}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.