#' @title GroupHeatSpot
#' @name GroupHeatSpot
#'
#' @importFrom GenomicRanges reduce
#' @importFrom BiocGenerics start
#' @importFrom BiocGenerics end
#' @importFrom Rsamtools BamFile
#' @importFrom GenomicAlignments readGAlignments
#' @importFrom SummarizedExperiment colData
#' @importFrom parallel mclapply
#' @importFrom data.table as.data.table
#' @importFrom reshape2 melt
#' @import ggplot2
#' @importFrom ggbio autoplot
#' @import patchwork
#'
#' @param object an ICASDataSet
#' @param SJ a charactor of SJ
#' @param flank the distance of flanking extension
#' @param cells which cells are output, NULL for all cells
#' @param BamFiles a file list of bamfiles
#' @param BamPostfix the postfix of bam files, remove the postfix the base name of bam must consistent with cells name
#' @param logTrans whether to perform log1p for the depth
#' @param ignore.strand the strandedness of bam file
#' @param ignore.SEorPE ignore single end paired of bamfiles (if your bam files have single end and paired end at the same time, you need to ignore the SEorPE)
#' @param singleEnd singleEnd or paired end of bam
#' @param antisense strandedness for singleEnd
#' @param strandMode strandMode for readGAlignments
#' @param mapqFilter A non-negative integer(1) specifying the minimum mapping quality to include. BAM records with mapping qualities less than mapqFilter are discarded.
#' @param geneModul A TxDb object made from GTF used by BAM aligner or a GRangesList object
#' @param shrinkage the shrinkage for intron region (must be a positive >= 1). default 1 for no shrinkage
#' @param high.col the color for high expressed base
#' @param low.col the color for low expressed base
#' @param curvature the curvature for splice junction
#' @param reduce To collapse all gene structure annotation features or not
#' @param rel_heights The relative heights of each row in the grid (must be a vector with 4 numbers).
#' @param highlight.region the region to highlight
#' @param highlight.color the color of highlight.region
#' @param highlight.label the label of highlight.region
#' @param highlight.label.size the size of highlight region label
#' @param removeTxid logical value, indicating whether to remove the transcript of y axis lab
#' @param MinimumFractionForP The minimum fraction of scaled expression to calculate base pair waldtest P value
#' @param NT the cores for parallel
#'
#' @author Tang Chao
#' @export
GroupHeatSpot <- function(object,
SJ,
flank = 200,
cells = NULL,
BamFiles,
BamPostfix,
logTrans = FALSE,
ignore.strand = TRUE,
ignore.SEorPE = FALSE,
singleEnd = FALSE,
antisense = TRUE,
strandMode = 2,
mapqFilter = 0,
geneModul,
shrinkage = 1,
high.col = "#0000FF80",
low.col = "#FFFF0080",
curvature = -0.1,
reduce = FALSE,
rel_heights = c(1, 1, 1, 1),
highlight.region = NULL,
highlight.color = "#0000FF80",
highlight.label = NULL,
highlight.label.size = 4,
removeTxid = FALSE,
MinimumFractionForP = 0.05,
NT = 1,
...) {
if(!is(object, "ICASDataSet"))
stop("The object must be a ICASDataSet data")
if(length(SJ) != 1) {
stop("SJ must be one SJ")
}
if(!SJ %in% row.names(psi(object)))
stop("SJ must be in row.names of psi(object)")
if(is.null(cells)) {
cells <- row.names(colData(object))
} else {
cells <- cells[cells %in% row.names(colData(object))]
if(length(cells) == 0) {
stop("cells are not in row.names(colData(object))")
}
}
if(!all(file.exists(BamFiles))) {
stop("all BamFiles file exists are FALSE")
}
if(!identical(sort(cells), sort(gsub(BamPostfix, "", basename(BamFiles))))) {
stop("Bamfiles are not cover your cells or maybe your BamPostfix are wrong")
}
if(!is.logical(ignore.strand)) {
stop("ignore.strand must be logical value")
}
if(!is.logical(removeTxid)) {
stop("removeTxid must be logical value")
}
if(!is.logical(singleEnd)) {
stop("singleEnd must be logical value")
}
if(!is.logical(ignore.SEorPE)) {
stop("ignore.SEorPE must be logical value")
}
if(!is.logical(logTrans)) {
stop("logTrans must be logical value")
}
if(!is.logical(antisense)) {
stop("antisense must be logical value")
}
if(length(strandMode) != 1) {
stop("strandMode must be 1 or 2")
}
if(!is.element(strandMode, c(1, 2))) {
stop("strandMode must be 1 or 2")
}
if(!is.numeric(mapqFilter)) {
stop("mapqFilter must be a positive value")
}
if(mapqFilter < 0) {
stop("mapqFilter must be a positive value")
}
if(!(is(geneModul, "TxDb") | is(geneModul, "GRangesList"))) {
stop("geneModul must be a TxDb form GenomicFeatures or a GRangesList")
}
if(!is.numeric(flank)) {
stop("flank must be a positive value")
}
if(flank < 0) {
stop("flank must be a positive value")
}
if(!is.numeric(curvature)) {
stop("curvature must be numeric value")
}
if(!is.numeric(NT)) {
stop("NT must be a positive value")
}
if(!is.logical(reduce)) {
stop("reduce must be logical value")
}
if(length(rel_heights) != 4) {
stop("length of rel_heights must be 4")
}
if(any(!is.numeric(rel_heights))) {
stop("rel_heights must be 4 positive numeric values")
}
if(any(rel_heights < 0)) {
stop("rel_heights must be 4 positive numeric values")
}
if(NT <= 0) {
stop("NT must be a positive value")
}
NT <- as.integer(NT)
if(highlight.label.size <= 0) {
stop("highlight.label.size must be a positive value")
}
highlight.label.size <- as.integer(highlight.label.size)
if(!is.null(highlight.region)) {
tryCatch(hlgr <- as(highlight.region, "GRanges"), error = function(e) stop("the highlight.region character vector must contain strings of the form 'chr:start-end' or 'chr:start-end:strand'"))
if(any(width(hlgr) < 3)) {
width(hlgr[width(hlgr) < 3]) <- 3
}
}
rankcell <- row.names(colData(object)[order(colData(object)[[object@design]]), ])
rankcell <- rankcell[rankcell %in% cells]
## AS event phase
event <- SJEvent(object = object, SJ = SJ)
region <- GenomicRanges::reduce(as(strsplit(event$ID, "@")[[1]], "GRanges"))
BiocGenerics::start(region) <- BiocGenerics::start(region) - min(BiocGenerics::start(region), flank)
BiocGenerics::end(region) <- BiocGenerics::end(region) + flank
## Fig4 gene structure
p4 <- plotgene(object = object,
SJ = SJ,
flank = flank,
geneModul = geneModul,
shrinkage = shrinkage,
curvature = curvature,
reduce = reduce,
highlight.region = highlight.region,
highlight.color = highlight.color,
highlight.label = highlight.label,
highlight.label.size = highlight.label.size,
returnData = TRUE,
removeTxid = removeTxid)
if(is.list(p4) & length(p4) == 2) {
plast <- p4[[1]]
} else {
plast <- p4
}
## BAM reads processing
event <- SJEvent(object = object, SJ = SJ)
region <- GenomicRanges::reduce(as(strsplit(event$ID, "@")[[1]], "GRanges"))
BiocGenerics::start(region) <- BiocGenerics::start(region) - min(BiocGenerics::start(region), flank)
BiocGenerics::end(region) <- BiocGenerics::end(region) + flank
parallel::mclapply(BamFiles, function(x) {
if(ignore.SEorPE) {
suppressWarnings(as.numeric(ReadBam(BamFile = x,
region = region,
ignore.strand = TRUE,
singleEnd = TRUE,
antisense = antisense,
strandMode = strandMode,
mapqFilter = mapqFilter)))
} else {
suppressWarnings(as.numeric(ReadBam(BamFile = x,
region = region,
ignore.strand = ignore.strand,
singleEnd = singleEnd,
antisense = antisense,
strandMode = strandMode,
mapqFilter = mapqFilter,
...)))
}
}, mc.cores = NT) -> base_reads
## BAM reads normalization
if(logTrans) {
base_reads_norm <- lapply(base_reads, function(x) log1p(x)/log1p(max(x)))
} else {
base_reads_norm <- lapply(base_reads, function(x) x/max(x))
}
base_reads_norm <- do.call(rbind, base_reads_norm)
colnames(base_reads_norm) <- BiocGenerics::start(region):BiocGenerics::end(region)
row.names(base_reads_norm) <- gsub(BamPostfix, "", basename(BamFiles))
base_reads_norm <- base_reads_norm[rankcell, ]
if(is.list(p4) & length(p4) == 2) {
base_reads_norm <- base_reads_norm[, as.character(p4[[2]]$x)]
colnames(base_reads_norm) <- p4[[2]]$x2
}
## Fig1 Heat map
annoMat <- data.frame(cell = factor(rankcell, levels = rankcell),
group = SummarizedExperiment::colData(object)[rankcell, object@design],
pos = min(as.numeric(colnames(base_reads_norm))))
lapply(seq_len(min(c(round(flank * 0.2), round(ncol(base_reads_norm)*0.01)))), function(i) {
data.frame(cell = annoMat$cell, group = annoMat$group, pos = annoMat$pos + i)
}) -> annoMat2
annoMat2 <- do.call(rbind, annoMat2)
base_reads_norm <- data.table::as.data.table(reshape2::melt(base_reads_norm))
base_reads_norm[, Var1 := factor(Var1, levels = rankcell)]
Mat1 <- base_reads_norm
ggplot(Mat1, aes(x = Var2, y = as.numeric(Var1))) +
geom_tile(aes(fill = value), show.legend = FALSE) +
theme_classic() +
scale_fill_gradient(high = high.col, low = low.col) +
geom_point(data = annoMat2, aes(x = pos, y = as.numeric(cell), colour = group), shape = 15, size = 1) +
theme(legend.position = "none",
axis.line.x = element_blank(),
axis.line.y = element_line(size = 0.1, color = "white"),
axis.text.x = element_blank(),
axis.text.y = element_text(size = 14),
axis.title = element_blank(),
axis.ticks = element_blank()) +
guides(colour = guide_legend(override.aes = list(size = 3))) +
scale_y_continuous(breaks = CumPos(table(annoMat$group)), labels = levels(annoMat$group))+
scale_x_continuous(expand = c(0, 0)) -> p1
## Fig2 density plot
base_reads_norm <- merge(annoMat[, c("cell", "group")], base_reads_norm, by.x = "cell", by.y = "Var1")
base_reads_norm <- data.table::as.data.table(base_reads_norm)
Mat2 <- base_reads_norm[, .(me = mean(value), v = sd(value)), by = list(group, Var2)]
ggplot(Mat2, aes(x = Var2, y = me)) +
geom_line(aes(colour = group)) +
geom_ribbon(aes(x = Var2, ymin = me - v, ymax = me + v, fill = group), alpha = 0.2) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
axis.text = element_text(size = 12),
axis.title.y = element_text(size = 16),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line.y = element_line(),
legend.position = "none") +
labs(y = "Scaled\nexpression") +
scale_x_continuous(expand = c(0, 0)) -> p2
## Fig3 P value
pos_rm <- base_reads_norm[, mean(value), by = c("Var2", "group")][, max(V1), by = "Var2"][V1 < MinimumFractionForP, Var2]
Mat3 <- na.omit(base_reads_norm[, PosReadTest(depth = value, group = group), by = Var2])
Mat3[Var2 %in% pos_rm, V1 := 1]
ggplot(Mat3, aes(Var2, y = -log10(V1))) +
geom_step() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
axis.text = element_text(size = 12),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 16),
axis.ticks.x = element_blank(),
axis.line.y = element_line(),
legend.position = "none") +
labs(y = "-log10(P)") +
scale_x_continuous(expand = c(0, 0)) +
geom_vline(xintercept = Mat3[V1 == 0, Var2], color = "white", size = 2) +
geom_hline(yintercept = -log10(0.01), lty = 2) -> p3
if(any(rel_heights == 0)) {
ps <- list(p1, p2, p3, plast)
ps <- ps[rel_heights != 0]
if(length(ps) == 1) {
print(ps[[1]])
} else {
if(length(ps) == 2) {
ps[[1]] / ps[[2]] + plot_layout(heights = rel_heights[rel_heights != 0])
} else {
ps[[1]] / ps[[2]] / ps[[3]] + plot_layout(heights = rel_heights[rel_heights != 0])
}
}
} else {
(p1 / p2 / p3 / plast) + plot_layout(heights = rel_heights)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.