#' @name runMarker
#' @title Run identification of unique biomarkers
#' @description This function aims to identify uniquely and significantly expressed (overexpressed or downexpressed) biomarkers for each subtype identified by multi-omics integrative clustering algorithms. Top markers will be chosen to generate a template so as to run nearest template prediction for subtype verification.
#' @param moic.res An object returned by `getMOIC()` with one specified algorithm or `get\%algorithm_name\%` or `getConsensusMOIC()` with a list of multiple algorithms.
#' @param dea.method A string value to indicate the algorithm for differential expression analysis. Allowed value contains c('deseq2', 'edger', 'limma').
#' @param dat.path A string value to indicate the path for saving the files of differential expression analysis.
#' @param res.path A string value to indicate the path for saving the results for identifying subtype-specific markers.
#' @param p.cutoff A numeric value to indicate the nominal p value for identifying significant markers; pvalue < 0.05 by default.
#' @param p.adj.cutoff A numeric value to indicate the adjusted p value for identifying significant markers; padj < 0.05 by default.
#' @param dirct A string value to indicate the direction of identifying significant marker. Allowed values contain c('up', 'down'); `up` means up-regulated marker, and `down` means down-regulated marker.
#' @param n.marker A integer value to indicate how many top markers sorted by log2fc should be identified for each subtype; 200 by default.
#' @param norm.expr A matrix of normalized expression data with rows for genes and columns for samples; FPKM or TPM without log2 transformation is recommended.
#' @param prefix A string value to indicate the prefix of differential expression file (use for searching files).
#' @param doplot A logic value to indicate if generating heatmap by using subtype-specific markers. TRUE by default.
#' @param annCol A data.frame storing annotation information for samples.
#' @param annColors A list of string vectors for colors matched with annCol.
#' @param clust.col A string vector storing colors for annotating each subtype at the top of heatmap.
#' @param halfwidth A numeric vector to assign marginal cutoff for truncating values in data; 3 by default.
#' @param centerFlag A logical vector to indicate if expression data should be centered; TRUE by default.
#' @param scaleFlag A logical vector to indicate if expression data should be scaled; TRUE by default.
#' @param color A string vector storing colors for heatmap.
#' @param fig.path A string value to indicate the output path for storing the marker heatmap.
#' @param fig.name A string value to indicate the name of the marker heatmap.
#' @param width A numeric value to indicate the width of output figure.
#' @param height A numeric value to indicate the height of output figure.
#' @param show_rownames A logic value to indicate if showing rownames (feature names) in heatmap; FALSE by default.
#' @param show_colnames A logic value to indicate if showing colnames (sample ID) in heatmap; FALSE by default.
#' @param ... Additional parameters pass to \link[ComplexHeatmap]{pheatmap}.
#' @return A figure of subtype-specific marker heatmap (.pdf) if \code{doPlot = TRUE} and a list with the following components:
#'
#' \code{unqlist} a string vector storing the unique marker across all subtypes.
#'
#' \code{templates} a data.frame storing the the template information for nearest template prediction, which is used for verification in external cohort.
#'
#' \code{dirct} a string value indicating the direction for identifying subtype-specific markers.
#'
#' \code{heatmap} a complexheatmap object.
#' @export
#' @importFrom grDevices pdf dev.off colorRampPalette
#' @importFrom ComplexHeatmap pheatmap draw
#' @references Gu Z, Eils R, Schlesner M (2016). Complex heatmaps reveal patterns and correlations in multidimensional genomic data. Bioinformatics, 32(18):2847-2849.
#' @examples # There is no example and please refer to vignette.
runMarker <- function(moic.res = NULL,
dea.method = c("deseq2", "edger", "limma"),
prefix = NULL,
dat.path = getwd(),
res.path = getwd(),
p.cutoff = 0.05,
p.adj.cutoff = 0.05,
dirct = "up",
n.marker = 200,
doplot = TRUE,
norm.expr = NULL,
annCol = NULL,
annColors = NULL,
clust.col = c("#2EC4B6","#E71D36","#FF9F1C","#BDD5EA","#FFA5AB","#011627","#023E8A","#9D4EDD"),
halfwidth = 3,
centerFlag = TRUE,
scaleFlag = TRUE,
show_rownames = FALSE,
show_colnames = FALSE,
color = c("#5bc0eb", "black", "#ECE700"),
fig.path = getwd(),
fig.name = NULL,
width = 8,
height = 8,
...) {
n.moic <- length(unique(moic.res$clust.res$clust))
mo.method <- moic.res$mo.method
DEpattern <- paste(mo.method, "_", ifelse(is.null(prefix),"",paste0(prefix,"_")), dea.method, ".*._vs_Others.txt$", sep="")
DEfiles <- dir(dat.path,pattern = DEpattern)
if(length(DEfiles) == 0) {stop("no DEfiles!")}
if(length(DEfiles) != n.moic) {stop("not all the multi-omics clusters have DEfile!")}
if (!is.element(dirct, c("up", "down"))) {stop( "dirct type error! Allowed value contains c('up', 'down').") }
if(dirct == "up") { outlabel <- "unique_upexpr_marker.txt" }
if(dirct == "down") { outlabel <- "unique_downexpr_marker.txt" }
genelist <- c()
for (filek in DEfiles) {
DEres <- read.table(file.path(dat.path, filek), header=TRUE, row.names=NULL, sep="\t", quote="", stringsAsFactors=FALSE)
DEres <- DEres[!duplicated(DEres[, 1]),]
DEres <- DEres[!is.na(DEres[, 1]), ]
rownames(DEres) <- DEres[, 1]
DEres <- DEres[, -1]
#rownames(DEres) <- toupper(rownames(DEres))
if (dirct == "up") {
genelist <- c( genelist, rownames(DEres[!is.na(DEres$padj) & DEres$pvalue < p.cutoff & DEres$padj < p.adj.cutoff & !is.na(DEres$log2fc) & DEres$log2fc > 0, ]) )
}
if (dirct == "down") {
genelist <- c( genelist, rownames(DEres[!is.na(DEres$padj) & DEres$pvalue < p.cutoff & DEres$padj < p.adj.cutoff & !is.na(DEres$log2fc) & DEres$log2fc < 0, ]) )
}
}
unqlist <- setdiff(genelist,genelist[duplicated(genelist)])
marker <- list()
for (filek in DEfiles) {
DEres <- read.table(file.path(dat.path, filek), header=TRUE, row.names=NULL, sep="\t", quote="", stringsAsFactors=FALSE)
DEres <- DEres[!duplicated(DEres[, 1]),]
DEres <- DEres[!is.na(DEres[, 1]), ]
rownames(DEres) <- DEres[, 1]
DEres <- DEres[, -1]
#rownames(DEres) <- toupper(rownames(DEres))
if(dirct == "up") {
outk <- intersect( unqlist, rownames(DEres[!is.na(DEres$padj) & DEres$pvalue < p.cutoff & DEres$padj < p.adj.cutoff & !is.na(DEres$log2fc) & DEres$log2fc > 0, ]) )
outk <- DEres[outk,]
outk <- outk[order(outk$log2fc, decreasing = TRUE),]
if(nrow(outk) > n.marker) {
marker[[filek]] <- outk[1:n.marker,]
} else {
marker[[filek]] <- outk
}
marker$dirct <- "up"
}
if(dirct == "down") {
outk <- intersect( unqlist, rownames(DEres[!is.na(DEres$padj) & DEres$pvalue < p.cutoff & DEres$padj < p.adj.cutoff & !is.na(DEres$log2fc) & DEres$log2fc < 0, ]) )
outk <- DEres[outk,]
outk <- outk[order(outk$log2fc, decreasing = FALSE),]
if(nrow(outk) > n.marker) {
marker[[filek]] <- outk[1:n.marker,]
} else {
marker[[filek]] <- outk
}
marker$dirct <- "down"
}
# write file
write.table(outk, file=file.path(res.path, paste(gsub("_vs_Others.txt","", filek, fixed = TRUE), outlabel, sep = "_")), row.names = TRUE, col.names = NA, sep = "\t", quote = FALSE)
}
# generate templates for nearest template prediction
templates <- NULL
for (filek in DEfiles) {
tmp <- data.frame(probe = rownames(marker[[filek]]),
class = sub("_vs_Others.txt","",sub(".*.result.","",filek)),
dirct = marker$dirct,
stringsAsFactors = FALSE)
templates <- rbind.data.frame(templates, tmp, stringsAsFactors = FALSE)
}
write.table(templates, file=file.path(res.path, paste0(mo.method,"_",dea.method,"_",dirct,"regulated_marker_templates.txt")), row.names = FALSE, sep = "\t", quote = FALSE)
# generate heatmap with subtype-specific markers
if(doplot) {
if(is.null(norm.expr)) {
stop("please provide a matrix or data.frame of normalized expression data with rows for genes and columns for samples; FPKM or TPM without log2 transformation is recommended.")
}
# check data
comsam <- intersect(moic.res$clust.res$samID, colnames(norm.expr))
if(length(comsam) == nrow(moic.res$clust.res)) {
message("--all samples matched.")
} else {
message(paste0("--",(nrow(moic.res$clust.res)-length(comsam))," samples mismatched from current subtypes."))
}
moic.res$clust.res <- moic.res$clust.res[comsam, , drop = FALSE]
norm.expr <- norm.expr[,comsam]
if(is.null(fig.name)) {
outFig <- paste0("markerheatmap_using_",dirct,"regulated_genes.pdf")
} else {
outFig <- paste0(fig.name, "_using_", dirct,"regulated_genes.pdf")
}
sam.order <- moic.res$clust.res[order(moic.res$clust.res$clust, decreasing = FALSE), "samID"]
colvec <- clust.col[1:n.moic]
names(colvec) <- paste0("CS",1:n.moic)
if(!is.null(annCol) & !is.null(annColors)) {
annCol <- annCol[sam.order, , drop = FALSE]
annCol$Subtype <- paste0("CS",moic.res$clust.res[sam.order,"clust"])
annColors[["Subtype"]] <- colvec
} else {
annCol <- data.frame("Subtype" = paste0("CS",moic.res$clust.res[sam.order,"clust"]),
row.names = sam.order,
stringsAsFactors = FALSE)
annColors <- list("Subtype" = colvec)
}
if(max(norm.expr) < 25 | (max(norm.expr) >= 25 & min(norm.expr) < 0)) {
message("--expression profile seems to have been standardised (z-score or log transformation), no more action will be performed.")
gset <- norm.expr
}
if(max(norm.expr) >= 25 & min(norm.expr) >= 0){
message("--log2 transformation done for expression data.")
gset <- log2(norm.expr + 1)
}
standarize.fun <- function(indata=NULL, halfwidth=NULL, centerFlag=TRUE, scaleFlag=TRUE) {
outdata=t(scale(t(indata), center=centerFlag, scale=scaleFlag))
if (!is.null(halfwidth)) {
outdata[outdata>halfwidth]=halfwidth
outdata[outdata<(-halfwidth)]= -halfwidth
}
return(outdata)
}
plotdata <- standarize.fun(gset[intersect(templates$probe, rownames(gset)), sam.order],
halfwidth = halfwidth,
centerFlag = centerFlag,
scaleFlag = scaleFlag)
if(!is.null(annCol) & !is.null(annColors)) {
for (i in names(annColors)) {
if(is.function(annColors[[i]])) {
annColors[[i]] <- annColors[[i]](pretty(range(annCol[,i]),n = 64)) # transformat colorRamp2 function to color vector
}
}
}
hm <- ComplexHeatmap::pheatmap(mat = plotdata,
border_color = NA,
cluster_cols = FALSE,
cluster_rows = FALSE,
annotation_col = annCol,
annotation_colors = annColors,
legend_breaks = pretty(c(-halfwidth,halfwidth)),
legend_labels = pretty(c(-halfwidth,halfwidth)),
show_rownames = show_rownames,
show_colnames = show_colnames,
treeheight_col = 0,
treeheight_row = 0,
color = grDevices::colorRampPalette(color)(64),
...)
# save to pdf
pdf(file.path(fig.path, outFig), width = width, height = height)
draw(hm,annotation_legend_side = "left",heatmap_legend_side = "left")
invisible(dev.off())
# print to screen
draw(hm,annotation_legend_side = "left",heatmap_legend_side = "left")
return(list(unqlist = unqlist, templates = templates, dirct = dirct, heatmap = hm))
} else {
return(list(unqlist = unqlist, templates = templates, dirct = dirct))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.