#' Get some stats after reading in data
#'
#' @import magrittr
#'
#' @include MultiDataSet_extendedfunctions.R
#'
#' @param inputData IntLimObject output of ReadData()
#' @param palette choose an RColorBrewer palette ("Set1", "Set2", "Set3",
#' "Pastel1", "Pastel2", "Paired", etc.) or submit a vector of colors
#' @param viewer whether the plot should be displayed in the RStudio viewer (T) or
#' in Shiny/Knittr (F)
#' @return a highcharter object
#'
#' @examples
#' dir <- system.file("extdata", package="IntLIM", mustWork=TRUE)
#' csvfile <- file.path(dir, "NCItestinput.csv")
#' mydata <- ReadData(csvfile,metabid='id',geneid='id')
#' PlotDistributions(mydata)
#' @export
PlotDistributions <- function(inputData,viewer=T,
# palette = c("#C71585", "#00E5EE")) {
palette="Set1"){
. <- c()
if (length(palette) == 2) {
cols <- c(palette)
}
else if (length(palette) == 1) {
cols <- RColorBrewer::brewer.pal(3, palette)[1:2]
}
else {
stop("palette must either be an RColorBrewer palette or a vector of hex colors of size 2")
}
categ <- c("Genes","Metabolites")
mygene <- as.data.frame(Biobase::assayDataElement(inputData[["expression"]],'exprs'))
toplot <- suppressMessages(reshape2::melt(mygene))
df <- dplyr::data_frame(value = toplot$value, by = toplot$variable) %>% dplyr::group_by_("by") %>%
dplyr::do(data = grDevices::boxplot.stats(.$value))
# names(df$data) <- df$by
# df$color <- df$by
bxps <- purrr::map(df$data, "stats")
outs <- purrr::map2_df(seq(nrow(df)), df$data, function(x, y) {
if (length(y$out) > 0)
d <- dplyr::data_frame(x = x - 1, y = y$out)
else d <- dplyr::data_frame()
d
})
outs <- data.frame(outs, 'z' = colnames(mygene)[outs$x + 1])
z <- outs$z
# To try to get the gene names of outliers, would have to go back and get the gene names from original data frame and put htem in outs$color
boxplotOptions <- list(
fillColor = '#ffffff',
lineWidth = 2,
medianColor = '#000000',
medianWidth = 2,
stemColor = '#000000',
stemDashStyle = 'dot',
stemWidth = 1,
whiskerColor = '#000000',
whiskerLength = '20%',
whiskerWidth = 3)
g <- highcharter::highchart(width = 750, height = 750 ) %>%
highcharter::hc_title(text = "Gene Expression",
style = list(color = '#2E1717',
fontWeight = 'bold', fontSize = "20px")) %>%
highcharter::hc_plotOptions(
boxplot = boxplotOptions
) %>%
hc_add_series(data = bxps,name = "Gene Expression", type="boxplot",color=cols[1],showInLegend=FALSE) %>%
highcharter::hc_add_series(data=list_parse(outs),name = "Gene Expression",
type="scatter",color=cols[1],showInLegend=FALSE,
tooltip = list(headerFormat = "", pointFormat = "{point.z} <br/> {point.y}",
showInLegend = FALSE)) %>%
# name = str_trim(paste(list(...)$name, "outliers")),
# type = "scatter") #marker = list(...)) %>%
# tooltip = list(headerFormat = "<span>{point.key}</span><br/>")) %>%
# pointFormat = "<span style='color:{point.color}'></span> \nOutlier: <b>{point.y}</b><br/>")) %>%
highcharter::hc_yAxis(title = list(text = "log(expression)",
style = list(fontSize = "13px")),
labels = list(format = "{value}")) %>%
highcharter::hc_xAxis(labels="", categories = colnames(mygene)) %>%
highcharter::hc_tooltip(valueDecimals = 2) %>%
highcharter::hc_exporting(enabled = TRUE)
mymetab <- Biobase::assayDataElement(inputData[["metabolite"]],'metabData')
toplot <- suppressMessages(reshape2::melt(mymetab))
df <- dplyr::data_frame(value = toplot$value, by = toplot$variable) %>%
dplyr::group_by_("by") %>%
dplyr::do(data = grDevices::boxplot.stats(.$value))
bxps <- purrr::map(df$data, "stats")
outs <- purrr::map2_df(seq(nrow(df)), df$data, function(x, y) {
if (length(y$out) > 0)
d <- dplyr::data_frame(x = x - 1, y = y$out)
else d <- dplyr::data_frame()
d
})
outs <- data.frame(outs, 'z' = colnames(mymetab)[outs$x + 1])
z <- outs$z
m <- highcharter::highchart(width = 750, height = 750 ) %>%
highcharter::hc_title(text = "Metabolite Levels",
style = list(color = '#2E1717',
fontWeight = 'bold', fontSize = "20px")) %>%
highcharter::hc_plotOptions(
boxplot = boxplotOptions
) %>%
highcharter::hc_add_series(data = bxps,name = "Metabolite Levels",
type="boxplot",color=cols[2],showInLegend=FALSE) %>%
highcharter::hc_add_series(data=list_parse(outs),name = "Metabolite Levels",
type="scatter",color=cols[2],showInLegend=FALSE,tooltip = list(headerFormat = "", pointFormat = "{point.z} <br/> {point.y}",
showInLegend = FALSE)) %>%
highcharter::hc_yAxis(title = list(text = "log(abundances)",
style = list(fontSize = "13px")),
labels = list(format = "{value}")) %>%
highcharter::hc_xAxis(labels="", categories = colnames(mymetab)) %>%
highcharter::hc_tooltip(valueDecimals = 2) %>%
highcharter::hc_exporting(enabled = TRUE)
if (viewer == TRUE) {
p <-
htmltools::browsable(highcharter::hw_grid(g, m, ncol = 2, rowheight = 550))
}
else {
p <- highcharter::hw_grid(g, m)
}
return(p)
}
#' PCA plots of data for QC
#'
#' @import magrittr
#'
#' @include MultiDataSet_extendedfunctions.R
#'
#' @param inputData IntLimObject output of ReadData()
#' @param stype category to color-code by (can be more than two categories)
#' @param palette choose an RColorBrewer palette ("Set1", "Set2", "Set3",
#' "Pastel1", "Pastel2", "Paired", etc.) or submit a vector of colors
#' @param viewer whether the plot should be displayed in the RStudio viewer (T) or
#' in Shiny/Knittr (F)
#' @param common whether or not samples that are in common between the metabolite and gene expression datasets should be plotted (T/F); default is TRUE
#' @return a highcharter object
#'
#' @examples
#' dir <- system.file("extdata", package="IntLIM", mustWork=TRUE)
#' csvfile <- file.path(dir, "NCItestinput.csv")
#' mydata <- ReadData(csvfile,metabid='id',geneid='id')
#' PlotPCA(mydata,stype = "PBO_vs_Leukemia")
#' @export
PlotPCA <- function(inputData,viewer=T,stype=NULL,common=T,
palette = "Set1") {
categ <- c("Genes","Metabolites")
if(is.null(stype)) {
warning("The resulting PCA plot is not color-coded because you did not provide a category in 'stype'")
mytype <- NULL
} else if (length(intersect(colnames(Biobase::pData(inputData[["metabolite"]])),stype))!=1) {
stop(paste0("You provided ",stype, "as your stype variable but it does not exist in your data"))
} else {
mytype <- as.character(Biobase::pData(inputData[["metabolite"]])[,stype])
numcateg <- length(unique(mytype))
if(length(palette) >= 2) {
cols <- palette
} else {
if(numcateg == 1) {
if(length(palette)==1) {cols <- RColorBrewer::brewer.pal(3, palette)[1]
} else {stop("palette should be an RColorBrewer palette or a vector of colors")}
} else if (numcateg == 2) {
if(length(palette)==1) {cols <- RColorBrewer::brewer.pal(3, palette)[1:2]
} else {stop("palette should be an RColorBrewer palette or a vector of colors")}
} else if (numcateg > 2) {
if(length(palette)==1) {cols <- RColorBrewer::brewer.pal(numcateg, palette)
} else {stop("palette should be an RColorBrewer palette or a vector of colors")}
} else {stop("There are no values in your 'stype' column")}
}
}
if(common==T) {
if(is.null(stype)) {
incommon <- getCommon(inputData)
mygene <- incommon$gene
gpca <- stats::prcomp(t(mygene),center=T,scale=F)
mymetab <- incommon$metab
mpca <- stats::prcomp(t(mymetab),center=T,scale=F)
gtoplot=data.frame(x=gpca$x[,1],y=gpca$x[,2],z=rownames(gpca$x),color=rep("blue",nrow(gpca$x)))
mtoplot=data.frame(x=mpca$x[,1],y=mpca$x[,2],z=rownames(mpca$x),color=rep("blue",nrow(mpca$x)))
gds <- list_parse(gtoplot)
pg <- highcharter::highchart(width = 350, height = 350 )
pg <- pg %>% highcharter::hc_add_series(data=gds,type="scatter",
tooltip = list(headerFormat="",
pointFormat=paste("{point.label}","{point.z}")),
showInLegend=FALSE)
mds <- list_parse(mtoplot)
pm <- highcharter::highchart(width = 350, height = 350)
pm <- pm %>% highcharter::hc_add_series(data=mds,type="scatter",
tooltip = list(headerFormat="",
pointFormat=paste("{point.label}","{point.z}")),
showInLegend=FALSE)
} else {
incommon <- getCommon(inputData,stype)
mygene <- incommon$gene
mymetab <- incommon$metab
alltype <- incommon$p
uniqtypes <- unique(alltype)
mycols <- as.character(alltype)
for (i in 1:numcateg) {
mycols[which(alltype==uniqtypes[i])] <- cols[i]
}
gpca <- stats::prcomp(t(mygene),center=T,scale=F)
mpca <- stats::prcomp(t(mymetab),center=T,scale=F)
gtoplot=data.frame(x=gpca$x[,1],y=gpca$x[,2],z=rownames(gpca$x),label=alltype,color=mycols)
mtoplot=data.frame(x=mpca$x[,1],y=mpca$x[,2],z=rownames(mpca$x),label=alltype,color=mycols)
mds <- list_parse(mtoplot)
gds <- list_parse(gtoplot)
pg <- highcharter::highchart(width = 350, height = 350)
pm <- highcharter::highchart(width = 350, height = 350)
for (i in 1:length(uniqtypes)) {
mytype <- unique(alltype)[i]
gds <- list_parse(gtoplot[which(gtoplot$label==mytype),])
pg <- pg %>% highcharter::hc_add_series(data=gds,type="scatter",
name=mytype,color=cols[which(alltype==mytype)[1]],tooltip = list(headerFormat="",
pointFormat=paste("{point.label}","{point.z}")),
showInLegend=TRUE)
mds <- list_parse(mtoplot[which(mtoplot$label==mytype),])
pm <- pm %>% highcharter::hc_add_series(data=mds,type="scatter",
name=mytype,color=cols[which(alltype==mytype)[1]],tooltip = list(headerFormat="",
pointFormat=paste("{point.label}","{point.z}")),
showInLegend=TRUE)
}
}
} else { # common == F
mygene <- as.data.frame(Biobase::assayDataElement(inputData[["expression"]],'exprs'))
mymetab <- Biobase::assayDataElement(inputData[["metabolite"]],'metabData')
gpca <- stats::prcomp(t(mygene),center=T,scale=F)
mpca <- stats::prcomp(t(mymetab),center=T,scale=F)
# percvar=round((gpca$sdev)^2 / sum(gpca$sdev^2)*100,2)
if(!is.null(stype)) {
gtypes <- as.character(Biobase::pData(inputData[["expression"]])[,stype])
mtypes <- as.character(Biobase::pData(inputData[["metabolite"]])[,stype])
uniqtypes <- unique(c(mtypes,gtypes))
gcols <- as.character(gtypes)
mcols <- as.character(mtypes)
for (i in 1:numcateg) {
gcols[which(gtypes==uniqtypes[i])] <- cols[i]
mcols[which(mtypes==uniqtypes[i])] <- cols[i]
}
# Deal with missing values or ""
if(length(which(gtypes==""))>0) {
gcols[which(gtypes=="")]="grey"
gtypes[which(gtypes=="")]="NA"
}
if (length(which(mtypes==""))>0) {
mcols[which(mtypes=="")]="grey"
mtypes[which(mtypes=="")]="NA"
}
if(length(which(is.na(gtypes)))>0) {
gcols[which(is.na(gtypes))]="grey"
}
if(length(which(is.na(mtypes)))>0) {
mcols[which(is.na(mtypes))]="grey"
}
gtoplot=data.frame(x=gpca$x[,1],y=gpca$x[,2],z=rownames(gpca$x),label=gtypes,color=gcols)
mtoplot=data.frame(x=mpca$x[,1],y=mpca$x[,2],z=rownames(mpca$x),label=mtypes,color=mcols)
mds <- list_parse(mtoplot)
gds <- list_parse(gtoplot)
pg <- highcharter::highchart(width = 350, height = 350 )
for (i in 1:length(unique(gtypes))) {
mytype <- unique(gtypes)[i]
gds <- list_parse(gtoplot[which(gtoplot$label==mytype),])
pg <- pg %>% highcharter::hc_add_series(data=gds,type="scatter",
name=mytype,color=cols[which(gtypes==mytype)[1]],tooltip = list(headerFormat="",
pointFormat=paste("{point.label}","{point.z}")),
showInLegend=TRUE)
}
pm <- highcharter::highchart(width = 350, height = 350 )
for (i in 1:length(unique(mtypes))) {
mytype <- unique(mtypes)[i]
mds <- list_parse(mtoplot[which(mtoplot$label==mytype),])
pm <- pm %>% highcharter::hc_add_series(data=mds,type="scatter",
name=mytype,color=cols[which(mtypes==mytype)[1]],tooltip = list(headerFormat="",
pointFormat=paste("{point.label}","{point.z}")),
showInLegend=TRUE)
}
} else { #stype is null
gtoplot=data.frame(x=gpca$x[,1],y=gpca$x[,2],z=rownames(gpca$x),label="",color=rep("blue",nrow(gpca$x)))
mtoplot=data.frame(x=mpca$x[,1],y=mpca$x[,2],z=rownames(mpca$x),label="",color=rep("blue",nrow(mpca$x)))
gds <- list_parse(gtoplot)
pg <- highcharter::highchart(width = 350, height = 350 )
pg <- pg %>% highcharter::hc_add_series(data=gds,type="scatter",
tooltip = list(headerFormat="",
pointFormat=paste("{point.label}","{point.z}")),
showInLegend=FALSE)
mds <- list_parse(mtoplot)
pm <- highcharter::highchart(width = 350, height = 350)
pm <- pm %>% highcharter::hc_add_series(data=mds,type="scatter",
tooltip = list(headerFormat="",
pointFormat=paste("{point.label}","{point.z}")),
showInLegend=FALSE)
}
} # end common == F
mpercvar=round((mpca$sdev)^2 / sum(mpca$sdev^2)*100,2)
gpercvar=round((gpca$sdev)^2 / sum(gpca$sdev^2)*100,2)
pg <- pg %>% highcharter::hc_title(text="PCA of genes") %>%
highcharter::hc_xAxis(title=list(text=paste0("PC1:",round(gpercvar[1],1),"%"))) %>%
highcharter::hc_yAxis(title=list(text=paste0("PC2:",round(gpercvar[2],2),"%"))) %>%
hc_chart(zoomType = "xy")
# dataLabels= list(enabled = TRUE, format = "{point.label}"),
pm <- pm %>% highcharter::hc_title(text="PCA of metabolites") %>%
highcharter::hc_xAxis(title=list(text=paste0("PC1:",round(mpercvar[1],1),"%"))) %>%
highcharter::hc_yAxis(title=list(text=paste0("PC2:",round(mpercvar[2],2),"%"))) %>%
hc_chart(zoomType = "xy")
if (viewer == TRUE) {
p <-
htmltools::browsable(highcharter::hw_grid(pg, pm, ncol = 2, rowheight = 550))
} else {
p <- highcharter::hw_grid(pg, pm)
}
return(p)
}
#' Visualize the distribution of unadjusted p-values from linear models
#'
#' @include IntLimResults_extendedfunctions.R
#'
#' @param IntLimResults output of RunIntLim()
#' @param breaks the number of breaks to use in histogram (see hist() documentation for more details)
#'
#' @examples
#' \dontrun{
#' dir <- system.file("extdata", package="IntLIM", mustWork=TRUE)
#' csvfile <- file.path(dir, "NCItestinput.csv")
#' mydata <- ReadData(csvfile,metabid='id',geneid='id')
#' myres <- RunIntLim(mydata,stype="PBO_vs_Leukemia")
#' DistPvalues(myres)
#' }
#' @export
DistPvalues<- function(IntLimResults,breaks=100) {
hist(IntLimResults@interaction.pvalues,breaks=breaks,
main="Histogram of Interaction P-values")
}
#' Plot correlation heatmap
#'
#' @import magrittr
#' @import heatmaply
#'
#' @param inputResults IntLimResults object (output of ProcessResults())
#' @param top_pairs cutoff of the top pairs, sorted by adjusted p-values, to be plotted (plotting more than 1200 can take some time) (default: 1200)
#' @param viewer whether the plot should be displayed in the RStudio viewer (T) or
#' in Shiny/Knittr (F)
#' @param treecuts number of clusters (of gene-metabolite pairs) to cut the tree into for color-coding
#' @param palette choose an RColorBrewer palette ("Set1", "Set2", "Set3",
#' "Pastel1", "Pastel2", "Paired", etc.) or submit a vector of colors
#' @return a highcharter object
#'@param static allows user to decide whether heatmap is interactive or static
#'@param html.file allows user to specify file path to output heatmap onto (used for non-static heatmaply objects)
#'@param pdf.file allows user to specify file path to output heatmap onto (used for static heatmap.2 objects)
#' @examples
#' \dontrun{
#' dir <- system.file("extdata", package="IntLIM", mustWork=TRUE)
#' csvfile <- file.path(dir, "NCItestinput.csv")
#' mydata <- ReadData(csvfile,metabid='id',geneid='id')
#' myres <- RunIntLim(mydata,stype="PBO_vs_Leukemia")
#' myres <- ProcessResults(myres,mydata)
#' CorrHeatmap(myres)
#' }
#' @export
CorrHeatmap <- function(inputResults,viewer=T,top_pairs=1200,treecuts=2, palette = NULL, static = FALSE,
html.file=NULL, pdf.file=NULL) {
type <- cor <- c()
if(nrow(inputResults@filt.results)==0) {
stop("Make sure you run ProcessResults before making the heatmap")
}
allres <- inputResults@filt.results
if(nrow(allres)>top_pairs) {
allp <- inputResults@filt.results[,"FDRadjPval"]
allres <- allres[order(allp,decreasing=F)[1:top_pairs],]
}
toplot <- data.frame(name=paste(allres[,1],allres[,2],sep=" vs "),
allres[,3:4])
suppressMessages(
meltedtoplot <- tidyr::gather(
toplot,
type,cor,colnames(toplot)[2],colnames(toplot)[3]))
#all possible values of X (type) and Y (name)
theXAxis <- as.character(meltedtoplot[, "type"])
theYAxis <- as.character(meltedtoplot[, "name"])
#unique values of X and Y
theUniqueY <- as.character(unique(theYAxis))
theUniqueX <- as.character(unique(theXAxis))
# Substitute words with position on the meatrix
for (i in 1:length(theUniqueY)){
num <- which(theYAxis == theUniqueY[i])
theYAxis[num] <- i
}
for (i in 1:length(theUniqueX)) {
num <- which(theXAxis == theUniqueX[i])
theXAxis[num] <- i
}
# New package heatmaply here
type <- unique(meltedtoplot[,'type'])
num <- nrow(meltedtoplot[meltedtoplot[,'type'] == type[1],])
heat_data <- matrix(data = 0, nrow =num,ncol = 2)
row.names(heat_data) <- meltedtoplot[1:num,1]
colnames(heat_data) <- gsub("_cor","",c(type[1],type[2]))
heat_data[,1] <- meltedtoplot[1:num,3]
heat_data[,2] <- meltedtoplot[-1:-num,3]
if (is.null(palette)){
palette=grDevices::colorRampPalette(c("#D01C8B", "#F1B6DA", "#F7F7F7", "#B8E186", "#4DAC26")) (255)[255:1]
}
if(static == FALSE){
hm <- heatmaply::heatmaply(heat_data,main = "Correlation heatmap",
k_row = treecuts,#k_col = 2,
margins = c(80,5),
dendrogram = "row",
y_axis_font_size ="1px",
colors = palette,
key.title = 'Correlation \n differences',
file=html.file)
hm
if(!is.null(pdf.file)){
hmr <- heatmaply::heatmapr(heat_data,main = "Correlation heatmap",
k_row = treecuts,#k_col = 2,
margins = c(80,5),
dendrogram = "row",
y_axis_font_size ="1px",
colors = palette,
key.title = 'Correlation \n differences' )
#distance = stats::dist(heat_data)
#hcluster = stats::hclust(distance)
#dend1 = stats::as.dendrogram(hcluster)
#dend1 = dendextend::set(dend1, "branches_k_color", k = treecuts)
#dend1 = dendextend::set(dend1, "branches_lwd", c(1,treecuts))
#dend1 = dendextend::ladderize(dend1)
#row_dend <-dend1
row_dend = hmr$rows
grDevices::pdf(file=pdf.file, width=12, height=6.3)
gplots::heatmap.2(heat_data,main = "Correlation \n heatmap",
#k_row = treecuts,#k_col = 2,
#margins = c(80,5),
dendrogram = "row",
#y_axis_font_size ="1px",
col = palette,
density.info = 'none',
key.title = 'Correlation \n differences',
labRow = rep('',nrow(heat_data)),
cexCol = 0.05 + 0.25/log10(ncol(heat_data)),
trace = 'none', Rowv = row_dend)
grDevices::dev.off()
}
return(hm)
}else{
hmr <- heatmaply::heatmapr(heat_data,main = "Correlation heatmap",
k_row = treecuts,#k_col = 2,
margins = c(80,5),
dendrogram = "row",
y_axis_font_size ="1px",
colors = palette,
key.title = 'Correlation \n differences' )
#distance = stats::dist(heat_data)
#hcluster = stats::hclust(distance)
#dend1 = stats::as.dendrogram(hcluster)
#dend1 = dendextend::set(dend1, "branches_k_color", k = treecuts)
#dend1 = dendextend::set(dend1, "branches_lwd", c(1,treecuts))
#dend1 = dendextend::ladderize(dend1)
#row_dend <-dend1
row_dend = hmr$rows
gplots::heatmap.2(heat_data,main = "Correlation \n heatmap",
#k_row = treecuts,#k_col = 2,
#margins = c(80,5),
dendrogram = "row",
#y_axis_font_size ="1px",
col = palette,
density.info = 'none',
key.title = 'Correlation \n differences',
labRow = rep('',nrow(heat_data)),
cexCol = 0.05 + 0.25/log10(ncol(heat_data)),
trace = 'none', Rowv = row_dend)
if(!is.null(pdf.file)){
grDevices::pdf(file=pdf.file, width=12, height=6.3)
gplots::heatmap.2(heat_data,main = "Correlation \n heatmap",
#k_row = treecuts,#k_col = 2,
#margins = c(80,5),
dendrogram = "row",
#y_axis_font_size ="1px",
col = palette,
density.info = 'none',
key.title = 'Correlation \n differences',
labRow = rep('',nrow(heat_data)),
cexCol = 0.05 + 0.25/log10(ncol(heat_data)),
trace = 'none', Rowv = row_dend)
grDevices::dev.off()
}
}
if(!is.null(html.file) & static==TRUE){
hm.html.out <- heatmaply::heatmaply(heat_data,main = "Correlation heatmap",
k_row = treecuts,#k_col = 2,
margins = c(80,5),
dendrogram = "row",
y_axis_font_size ="1px",
colors = palette,
key.title = 'Correlation \n differences',
file=html.file)
}
}
#' Plot correlation heatmap in pdf
#'
#'
#' @param inputResults IntLimResults object (output of ProcessResults())
#' @param top_pairs cutoff of the top pairs, sorted by adjusted p-values, to be plotted (plotting more than 1200 can take some time) (default: 1200)
#' @param viewer whether the plot should be displayed in the RStudio viewer (T) or
#' in Shiny/Knittr (F)
#' @param treecuts number of clusters (of gene-metabolite pairs) to cut the tree into for color-coding
#' @param palette choose an RColorBrewer palette ("Set1", "Set2", "Set3",
#' "Pastel1", "Pastel2", "Paired", etc.) or submit a vector of colors
#' @return a highcharter object
#'
#' @export
CorrHeatmap_pdf <- function(inputResults,viewer=T,top_pairs=1200,treecuts=2, palette = NULL) {
type <- cor <- c()
if(nrow(inputResults@filt.results)==0) {
stop("Make sure you run ProcessResults before making the heatmap")
}
allres <- inputResults@filt.results
if(nrow(allres)>top_pairs) {
allp <- inputResults@filt.results[,"FDRadjPval"]
allres <- allres[order(allp,decreasing=F)[1:top_pairs],]
}
toplot <- data.frame(name=paste(allres[,1],allres[,2],sep=" vs "),
allres[,3:4])
suppressMessages(
meltedtoplot <- tidyr::gather(
toplot,
type,cor,colnames(toplot)[2],colnames(toplot)[3]))
#all possible values of X (type) and Y (name)
theXAxis <- as.character(meltedtoplot[, "type"])
theYAxis <- as.character(meltedtoplot[, "name"])
#unique values of X and Y
theUniqueY <- as.character(unique(theYAxis))
theUniqueX <- as.character(unique(theXAxis))
# Substitute words with position on the meatrix
for (i in 1:length(theUniqueY)){
num <- which(theYAxis == theUniqueY[i])
theYAxis[num] <- i
}
for (i in 1:length(theUniqueX)) {
num <- which(theXAxis == theUniqueX[i])
theXAxis[num] <- i
}
# New package heatmaply here
type <- unique(meltedtoplot[,'type'])
num <- nrow(meltedtoplot[meltedtoplot[,'type'] == type[1],])
heat_data <- matrix(data = 0, nrow =num,ncol = 2)
row.names(heat_data) <- meltedtoplot[1:num,1]
colnames(heat_data) <- gsub("_cor","",c(type[1],type[2]))
heat_data[,1] <- meltedtoplot[1:num,3]
heat_data[,2] <- meltedtoplot[-1:-num,3]
if (is.null(palette)){
palette=grDevices::colorRampPalette(c("#D01C8B", "#F1B6DA", "#F7F7F7", "#B8E186", "#4DAC26")) (255)[255:1]
}
hmr <- heatmaply::heatmapr(heat_data,main = "Correlation heatmap",
k_row = treecuts,#k_col = 2,
margins = c(80,5),
dendrogram = "row",
y_axis_font_size ="1px",
colors = palette,
key.title = 'Correlation \n differences')
#distance = stats::dist(heat_data)
#hcluster = stats::hclust(distance)
#dend1 = stats::as.dendrogram(hcluster)
#dend1 = dendextend::set(dend1, "branches_k_color", k = treecuts)
#dend1 = dendextend::set(dend1, "branches_lwd", c(1,treecuts))
#dend1 = dendextend::ladderize(dend1)
#row_dend <-dend1
row_dend = hmr$rows
gplots::heatmap.2(heat_data,main = "Correlation \n heatmap",
#k_row = treecuts,#k_col = 2,
#margins = c(80,5),
dendrogram = "row",
#y_axis_font_size ="1px",
col = palette,
density.info = 'none',
key.title = 'Correlation \n differences',
labRow = rep('',nrow(heat_data)),
cexCol = 0.05 + 0.25/log10(ncol(heat_data)),
trace = 'none', Rowv = row_dend)
}
#' scatter plot of gene-metabolite pairs (based on user selection)
#'
#' @import magrittr
#' @import highcharter
#'
#' @param inputData IntLimObject output of ReadData() or FilterData()
#' @param stype category to color-code by
##' @param palette choose an RColorBrewer palette ("Set1", "Set2", "Set3",
##' "Pastel1", "Pastel2", "Paired", etc.) or submit a vector of colors
#' @param geneName string of select geneName
#' @param viewer whether the plot should be displayed in the RStudio viewer (T) or
#' in Shiny/Knittr (F)
#' @param metabName string of select metabName
#' @return a highcharter object
#'
#' @examples
#' \dontrun{
#' dir <- system.file("extdata", package="IntLIM", mustWork=TRUE)
#' csvfile <- file.path(dir, "NCItestinput.csv")
#' mydata <- ReadData(csvfile,metabid='id',geneid='id')
#' PlotGMPair(mydata,stype="PBO_vs_Leukemia","DLG4","(p-Hydroxyphenyl)lactic acid")
#'
#' }
#' @export
PlotGMPair<- function(inputData,stype=NULL,geneName,metabName,palette = "Set1",
viewer=T) {
if(is.null(stype)) {
stop("Users must define stype which defines the categories to be compared (e.g. tumor vs non-tumor). This could be the same parameter that was used to run RunIntLim()")
}
if (length(palette) == 2) {
cols <- c(palette)
} else if (length(palette) == 1) {
cols <- RColorBrewer::brewer.pal(3, palette)[1:2]
} else {
stop("palette must either be an RColorBrewer palette or a vector of hex colors of size 2")
}
if (class(inputData) != "MultiDataSet") {
stop("input data is not a MultiDataSet class")
}
incommon <- IntLIM:::getCommon(inputData,stype)
if(is.null(stype)) {
stop("A category to colorcode by (e.g. stype) must be provided")
} else if (length(intersect(colnames(Biobase::pData(inputData[["metabolite"]])),stype))!=1) {
stop(paste0("You provided ",stype, "as your stype variable but it does not exist in your data"))
} else {
mytypes <- incommon$p
}
gene<-incommon$gene
if(length(which(rownames(gene)==geneName))>0) {
sGene<-gene[geneName,]
} else {
stop(paste0("The gene ",geneName," was not found in your data"))
}
metab<-incommon$metab
if(length(which(rownames(metab)==metabName))>0) {
sMetab<-as.numeric(metab[metabName,])
} else {
stop(paste0("The metabolite ",metabName," was not found in your data"))
}
if(length(unique(mytypes))!=2) {
stop(paste0("The group selected, '",stype,"', should only contain two different categories"))
}
mycols <- as.character(mytypes)
mycols[which(mytypes==unique(mytypes)[1])] <- cols[1]
mycols[which(mytypes==unique(mytypes)[2])] <- cols[2]
data<-data.frame(x=sGene,y=sMetab,z=colnames(gene),label=mytypes,color=mycols)
# Get points to draw the lines for each phenotype by hand
uniqtypes=as.character(unique(mytypes))
# Starting with phenotype 1, get min and max x values constrained to the values of y
# The reason we do this, is because the lines do not necessary need to go out to the max or min of x, particularly
# when slopes are really steep (abline does this automatically but not highcharter)
getLinePoints <- function(data,mytypes, uniqtypes, currenttype) {
y=data$y[which(data$label==uniqtypes[currenttype])]; x=data$x[which(data$label==uniqtypes[currenttype])]
min <- min(data$x[which(mytypes==uniqtypes[currenttype])])
max <- max(data$x[which(mytypes==uniqtypes[currenttype])])
m1<-stats::glm(y ~ x)
line1<-data.frame(x=c(max,min),
y=c(stats::predict(m1,data.frame(x=c(max,min)))))
return(data.frame(x=c(max,min), y=c(stats::predict(m1,data.frame(x=c(max,min))))))
}
line1 <- getLinePoints(data,mytypes,uniqtypes,currenttype=1)
line2 <- getLinePoints(data,mytypes, uniqtypes, currenttype=2)
ds <- highcharter::list_parse(data)
#cols=c("blue","pink")
hc <- highcharter::highchart(width = 350, height = 350 ) %>%
highcharter::hc_title(text=paste(geneName,' vs. ', metabName, sep = '')) %>%
highcharter::hc_xAxis(title=list(text=geneName)) %>%
highcharter::hc_yAxis(title=list(text=metabName)) %>%
hc_chart(zoomType = "xy") %>%
highcharter::hc_add_series(data=ds,type="scatter",#col=cols[1],
tooltip = list(headerFormat="",
pointFormat=paste("{point.label}","{point.z}")),
showInLegend=FALSE)
hc <- hc %>%
highcharter::hc_add_series(name = uniqtypes[1],
data=line1,type='line',#name=sprintf("regression line %s",type1),
color = cols[1],enableMouseTracking=FALSE,marker=FALSE) %>%
highcharter::hc_add_series(name = uniqtypes[2],
data=line2,type='line',#name=sprintf("regression line %s",type2),
color = cols[2],enableMouseTracking=FALSE,marker=FALSE)
hc
}
#' 'volcano' plot (difference in correlations vs p-values)
#' of all gene-metabolite pairs
#'
#' @param inputResults IntLimResults object with model results (output of RunIntLim())
#' @param inputData MultiDataSet object (output of ReadData()) with gene expression,
#' @param nrpoints number of points to be plotted in lowest density areas (see 'smoothScatter' documentation for more detail)
#' @param pvalcutoff cutoff of FDR-adjusted p-value for filtering (default 0.05)
#' @param diffcorr cutoff of differences in correlations for filtering (default 0.5)
#' @return a smoothScatter plot
#'
#' @examples
#' \dontrun{
#' dir <- system.file("extdata", package="IntLIM", mustWork=TRUE)
#' csvfile <- file.path(dir, "NCItestinput.csv")
#' mydata <- ReadData(csvfile,metabid='id',geneid='id')
#' myres <- RunIntLim(mydata,stype="PBO_vs_Leukemia")
#' pvalCorrVolcano(inputResults=myres,inputData=mydata)
#' }
#' @export
pvalCorrVolcano <- function(inputResults, inputData,nrpoints=10000,diffcorr=0.5,pvalcutoff=0.05){
if (class(inputData) != "MultiDataSet") {
stop("input data is not a MultiDataSet class")
}
if(class(inputResults) != "IntLimResults") {
stop("input data is not a IntLim class")
}
volc.results <- IntLIM::ProcessResults(inputResults, inputData, diffcorr = 0, pvalcutoff = 1)
volc.table <- volc.results@filt.results
Corrdiff <- volc.table[,4] - volc.table[,3]
pval <- -log10(volc.table$FDRadjPval)
graphics::smoothScatter(x = Corrdiff, pval, xlab = 'Difference in Correlation between Phenotypes',
ylab = '-log10(FDR-adjusted p-value)', nrpoints=nrpoints,
main = 'Volcano Plot')
graphics::abline(h=-log10(pvalcutoff),lty=2,col="blue")
graphics::abline(v=c(diffcorr,-diffcorr),lty=2,col="blue")
}
#' histogram of gene-metabolite pairs
#' depending upon metabolite or gene
#'
#' @param inputResults IntLimResults object with model results (output of RunIntLim() and ProcessResults())
#' @param type 'metabolite' or 'gene'. 'metabolite' set as default
#' @param breaks Number of breaks selected for histogram
#'
#' @examples
#' \dontrun{
#' dir <- system.file("extdata", package="IntLIM", mustWork=TRUE)
#' csvfile <- file.path(dir, "NCItestinput.csv")
#' mydata <- ReadData(csvfile,metabid='id',geneid='id')
#' myres <- RunIntLim(mydata,stype="PBO_vs_Leukemia")
#' myres <- ProcessResults(inputResults=myres,inputData=mydata)
#' HistogramGMPairs(inputResults=myres)
#' }
#' @export
HistogramGMPairs <- function(inputResults, type = 'metabolite', breaks = 50){
x <- inputResults@filt.results
if(is.null(x)){
stop('Please run ProcessResults() before inputting into HistogramGMPairs')
}
if (type == 'metabolite'){
metab.pairs <- data.frame(table(x$metab))
metab.pairs.number <- as.vector(metab.pairs$Freq)
hist(metab.pairs.number, breaks = breaks, main = "Number of gene-metabolite pairs based on metabolite", xlab = 'Gene-metabolite pairs based on metabolite')
}else if (type == 'gene'){
gene.pairs <- data.frame(table(x$gene))
gene.pairs.number <- as.vector(gene.pairs$Freq)
hist(gene.pairs.number, main = "Number of gene-metabolite pairs based on gene", breaks = breaks, xlab = 'Gene-metabolite pairs based on gene')
}else{
stop("Only two valid types: gene or metabolite. Invalid type entered")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.