#-------------------------------------------------
#' Draw PCA
#'
#' This function generates PCA plots based on data
#'
#' @param eset an ExpressionSet object with omcis data
#' @param type Type of data to be processed, or name for the Omics set.
#' @param outputpath output file path for plots
#'
#' @return a pca plot
#'
#' @examples
#' drawPCA(eset)
#'
#' @import ggplot2
#' @import Biobase
#' @export
drawPCA <- function(eset, x_axis="PC1", y_axis="PC2", type, outputpath=output_plots_path,
outputfile=output_files_path, show_sample_names=TRUE, .species=species) {
# perform PC analysis
data=t(exprs(eset))
if(any(dim(data)==0))
return(NULL)
PC_data <- stats::prcomp(data)
percent_variance <- summary(PC_data)$importance["Proportion of Variance",]*100
# Make colors for groups
make_colors<-TRUE;
if ("ColorsHex" %in% colnames(pData(eset)) ){ if(checkColor(pData(eset)[,"ColorsHex"])){
colors_dots <- c(unique(as.character(pData(eset)[,"ColorsHex"])))
names(colors_dots) <- unique(pData(eset)[,"Group"])
make_colors<-FALSE;
} }
# Make PCA plot
basic_theme <- theme(legend.position="none", axis.text=element_blank(), axis.line=element_blank(), axis.ticks=element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border=element_blank(),
axis.title=element_text(size=8) )
pca_graph <- ggplot(data=as.data.frame(PC_data$x), aes(x=PC_data$x[,x_axis], y=PC_data$x[,y_axis]))+
{ if( "Group2" %in% colnames(pData(eset)) ) { geom_point(aes(shape=pData(eset)$Group2,colour=pData(eset)$Group)); }} +
{ if( "Group2" %in% colnames(pData(eset)) ) { scale_shape_manual(values=1:length(unique(pData(eset)$Group2))) } else { geom_point(aes(colour=pData(eset)$Group)); }} +
theme_bw() + theme(legend.title=element_blank()) +
{ if (!make_colors) scale_color_manual(values=colors_dots); } +
labs(title=paste("PCA: \n ",type,sep=""),
x=paste(x_axis, sprintf(" (%2.0f%%)", percent_variance[x_axis]), sep=""),
y=paste(y_axis, sprintf(" (%2.0f%%)", percent_variance[y_axis]), sep=""));
gg_dist_1 <- ggplot(data=as.data.frame(PC_data$x), aes(x=PC_data$x[,x_axis], fill=pData(eset)$Group)) +
geom_density(alpha=0.4, size=0.2) + ylab(paste(x_axis, "Density", sep="\n") ) + theme_bw() + basic_theme
gg_dist_2 <- ggplot(data=as.data.frame(PC_data$x), aes(x=PC_data$x[,y_axis], fill=pData(eset)$Group)) +
geom_density(alpha=0.4, size=0.2) + ylab(paste(y_axis, "Density", sep="\n") ) + theme_bw() + basic_theme
try({
pca_graph2 <- ggplot(data=as.data.frame(PC_data$x), aes(x=PC_data$x[,y_axis], y=PC_data$x[,"PC3"]))+
{ if( "Group2" %in% colnames(pData(eset)) ) { geom_point(aes(shape=pData(eset)$Group2,colour=pData(eset)$Group)); }} +
{ if( "Group2" %in% colnames(pData(eset)) ) { scale_shape_manual(values=1:length(unique(pData(eset)$Group2))) } else { geom_point(aes(colour=pData(eset)$Group)); }} +
theme_bw() + theme(legend.title=element_blank()) +
{ if (!make_colors) scale_color_manual(values=colors_dots); } +
labs(title=paste("PCA: \n ",type,sep=""),
x=paste(y_axis, sprintf(" (%2.0f%%)", percent_variance[y_axis]), sep=""),
y=paste("PC3", sprintf(" (%2.0f%%)", percent_variance["PC3"]), sep=""));
gg_dist_3 <- ggplot(data=as.data.frame(PC_data$x), aes(x=PC_data$x[,"PC3"], fill=pData(eset)$Group)) +
geom_density(alpha=0.4, size=0.2) + ylab(paste("PC3", "Density", sep="\n") ) + theme_bw() + basic_theme
})
# Identify most variable hits for loadings graph
num_hits = min(12,nrow(PC_data$rotation))
sig_hits <- c( rownames(PC_data$rotation[order(PC_data$rotation[,x_axis], decreasing=T)[1:num_hits],]),
rownames(PC_data$rotation[order(PC_data$rotation[,x_axis], decreasing=F)[1:num_hits],]),
rownames(PC_data$rotation[order(PC_data$rotation[,y_axis], decreasing=T)[1:num_hits],]),
rownames(PC_data$rotation[order(PC_data$rotation[,y_axis], decreasing=F)[1:num_hits],]) )
sig_data <- data.frame(PC_data$rotation[sig_hits,])
suppressWarnings({
pca_loading_graph <- ggplot(data=as.data.frame(PC_data$rotation), aes(x=(PC_data$rotation[,x_axis]), y=PC_data$rotation[,y_axis])) +
geom_point(colour="grey", size=0.7) +
ggrepel::geom_text_repel(data=sig_data, aes(x=sig_data[,x_axis], y=sig_data[,y_axis]),label=rownames(sig_data), colour="black", size=2) +
theme_bw() + theme(legend.title=element_blank()) +
labs(x=paste(x_axis, sep=""), y=paste(y_axis, sep=""),
title=paste("PC Factor Loadings \n",type, sep=""))
# Graph output
output_filename <- file.path(outputpath, paste("PCAplots_",type,".pdf",sep=""))
pdf(output_filename, width=3.5, height=3.5)
print(pca_graph+theme(legend.position="none") )
print(pca_graph+theme(legend.position="none")+ggrepel::geom_text_repel(aes(x=PC_data$x[,x_axis], y=PC_data$x[,y_axis], label=rownames(PC_data$x)), colour="black", size=3) )
gridExtra::grid.arrange(g_legend(pca_graph) )
try({
print(pca_graph2+theme(legend.position="none") )
print(pca_graph2+theme(legend.position="none")+ggrepel::geom_text_repel(aes(x=PC_data$x[,"PC2"], y=PC_data$x[,"PC3"], label=rownames(PC_data$x)), colour="black", size=3) )
})
print(pca_loading_graph)
plot(PC_data, type = 'l', main=paste("PCs vs. Variance",sep=""))
})
piece1 <- gg_dist_1 + theme(axis.title.x=element_blank(), plot.margin=unit(c(0.5, -0.3, 0, 0.6), "cm") )
piece2 <- gg_dist_2 + theme(axis.title.y=element_blank(), plot.margin=unit(c(-0.3, 0.2, 0.35, 0), "cm") ) + coord_flip()
piece3 <- pca_graph+theme(legend.position="none",plot.title=element_blank(), plot.margin=unit(c(0,0,0.5,0.5), "cm") )
print(cowplot::plot_grid( cowplot::plot_grid( piece1, piece3, ncol=1, rel_heights=c(1,4)),
cowplot::plot_grid(NULL, piece2, ncol=1, rel_heights=c(1,4)),
ncol=2, rel_widths=c(4,1)) )
piece1 <- gg_dist_2 + theme(axis.title.x=element_blank(), plot.margin=unit(c(0.5, -0.3, 0, 0.6), "cm") )
piece2 <- gg_dist_3 + theme(axis.title.y=element_blank(), plot.margin=unit(c(-0.3, 0.2, 0.35, 0), "cm") ) + coord_flip()
piece3 <- pca_graph2+theme(legend.position="none",plot.title=element_blank(), plot.margin=unit(c(0,0,0.5,0.5), "cm") )
print(cowplot::plot_grid( cowplot::plot_grid( piece1, piece3, ncol=1, rel_heights=c(1,4)),
cowplot::plot_grid(NULL, piece2, ncol=1, rel_heights=c(1,4)),
ncol=2, rel_widths=c(4,1)) )
dev.off()
# Save analysis
write.table(data.frame("Feature"=rownames(PC_data$rotation),PC_data$rotation),
file=file.path(outputfile, paste("PCA_loadings_", type,".txt", sep="")), quote=F, sep="\t", row.names = F )
# Run enrichment based on top 3 PCs
if( .species!="Other" & ("Gene" %in% colnames(fData(eset))) ) { try({
gsea_working_dir <- "PCA_Loading_GSEA"
gsea_working_path <- file.path(outputfile, gsea_working_dir)
if( dir.exists(gsea_working_path) == FALSE ) { dir.create(gsea_working_path) }
dest_gmt_file <- fetchGMT(outputfile, .species)
analysis_names <- c(paste(type, "PC1", sep="_"), paste(type, "PC2", sep="_"), paste(type, "PC3", sep="_") )
for (i in 1:length(analysis_names)){
analysis_name <- analysis_names[[i]]
ranked_vector <- PC_data$rotation[,i]
names(ranked_vector) <- fData(eset)[,"Gene"]
pathways_gmt <- fgsea::gmtPathways(dest_gmt_file)
suppressWarnings({
fgsea_results <- fgsea::fgsea(pathways=pathways_gmt, stats=ranked_vector, minSize=15, maxSize=500, nperm=1000)
})
fgsea_out <- fgsea_results[,c(1,1,2,3)]
colnames(fgsea_out) <- c("Term", "Description", "p.Val", "FDR")
fgsea_out[,"Phenotype"] <- sign(fgsea_results[,"NES"])
fgsea_out[,"Genes"] <- apply(fgsea_results[,"leadingEdge"], 1, function(x) paste(as.character(unlist(x["leadingEdge"])), collapse=","))
fgsea_out[,"Genes"] <- apply(fgsea_out, 1, function(x) gsub(" ", "", x["Genes"]))
fgsea_out[,"NES"] <- (fgsea_results[,"NES"])
fgsea_out[,"ES"] <- (fgsea_results[,"ES"])
fgsea_out[,"Gene_Hits"] <- apply(fgsea_results, 1, function(x) length(unlist(x["leadingEdge"])) )
fgsea_out[,"Gene_Total"] <- fgsea_results[,"size"]
fgsea_out <- fgsea_out[order(fgsea_out[,"p.Val"], decreasing=F)]
output_filename <- file.path(gsea_working_path, paste("fgsea_", analysis_name, "_loadings.txt", sep=""))
write.table(fgsea_out, file=output_filename, sep="\t", row.names=FALSE, quote=FALSE);
}
}, silent=T) }
return(pca_graph)
}
#-------------------------------------------------
#' Draw UMAP
#'
#' This function generates UMAP plots
#'
#' @param eset an ExpressionSet object with omcis data
#' @param type Type of data to be processed, or name for the Omics set.
#' @param outputpath output file path for plots
#'
#' @return a plot
#'
#' @examples
#' drawUMAP(eset)
#'
#' @import ggplot2
#' @import Biobase
#' @export
drawUMAP <- function(eset, type, outputpath=output_plots_path) {
data <- t(exprs(eset))
data.labels <- pData(eset)$Group
use_size <- 15
if(length(data.labels)<15){
use_size <- ( length(data.labels)/2 )
}
data.umap <- uwot::umap(data, n_neighbors= use_size )
# Make colors for groups
make_colors<-TRUE;
if ("ColorsHex" %in% colnames(pData(eset)) ){ if(checkColor(pData(eset)[,"ColorsHex"])){
colors_dots <- c(unique(as.character(pData(eset)[,"ColorsHex"])))
names(colors_dots) <- unique(pData(eset)[,"Group"])
make_colors<-FALSE;
} }
# Make plot
umap_graph <- ggplot(data=as.data.frame(data.umap), aes(x=data.umap[,1], y=data.umap[,2]))+
{ if( "Group2" %in% colnames(pData(eset)) ) { geom_point(aes(shape=pData(eset)$Group2,colour=pData(eset)$Group)); } } +
{ if( "Group2" %in% colnames(pData(eset)) ) { scale_shape_manual(values=1:length(unique(pData(eset)$Group2))) } else { geom_point(aes(colour=pData(eset)$Group)); }} +
theme_bw() + theme(legend.title=element_blank()) +
{ if (!make_colors) scale_color_manual(values=colors_dots); } +
labs(title=paste("UMAP: \n ",type,sep="") ) + xlab("UMAP1") + ylab("UMPA2") ;
# Graph output
output_filename <- file.path(outputpath, paste("UMAPplot_",type,".pdf",sep=""))
pdf(output_filename, width=3.5, height=3.5)
print(umap_graph+theme(legend.position = "none"))
gridExtra::grid.arrange(g_legend(umap_graph) )
dev.off()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.