#' Sample PCA plot for RNASeq
#' @description PCA plots of samples for DE analysis
#'
#' @param expressionMatrix Data frame with variance-stabilized expression data (in DESeq would be obtained by counts(dds, normalized = TRUE))
#' @param numberOfPCs Integer value for number of principal components
#' @param clusters User has the option of choosing how many clusters they want (k Means)
#' @param autoClustering Boolean. If TRUE the function will determine the optimal number of k means
#' @param colorBy User has "sample" and "cluster" options to determine which colors the plot
#'
#' @return List with 4 plots, `variancePlot` has cumulative and individual importance of each PC, `pcaGRid` is a grid of PCA plots, `pca3dPlot` has 3D PCA, and `clusteringPlot` is is the plot generated by eclust on the first 2 PCs
#' @author Felipe Flores
#' @import tidyverse GGally plotly
#'
#' @export
samplePCA <- function(
expressionMatrix,
numberOfPCs = 2,
clusters = 1,
autoClustering = FALSE,
colorBy = "sample")
{
returnList <- list()
expressionMatrix <- expressionMatrix[which(apply(expressionMatrix, 1, var)!=0), ]
pca <- stats::prcomp(t(expressionMatrix),center=TRUE,scale=TRUE)
pcaImportanceDF <- pca %>%
broom::tidy(matrix="pcs") %>%
dplyr::rename(.,Individual=percent,Cumulative=cumulative) %>%
tidyr::gather(key,value,Individual,Cumulative)
importancePlot <- pcaImportanceDF %>%
ggplot2::ggplot(aes(x=PC,y=value,group=key,color=key))+
theme_bw()+
geom_line()+
geom_point()+
labs(title="Importance of PCs",y="Percent of Variance explained")+
theme(plot.title = element_text(hjust=0.5), legend.title = element_blank())
pcaDF <- as_data_frame(pca$x,rownames = "Sample")
pca3dPlot <- pcaDF %>%
plotly::plot_ly(x = ~PC1, y = ~PC2, z = ~PC3, text = ~Sample, mode="marker+text")
# Code has not changed from the first couple plots up to here
# Case 1: If either the user specifies a number of clusters or automatic clustering, then
# this part of code is triggered
if (clusters != 1 || autoClustering) {
# Using factoextra's eclust to find clusters
pdf(NULL)
# This is just to suppress the graphical output for the moment, but we wanna save this plot later
eclustering <- factoextra::eclust(
t(expressionMatrix),
FUNcluster = "kmeans",
k=clusters,
k.max=min(10,nrow(t(expressionMatrix))-1)
)
dev.off()
# We customize the clustering plot from eclust a little bit:
clusteringPlot <- eclustering$clust_plot +
theme_bw()+
theme(plot.title = element_text(hjust = 0.5))
returnList <- list(clusteringPlot = clusteringPlot)
# Join the dataframe with the clustering information as a tibble
pcaDF <- pcaDF %>%
dplyr::right_join(dplyr::data_frame(
Sample=names(eclustering$cluster),
Cluster=LETTERS[eclustering$cluster]
))
pca3dPlot <- pca3dPlot %>%
plotly::add_trace(data = pcaDF, color = ~Cluster, colors = "Paired", showlegend = FALSE)
# Notice I converted the clusters to letters instead of numbers.
# ggpairs can deal with this categorical data much better.
# Factors could also have been used but meh /shrug
# Generating the grid plot with clusters for colors and samples for shapes
# This part is added because ggpairs allows up to 6 different shapes
# Maybe in the future they'll implement a feature to increment them
# Any more than that and the data points will disappear!
# In such a case, we flip the shape and cluster parameters
if (ncol(expressionMatrix)>6){
graphingParameters <- list(mapping=aes(shape=Cluster,col=Sample))
} else{
# graphingParameters <- list(mapping=aes(col=Cluster,shape=Sample))
switch (colorBy,
"cluster" = {graphingParameters <- list(mapping=aes(col = Cluster, shape = Sample))},
"sample" = {graphingParameters <- list(mapping=aes(col = Sample, shape = Cluster))}
)
}
}
else { # Case 2: User wants no clustering
graphingParameters <- list(mapping=aes(col=Sample))
}
graphingParameters <- c(
graphingParameters, # aes() parameters from the nested if's
list(
data=pcaDF,
columns=2:(numberOfPCs+1),
lower=list(continuous=GGally::wrap("points",size=3)),
upper=NULL,
legend = c(2,1)
)
)
pcaPlot <- do.call(GGally::ggpairs,graphingParameters) # Generate the plot with these parameters
# Add a few details:
pcaPlot <- pcaPlot + theme_bw() +
labs(title="Principal Components")+
theme(plot.title=element_text(hjust=0.5))
# And return them to the user!
returnList <-
c(returnList,
list(
importancePlot = importancePlot,
pcaPlot = pcaPlot,
pca3dPlot = pca3dPlot
)
)
return(returnList)
} # end of function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.