#' Perform robust PCA (Hubert PCA)
#'
#' This function performs the variant of robust PCA developed by Mia Hubert et al. on a
#' matrix of counts
#'
#' @param data Log-transformed gene expression counts, rows = samples; columns = features (genes)
#' @return A list containing the output of PcaHubert, the (default) PC scores, and the PC loadings
do.robpca <- function(data, ncp=10,...){
pca <- rrcov::PcaHubert(data, kmax = ncp, k=ncp)
scores <- rrcov::getScores(pca)
rpca.loadings <- as.data.frame(rrcov::getLoadings(pca))
eigenvalues = rrcov::getEigenvalues(pca)
return(list(scores=scores, loadings=rpca.loadings, eig=eigenvalues))
}
CalcModifiedPCscores <- function(object, loadings, num.sig.genes=30){
ncp <- ncol(loadings)
dims <- colnames(loadings)
pc.sigs <- as.data.frame(do.call(cbind, lapply(dims, function(dim) {
dim.order <- order(loadings[, dim], decreasing = T)
genes.pos <- rownames(loadings)[dim.order[1:num.sig.genes]]
#print(genes.pos)
dim.order <- order(loadings[, dim], decreasing = F)
genes.neg <- rownames(loadings)[dim.order[num.sig.genes:1]]
# generate signature value of top PC +/- correlated genes
# normalize every gene
# Renames colums to PCx.pos, PCx.neg, and PCx for the xth PC
data.pos <- t(as.matrix(object@data[genes.pos, ]))
data.neg <- t(as.matrix(object@data[genes.neg, ]))
data.pos <- data.pos / apply(data.pos, 2, max)
data.neg <- data.neg / apply(data.neg, 2, max)
cast.sig <- as.data.frame(cbind(rowSums(data.pos),
rowSums(data.neg)))
cast.sig$pc.score <- cast.sig[, 1] - cast.sig[, 2]
colnames(cast.sig) <- c(paste0(dim,'.pos'), paste0(dim,'.neg'), dim)
return(as.matrix(cast.sig))
}
)))
pc.sigs <- pc.sigs[object@cell.names, ]
pc.sigs <- as.matrix(pc.sigs[, grepl("PC[0-9]+$", colnames(pc.sigs))])
return(pc.sigs)
}
#' Perform robust PCA (Hubert PCA) with modified PC scores on a Seurat object
#'
#' \code{scRobustPCA} performs the variant of robust PCA developed by Mia Hubert
#' et al. on the gene expression matrix "data" in a Seurat object. Set
#' reduction.type='rpca' in other Seurat functions to use the rPCA results, for
#' example to calculate clusters with FindClusters.
#'
#' @param object Seurat object
#' @param npcs Number of principal components to calculate
#' @param pc.genes Genes as input to PC. If \code{pc.genes==NULL}, the var.genes
#' slot is used. If var.genes is empty and \code{pc.genes==NULL}, then all
#' genes are used.
#' @param use.modified.pcscores If \code{FALSE}, then the raw pc score output
#' from robust PCA is used. If \code{TRUE}, then pc scores are replaced with
#' the sum of the top 30 genes by positive loading minus the sum of the top 30
#' genes by negative loading. Each gene is scaled to a max of 1 and min of 0.
#' @return A Seurat object with the 'rpca' field filled.
#'
#' @examples
#'
#' @export
RunRobPCA <- function(object, npcs=10, pc.genes=NULL, use.modified.pcscores=TRUE){
print('Running rPCA')
if(is.null(pc.genes)){
pc.genes = object@var.genes
} else{
print("Variable genes not found. Using all genes in dataset.")
pc.genes = rownames(object@data)
}
mat.for.pca <- t(as.matrix(object@data[pc.genes, ]))
output <- do.robpca(mat.for.pca, ncp = npcs)
if(use.modified.pcscores){
print('Calculating modified PCs')
pc.sigs <- CalcModifiedPCscores(object = object, loadings = output$loadings, num.sig.genes = 30)
} else{
pc.sigs <- output$scores
}
reduction.name = 'rpca'
gene.loadings = as.matrix(output$loadings)
cell.embeddings = pc.sigs
reduction.key = 'PC'
sdev=numeric(0)
pca.obj <- new(Class = "dim.reduction", gene.loadings = gene.loadings,
cell.embeddings = cell.embeddings, sdev = sdev, key = reduction.key)
eval(expr = parse(text = paste0("object@dr$", reduction.name,
"<- pca.obj")))
parameters.to.store <- as.list(environment(), all = TRUE)[names(formals("RunPCA"))]
object <- Seurat:::SetCalcParams(object = object, calculation = "RunPCA",
... = parameters.to.store)
if (is.null(object@calc.params$RunPCA$pc.genes)) {
object@calc.params$RunPCA$pc.genes <- pc.genes
}
#
print('Adding modified PCs to Seurat object')
object <- Seurat::SetDimReduction(object, 'rpca', 'cell.embeddings', new.data = pc.sigs)
print('Adding rPCA loadings to Seurat object')
object <- Seurat::SetDimReduction(object, 'rpca', 'gene.loadings', new.data = as.matrix(output[[2]]))
object <- Seurat::SetDimReduction(object, 'rpca', 'key', new.data = 'PC')
return(object)
}
#' Plot the results of rPCA with cells colored by their identity class.
#'
#' @param object Seurat object
#' @param ... Additional parameters to DimPlot; for example, which dimensions to
#' plot.
#' @export
RPCAPlot <- function(object,...){
object <- Seurat::SetDimReduction(object, 'pca','cell.embeddings',
new.data = Seurat::GetDimReduction(object=object, reduction.type = 'rpca',slot = 'cell.embeddings'))
Seurat::PCAPlot(object = object)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.