#' compute_factor_genes
#'
#' Compute a list of genes by divergence between two datasets in a liger object.
#' This is done by computing the difference between dataset specific factor loadings.
#' @param liger_obj A fully processed liger object
#' @import ggrepel
#' @import gridExtra
#' @import ggpubr
#' @keywords iNMF
#' @export
#' @examples
#' factor.genes <- compute_factor_genes(a.NucSeq)
compute_factor_genes <- function(liger_obj){
# initialize empty df to store output
factor_df <- data.frame(gene = "", diff = 0, factor_num = 0)
# take difference between dataset specific factor loadings
diff.V <- liger_obj@V[[1]] - liger_obj@V[[2]]
# for each factor, populate the output df with diff.V info
for(i in 1:dim(diff.V)[1]){
df = data.frame(gene=liger_obj@var.genes, diff=diff.V[i,])
df$factor_num <- i
factor_df <- rbind(df, factor_df)
}
factor_df$dataset <- ifelse(factor_df$diff > 0.0, "Control", "Case")
return(factor_df)
}
#' compute_shared_genes
#'
#' Compute a list of genes shared two datasets in a liger object.
#' This is done by computing the difference between dataset specific factor loadings.
#' @param liger_obj A fully processed liger obj
#' @keywords iNMF
#' @export
#' @examples
#' shared.genes <- compute_shared_genes(a.NucSeq)
compute_shared_genes <- function(liger_obj){
# initialize empty df to store output
shared <- data.frame(gene = "", shared = 0, factor_num = 0)
# for each factor, populate the output df with diff.V info
for(i in 1:dim(liger_obj@W)[1]){
df = data.frame(gene=liger_obj@var.genes, shared=liger_obj@W[i,])
df$factor_num <- i
shared <- rbind(df, shared)
}
return(shared)
}
#' plot_factor_genes
#'
#' Plot a list of genes by divergence between two datasets in a liger object.
#' This is done by computing the difference between dataset specific factor loadings.
#' @param liger_obj A fully processed liger object
#' @import ggrepel
#' @import Seurat
#' @import ggpubr
#' @keywords iNMF
#' @export
#' @examples
#' factor.genes <- compute_factor_genes(a.NucSeq)
plot_factor_genes <- function(factor.genes, shared.genes, marker.genes, label.markers=T, n.label=5, max.shared.factors=2){
plot_list <- list()
shared_plot_list <- list()
for(i in 1:max(factor.genes$factor_num)){
print(i)
# get top marker genes in this factor for control and case:
factor.markers.control <- subset(marker.genes[[1]], factor_num==i)
factor.markers.case <- subset(marker.genes[[3]], factor_num==i)
# filter by max.shared.factors for each marker gene:
fator.markers.control <- subset(factor.markers.control, gene %in% names(marker.genes[[4]][marker.genes[[4]] <= max.shared.factors]))
fator.markers.case <- subset(factor.markers.case, gene %in% names(marker.genes[[5]][marker.genes[[5]] <= max.shared.factors]))
# get genes for this factor only. Order based on diff.
factor.diff <- subset(factor.genes, factor_num==i & abs(diff) > 0)
factor.diff <- factor.diff[order(as.numeric(factor.diff$diff), decreasing=T),]
rownames(factor.diff) <- 1:length(rownames(factor.diff))
# get shared genes for this factor only, order based on factor loadings:
factor.shared <- subset(shared.genes, factor_num==i & abs(shared) > 0)
factor.shared <- factor.shared[order(as.numeric(factor.shared$shared), decreasing=T),]
rownames(factor.shared) <- 1:length(rownames(factor.shared))
factor.shared.genes <- as.character(head(factor.shared$gene,n.label*2))
# do we label marker genes or the top factor genes?
if(label.markers){
control.genes <- as.character(head(factor.markers.control$gene,n.label))
case.genes <- as.character(head(factor.markers.case$gene,n.label))
}
else{
control.genes <- as.character(head(factor.diff$gene,n.label))
case.genes <- as.character(tail(factor.diff$gene,n.label))
}
labs <- c(control.genes, case.genes)
# plotting factor genes
p1 <- ggplot(factor.diff, aes(x=seq(1, length(factor.diff$diff)), y=diff)) +
geom_point() +
geom_hline(yintercept=0, linetype="dashed", color="red") +
geom_label_repel(
aes(label = ifelse(factor.diff$gene %in% labs, as.character(gene), '')),
size=3,
box.padding = unit(0.5, "lines"),
point.padding = unit(0.5, "lines"),
fill=ifelse(factor.diff$dataset == "Case", rgb(232,125,114, maxColorValue=255), rgb(85,188,194, maxColorValue=255)), # case vs control
color="white", segment.color="black"
)+
theme_bw() +
xlab("genes") +
ylab(bquote(~V[control] - ~V[case])) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank()) +
ggtitle(paste("Factor",i,"condition specific genes"))
p2 <- ggplot(factor.shared, aes(x=seq(1, length(factor.shared$shared)), y=shared)) +
geom_point() +
geom_hline(yintercept=0, linetype="dashed", color="red") +
geom_label_repel(
aes(label = ifelse(factor.shared$gene %in% factor.shared.genes, as.character(gene), '')),
size=3,
fill="darkgreen", color="white", segment.color="black"
)+
theme_bw() +
xlab("genes") +
ylab('W') +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank()) +
ggtitle(paste("Factor",i,"shared genes"))
plot_list[[i]] <- CombinePlots(list(p1,p2), ncol=2)
}
return(plot_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.