#' Get de score
#'
#' @param de.df
#' @param top.genes
#' @param upper
#'
#' @return
#' @export
#'
#' @examples
get_de_score <- function(de.df, top.genes, upper=20)
{
gene.score = -log10(de.df[top.genes,"padj"])
gene.score[gene.score > upper] = upper
gene.score = sum(gene.score)
return(gene.score)
}
#' Get de genes sym
#'
#' @param de.genes
#'
#' @return
#' @export
#'
#' @examples
get_de_genes_sym <- function(de.genes)
{
de.genes.sym = de.genes
pairs= names(de.genes)
pairs = as.data.frame(do.call("rbind",strsplit(pairs,"_")))
row.names(pairs) = names(de.genes)
pairs$tmp = paste0(pairs[,2],"_",pairs[,1])
de.genes.sym[pairs$tmp] = de.genes[row.names(pairs)]
for(i in 1:nrow(pairs)){
de.genes.sym[[pairs[i,"tmp"]]]$up.genes = de.genes[[row.names(pairs)[i]]]$down.genes
de.genes.sym[[pairs[i,"tmp"]]]$down.genes = de.genes[[row.names(pairs)[i]]]$up.genes
}
return(de.genes.sym)
}
#' Get de pair
#'
#' @param de.genes
#' @param cl1
#' @param cl2
#'
#' @return
#' @export
#'
#' @examples
get_de_pair<- function(de.genes, cl1, cl2)
{
pair = paste0(cl1, "_", cl2)
if(pair %in% names(de.genes)){
return(de.genes[[pair]])
}
else{
pair = paste0(cl2, "_", cl1)
de = de.genes[[pair]]
tmp = de$up.genes
de$up.genes = de$down.genes
de$down.genes = tmp
return(de)
}
}
#' Find doublet
#'
#' @param cl.df
#' @param cl.sim
#' @param cl.good
#' @param de.genes
#'
#' @return
#' @export
#'
#' @examples
find_doublet <- function(cl.df, cl.sim, cl.good, de.genes=NULL)
{
diag(cl.sim)=0
if(is.null(de.genes)){
stop("Need to specify de.genes")
}
nn = setNames(cl.good[apply(cl.sim[, cl.good],1, function(x){
which.max(x)
})],row.names(cl.sim))
nn.df= data.frame(cl_label = cl.df[names(nn), "cluster_label"],cl.gene.counts = cl.df[names(nn), "gene.counts"],
nn.cl=nn, nn.cl_label = cl.df[as.character(nn),"cluster_label"],nn.cl.gene.counts = cl.df[as.character(nn), "gene.counts"],stringsAsFactors=FALSE)
nn.df$pair = paste0(names(nn), "_",nn)
nn.df$pair.sim = get_pair_matrix(cl.sim, row.names(nn.df), nn.df$nn.cl)
nn.df$up.genes=0
nn.df$up.genes.score = 0
nn.df$max.olap.cl = NA
nn.df$max.olap.genes1 = 0
nn.df$max.olap.score1 = 0
nn.df$max.olap.ratio1 = 0
nn.df$max.olap.genes2 = 0
nn.df$max.olap.score2 = 0
nn.df$max.olap.ratio2 = 0
for(i in 1:nrow(nn.df)){
cl1= row.names(nn.df)[i]
cl2=as.character(nn.df$nn.cl[i])
de = get_de_pair(de.genes, cl1, cl2)
up.genes = head(names(de$up.genes), 50)
nn.df$up.genes[i] = length(up.genes)
nn.df$up.genes.score[i] = sum(pmin(-log10(de$up.genes[up.genes]),20))
up.genes.olap =sapply( setdiff(cl.good, c(cl1,cl2)), function(k){
p = paste0(k,"_",cl2)
tmp.de = get_de_pair(de.genes, k, cl2)
olap.genes= intersect(names(tmp.de$up.genes), up.genes)
olap.num = length(olap.genes)
olap.score= sum(pmin(-log10(de$up.genes[olap.genes]), 20))
c(olap.num, olap.score)
})
cl3 = names(which.max(up.genes.olap[1,]))
nn.df$max.olap.cl[i] = cl3
nn.df$max.olap.genes1[i] = up.genes.olap[1,cl3]
nn.df$max.olap.score1[i] = up.genes.olap[2,cl3]
nn.df$max.olap.ratio1[i] = up.genes.olap[2,cl3]/nn.df$up.genes.score[i]
de1 = get_de_pair(de.genes, cl1, cl3)
up.genes = head(names(de1$up.genes), 50)
up.gene.score= sum(pmin(-log10(de1$up.genes[up.genes]),20))
de2 = get_de_pair(de.genes, cl2, cl3)
olap.genes = intersect(names(de2$up.genes), up.genes)
nn.df$max.olap.genes2[i] = length(olap.genes)
nn.df$max.olap.score2[i] = sum(pmin(-log10(de1$up.genes[olap.genes]),20))
nn.df$max.olap.ratio2[i] = nn.df$max.olap.score2[i]/ up.gene.score
}
nn.df$max.olap.cl_label = cl.df[as.character(nn.df$max.olap.cl),"cluster_label"]
nn.df$olap.cl.sim = get_pair_matrix(cl.sim, nn.df$max.olap.cl, nn.df$nn.cl)
return(nn.df)
}
#' Plot doublet
#'
#' @param norm.dat
#' @param cl
#' @param doublet.df
#' @param de.genes
#' @param all.col
#'
#' @return
#' @export
#'
#' @examples
plot_doublet <- function(norm.dat, cl, doublet.df, de.genes, all.col)
{
for(i in 1:nrow(doublet.df)){
x = as.character(doublet.df[i, "cl"])
y = as.character(doublet.df[i, "cl1"])
z = as.character(doublet.df[i, "cl2"])
tmp.cl = cl[cl %in% c(x, y, z)]
tmp.cl = setNames(factor(as.character(tmp.cl), c(x,y,z)), names(tmp.cl))
tmp=display_cl(tmp.cl, norm.dat, prefix=paste0("doublet.",paste(levels(tmp.cl), collapse="_")), col=all.col, max.cl.size=100, de.genes=de.genes)
}
}
check_triplet <- function(de.genes, cl1,cl2,cl3)
{
de = get_de_pair(de.genes, cl1, cl2)
up.genes.score = head(de$up.genes, 50)
down.genes.score = head(de$down.genes,50)
up.genes.score[up.genes.score > 20] = 20
down.genes.score[down.genes.score > 20] = 20
up.genes = names(up.genes.score)
down.genes = names(down.genes.score)
up.genes.score=sum(up.genes.score)
down.genes.score = sum(down.genes.score)
tmp1.de = get_de_pair(de.genes, cl1, cl3)
tmp2.de = get_de_pair(de.genes, cl3, cl2)
if(tmp1.de$num < 20 | tmp2.de$num < 20 ){
return(NULL)
}
olap.up.genes1 = intersect(names(tmp2.de$up.genes), up.genes)
olap.up.num1 = length(olap.up.genes1)
olap.up.score1 = get_de_truncate_score_sum(de$up.genes[olap.up.genes1])
olap.up.ratio1 = olap.up.score1 / up.genes.score
olap.down.genes1 = intersect(names(tmp1.de$down.genes), down.genes)
olap.down.num1 = length(olap.down.genes1)
olap.down.score1 = get_de_truncate_score_sum(de$down.genes[olap.down.genes1])
olap.down.ratio1 = olap.down.score1 / down.genes.score
up.genes2 = head(names(tmp1.de$up.genes), 50)
up.genes.score2 = get_de_truncate_score_sum(tmp1.de$up.genes[up.genes2])
olap.up.genes2 = intersect(names(up.genes2),de$up.genes)
olap.up.num2 = length(olap.up.genes2)
olap.up.score2 = get_de_truncate_score_sum(tmp1.de$up.genes[olap.up.genes2])
olap.up.ratio2 = olap.up.score2 /up.genes.score2
down.genes2 = head(names(tmp2.de$down.genes), 50)
down.genes.score2 = get_de_truncate_score_sum(tmp2.de$down.genes[down.genes2])
olap.down.genes2 = intersect(down.genes2,names(de$down.genes))
olap.down.num2 = length(olap.down.genes2)
olap.down.score2 = get_de_truncate_score_sum(tmp2.de$down.genes[olap.down.genes2])
olap.down.ratio2 = olap.down.score2 /down.genes.score2
result = list(
cl1=cl1,
cl2=cl2,
up.num = length(up.genes),
down.num = length(down.genes),
olap.num=c(olap.up.num1, olap.down.num1, olap.up.num2, olap.down.num2),
olap.ratio = c(olap.up.ratio1, olap.down.ratio1, olap.up.ratio2, olap.down.ratio2),
olap.score = c(olap.up.score1, olap.down.score1, olap.up.score2, olap.down.score2)
)
result$score = sum(result$olap.score) / sum(c(up.genes.score, down.genes.score, up.genes.score2, down.genes.score2))
return(result)
}
find_doublet_all <- function(de.genes, mc.cores=5, min.genes=100)
{
require(parallel)
if(is.null(de.genes)){
stop("Need to specify de.genes")
}
pairs.df = get_pairs(names(de.genes))
cl = union(pairs.df[,1],pairs.df[,2])
result.list= parallel::pvec(sample(row.names(pairs.df)), function(pairs){
result.list= sapply(pairs, function(p){
de = de.genes[[p]]
if(length(de)==0){
return(NULL)
}
if(de$up.num < min.genes | de$down.num < min.genes){
return(NULL)
}
cl1 = pairs.df[p,1]
cl2 = pairs.df[p,2]
results = sapply(setdiff(as.character(cl),c(cl1,cl2)), function(cl3){
result = check_triplet(de.genes, cl1,cl2,cl3)
return(result)
},simplify=F)
if(is.null(results) | length(results)==0){
return(NULL)
}
test.score=unlist(sapply(results, function(x)x$score))
tmp = names(which.max(test.score))
result = results[[tmp]]
result$cl = tmp
return(result)
},simplify=F)
},mc.cores=mc.cores)
result.list = result.list[!sapply(result.list, is.null)]
if(is.null(result.list)| length(result.list)==0){
return(NULL)
}
cl = sapply(result.list, function(x)x$cl)
cl1 = sapply(result.list, function(x)x$cl1)
cl2 = sapply(result.list, function(x)x$cl2)
up.num = sapply(result.list, function(x)x$up.num)
down.num = sapply(result.list, function(x)x$down.num)
olap.num.df = t(sapply(result.list, function(x)x$olap.num))
colnames(olap.num.df) = paste0("olap.num.",c("up.1","down.1","up.2","down.2"))
olap.ratio.df = t(sapply(result.list, function(x)x$olap.ratio))
colnames(olap.ratio.df) = paste0("olap.ratio.",c("up.1","down.1","up.2","down.2"))
score = sapply(result.list, function(x)x$score)
result.df = data.frame(cl=cl, cl1=cl1, cl2=cl2, score=score, up.num=up.num, down.num=down.num, as.data.frame(olap.num.df), as.data.frame(olap.ratio.df))
return(result.df)
}
find_low_quality <- function(de.genes=NULL, de.score.mat=NULL,low.th = 2)
{
library(dplyr)
if(is.null(de.score.mat)){
de.score.mat <- get_de_matrix(de.genes,
directed = TRUE,
field = "num")
}
de.score.mat.df = as.data.frame(as.table(de.score.mat))
de.score.mat.df = de.score.mat.df %>% filter(Freq < low.th & Var1!=Var2)
colnames(de.score.mat.df) = c("cl.low","cl", "up.genes")
return(de.score.mat.df)
}
#' Plot cl low
#'
#' @param norm.dat
#' @param cl
#' @param low.df
#' @param de.genes
#' @param all.col
#'
#' @return
#' @export
#'
#' @examples
plot_low_qc<- function(norm.dat, cl, low.df, de.genes, all.col)
{
for(i in 1:nrow(low.df)){
tmp.cl = droplevels(cl[cl %in% as.character(unlist(low.df[i,1:2]))])
tmp=display_cl(tmp.cl, norm.dat, prefix=paste(levels(tmp.cl), collapse="_"), col=all.col, max.cl.size=100, de.genes=de.genes)
}
}
find_doublet_by_marker <- function(cl.means,markers=list(Neuron=c("Slc32a1","Slc17a7","Slc17a6"),Olig=c("Sox10","Opalin"),Astro="Aqp4",Endo="Ly6c1",VLMC="Slc6a13",Peri="Kcnj8",SMC="Acta2",Micro="C1qc"),th=3.5)
{
library(matrixStats)
cl.val=sapply(markers, function(x){
colMaxs(cl.means[x,,drop=F])
})
doublet = rowSums(cl.val > th) > 1
return(t(cl.means[unlist(markers),doublet]))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.