validatePerCellTypeParams <- function(con.obj, groups, sample.groups, ref.level, cluster.sep.chr) {
if (!requireNamespace("DESeq2", quietly = TRUE)) {
stop("You have to install DESeq2 package to use differential expression")
}
if ( class(con.obj) != 'Conos') stop('con.obj must be a conos object')
if ( is.null(groups) ) stop('groups must be specified');
if ( is.null(sample.groups) ) stop('sample.groups must be specified')
if ( class(sample.groups) != 'list' ) stop('sample.groups must be a list');
if ( length(sample.groups) != 2 ) stop('sample.groups must be of length 2');
if ( ! all(unlist(lapply(sample.groups, function(x) class(x) == 'character'))) )
stop('sample.groups must be a list of character vectors');
if ( ! all(unlist(lapply(sample.groups, function(x) length(x) > 0))) )
stop('sample.groups entries must be on length greater or equal to 1')
if ( ! all(unlist(lapply(sample.groups, function(x) {all(x %in% names(con.obj$samples))}))) )
stop('sample.groups entries must be names of samples in the conos object')
if ( is.null(ref.level) ) stop('reference level is not defined')
## todo: check samplegrousp are named
if(is.null(names(sample.groups))) stop('sample.groups must be named')
if(class(groups) != 'factor') stop('groups must be a factor')
if(any(grepl(cluster.sep.chr, names(con.obj$samples),fixed=TRUE)))
stop('cluster.sep.chr must not be part of any sample name')
if(any(grepl(cluster.sep.chr,levels(groups),fixed=TRUE)))
stop('cluster.sep.chr must not be part of any cluster name')
}
validateBetweenCellTypeParams <- function(con.obj, groups, sample.groups, refgroup, altgroup, cluster.sep.chr) {
if (!requireNamespace("DESeq2", quietly = TRUE)) {
stop("You have to install DESeq2 package to use differential expression")
}
if (class(con.obj) != 'Conos') stop('con.obj must be a conos object')
if (is.null(groups) ) stop('groups must be specified');
if (is.null(sample.groups) ) stop('sample.groups must be specified')
if (class(sample.groups) != 'list' ) stop('sample.groups must be a list');
#if ( length(sample.groups) != 2 ) stop('sample.groups must be of length 2');
if (!all(unlist(lapply(sample.groups, function(x) class(x) == 'character'))) )
stop('sample.groups must be a list of character vectors');
if (!all(unlist(lapply(sample.groups, function(x) length(x) > 0))) )
stop('sample.groups entries must be on length greater or equal to 1')
if (!all(unlist(lapply(sample.groups, function(x) {all(x %in% names(con.obj$samples))}))) )
stop('sample.groups entries must be names of samples in the conos object')
if (is.null(refgroup)) stop('reference group is not defined')
if (is.null(altgroup)) stop('altgroup is not defined')
## todo: check samplegrousp are named
if(is.null(names(sample.groups))) stop('sample.groups must be named')
if(class(groups) != 'factor') stop('groups must be a factor')
if(any(grepl(cluster.sep.chr, names(con.obj$samples),fixed=TRUE)))
stop('cluster.sep.chr must not be part of any sample name')
if(any(grepl(cluster.sep.chr,levels(groups),fixed=TRUE)))
stop('cluster.sep.chr must not be part of any cluster name')
}
rawMatricesWithCommonGenes <- function(con.obj, sample.groups=NULL) {
samples <- con.obj$samples
if (!is.null(sample.groups)) {
samples <- samples[unlist(sample.groups)]
}
## Generate an aggregated matrix
raw.mats <- lapply(samples, getRawCountMatrix, transposed=T)
common.genes <- Reduce(intersect,lapply(raw.mats, colnames))
return(lapply(raw.mats, function(x) {x[,common.genes]}))
}
collapseCellsByType <- function(cm, groups, min.cell.count=10) {
groups <- as.factor(groups);
cl <- factor(groups[match(rownames(cm),names(groups))],levels=levels(groups));
tc <- colSumByFactor(cm,cl);
tc <- tc[-1,,drop=FALSE] # omit NA cells
tc[table(cl)>=min.cell.count,,drop=FALSE]
}
adjustMatrixRownames <- function(name, cm, cluster.sep.chr) {rownames(cm) <- paste0(name, cluster.sep.chr, rownames(cm)); return(cm)}
rbindDEMatrices <- function(mats, cluster.sep.chr) {
mats <- lapply(names(mats), function(n) {
rownames(mats[[n]]) <- paste0(n, cluster.sep.chr, rownames(mats[[n]]));
return(mats[[n]])
})
return(t(do.call(rbind, mats)))
}
strpart <- function (x, split, n, fixed = FALSE) {
sapply(strsplit(as.character(x), split, fixed = fixed), "[", n)
}
is.error <- function (x) {
inherits(x, c("try-error", "error"))
}
#' Do differential expression for each cell type in a conos object between the specified subsets of apps
#' @param con.obj conos object
#' @param groups factor specifying cell types
#' @param sample.groups a list of two character vector specifying the app groups to compare
#' @param cooks.cutoff cooksCutoff for DESeq2
#' @param independent.filtering independentFiltering for DESeq2
#' @param n.cores number of cores
#' @param cluster.sep.chr character string of length 1 specifying a delimiter to separate cluster and app names
#' @param return.details return detals
#' @export getPerCellTypeDE
getPerCellTypeDE <- function(con.obj, groups=NULL, sample.groups=NULL, cooks.cutoff = FALSE, ref.level = NULL, min.cell.count = 10,
independent.filtering = FALSE, n.cores=1, cluster.sep.chr = '<!!>',return.details=TRUE) {
validatePerCellTypeParams(con.obj, groups, sample.groups, ref.level, cluster.sep.chr)
## Generate a summary dataset collapsing the cells of the same type in each sample
## and merging everything in one matrix
aggr2 <- rawMatricesWithCommonGenes(con.obj, sample.groups) %>%
lapply(collapseCellsByType, groups=groups, min.cell.count=min.cell.count) %>%
rbindDEMatrices(cluster.sep.chr=cluster.sep.chr)
gc()
## For every cell type get differential expression results
de.res <- papply(sn(levels(groups)), function(l) {
tryCatch({
## Get count matrix
cm <- aggr2[,strpart(colnames(aggr2),cluster.sep.chr,2,fixed=TRUE) == l]
## Generate metadata
meta <- data.frame(
sample.id= colnames(cm),
group= as.factor(unlist(lapply(colnames(cm), function(y) {
y <- strpart(y,cluster.sep.chr,1,fixed=TRUE)
names(sample.groups)[unlist(lapply(sample.groups,function(x) any(x %in% y)))]
})))
)
if (!ref.level %in% levels(meta$group))
stop('The reference level is absent in this comparison')
meta$group <- relevel(meta$group, ref=ref.level)
if (length(unique(as.character(meta$group))) < 2)
stop('The cluster is not present in both conditions')
dds1 <- DESeq2::DESeqDataSetFromMatrix(cm,meta,design=~group)
dds1 <- DESeq2::DESeq(dds1)
res1 <- DESeq2::results(dds1, cooksCutoff = cooks.cutoff, independentFiltering = independent.filtering)
res1 <- as.data.frame(res1)
res1 <- res1[order(res1$padj,decreasing = FALSE),]
##
if(return.details) {
list(res=res1, cm=cm, sample.groups=sample.groups)
} else {
res1
}
}, error=function(err) {warning("Error for level ", l, ": ", err$message); return(NA)})
}, n.cores=n.cores)
de.res
}
#' Do differential expression for each cell type in a conos object between the specified subsets of apps
#' applying the specified correction vector
#' @param con.obj conos object
#' @param groups factor specifying cell types
#' @param sample.groups a named list of two character vectors specifying the app groups to compare
#' @param cooks.cutoff cooksCutoff for DESeq2
#' @param independent.filtering independentFiltering for DESeq2
#' @param n.cores number of cores
#' @param cluster.sep.chr character string of length 1 specifying a delimiter to separate cluster and app names
#' @param correction correction vector obtained from getCorrectionVector
#' @export getPerCellTypeDECorrected
getPerCellTypeDECorrected <- function(con.obj, groups=NULL, sample.groups=NULL, cooks.cutoff = FALSE,
independent.filtering = FALSE, n.cores=1,cluster.sep.chr = '<!!>',
correction=NULL, return.details=TRUE, ref.level=NULL) {
validatePerCellTypeParams(con.obj, groups, sample.groups, ref.level, cluster.sep.chr)
## Generate a summary dataset collapsing the cells of the same type in each sample
## and merging everything in one matrix
aggr2 <- rawMatricesWithCommonGenes(con.obj, sample.groups) %>%
lapply(function(x) Matrix.utils::aggregate.Matrix(x, groups[intersect(names(groups), rownames(x))])) %>%
rbindDEMatrices(cluster.sep.chr=cluster.sep.chr)
gc()
## For every cell type get differential expression results
de.res <- papply(sn(levels(groups)), function(l) {
try({
## Get count matrix
cm <- aggr2[,strpart(colnames(aggr2),cluster.sep.chr,2,fixed=TRUE) == l]
## Generate metadata
meta <- data.frame(
sample.id= colnames(cm),
group= as.factor(unlist(lapply(colnames(cm), function(y) {
y <- strpart(y,cluster.sep.chr,1,fixed=TRUE)
names(sample.groups)[unlist(lapply(sample.groups,function(x) any(x %in% y)))]
})))
)
meta$group <- relevel(meta$group, ref=ref.level)
if (length(unique(as.character(meta$group))) < 2)
stop('The cluster is not present in both conditions')
dds1 <- DESeq2::DESeqDataSetFromMatrix(cm,meta,design=~group)
dds1 <- DESeq2::estimateSizeFactors(dds1)
sf <- DESeq2::sizeFactors(dds1)
if(!(all(rownames(cm) %in% names(correction)) & all(names(correction) %in% rownames(cm))))
stop('incompatible matrices')
nf.tmp <- matrix(rep(sf, nrow(cm)),nrow=nrow(cm),byrow=TRUE)
rownames(nf.tmp) <- rownames(cm);
colnames(nf.tmp) <- colnames(cm)
gene.scale.factors <- 2^(correction[rownames(nf.tmp)])
baselevel <- levels(SummarizedExperiment::colData(dds1)$group)[1]
x <- do.call(cbind, lapply(SummarizedExperiment::colData(dds1)$group, function(x) {
if (x == baselevel) {
rep(1, length(gene.scale.factors))
} else {
gene.scale.factors
}
}))
rownames(x) <- rownames(nf.tmp);
colnames(x) <- colnames(nf.tmp)
nf.tmp <- nf.tmp * x
x2 <- plyr::aaply(nf.tmp, 1, function(x) {x / exp(mean(log(x)))})
DESeq2::normalizationFactors(dds1) <- x2
dds1 <- DESeq2::DESeq(dds1)
res1 <- DESeq2::results(dds1, cooksCutoff = cooks.cutoff, independentFiltering = independent.filtering)
res1 <- as.data.frame(res1)
res1 <- res1[order(res1$padj,decreasing = FALSE),]
if (return.details) {
list(res=res1,cm=cm,sample.groups=sample.groups)
} else {
res1
}
})
}, n.cores=n.cores)
return(de.res)
}
#' Save differential expression as CSV table
#' @param de.results output of differential expression results, corrected or uncorrected
#' @param saveprefix prefix for output file
#' @param data.frame for gene metadata
#' @export saveDEasCSV
saveDEasCSV <- function(de.results=NULL,saveprefix=NULL,gene.metadata=NULL) {
if(is.null(de.results)) stop('de.results has not been specified')
if(is.null(saveprefix)) stop('saveprefix has not bee specified')
## find errors
n.error <- sum(unlist(lapply(de.results,is.error)))
if(n.error > 0) {
cat("Warning: ", n.error, " of ", length(de.results), ' results have returned an error; ignoring...\n')
}
de.results <- de.results[!unlist(lapply(de.results,is.error))]
##
x <- lapply(namedNames(de.results), function(ncc) {
res.celltype <- de.results[[ncc]]
res.table <- as.data.frame(res.celltype$res)
## append gene names
res.table$gene <- rownames(res.table)
## append singificance
res.table$significant <- res.table$padj < 0.05
res.table$log2FoldChange[is.na(res.table$log2FoldChange)] <- 0
## Append Z scores and rowid
res.table$Z <- qnorm(1 - (res.table$pval/2))
res.table$Z[is.na(res.table$Z)] <- 0
res.table$Za <- qnorm(1 - (res.table$padj/2))
res.table$Za[is.na(res.table$Za)] <- 0
res.table$Z <- res.table$Z * sign(res.table$log2FoldChange)
res.table$Za <- res.table$Za * sign(res.table$log2FoldChange)
if(!is.null(gene.metadata)) {
## match order to metadata table
mo <- match(as.character(gene.metadata$geneid),as.character(res.table$gene))
## drop gene id column
keep.cols <- colnames(gene.metadata)[colnames(gene.metadata) != 'geneid']
names(keep.cols) <- keep.cols
res.table <- cbind(res.table, gene.metadata[mo,keep.cols,drop=FALSE])
}
file <- paste0(saveprefix,make.names(ncc),'.csv')
write.table(x=res.table,file=file)
res.table
})
invisible(x)
}
#' Save differential expression results as JSON
#' @param de.results differential expression results
#' @param saveprefix prefix for the differential expression output
#' @param gene.metadata data.frame with gene metadata
#' @param cluster.sep.chr character string of length 1 specifying a delimiter to separate cluster and app names
#' @export saveDEasJSON
saveDEasJSON <- function(de.results = NULL, saveprefix = NULL, gene.metadata = NULL, cluster.sep.chr='<!!>') {
## ### DEVEL
## de.results <- all.percl.TvsW
## saveprefix <- 'json/'
## rm(de.results, saveprefix)
## ##
## Check input
if(is.null(de.results)) stop('de.results have not been specified')
if(is.null(saveprefix)) stop('saveprefix has not been specified')
## Find de instances that didn't work (usually because cell type is absent from one or more sample types)
n.error <- sum(unlist(lapply(de.results, is.error)))
if(n.error > 0) {
cat("Warning: ", n.error,' of ', length(de.results) ,' results have returned an error; ignoring...\n')
}
## get the de results that worked
de.results <- de.results[!unlist(lapply(de.results, is.error))]
## Generate structure and save JSON
lapply(namedNames(de.results), function(ncc) {
res.celltype <- de.results[[ncc]]
## Get results table as df
res.table <- as.data.frame(res.celltype$res)
## append gene names
res.table$gene <- rownames(res.table)
## append singificance
res.table$significant <- res.table$padj < 0.05
res.table$log2FoldChange[is.na(res.table$log2FoldChange)] <- 0
## Append Z scores and rowid
res.table$Z <- qnorm(1 - (res.table$pval/2))
res.table$Z[is.na(res.table$Z)] <- 0
res.table$Za <- qnorm(1 - (res.table$padj/2))
res.table$Za[is.na(res.table$Za)] <- 0
res.table$Z <- res.table$Z * sign(res.table$log2FoldChange)
res.table$Za <- res.table$Za * sign(res.table$log2FoldChange)
res.table$rowid <- 1:nrow(res.table)
if (!is.null(gene.metadata)) {
## match order to metadata table
mo <- match(as.character(gene.metadata$geneid),as.character(res.table$gene))
## drop gene id column
keep.cols <- colnames(gene.metadata)[colnames(gene.metadata) != 'geneid']
names(keep.cols) <- keep.cols
}
res.table <- cbind(res.table, gene.metadata[mo,keep.cols,drop=FALSE])
## get names of all the genes
all.genes <- rownames(res.table)
## Get the count matrix
cm <-res.celltype$cm
## remove the cell type suffix
colnames(cm) <- strpart(colnames(cm),cluster.sep.chr,1,fixed=TRUE)
## ilev entry (submatrices of cps)
ilev <- lapply(res.celltype$sample.groups, function(sg) {
## In certain cases columns may be missing,skip
sg <- sg[sg %in% colnames(cm)]
## keep only cols of interest
cm.tmp <- cm[,sg]
## convert to matrix
cm.tmp <- as.matrix(cm.tmp)
rownames(cm.tmp) <- rownames(cm)
## calculate cpm
cpm <- sweep(cm.tmp, 2, apply(cm.tmp,2, sum), FUN='/')
cpm <- log10(cpm * 1e6 + 1)
##
snames1 <- colnames(cpm)
## Put genes in order
cpm <- cpm[all.genes,]
colnames(cpm) <- NULL;
rownames(cpm) <- NULL;
## return
list(snames=snames1, val=as.matrix(cpm))
})
## snames entry (samplenames)
snames <- names(res.celltype$sample.groups)
## convert to json
tojson <- list(
res = res.table,
genes = all.genes,
ilev = ilev,
snames = snames
)
y <- jsonlite::toJSON(tojson)
## File to save to
file <- paste0(saveprefix,make.names(ncc),'.json')
## create the json file
write(y,file)
NULL
})
invisible(NULL)
}
#' Compare two cell types across the entire panel
#' @param con.obj conos object
#' @param groups factor describing cell grouping
#' @param sample.groups a named list of two character vectors specifying the app groups to compare
#' @param cooks.cutoff cooksCutoff parameter for DESeq2
#' @param refgroup cell type to compare to be used as reference
#' @param altgroup cell type to compare to
#' @param min.cell.count minimum number of cells per celltype/sample combination to keep
#' @param independent.filtering independentFiltering parameter for DESeq2
#' @param cluster.sep.chr character string of length 1 specifying a delimiter to separate cluster and app names
#' @param return.details logical, return detailed results
#' @param only.paired only keep samples that that both cell types above the min.cell.count threshold
#' @export getBetweenCellTypeDE
getBetweenCellTypeDE <- function(con.obj, sample.groups = NULL, groups=NULL, cooks.cutoff = FALSE, refgroup = NULL, altgroup = NULL, min.cell.count = 10,
independent.filtering = FALSE, cluster.sep.chr = '<!!>',return.details=TRUE, only.paired=TRUE) {
# TODO: do we really need sample.groups here? They are used in the corrected version for some unknown reason.
validateBetweenCellTypeParams(con.obj, groups, sample.groups, refgroup, altgroup, cluster.sep.chr)
## Get the samples from the panel to use in this comparison
aggr2 <- rawMatricesWithCommonGenes(con.obj, sample.groups) %>%
lapply(collapseCellsByType, groups=groups, min.cell.count=min.cell.count) %>%
rbindDEMatrices(cluster.sep.chr=cluster.sep.chr)
gc()
aggr2.meta <- generateDEMatrixMetadata(aggr2, refgroup, altgroup, cluster.sep.chr)
## Get the samples that have both cell types only
if (only.paired) {
complete.obs <- names(which(apply(reshape2::acast(aggr2.meta, library ~ celltype),1,function(x){sum(is.na(x))}) == 0, useNames = TRUE))
aggr2.meta <- aggr2.meta[aggr2.meta$library %in% complete.obs,]
}
## Select the desired samples only
aggr2.meta$celltype <- relevel(aggr2.meta$celltype, ref = refgroup)
aggr2 <- aggr2[,rownames(aggr2.meta)]
## Generate DESeq2 comparison
dds1 <- DESeq2::DESeqDataSetFromMatrix(aggr2, aggr2.meta, design = ~ library + celltype)
dds1 <- DESeq2::DESeq(dds1)
res1 <- DESeq2::results(dds1, cooksCutoff = cooks.cutoff, independentFiltering = independent.filtering)
res1 <- res1[order(res1$padj,decreasing = FALSE),]
## Return
if(return.details) {
list(res=res1, cm=aggr2, meta = aggr2.meta, refgroup = refgroup, altgroup = altgroup, sample.groups=sample.groups)
} else {
res1
}
}
generateDEMatrixMetadata <- function(mtx, refgroup, altgroup, cluster.sep.chr) {
meta <- data.frame(
row.names = colnames(mtx),
sample=colnames(mtx),
library=strpart(colnames(mtx),cluster.sep.chr,1,fixed=T),
celltype = strpart(colnames(mtx),cluster.sep.chr,2,fixed=T)
)
return(subset(meta, celltype %in% c(refgroup, altgroup)))
}
#' Compare two cell types across the entire panel
#' @param con.obj conos object
#' @param sample.groups a named list of two character vectors specifying the app groups to compare
#' @param groups factor describing cell grouping
#' @param cooks.cutoff cooksCutoff parameter for DESeq2
#' @param refgroup cell type to compare to be used as reference
#' @param altgroup cell type to compare to
#' @param min.cell.count minimum number of cells per celltype/sample combination to keep
#' @param independent.filtering independentFiltering parameter for DESeq2
#' @param cluster.sep.chr character string of length 1 specifying a delimiter to separate cluster and app names
#' @param return.details logical, return detailed results
#' @param only.paired only keep samples that that both cell types above the min.cell.count threshold
#' @param correction fold change corrections per genes
#' @param ref.level reference level on the basis of which the correction was calculated
#' @export getBetweenCellTypeCorrectedDE
getBetweenCellTypeCorrectedDE <- function(con.obj, sample.groups = NULL, groups=NULL, cooks.cutoff = FALSE, refgroup = NULL, altgroup = NULL, min.cell.count = 10,
independent.filtering = FALSE, cluster.sep.chr = '<!!>',return.details=TRUE, only.paired=TRUE, correction = NULL, ref.level=NULL) {
validateBetweenCellTypeParams(con.obj, groups, sample.groups, refgroup, altgroup, cluster.sep.chr)
## Get the samples from the panel to use in this comparison
aggr2 <- rawMatricesWithCommonGenes(con.obj, sample.groups) %>%
lapply(collapseCellsByType, groups=groups, min.cell.count=min.cell.count) %>%
rbindDEMatrices(cluster.sep.chr=cluster.sep.chr)
gc()
aggr2.meta <- generateDEMatrixMetadata(aggr2, refgroup, altgroup, cluster.sep.chr=cluster.sep.chr)
## Get the samples that have both cell types only
if (only.paired) {
complete.obs <- reshape2::acast(aggr2.meta, library ~ celltype, value.var = 'celltype', fun.aggregate = length) %>%
apply(1, function(x) sum(is.na(x)) == 0) %>% which(useNames = TRUE) %>% names()
aggr2.meta <- aggr2.meta[aggr2.meta$library %in% complete.obs,]
}
## Select the desired samples only
aggr2.meta$celltype <- relevel(aggr2.meta$celltype, ref = refgroup)
aggr2 <- aggr2[,rownames(aggr2.meta)]
tmp1 <- reshape2::melt(sample.groups)
colnames(tmp1) <- c('sample','group')
aggr2.meta$group <- factor(tmp1$group[match(as.character(aggr2.meta$library), tmp1$sample)])
aggr2.meta$group <- relevel(aggr2.meta$group, ref = ref.level)
rm(tmp1)
## Generate DESeq2 comparison
dds1 <- DESeq2::DESeqDataSetFromMatrix(aggr2, aggr2.meta, design = ~ celltype)
## Apply the correction based on sample type
dds1 <- DESeq2::estimateSizeFactors(dds1)
sf <- DESeq2::sizeFactors(dds1)
if(!(all(rownames(aggr2) %in% names(correction)) & all(names(correction) %in% rownames(aggr2))))
stop('incompatible matrices')
nf.tmp <- matrix(rep(sf, nrow(aggr2)),nrow=nrow(aggr2),byrow=TRUE)
rownames(nf.tmp) <- rownames(aggr2);
colnames(nf.tmp) <- colnames(aggr2)
gene.scale.factors <- 2^(correction[rownames(nf.tmp)])
baselevel <- levels(SummarizedExperiment::colData(dds1)$group)[1]
x <- do.call(cbind, lapply(SummarizedExperiment::colData(dds1)$group, function(x) {
if (x == baselevel) {
rep(1, length(gene.scale.factors))
} else {
gene.scale.factors
}
}))
rownames(x) <- rownames(nf.tmp);
colnames(x) <- colnames(nf.tmp)
nf.tmp <- nf.tmp * x
x2 <- plyr::aaply(nf.tmp, 1, function(x) {x / exp(mean(log(x)))})
DESeq2::normalizationFactors(dds1) <- x2
##
dds1 <- DESeq2::DESeq(dds1)
res1 <- DESeq2::results(dds1, cooksCutoff = cooks.cutoff, independentFiltering = independent.filtering)
res1 <- res1[order(res1$padj,decreasing = FALSE),]
## Return
if(return.details) {
list(res=res1, cm=aggr2, meta = aggr2.meta, refgroup = refgroup, altgroup = altgroup, sample.groups=sample.groups)
} else {
res1
}
}
## Marker genes
#' Takes data.frames with info about DE genes for single cell type and many samples and
#' returns data.frame with aggregated info for this cell type
aggregateDEMarkersAcrossDatasets <- function(marker.dfs, z.threshold, upregulated.only) {
if (length(marker.dfs) == 0)
return(data.frame())
z.scores.per.dataset <- lapply(marker.dfs, function(df) setNames(df$Z, rownames(df)))
m.vals.per.dataset <- lapply(marker.dfs, function(df) setNames(df$M, rownames(df)))
gene.union <- lapply(z.scores.per.dataset, names) %>% Reduce(union, .)
z.scores <- sapply(z.scores.per.dataset, `[`, gene.union) %>% rowMeans(na.rm=T)
m.vals <- sapply(m.vals.per.dataset, `[`, gene.union) %>% rowMeans(na.rm=T)
ro <- order(z.scores,decreasing=T)
pvals <- dnorm(z.scores)
res <- data.frame(Gene=names(z.scores), M=m.vals, Z=z.scores, PValue=pvals, PAdj=p.adjust(pvals))[ro,]
z.filter <- if (upregulated.only) res$Z else abs(res$Z)
return(res[z.filter > z.threshold,])
}
getDifferentialGenesP2 <- function(p2.samples, groups, z.threshold=3.0, upregulated.only=F, verbose=T, n.cores=1) {
lapply.func <- if (verbose) function(...) pbapply::pblapply(..., cl=n.cores) else function(...) papply(..., n.cores=n.cores)
groups %<>% as.character() %>% setNames(names(groups))
if (verbose) cat("Estimating marker genes per sample\n")
markers.per.sample <- lapply.func(p2.samples, function(p2) {
if (length(intersect(rownames(p2$counts), names(groups))) < 3) {
list()
} else {
if (packageVersion("pagoda2") >= "0.1.1") {
p2$getDifferentialGenes(groups=groups, z.threshold=0, append.specificity.metrics=F, append.auc=F)
} else {
p2$getDifferentialGenes(groups=groups, z.threshold=0)
}
}
})
if (verbose) cat("Aggregating marker genes\n")
markers.per.type <- unique(groups) %>% setNames(., .) %>%
lapply(function(id) lapply(markers.per.sample, `[[`, id) %>% .[!sapply(., is.null)]) %>%
lapply.func(aggregateDEMarkersAcrossDatasets, z.threshold=z.threshold, upregulated.only=upregulated.only)
return(markers.per.type)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.