#' @title importArboreto
#' @description Import output from Arboreto/GRNBoost (https://arboreto.readthedocs.io)
#' @param fileName File name of the link/adjacency list
#' @param reorder Whether to sort the links by decreasing weight/importance
#' @seealso pySCENIC (https://pyscenic.readthedocs.io)
#' @examples
#' GRNBoost_linkList <- importArboreto("adjacencies.tsv")
#' @import data.table
#' @export
importArboreto <- function(fileName,
reorder=TRUE,
normalizeImportance=TRUE,
lapplyFun=BiocParallel::bplapply)
{
# lapplyFun <- lapply; if("BiocParallel" %in% installed.packages()) lapplyFun <- BiocParallel::bplapply
arboreto_linkList <- data.table::fread(fileName, stringsAsFactors=FALSE)
colnames(arboreto_linkList) <- R.utils::capitalize(colnames(arboreto_linkList))
colToOrder <- "Importance"
if(normalizeImportance)
{
message("Loaded ", nrow(arboreto_linkList)," links. Normalizing importance...")
arboreto_linkList <- split(arboreto_linkList, by="Target")
arboreto_linkList <- lapplyFun(arboreto_linkList, function(x) {
x$ImportanceNorm <- signif(x$Importance/sum(x$Importance), 3)
x
})
arboreto_linkList <- data.table::rbindlist(arboreto_linkList)
colToOrder <- "ImportanceNorm"
}
if(reorder) arboreto_linkList <- data.table::setorderv(arboreto_linkList, cols=colToOrder, order=-1)
arboreto_linkList <- as.data.frame(arboreto_linkList)
if(any(is.factor(sapply(colnames(arboreto_linkList), function(x) mode(arboreto_linkList[,x]))))) warning("Some values were loaded as factors. Convert them to character or numeric to avoid problems later on.")
return(arboreto_linkList)
}
#' @title importAUCfromText (deprecated)
#' @description Import AUCell matrix from text format, as generated by pySCENIC
#' Deprecated: This function is no longer needed. The results can be loaded directly from the .loom file with get_regulons_AUC()
#' @param fileName File name of the AUC scores (transposed: gene-sets in rows, cells in columns)
#' @param rows Type of data stored as rows (only for informative purposes) Default: "regulons"
#' @param columns Type of data stored as columns (only for informative purposes) Default: "cells"
#' @param transpose Whether to transpose the input matrix
#' @seealso pySCENIC (https://pyscenic.readthedocs.io)
#' @examples
#' regulonAUC <- importAUCfromText("aucMatrix.tsv")
#' @import data.table AUCell
#' @export
importAUCfromText <- function(fileName, rows="regulons", columns="cells", transpose=TRUE, newNames=NULL)
{
aucMatrix <- data.table::fread(fileName, drop=1)
cellNames <- unname(unlist(data.table::fread(fileName, select=1, skip=1)))
if(transpose) aucMatrix <- t(as.matrix(aucMatrix))
colnames(aucMatrix) <- cellNames
if(!is.null(newNames)) rownames(aucMatrix) <- unname(newNames[rownames(aucMatrix)])
names(dimnames(aucMatrix)) <- c(rows, columns)
new("aucellResults", SummarizedExperiment::SummarizedExperiment(assays=list(AUC=aucMatrix)))
}
#' @title importModules (deprecated)
#' @description Imports TF co-expression from .gmt file, as generated by pySCENIC
#' Deprecated: This function is no longer needed. The results can be loaded directly from the .loom file with get_regulons()
#' @param fileName File name of the co-expression modules (.gmt)
#' @param transpose saveAsDf=TRUE to save as data.frame to continue the pipeline in R (with runSCENIC_2_createRegulons)
#' @seealso pySCENIC (https://pyscenic.readthedocs.io)
#' @return Returns the co-expression modules as list (invisible), and saves them as data.frame if requested (file name: getIntName(scenicOptions, "tfModules_asDF"))
#' @examples
#' pyScenicDir <- "."
#' tfModules <- importModules(fileName=file.path(pyScenicDir, "modules.gmt"))
#' @export
importModulesGmt <- function(fileName, scenicOptions=NULL, saveAsDf=TRUE, verbose=TRUE)
{
if(!is.null(scenicOptions))
{
fileName_asDF <- getIntName(scenicOptions, "tfModules_asDF")
}
tfModules <- readLines(fileName)
tfModules <- lapply(tfModules, function(x) strsplit(x,"\t")[[1]])
tfModules <- setNames(lapply(tfModules, function(x) x[3:length(x)]), sapply(tfModules, function(x) x[1]))
names(tfModules) <- gsub("Regulon for ","", names(tfModules))
tfModules <- setNames(tfModules, paste0(names(tfModules),"_mod", unlist(mapply(seq, 1, table(names(tfModules))))))
if(verbose)
{
mlen <- lengths(tfModules)
message("Imported ", length(tfModules), " TF co-expression modules (of ", min(mlen), " - ", max(mlen), " genes).")
}
if(saveAsDf)
{
tfModules_asDF <- reshape2::melt(tfModules, value.name="Target")
tmp <- tfModules_asDF[,2]
tmp <- do.call(rbind,strsplit(tmp, "_"))
colnames(tmp) <- c("TF", "method")
tfModules_asDF <- as.data.frame(cbind(Target=as.character(tfModules_asDF[,"Target"]), tmp, corr=NA), stringsAsFactors=FALSE)
saveRDS(tfModules_asDF, file=fileName_asDF)
if(verbose) {
message("Saved as data.frame:")
print(head(tfModules_asDF))
}
}
invisible(tfModules)
}
# tmp <- data.table::fread(file.path(pyScenicDir, "adjacencies.tsv")) # only importance, not split by coexmodules
# coexmods <- GSEABase::getGmt(file.path(pyScenicDir, "modules.gmt"));coexmods <- geneIds(coexmods) # error: duplicate gene names
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.