R/aux_importsFromPython.R

Defines functions importModulesGmt importAUCfromText importArboreto

Documented in importArboreto importAUCfromText importModulesGmt

#' @title importArboreto
#' @description Import output from Arboreto/GRNBoost (https://arboreto.readthedocs.io)
#' @param fileName File name of the link/adjacency list
#' @param reorder Whether to sort the links by decreasing weight/importance
#' @seealso pySCENIC (https://pyscenic.readthedocs.io)
#' @examples 
#' GRNBoost_linkList <- importArboreto("adjacencies.tsv")
#' @import data.table
#' @export 
importArboreto <- function(fileName, 
                           reorder=TRUE, 
                           normalizeImportance=TRUE,
                           lapplyFun=BiocParallel::bplapply)
{
  # lapplyFun <- lapply; if("BiocParallel" %in% installed.packages()) lapplyFun <- BiocParallel::bplapply
  
  arboreto_linkList <- data.table::fread(fileName, stringsAsFactors=FALSE)
  colnames(arboreto_linkList) <- R.utils::capitalize(colnames(arboreto_linkList))
  colToOrder <- "Importance"
  
  if(normalizeImportance)
  {
    message("Loaded ", nrow(arboreto_linkList)," links. Normalizing importance...")
    arboreto_linkList <- split(arboreto_linkList, by="Target")
    arboreto_linkList <- lapplyFun(arboreto_linkList, function(x) {
      x$ImportanceNorm <- signif(x$Importance/sum(x$Importance), 3)
      x
    })
    arboreto_linkList <- data.table::rbindlist(arboreto_linkList)
    colToOrder <- "ImportanceNorm"
  }
  
  if(reorder) arboreto_linkList <- data.table::setorderv(arboreto_linkList, cols=colToOrder, order=-1)
  
  arboreto_linkList <- as.data.frame(arboreto_linkList)
  if(any(is.factor(sapply(colnames(arboreto_linkList), function(x) mode(arboreto_linkList[,x]))))) warning("Some values were loaded as factors. Convert them to character or numeric to avoid problems later on.")
  return(arboreto_linkList)
}

#' @title importAUCfromText (deprecated)
#' @description Import AUCell matrix from text format, as generated by pySCENIC
#' Deprecated: This function is no longer needed. The results can be loaded directly from the .loom file with get_regulons_AUC()
#' @param fileName File name of the AUC scores (transposed: gene-sets in rows, cells in columns)
#' @param rows Type of data stored as rows (only for informative purposes) Default: "regulons"
#' @param columns Type of data stored as columns (only for informative purposes) Default: "cells"
#' @param transpose Whether to transpose the input matrix
#' @seealso pySCENIC (https://pyscenic.readthedocs.io)
#' @examples
#' regulonAUC <- importAUCfromText("aucMatrix.tsv")
#' @import data.table AUCell
#' @export
importAUCfromText <- function(fileName, rows="regulons", columns="cells", transpose=TRUE, newNames=NULL)
{
  aucMatrix <- data.table::fread(fileName, drop=1)
  cellNames <- unname(unlist(data.table::fread(fileName, select=1, skip=1)))
  if(transpose) aucMatrix <- t(as.matrix(aucMatrix))
  colnames(aucMatrix) <- cellNames
  if(!is.null(newNames)) rownames(aucMatrix) <- unname(newNames[rownames(aucMatrix)])

  names(dimnames(aucMatrix)) <- c(rows, columns)

  new("aucellResults", SummarizedExperiment::SummarizedExperiment(assays=list(AUC=aucMatrix)))
}


#' @title importModules (deprecated)
#' @description Imports TF co-expression from .gmt file, as generated by pySCENIC
#' Deprecated: This function is no longer needed. The results can be loaded directly from the .loom file with get_regulons()
#' @param fileName File name of the co-expression modules (.gmt)
#' @param transpose saveAsDf=TRUE to save as data.frame to continue the pipeline in R (with runSCENIC_2_createRegulons)
#' @seealso pySCENIC (https://pyscenic.readthedocs.io)
#' @return Returns the co-expression modules as list (invisible), and saves them as data.frame if requested (file name: getIntName(scenicOptions, "tfModules_asDF"))
#' @examples
#' pyScenicDir <- "."
#' tfModules <- importModules(fileName=file.path(pyScenicDir,  "modules.gmt"))
#' @export
importModulesGmt <- function(fileName, scenicOptions=NULL, saveAsDf=TRUE, verbose=TRUE)
{
  if(!is.null(scenicOptions))
  {
    fileName_asDF <- getIntName(scenicOptions, "tfModules_asDF")
  }
  
  tfModules <- readLines(fileName)
  tfModules <- lapply(tfModules, function(x) strsplit(x,"\t")[[1]])
  tfModules <- setNames(lapply(tfModules, function(x) x[3:length(x)]), sapply(tfModules, function(x) x[1]))
  names(tfModules) <- gsub("Regulon for ","", names(tfModules))
  tfModules <- setNames(tfModules, paste0(names(tfModules),"_mod", unlist(mapply(seq, 1, table(names(tfModules))))))
  
  if(verbose) 
  {
    mlen <- lengths(tfModules)
    message("Imported ", length(tfModules), " TF co-expression modules (of ", min(mlen), " - ", max(mlen), " genes).")
  }
  
  if(saveAsDf)
  {
    tfModules_asDF <- reshape2::melt(tfModules, value.name="Target")
    tmp <- tfModules_asDF[,2]
    tmp <- do.call(rbind,strsplit(tmp, "_"))
    colnames(tmp) <- c("TF", "method")
    tfModules_asDF <- as.data.frame(cbind(Target=as.character(tfModules_asDF[,"Target"]), tmp, corr=NA), stringsAsFactors=FALSE)
    saveRDS(tfModules_asDF, file=fileName_asDF)
    
    if(verbose) {
      message("Saved as data.frame:")
      print(head(tfModules_asDF))
    }
  }
  invisible(tfModules)
}
# tmp <- data.table::fread(file.path(pyScenicDir,  "adjacencies.tsv")) # only importance, not split by coexmodules
# coexmods <- GSEABase::getGmt(file.path(pyScenicDir,  "modules.gmt"));coexmods <- geneIds(coexmods) # error: duplicate gene names
aertslab/SCENIC documentation built on April 7, 2024, 10 a.m.