R/src_RowOrderExtractor.R

Defines functions RowOrderExtractor

Documented in RowOrderExtractor

#' Extract row order from a hclust object stored in a ComplexHeatmap object as produced by \code{\link{PlotHtmp}}
#' 
#' This function (in a chronological order) comes right after running \code{\link{PlotHtmp}}.
#' The function will take the returned output of \code{\link{PlotHtmp}} plus either a GRanges object or a list of 
#' gene of region names and will then extract the rows that belong to each cluster as defined in \code{\link{PlotHtmp}}.
#' 
#' @param HtmpObject the output of \code{\link{PlotHtmp}}
#' @param ReferenceData a GRanges object (when using ranged data) or a vector with names in the same order 
#' as used for \code{\link{PlotHtmp}} from which the rows will be extracted according clustering from \code{\link{PlotHtmp}}
#' @param Type either "ranged" or "unranged", must be ranged when using GRanges input
#' @param ToDisk whether to write the ranges or names of each cluster to disk
#' @param OutputDir output directory for ToDisk
#' @param OutputName name prefix for ToDisk
#' @param WithDate logica, whether to prefix files written to disk with date
#' @param RangedIs1Based Whether GRanges is 1-based so subtract 1 from start prior to writing to disk as BED
#' 
#' @details During \code{\link{PlotHtmp}} there is an option to split the rows into clusters according to hclust output.
#' In order to get the elements per cluster one could use \code{\link{cutree}} but since we want to ensure that the output order
#' of the elements is 100% identical to the order that \code{\link{PlotHtmp}} used for plotting we extract the order directly from
#' the returned heatmap object.
#' 
#' @author Alexander Toenges
#' 
#' @examples 
#' Gr <- GRanges(seqnames = rep("chr1", 2000), ranges = IRanges(start = seq(1,1000), end = rep(1001, 2000)))
#' cts <- sapply(seq(1,10), function(x) rnorm(1000,10))
#' rownames(cts) <- paste0("gene", seq(1,1000))
#' Htmp <- PlotHtmp(InputData = cts, Htmp.hclustRow = hclust(dist(cts)), Htmp.nclusters = 3, Htmp.return = TRUE)
#' Extracted.unranged <- RowOrderExtractor(HtmpObject = Htmp, ReferenceData = rownames(cts), Type = "unranged")
#' Extracted.ranged <- RowOrderExtractor(HtmpObject = Htmp, ReferenceData = Gr, Type = "ranged")
#' 
#' @export
RowOrderExtractor <- function(HtmpObject,
                              ReferenceData, 
                              Type,
                              ToDisk = FALSE,
                              OutputDir = "./",
                              OutputName = "",
                              WithDate = FALSE,
                              RangedIs1Based = TRUE)
{
  
  ##############################################################################################################
  
  GetDate  <- function(){ gsub("^20", "", format(Sys.Date(), "%Y%m%d")) }
  
  if(Type != "ranged" & Type != "unranged") stop("Type must be <ranged> or <unranged>")

  ##############################################################################################################
  
  ## get row order
  rod <- row_order(HtmpObject)

  ##############################################################################################################
  
  if(WithDate) {
    Datum <- paste0(GetDate(), "_")
  } else Datum <- ""
  
  ##############################################################################################################
  
  ## GRanges input
  if(Type == "ranged"){
    
    if(class(ReferenceData) != "GRanges"){
     stop("Type is set to ranged but ReferenceData is not GRanges class")
    }
    
    if(length(ReferenceData) == 0) stop("ReferenceData does not contain any ranges")
    
    OUTPUT <- GRangesList(
    lapply(1:length(rod), function(rod2){
        
        tmp.GR <- ReferenceData[rod[[rod2]],]
        
        if(ToDisk){
          if(RangedIs1Based){
            tmp.df <- data.frame(tmp.GR)[,c(1,2,3)]
            tmp.df$start <- tmp.df$start - 1
          } else tmp.df <- data.frame(tmp.GR)[,c(1,2,3)]
            
          write.table(x = tmp.df,
                      file = paste0(OutputDir, "/", Datum, OutputName, "_cluster", i, ".bed"),
                      sep = "\t", quote = FALSE, row.names = FALSE, col.names = FALSE)
        }
        return(tmp.GR)
      })
    )
    return(OUTPUT)
  }
  
  ##############################################################################################################
  
  ## simple list input:
  if(Type == "unranged"){
    
    OUTPUT <-
    lapply(1:length(rod), function(K){
      
      tmp.out <- ReferenceData[rod[[rod2]]]
      
      if(ToDisk){
        write.table(x = tmp.out,
                    file = paste0(OutputDir, "/", Datum, OutputName, "_cluster", i, ".txt"),
                    sep = "\t", quote = FALSE, row.names = FALSE, col.names = FALSE)
      }
      return(tmp.out)
    })
    return(OUTPUT)
    
  }
  
  ##############################################################################################################
  
}
ATpoint/misterplotR documentation built on Feb. 15, 2020, 12:17 a.m.