R/insulation.heatmap.R

#' Plot insulation-heatmaps.
#'
#' insulations in, plot out
#'
#' @param insulationList A named list of results from `genome.wide.insulation()`.
#' @param bed A bed-df with TAD-calls. Only the first three columns are important.
#' @param getOrder Instead of a plot, get the order of bed
#' @param zlim zlims c(min,max) for heatmap
#' @param flank Amount of flanking bp.
#' @param profileZlim zlims c(min,max) for profile
#' @param profileFunct Funciton to make profile-plots (mean, median, sum, absSum)
#' @param profileCols Vector of colors for profile-plots
#' @param sortWidth Percentage of columns to sort on (aligned on center)
#' @param brewerPal Colorbrewer-palette to use
#' @param brewerDit -1 flips the palette.
#' @param focus Sort on which sample?
#' @param verbose Chatty?
#' @return A plot
#' @export
insulation.heatmap <- function(insulationList, bed, getOrder=F, focus = 1, sortWidth = 10, profileFunct = mean, flank=500e3, zlim = c(-1,1), verbose = F , brewerPal = "Greys", brewerDir = -1, profileCols = NULL, profileZlim = NULL){ # ooit een gg-switch!
  # too small a canvas will lead to misaligned plots!
  require(cowplot)
  require(ggplot2)
  require(reshape2)
  # Get matrixes
  sampleList <- list()
  for(i in 1:length(insulationList)){
    insdDat <- insulationList[[i]]
    insName <-  names(insulationList)[i]
    results <- align.insulation( insdDat, bed, flank.length=flank , verbose = verbose)
    sampleList[[i]] <- results
    names(sampleList)[i] <- insName
  }
  sampleNames <- names(insulationList)

  perSample = NULL

  # sorting on focus
  foundOrder = NULL

  if(focus > length(insulationList)){
    stop("Focus is outside of the number of samples!")
  } else {
    focusYoungGrasshopper <- sampleList[[focus]]

    # which of columns to sort on?
    perSample = ncol(focusYoungGrasshopper)
    nSortCol <-  ((perSample)/100)*sortWidth
    halfnSortCol <- round(nSortCol/2)
    midpoint <- ((ncol(focusYoungGrasshopper)-1)/2)+1
    sortCols <- (midpoint-halfnSortCol):(midpoint+halfnSortCol)

    rowSums <- apply(as.matrix(focusYoungGrasshopper)[, sortCols],1, mean)
    foundOrder <- base::order(rowSums,decreasing = T) # in/de is flipped for ggplot
  }

  # order individual samples
  for(i in 1:length(sampleList)){
    sampleList[[i]] <- sampleList[[i]][foundOrder,]
  }

  if(verbose){message("Melting...")}
  dfList <- NULL
  for(i in 1:length(sampleList)){
    melted <- reshape2::melt(t(sampleList[[i]]))
    melted$Var1 <- as.numeric(as.factor(melted$Var1))
    melted$Var2 <- as.numeric(as.factor(melted$Var2))
    melted$sample <- names(sampleList)[i]
    dfList[[i]] <- melted
  }
  df <- rbindlist(dfList)
  df$sample <- factor(df$sample, levels = sampleNames)

  # plot profiles
  groupedDF <- group_by(df, sample, Var1)
  profDF <- summarize(groupedDF, val = profileFunct(value))

  if(is.null(profileCols)){
    profileCols <- rep('black', length(insulationList))
  }

  PRFLS <- NULL
  if(is.null(profileZlim)){

    profileZlim = range(profDF$val) + c(-0.05, 0.05) * diff(range(profDF$val))

    PRFLS <- ggplot(profDF, aes(Var1, val, col = sample)) +
      geom_line()  + RHWtheme() +
      scale_x_continuous(expand=c(0,0), breaks = c( (perSample/2) + 1 ), labels = "^") +
      scale_y_continuous(expand=c(0,0), limits = profileZlim ) +
      labs( x = '', y = '') + guides(col = F) +
      scale_colour_manual(values = profileCols)+
      theme(aspect.ratio = 1) +
      facet_grid(. ~ sample ) +theme(plot.margin = unit(c(0, 0, 0, 0), "cm"))+
      theme(axis.title.x=element_blank(),  axis.ticks.x=element_blank())
  } else {
    PRFLS <- ggplot(profDF, aes(Var1, val, col = sample)) +
      geom_line() + RHWtheme() +
      scale_x_continuous(expand=c(0,0), breaks = c( (perSample/2) + 1 ), labels = "^") +
      scale_y_continuous(expand=c(0,0), limits = profileZlim ) +
      labs( x = '', y = '') + guides(col = F) +
      scale_colour_manual(values = profileCols) +
      theme(aspect.ratio = 1) +
      facet_grid(. ~ sample ) + theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) +
      theme(axis.title.x=element_blank(),  axis.ticks.x=element_blank())
  }

  # set zlims
  if(verbose){message("Setting zlims...")}
  df[,3][df[,3] < zlim[1]] <- zlim[1]
  df[,3][df[,3] > zlim[2]] <- zlim[2]

  # plot heatmaps

  HTMPS <- ggplot(df, aes(Var1, Var2, fill = value)) +
    geom_raster(interpolate = T) + facet_grid(. ~ sample) + RHWtheme() +
    scale_x_continuous(expand=c(0,0), breaks = c( (perSample/2) + 1 ), labels = "^") +
    scale_y_continuous(expand=c(0,0), breaks = NULL) +
    labs( x = '', y = '') + guides(fill = F) + scale_fill_distiller(type = 'seq', palette = brewerPal, direction = brewerDir )+
    theme(strip.text = element_blank() ,
          strip.background = element_blank(),
          plot.margin = unit( c(0,0,0,0) , units = "lines" ))

  if(getOrder == F){
    plot_grid(PRFLS, HTMPS, align = "v",ncol = 1, rel_heights = c(1/4, 2/4))
  } else {
    return(foundOrder)
  }



}

align.insulation.chrom <- function( ins.data, bed, flank = 10 ){
  n <- findInterval(bed[,2]-1, ins.data[,2])
  bool <- n > flank & n + flank < nrow(ins.data)
  boolF <- which(bool == F)
  boolD <- diff(boolF)
  
  rows2addFront = which(boolD > 1)
  rows2addBack = length(boolF)-rows2addFront
  
  n <- n[n > flank & n + flank < nrow(ins.data)]
  
  #create a vector for the flanking sequence
  add <- -flank:flank
  #repeat it for every element in n
  add.long <- rep(add, length(n))
  #repeat every element in n for the length of add
  n.long <- rep(n, each=length(add))
  
  #add the selected value and the flanking sequences
  i.sel <- n.long + add.long
  
  #put the selected elements in matrix
  align.mat <- matrix(ins.data[i.sel,4], nrow=length(n), ncol=length(add), byrow=T)
  
  # add zeroes for first boundaries too close
 if(length(rows2addFront) > 0){
    align.mat = rbind(matrix(rep(0, ncol(align.mat) * rows2addFront), 
                              ncol =  ncol(align.mat)),
                       align.mat)
  }

  if(length(rows2addBack) > 0){
    align.mat = rbind(align.mat,
                      matrix(rep(0, ncol(align.mat) * rows2addBack), 
                             ncol =  ncol(align.mat)))
  }

  return(align.mat)
}

align.insulation <- function( ins.data, bed, flank.length = 200e3 , verbose = F){
  #elaborate way of selecting the resolution of the Hi-C matrix
  res <- as.numeric(names(tail(sort(table(ins.data[,3]-ins.data[,2])),1)))
  flank = flank.length/res

  chrom.vec <- unique(ins.data[,1])
  align.mat <- NULL
  for( chr in chrom.vec){
    if(verbose){message("Analyzing ", chr, "\\r")}

    sub.mat <- align.insulation.chrom( ins.data[ins.data[,1]==chr,], bed[bed[,1]==chr,], flank = flank )
    align.mat <- rbind(align.mat, sub.mat)
  }
  align.mat
}
robinweide/RHWlib documentation built on May 7, 2019, 8:03 a.m.