R/plots.R

Defines functions plotHeatmap subsetProfile winsorizeVector winsorizeMatrix plotRegion.ChIPprofile

Documented in plotRegion.ChIPprofile

#' Plot regions
#'
#' A function to plot regions
#' 
#' @usage
#' \S4method{plotRegion}{ChIPprofile}(object,
#' gts,sampleData,groupData,summariseBy,
#' colourBy,lineBy,groupBy,
#' plotregion,outliers,freeScale)
#'
#'
#' @docType methods
#' @name plotRegion
#' @rdname plotRegion
#' @aliases plotRegion plotRegion,ChIPprofile-method
#' 
#' @author Thomas Carroll
#'
#' @param object A ChIPprofile object 
#' @param gts A list of character vectors or GRangesList
#' @param plotregion region to plot. For combined plots with style "region", may be "start" or "end" to show full resolution of plot of edges.
#' @param groupData Dataframe of metadata for groups
#' @param sampleData Dataframe of metadata for sample
#' @param summariseBy Column names from GRanges elementmetadata. Formula or character vector of column names to use
#' to collapse genomic ranges to summarised profiles.
#' summariseBy can not be used injustion with groups specified by gts argument.
#' @param colourBy Character vector or formula of either column names from colData(object) containing
#' sample metadata or character vector "group" to colour by groups in gts
#' @param lineBy Character vector or formula of either column names from colData(object) containing
#' sample metadata or character vector "group" to set line type by groups in gts
#' @param groupBy Character vector or formula of either column names from colData(object) containing
#' sample metadata or character "group" to colour by groups in gts
#' @param outliers A numeric vector of length 1 containing proportion from limits to windsorise.]
#' @param freeScale TRUE or FALSE to set whether ggplot 2 facets have their own scales. 
#' Useful for comparing multiple samples of differing depths without normalisation. Default is FALSE.
#' @return  A gg object from ggplot2
#' @examples
#' data(chipExampleBig)
#' plotRegion(chipExampleBig[[2]])
plotRegion.ChIPprofile <- function(object,gts=NULL,sampleData=NULL,groupData=NULL,summariseBy=NULL,colourBy=NULL,lineBy=NULL,groupBy=NULL,plotregion="full",outliers=NULL,freeScale=FALSE)
{
  
## This function can work using two main grouping logics
## When gts is supplied, groupBy, colourBy, lineBy work only with columns
## from the sample metadata found in colData(object).
## If the gts argument is specified then summariseBy must be single character vector
##  specifies which metadata column which column to be used for grouping
##
## When summariseBy is specified as formula or character vector longer  
##  than one then maintain as/coerced into a formula and gts is ignored.
## groupBy, colourBy, lineBy must be parts of formula as assessed by terms()  
## 
## gts may be named list of character vectors referring to rownames(object) or a granges
## When 
  #app <- lapply(gsets,function(x){colMeans(assays(object)[[1]][rowRanges(object)$name %in% x,])})
  nOfWindows <- object@params$nOfWindows
  
  ## When running with gts option
  ## SummariseBy can now only be used to select column for gts to be match to
  ## and colourBy, lineBy and groupBy will refer to sample metadata or be "group"
  
  if(!is.null(gts)){
    
    profileList <- list()
    
    ## Start cycling through assays
    for(p in 1:length(assays(object))){
    
    ## extract profile matrix
      profileTemp <- assays(object)[[p]]
    
    ## profiles are subset using subsetProfile.
    ## this allows for subsets by lists of character vector 
    ## or granges lists.colmeans is then run on these subsets
    ## Alternatively windsoring (see method) of subsets then colmeans.
      
      if(!is.null(outliers)){
        profileTempList <- lapply(gts,function(x)
          colMeans(winsorizeMatrix(subsetProfile(profileTemp,x,rowRanges(object),summariseBy),outliers,1-outliers),na.rm=T)
          )         
      }else{
        profileTempList <- lapply(gts,function(x)colMeans(subsetProfile(profileTemp,x,rowRanges(object),summariseBy),na.rm=T)) 
      }
    
    ## Create melted data frame for ggplot and attach index
    
      profileMatTemp <- melt(as.data.frame(do.call(cbind,profileTempList)))
    
    ## and attach index for different styles of plots
    if(object@params$style=="region" & plotregion=="full"){
        axisIndex=c(seq(1,(object@params$distanceOutRegionStart+object@params$distanceInRegionStart+1)),
                    (object@params$distanceOutRegionStart+object@params$distanceInRegionStart+1)+seq(1,object@params$nOfWindows)*100,
                    (object@params$distanceOutRegionStart+object@params$distanceInRegionStart+1)+(object@params$nOfWindows*100)+
                      seq(1,(object@params$distanceInRegionEnd+object@params$distanceOutRegionEnd+1)))
      }
      if(object@params$style=="point"){
        axisIndex=c(seq(1,(object@params$distanceUp+object@params$distanceDown+1)))
      }
      if(object@params$style=="percentOfRegion"){
        axisIndex=c(seq(1,((nOfWindows*((object@params$distanceAround)/100))*2)+nOfWindows))
      } 
    
    ## Add Sample name, group name and index to dataframe
      profileFrame <-data.frame("xIndex"=axisIndex,Group=profileMatTemp[,1],Sample=basename(unlist(metadata(object)["names"]))[p],Score=profileMatTemp[,2])
      
      profileList[[p]] <- profileFrame
    }  
    
    ## Join profiles from each sample and fix column names
    meltedProfileFrame <- do.call(rbind,profileList)
    colnames(meltedProfileFrame) <- c("xIndex","Group","Sample","Score")

  }else if(is.null(gts) & !is.null(summariseBy)){
      profileList <- list()
      
      ## Start cycling through assays
      for(p in 1:length(assays(object))){
            
            ## extract profile matrix
          profileTemp <- assays(object)[[p]]
            
          mcols(rowRanges(object))$summariseCol <- apply(as.data.frame(mcols(rowRanges(object))[,summariseBy,drop=FALSE]) , 1 , paste , collapse = "-" )
          gts <- as.list(unique(mcols(rowRanges(object))$summariseCol))
          summariseBy <- "summariseCol"
          names(gts) <- unlist(gts)
          if(!is.null(outliers)){
            profileTempList <- lapply(gts,function(x)
              colMeans(winsorizeMatrix(subsetProfile(profileTemp,x,rowRanges(object),summariseBy),outliers,1-outliers),na.rm=T)
            )         
          }else{
            profileTempList <- lapply(gts,function(x)colMeans(subsetProfile(profileTemp,x,rowRanges(object),summariseBy),na.rm=T)) 
          }
      
    
    ## Create melted data frame for ggplot and attach index
    
    profileMatTemp <- melt(as.data.frame(do.call(cbind,profileTempList)))
    if(object@params$style=="region" & plotregion=="full"){
      axisIndex=c(seq(1,(object@params$distanceOutRegionStart+object@params$distanceInRegionStart+1)),
                  (object@params$distanceOutRegionStart+object@params$distanceInRegionStart+1)+seq(1,object@params$nOfWindows)*100,
                  (object@params$distanceOutRegionStart+object@params$distanceInRegionStart+1)+(object@params$nOfWindows*100)+
                    seq(1,(object@params$distanceInRegionEnd+object@params$distanceOutRegionEnd+1)))
    }
    if(object@params$style=="point"){
      axisIndex=c(seq(1,(object@params$distanceUp+object@params$distanceDown+1)))
    }
    if(object@params$style=="percentOfRegion"){
      axisIndex=c(seq(1,((nOfWindows*((object@params$distanceAround)/100))*2)+nOfWindows))
    } 
    
    ## Add Sample name, group name and index to dataframe
    profileFrame <-data.frame("xIndex"=axisIndex,Group=profileMatTemp[,1],Sample=basename(unlist(metadata(object)["names"]))[p],Score=profileMatTemp[,2])
    
    profileList[[p]] <- profileFrame
  }  
  
  ## Join profiles from each sample and fix column names
  meltedProfileFrame <- do.call(rbind,profileList)
  colnames(meltedProfileFrame) <- c("xIndex","Group","Sample","Score")
  
  }else{
    
    ## When no summariseBy or gts supplied colmeans or
    ## windsorised colmeans of whole profile matrix
    
    if(!is.null(outliers)){
      profileList <- lapply(c(assays(object)),function(x)
        colMeans(winsorizeMatrix(x,outliers,1-outliers),na.rm=T)
      )         
    }else{    
      profileList <- lapply(c(assays(object)),colMeans,na.rm=T)
    }
    
    ## Join multiple assays/samples
    
    profileFrame <- do.call(cbind,profileList)
    colnames(profileFrame) <- basename(unlist(metadata(object)["names"]))

    ## Attach index for different styles of plots
    
    if(object@params$style=="region"){    
      axisIndex=c(seq(1,(object@params$distanceOutRegionStart+object@params$distanceInRegionStart+1)),
                  (object@params$distanceOutRegionStart+object@params$distanceInRegionStart+1)+seq(1,object@params$nOfWindows)*100,
                  (object@params$distanceOutRegionStart+object@params$distanceInRegionStart+1)+(object@params$nOfWindows*100)+
                    seq(1,(object@params$distanceInRegionEnd+object@params$distanceOutRegionEnd+1)))
    }
    if(object@params$style=="point"){
      axisIndex=c(seq(1,(object@params$distanceUp+object@params$distanceDown+1)))
    }
    if(object@params$style=="percentOfRegion"){
      axisIndex=c(seq(1,((object@params$nOfWindows*((object@params$distanceAround)/100))*2)+object@params$nOfWindows))
    }     
    rownames(profileFrame) <- axisIndex
    meltedProfileFrame <- melt(profileFrame)
    colnames(meltedProfileFrame) <- c("xIndex","Sample","Score")
  }
  #profileList <- lapply(c(assays(object)),function(y)lapply(gsets,function(x){colMeans(y[rowRanges(object)$name %in% x,])}))
  
  ## Create geom_path plot
  if(!is.null(gts) & !is.null(groupData)){
    meltedProfileFrame <- merge(meltedProfileFrame,groupData,by.x=2,by.y=1,all.x=TRUE,all.y=FALSE,sort=FALSE)
  }
  if(!is.null(sampleData)){
    sampleNameCol <- which(colnames(meltedProfileFrame) %in% "Sample")
    meltedProfileFrame <- merge(meltedProfileFrame,sampleData,by.x=sampleNameCol,by.y=1,all.x=TRUE,all.y=FALSE,sort=FALSE)
  }
  
  P <- ggplot(meltedProfileFrame,
              aes_string(x="xIndex",y="Score"))+geom_path(alpha = 1,size=1.3)+xlim(0,max(axisIndex))+ylab("Score")+theme(axis.title.y=element_text(angle=0))
  
  ## Add scales depending on style and region being plotted
  
  if(object@params$style=="region" & plotregion =="full"){
    P <- P + scale_x_continuous(breaks=c(1,object@params$distanceOutRegionStart+1,
                                       (object@params$distanceOutRegionStart+object@params$distanceInRegionStart+1),
                                       (object@params$distanceOutRegionStart+object@params$distanceInRegionStart+1)+(25/100)*(object@params$nOfWindows*100),
                                       (object@params$distanceOutRegionStart+object@params$distanceInRegionStart+1)+(75/100)*(object@params$nOfWindows*100),
                                       (object@params$distanceOutRegionStart+object@params$distanceInRegionStart+1)+(object@params$nOfWindows*100),
                                       (object@params$distanceOutRegionStart+object@params$distanceInRegionStart+1)+(object@params$nOfWindows*100)+object@params$distanceInRegionEnd+1,
                                       (object@params$distanceOutRegionStart+object@params$distanceInRegionStart+1)+(object@params$nOfWindows*100)+(object@params$distanceInRegionEnd+object@params$distanceOutRegionEnd+1)),
                              labels=c(paste0("Start-",object@params$distanceOutRegionStart),
                                       "Start",
                                       paste0("Start+",object@params$distanceInRegionStart),
                                       "25%",
                                       "75%",
                                       paste0("End-",object@params$distanceInRegionEnd),
                                       "End",
                                       paste0("End+",object@params$distanceOutRegionEnd)
                                       )
                                )+
      theme(axis.text.x  = element_text(angle=45, vjust=0.5, size=12))
  }
  if(object@params$style=="point"){
    P <- P + scale_x_continuous(breaks=c(1,object@params$distanceUp+1,object@params$distanceUp+1+object@params$distanceDown),
                              labels=c(paste0("Centre-",object@params$distanceUp),"Centre",paste0("Centre+",object@params$distanceDown)))+
      theme(axis.text.x  = element_text(angle=45, vjust=0.5, size=12))
  }
  if(object@params$style=="percentOfRegion"){
    P <- P + scale_x_continuous(breaks=c(1,(nOfWindows*((object@params$distanceAround)/100)),
                                            (nOfWindows*((object@params$distanceAround)/100))+nOfWindows,
                                             2*(nOfWindows*((object@params$distanceAround)/100))+nOfWindows),
                                labels=c(paste0("Start-",(nOfWindows*((object@params$distanceAround)/100)),"%"),
                                                "Start",
                                                "End",
                                                paste0("End+",(nOfWindows*((object@params$distanceAround)/100)),"%")
                                                ))+
      theme(axis.text.x  = element_text(angle=45, vjust=0.5, size=12))
  }  
  if(object@params$style=="region" & plotregion =="start"){
    P <- P + scale_x_continuous(breaks=c(1,object@params$distanceOutRegionStart+1,object@params$distanceInRegionStart+1+object@params$distanceOutRegionStart),
                              labels=c(paste0("Start-",object@params$distanceOutRegionStart),"Centre",paste0("Start-",object@params$distanceInRegionStart)),
                              limits=c(1,object@params$distanceInRegionStart+1+object@params$distanceOutRegionStart))+
      theme(axis.text.x  = element_text(angle=45, vjust=0.5, size=12))    
  }
  if(object@params$style=="region" & plotregion =="end"){
    P <- P + scale_x_continuous(breaks=(object@params$distanceOutRegionStart+object@params$distanceInRegionStart+1)+(object@params$nOfWindows*100)+c(1,object@params$distanceInRegionEnd+1,object@params$distanceInRegionEnd+1+object@params$distanceOutRegionEnd),
                              labels=c(paste0("Start-",object@params$distanceInRegionEnd),"Centre",paste0("Start-",object@params$distanceOutRegionEnd)),
                              limits=(object@params$distanceOutRegionStart+object@params$distanceInRegionStart+1)+(object@params$nOfWindows*100)+c(1,object@params$distanceInRegionEnd+1+object@params$distanceOutRegionEnd))+
                                theme(axis.text.x  = element_text(angle=45, vjust=0.5, size=12))    
  } 
  
  

  if(!is.null(gts)){
    if(is.null(groupBy) & is.null(colourBy) & is.null(lineBy)){
      groupBy <- "Group"
    }
    #P <- P+aes(group="group")+aes_string(colour=colourBy,linetype=lineBy)   
  }
  if(is.null(groupBy) & is.null(colourBy) & is.null(lineBy)){
    groupBy <- "Sample"
  }

  
  if(!is.null(groupBy)){
    
    facet <- facet_wrap(
      formula(paste("~",paste(groupBy,collapse="+")))
    )
    P <- P + facet
    
  }
  
  if(freeScale){
    P$facet$params$free$y <- TRUE 
  }
  P <- P+aes_string(colour=colourBy,linetype=lineBy)

  return(P)
}

winsorizeMatrix <- function(mat,limitlow,limithigh){
  apply(mat,2,function(x)winsorizeVector(x,limitlow,limithigh))
}
winsorizeVector <- function(vect,limitlow,limithigh){
  qs <- quantile(vect,c(limitlow,limithigh),na.rm=TRUE)
  vect[vect < qs[1]] <- qs[1]  
  vect[vect > qs[2]] <- qs[2]  
  vect
}
setGeneric("plotRegion", function(object="ChIPprofile",gts=NULL,sampleData=NULL,groupData=NULL,summariseBy=NULL,colourBy=NULL,lineBy=NULL,groupBy=NULL,plotregion="character",outliers=NULL,freeScale=FALSE) standardGeneric("plotRegion"))

#' @rdname plotRegion
#' @export
setMethod("plotRegion", signature(object="ChIPprofile"), plotRegion.ChIPprofile)

subsetProfile <- function(profile,group,granges,summariseColumn){
  if(class(group) == "character"){
    if(is.null(summariseColumn)){
      return(profile[rownames(profile) %in% group,])
    }else{
      return(profile[mcols(granges)[,summariseColumn] %in% group,])
    }
  }
  if(class(group) == "GRanges"){         
    return(profile[granges %over% group,])
  }
}


plotHeatmap <- function(profile,bins=100,col=heat.colors(100),
                        rowScale=FALSE,logScale=TRUE,orderPosition=NULL,orderBy="maxAtPosition",
                        breaks=NULL,maxValue=NULL,...){
  # profile=csc41
  # bins=100
  # col=heat.colors(100)
  # rowScale=TRUE
  # orderPosition=NULL
  # orderBy="maxAtPosition"
  matt <- assay(profile)
  dwdw <- apply(matt,1,function(x)any(is.na(x)))
  matt <- matt[!dwdw,]
  if(is.null(orderPosition)){
    if(is.null(bins)){
      orderPosition <- unique(c(floor(bins/2),ceiling(bins/2)))
    }else{
      orderPosition <- unique(c(floor(bins/2),ceiling(bins/2)))
    }
  }
  # if(rowScale == TRUE){
  #   cols <- colorRampPalette(brewer.pal(9,"Blues"),bias=1)(100)
  # }else{
  #   cols <- colorRampPalette(brewer.pal(9,"Blues"),bias=10)(100)
  # }
  
  if(!is.null(bins)){
    binsize <- floor(ncol(assay(profile))/bins)
    binremainner <- ncol(matt)%%bins
    mat <- matrix(nrow=nrow(matt),ncol=bins)
    firstIndex <- 0
    endIndex <- floor(binsize/2)+binsize
    mat[,1] <- rowMeans(matt[,firstIndex:endIndex])
    for(i in 2:(bins-1)){
      firstIndex <- endIndex+1
      endIndex <- endIndex+binsize
      mat[,i] <- rowMeans(matt[,firstIndex:endIndex])
    }
    
    mat[,i+1] <- rowMeans(matt[,endIndex:ncol(matt)])
    matt <- mat
  }else{
    message("No binning of matrix done")
  }
  if(orderBy=="maxAtPosition"){
    if(length(orderPosition) == 1){
      matt <- matt[order(matt[,orderPosition],decreasing=TRUE),]
    }else{
      matt <- matt[order(rowMeans(matt[,min(orderPosition):max(orderPosition)]),decreasing=TRUE),]
    } 
  }else if(orderBy=="none"){
    matt <- matt
  }else{
    matt <- matt[match(rownames(matt),orderBy),]
    
  }
  if(logScale){
    matt[matt == 0] <- min(matt[matt !=0])
    matt <- log2(matt)
  }
  if(rowScale==TRUE){
    matt <- t(scale(t(matt),center=TRUE,scale=TRUE))
  }

    

  layout(matrix(data=c(1,2), nrow=1, ncol=2),
         widths=c(4,1), heights=c(1,1))
  matttoPlot <- matt[rev(1:nrow(matt)),]
  if(is.null(breaks)){
    if(!is.null(maxValue)){
      breaks <- seq(min(matt),maxValue,length.out = length(col)+1)
      image(t(matttoPlot),useRaster=TRUE,
            xaxt='n',yaxt="n",col=col,breaks=breaks,...) 
    }else{
    image(t(matttoPlot),useRaster=TRUE,
          xaxt='n',yaxt="n",col=col,...)
    }
  }else{
    image(t(matttoPlot),useRaster=TRUE,
          xaxt='n',yaxt="n",col=col,breaks=breaks,...) 
  }
  par(mar = c(3,2.5,2.5,2))
  image(1, 1:length(col),
        matrix(data=seq(min(matt,na.rm = T),max(matt,na.rm=T),
                        length=length(col)),ncol=length(col),
               nrow=1),
        xlab="",ylab="",xaxt="n",yaxt="n",las=2,col=col)
  return(matt)
}

Try the soGGi package in your browser

Any scripts or data that you put into this service are public.

soGGi documentation built on Nov. 8, 2020, 5:24 p.m.