R/add_reference.R

Defines functions add_reference

#' Adds category lines over the manhattan plot.
#' 
#' @param mhPlot A Manhattan plot grob as generated by the mh function
#' @param y A data frame with annotation data
#' @param z A data frame with best hits data
#' @param snp SNP column in data frames
#' @param chr Chromosome column in data frames
#' @param pb SNP position column in data frames
#' @param categories vector of the columns in y indicating the markers category
#' @param categoryColors list of the colors to use for the categories (ignored if NA)
#' @param flanking the flanking size in Mbp (3 by default)
#' @param build What build to use for plotting ('b37' or 'b38', default is 'b37')
#' 
#' @return A manhattan plot with categories (gg object)
#' 
#' @import ggplot2
#' @import ggtable
#' 
#' @export

#devtools::use_package("ggplot2", "Suggests")
#devtools::use_package("gtable", "Suggests")
#devtools::use_package("ggrepel", "Suggests")

add_reference <- function(mhGrob, y, z, snp='SNP', chr='CHR', bp='BP', categories, categoryColors = NA, 
                          flanking = 3, build = 'b37') {
  
  
  # Build specific variables
  
  chromosomeLength38 <- c(248956422, 242193529, 198295559, 190214555, 181538259, 170805979, 159345973, 145138636, 138394717, 133797422, 135086622, 133275309, 114364328, 107043718, 101991189, 90338345, 83257441, 80373285, 58617616, 64444167, 46709983, 50818468) # bp length from Ensembl GRCh38.p10
  chromosomeLength37 <- c(249250621, 243199373, 198022430, 191154276, 180915260, 171115067, 159138663, 146364022, 141213431, 135534747,	135006516, 133851895, 115169878, 107349540, 102531392, 90354753, 81195210, 78077248, 59128983, 63025520, 48129895, 51304566) # bp length from Ensembl GRCh37.p13
  if (build == 'b37') {
    chromosomeLength <- chromosomeLength37
  } else if (build == 'b38') {
    chromosomeLength <- chromosomeLength38
  } else {
    stop("Build not supported")
  }
  genomeLength <- sum(chromosomeLength)
    
  
  # Create data frame for the annotation plot values
  
  annotationDataFrame <- data.frame(snp = y[[snp]], chr = y[[chr]], bp = y[[bp]], stringsAsFactors = F)
  
  for (category in categories) {
    annotationDataFrame[, category] <- factor(y[[category]])
  }
  
  
  # Create data frame for the best hits plot values
  
  bestHitsDataFrame <- data.frame(chr = z[[chr]], bp = z[[bp]], id = z[[name]], stringsAsFactors = F)
  
  
  # Get start and end positions
  
  annotationDataFrame$xStart <- 0 # The start of every annotation marker
  annotationDataFrame$xEnd <- 0 # The end of every annotation marker
  bestHitsDataFrame$x <- 0 # The position of every gene
  chrStart <- c() # The start of every second chromosome
  chrEnd <- c() # The end of every second chromosome
  
  xOffset <- 0
  start <- T
  
  for (chromosomeNumber in 1:22) {
    
    bpTemp <- annotationDataFrame$bp[annotationDataFrame$chr == chromosomeNumber]
    xTemp <- bpTemp + xOffset
    startTemp <- xTemp - flanking * 1000000
    annotationDataFrame$xStart[annotationDataFrame$chr == chromosomeNumber] <- ifelse(startTemp < 0, 0, startTemp)
    endTemp <- xTemp + flanking * 1000000
    annotationDataFrame$xEnd[annotationDataFrame$chr == chromosomeNumber] <- ifelse(endTemp > genomeLength, genomeLength, endTemp)
    
    bestHitsDataFrame$x[bestHitsDataFrame$chr == chromosomeNumber] <- bestHitsDataFrame$bp[bestHitsDataFrame$chr == chromosomeNumber] + xOffset
    
    xOffset <- xOffset + chromosomeLength[chromosomeNumber]
    
    if (start) {
      
      chrStart <- c(chrStart, xOffset)
      
    } else {
      
      chrEnd <- c(chrEnd, xOffset)
      
    }
    
    start <- !start
    
  }
  
  # Get mh grob aand cut it in two
  
  mhTop <- mhGrob[1:5, ]
  mhBottom <- mhGrob[6:nrow(mhGrob), ]
  
  resultGrob <- mhTop
  
  
  # Make best hit plot
  
  bestHitPlot <- ggplot()
  bestHitPlot <- bestHitPlot + geom_vline(aes(xintercept = bestHitsDataFrame$x), col = "black", linetype = "dotted")
  bestHitPlot <- bestHitPlot + geom_rect(aes(xmin = chrStart, xmax = chrEnd, ymin = 0, ymax = 1), alpha = 0.2)
  bestHitPlot <- bestHitPlot + geom_text_repel(data = bestHitsDataFrame, aes(x = x, y = 0.5, label = id), nudge_y = 0, point.padding = NA)
  bestHitPlot <- bestHitPlot + scale_y_continuous(expand = c(0, 0))
  bestHitPlot <- bestHitPlot + scale_x_continuous(expand = c(0.01, 0), limits = c(0, genomeLength))
  bestHitPlot <- bestHitPlot + theme(legend.position = 'none',
                                     axis.line = element_blank(),
                                     axis.title = element_blank(),
                                     axis.text = element_blank(),
                                     axis.ticks = element_blank(),
                                     panel.background = element_rect(fill = NA, colour = "grey50"),
                                     panel.grid = element_blank(),
                                     panel.spacing = unit(0, 'mm'))
  
  
  # Append to the result
  
  plotGrob <- ggplotGrob(bestHitPlot)
  plotGrob <- plotGrob[6, ]
  plotGrob <- gtable_add_cols(plotGrob, gtable_width(resultGrob[, 5]), pos = 5)
  plotGrob <- gtable_add_cols(plotGrob, gtable_width(resultGrob[, 6]), pos = 6)
  resultGrob <- rbind(resultGrob, plotGrob, size = "first")
  
  # Make category pots and append to result
  
  for (i in length(categories):1) {
    
    
    # Get category
    
    category <- categories[i]
    categoriesTemp <- levels(annotationDataFrame[, category])
    
    
    # Get color
    
    if (length(categoryColors) > 1 || !is.na(categoryColors)) {
      
      categoryColorsTemp <- categoryColors[[i]]
      
    } else {
      
      categoryColorsTemp <- c(NA, scales::hue_pal()(length(categoriesTemp) - 1))
      
    }
    
    
    # Remove empty values
    
    plotDataFrame <- annotationDataFrame[! annotationDataFrame[, category] %in% categoriesTemp[is.na(categoryColorsTemp)], ]
    plotDataFrame$category <- plotDataFrame[, category]
    
    categoryColorsTemp <- categoryColorsTemp[!is.na(categoryColorsTemp)]
    
    
    # Sort according to the category
    
    plotDataFrame <- plotDataFrame[order(plotDataFrame$category), ]
    
    
    # Create plot object
    
    categoryPlot <- ggplot()
    
    # Add background rectangle for every second chromosome
    
    categoryPlot <- categoryPlot + geom_rect(aes(xmin = chrStart, xmax = chrEnd, ymin = 0, ymax = 1), alpha = 0.2)
    
    # Add rectangle for every marker
    
    categoryPlot <- categoryPlot + geom_rect(data = plotDataFrame, aes(xmin = xStart, xmax = xEnd, ymin = 0.1, ymax = 0.9, fill = category), alpha = 0.5)
    
    # Set layout
    
    categoryPlot <- categoryPlot + scale_fill_manual(values = categoryColorsTemp)
    categoryPlot <- categoryPlot + scale_y_continuous(position = "right", expand = c(0, 0), breaks = c(0.5), labels = c(category))
    categoryPlot <- categoryPlot + scale_x_continuous(expand = c(0.01, 0), limits = c(0, genomeLength))
    categoryPlot <- categoryPlot + geom_vline(aes(xintercept = bestHitsDataFrame$x), col = "black", linetype = "dotted")
    categoryPlot <- categoryPlot + theme(legend.position = 'none',
                                         axis.line = element_blank(),
                                         axis.title = element_blank(),
                                         axis.text.x = element_blank(),
                                         axis.ticks = element_blank(),
                                         panel.background = element_rect(fill = NA, colour = "grey50"),
                                         panel.grid = element_blank(),
                                         panel.spacing = unit(0, 'mm'))
    
    # Get grob
    
    plotGrob <- ggplotGrob(categoryPlot)
    plotGrob <- plotGrob[6, ]
    
    
    # Fix the number of columns
    
    plotGrob <- gtable_add_cols(plotGrob, gtable_width(resultGrob[, 5]), pos = 5)
    plotGrob <- gtable_add_cols(plotGrob, gtable_width(resultGrob[, 6]), pos = 6)
    
    
    # Add to result
    
    resultGrob <- rbind(resultGrob, plotGrob, size = "first")
    
  }
  
  
  # Add bottom and format height of respective plots
  
  resultGrob <- rbind(resultGrob, mhBottom, size = "first")
  resultGrob$heights[6:(6+length(categories)+1)] <- unit(c(rep(1, length(categories)+1), 20), "null")
  
  # Return the grob
  
  return(resultGrob)
}
SelectionPredisposed/qqman2 documentation built on May 21, 2019, 1:41 a.m.