R/plotting.R

Defines functions plotMutationSpectrum geom_h5vc mismatchPlot .processDataForMismatchPlot

Documented in geom_h5vc mismatchPlot plotMutationSpectrum

.processDataForMismatchPlot <- function( data, sampledata, samples, range, position, windowsize, plotReference, refHeight, tickSpacing ){
  if(is.null(range)){
    if(!is.null(position)){
      range <- c( position - windowsize, position + windowsize )  
    }else{
      stopifnot("h5dapplyInfo" %in% names(data) )
      range <- c( data$h5dapplyInfo$Blockstart, data$h5dapplyInfo$Blockend)
    }
  }
  for( ds in names(data) ){ #strip dimnames to ensure functionality downstream - you have to make sure your data is in the correct format yourself
    dimnames(data[[ds]]) <- NULL
  }
  sampleColumns = sampledata$Column[ match(samples, sampledata$Sample) ]
  stopifnot(length(sampleColumns)>0, !any(is.na(sampleColumns)))
  #stopifnot(is.numeric(windowsize), length(windowsize)==1,
  #          is.numeric(position),   length(position)==1)
  stopifnot( "Counts" %in% names(data), "Coverages" %in% names(data), "Deletions" %in% names(data) )
  if(plotReference & "Reference" %in% names(data)){
    samples = c(samples, "Reference")
  }
  dimnames(data$Coverages)[[3]] <- seq(range[1],range[2])
  cvg = melt( data$Coverages[ sampleColumns,,,drop=FALSE ] )
  colnames(cvg) = c("Sample", "Strand", "Position", "Coverage")
  cvg$Sample = factor( samples[ cvg$Sample ], levels = samples )
  cvg$Coverage[cvg$Strand == 2] <- -cvg$Coverage[cvg$Strand == 2]
  dimnames(data$Counts)[[4]] <- seq(range[1],range[2])
  counts = melt( data$Counts[ ,sampleColumns,,,drop=FALSE ] )
  colnames(counts) <- c("Base", "Sample", "Strand", "Position", "Count")
  dimnames(data$Deletions)[[3]] <- seq(range[1],range[2])
  dels = melt( data$Deletions[ sampleColumns,,,drop=FALSE ] )
  colnames(dels) = c("Sample","Strand","Position","Deletions")
  dels$Deletions[dels$Strand == 2] <- -dels$Deletions[dels$Strand == 2]
  dels$Coverage = cvg$Coverage
  dels$Sample = factor( samples[ dels$Sample ], levels=samples )
  dels$Type = "Del"
  counts$Sample = factor( samples[ counts$Sample ], levels=samples )
  for( i in 1:4 ){
    counts$Count[ counts$Base == i ] = counts$Count[ counts$Base == i ] + counts$Count[ counts$Base == i + 8 ]
  }
  counts = counts[ counts$Base %in% 1:8, ]
  plot_bases = c( "A_lq", "C_lq", "G_lq", "T_lq", "A", "C", "G", "T" )
  counts$Base = plot_bases[counts$Base]
  counts$Base = factor( counts$Base, levels = c("A", "C", "G", "T", "A_lq", "C_lq", "G_lq", "T_lq") )
  counts$Bottom = 0
  tmp = rep(0, range[2]-range[1] + 1 )
  for( sample in unique( counts$Sample ) ){
    for( strand in 1:2 ){
      for( b in levels(counts$Base) ){
        ss = counts$Count[counts$Strand == strand & counts$Base == b & counts$Sample == sample]
        counts$Count[ counts$Strand == strand & counts$Base == b & counts$Sample == sample ] = counts$Count[ counts$Strand == strand & counts$Base == b & counts$Sample == sample ] + tmp
        counts$Bottom[ counts$Strand == strand & counts$Base == b & counts$Sample == sample ] = tmp
        tmp = tmp + ss
      }
      tmp = rep(0, range[2]-range[1] + 1 )
    }
  }
  counts$Count[counts$Strand == 2] <- -1*counts$Count[counts$Strand == 2]
  counts$Bottom[counts$Strand == 2] <- -1*counts$Bottom[counts$Strand == 2]
  #Reference Stuff
  if( "Reference" %in% names(data) & plotReference){
    ref <- data.frame( "Base" = bases[data$Reference+1], "Sample" = "Ref", "Strand" = "+", "Position" = seq( range[1], range[2]), "Count" = refHeight, "Bottom" = 0 )
    counts <- rbind(counts, ref)
  }
  cvg$Range <- paste(range, collapse = " - ")
  counts$Range <- paste(range, collapse = " - ")
  dels$Range <- paste(range, collapse = " - ")
  return(list(
    "Coverage"  = cvg,
    "Counts"    = counts,
    "Deletions" = dels
    ))
}

mismatchPlot <- function( data, sampledata, samples=sampledata$Sample, windowsize = NULL, position = NULL, range = NULL, plotReference = TRUE, refHeight=8, printReference = TRUE, printRefSize = 2, tickSpacing = c(10,10) ){
  plotList <- list()
  if(!plotReference & printReference){
    stop("Can't print reference without plotting it, please set 'plotReference' to 'TRUE'.")
  }
  if(!("Coverages" %in% names(data))){
    tmpList <- lapply( data, function(dat) .processDataForMismatchPlot(dat, sampledata, samples, range = NULL, position = position, windowsize = windowsize, plotReference = plotReference, refHeight = refHeight, tickSpacing = tickSpacing) )
    plotList$Coverage <- do.call(rbind, lapply( tmpList, function(x) x$Coverage))
    plotList$Counts <- do.call(rbind, lapply( tmpList, function(x) x$Counts))
    plotList$Deletions <- do.call(rbind, lapply( tmpList, function(x) x$Deletions))
  }else{
    plotList <- .processDataForMismatchPlot(data, sampledata, samples, range, position = position, windowsize = windowsize, plotReference = plotReference, refHeight = refHeight, tickSpacing = tickSpacing) 
  }  
  mmp <- ggplot( plotList$Coverage ) +
    geom_rect( data = plotList$Coverage, aes_string( xmin = 'Position', xmax = 'Position + 1', ymax = 'Coverage', ymin = '0' ), fill = "#C5C5C5" ) +
    geom_rect( data = plotList$Counts, aes_string( xmin = 'Position', xmax = 'Position + 1', ymin = 'Bottom', ymax = 'Count', fill = 'Base' ) ) +
    geom_rect( data = plotList$Deletions, aes_string( xmin = 'Position', xmax = 'Position + 1', ymin = 'Coverage', ymax = 'Coverage + Deletions', fill = 'Type' ) ) +
    scale_fill_manual( values = c("A" = "#15D515", "C" = "#D51515", "G" = "#1515D5", "T" = "#D5D515", "Del" = "purple", "A_lq"="#75D575", "C_lq"="#D57575", "G_lq"="#7575D5", "T_lq"="#D5D575") ) +
    geom_hline( aes( yintercept = 0 ), colour = "#555555" ) +
    scale_x_continuous(
      expand = c(0,0)
      ) +
    facet_grid( Sample ~ Range, scales="free", space="free", margins=FALSE ) +
    theme(axis.text.x = element_text(angle=45, vjust=0.5, size = 12), legend.position = "top") + expand_limits(y=c(0,refHeight)) +
    scale_y_continuous(breaks = seq(ceiling(min(plotList$Coverage$Coverage) / tickSpacing[2])*tickSpacing[2],floor(max(plotList$Coverage$Coverage)/tickSpacing[2])*tickSpacing[2],tickSpacing[2])) +
    ylab("Read Depth") + theme_bw() + theme(strip.background = element_rect(fill="white"), strip.text.y = element_text(angle = 0))
  if(printReference){
    mmp <- mmp + geom_text( data = subset( plotList$Counts, Sample == "Ref"), aes_string(x = 'Position + 0.5', y = 'refHeight/2', label = 'Base'), family = "mono", position = position_dodge(0.9), size = printRefSize, hjust = 0.5)
  }
  if(!is.null(position)){
    mmp + geom_vline( aes( xintercept = c(position) ), colour = "#555555" ) + geom_vline( aes( xintercept = c(position + 1) ), colour = "#555555" )
  }else{
    mmp
  }
}

geom_h5vc <- function( data, sampledata, samples=sampledata$Sample, windowsize, position, dataset, ... ){
  sampleColumns = sampledata$Column[ match(samples, sampledata$Sample) ]
  stopifnot(length(sampleColumns)>0, !any(is.na(sampleColumns)))
  stopifnot(is.numeric(windowsize), length(windowsize)==1,
            is.numeric(position),   length(position)==1)
  dat = melt( data[[dataset]][ sampleColumns,,,drop=FALSE ] ) 
  colnames(dat) = c("Sample", "Strand", "Position", "Value")
  dat$Sample = samples[ dat$Sample ]
  dat$Value[dat$Strand == 2] <- -dat$Value[dat$Strand == 2]
  dat$Position = dat$Position - windowsize - 1
  ret = geom_rect( data = dat, aes_string( xmin = 'Position', xmax = 'Position + 1', ymax = 'Value', ymin = '0' ), ...)
  return(ret)
}

plotMutationSpectrum <- function( ms, plotCounts = TRUE ){
  ms$MutationContext <- paste( ms$Prefix, ms$Suffix, sep = "." )
  ms <- subset(ms, altAllele != "-")
  if( plotCounts ){
    p <- ggplot( ms, aes_string( x = "MutationContext", fill = "MutationType") ) + 
      geom_bar(width = 0.5) + 
      facet_grid( Sample ~ MutationType ) +
      scale_y_discrete(name="Number of Events") +
      scale_x_discrete(name="Mutation Context") +
      scale_fill_manual( values = c( "C>A" = "#1515A5", "C>G" = "black", "C>T" = "red", "T>A" = "grey", "T>C" = "#15A515", "T>G" = "#F5C5C5" ) ) +
      theme(
        axis.text = element_text( size = 12 ),
        axis.title = element_text( size = 16 ),
        axis.text.x = element_text( angle = 90, hjust = 0 ),
        strip.text = element_text(size = 14),
        legend.position="none"
      )
  }else{
    plot_df <- table(ms[,c("MutationType","MutationContext","Sample")])
    for( sample in unique( tmp$Sample ) ){
      plot_df[,,sample] <- plot_df[,,sample] / sum( plot_df[,,sample] ) * 100
    }
    plot_df <- melt(plot_df)
    colnames(plot_df)[4] <- "Percentage"
    p <- ggplot( plot_df, aes_string( x = "MutationContext", fill = "MutationType", y = "Percentage") ) +
      geom_bar(width = 0.5, stat = "identity") +
      facet_grid( Sample ~ MutationType ) +
      scale_y_discrete(name="Percentage of Mutations", seq(0,100, 25)) +
      scale_x_discrete(name="Mutation Context") +
      scale_fill_manual( values = c( "C>A" = "#1515A5", "C>G" = "black", "C>T" = "red", "T>A" = "grey", "T>C" = "#15A515", "T>G" = "#F5C5C5" ) ) +
      theme(
        axis.text = element_text( size = 12 ),
        axis.title = element_text( size = 16 ),
        axis.text.x = element_text( angle = 90, hjust = 0 ),
        strip.text = element_text(size = 14),
        legend.position="none"
      )
  }
  return(p)
}

Try the h5vc package in your browser

Any scripts or data that you put into this service are public.

h5vc documentation built on Nov. 8, 2020, 4:56 p.m.