R/plot_distributions.r

Defines functions plot_distributions

Documented in plot_distributions

#' Plot 1D distributions of variants in cases and controls
#' Plots histogram, density and/or stripchart
#'
#' @author Adam Waring - adam.waring@@msdtc.ox.ac.uk
#'
#' @param case_residues vector of case-variant amino acid residue positions
#' @param control_residues vector of control-variant amino acid residue positions
#' @param type type of plots to plot
#'
#' @keywords RVAT, distribution, cluster, genetics, case-control, gene
#'
#' @details The function takes a vector of case and control missense-variant residue positions and plots their linear positions for comparison.
#' If multiple plot types are selected (as is default) then they are stacked on top of each other in one plot.
#'
#' @export
#' @examples
#' # The essential inputs are case_residues and control_residues
#'
#' nresidues = 1000 # length of the protein
#' probs = rexp(nresidues)^2
#' case_probs = probs * rep(c(1, 3, 1), c(200, 200, 600))
#' control_probs = probs * rep(c(2, 1, 2), c(200, 200, 600))
#'
#' case_residues = sample(1:nresidues, 100, rep=T, case_probs)
#' control_residues = sample(1:nresidues, 100, rep=T, control_probs)
#'
#' plot_distributions(case_residues, control_residues)


#cbpal = c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
plot_distributions = function(case_residues, control_residues, type = c("histogram", "density", "stripchart")){

  DF = data.frame(ppos=c(case_residues, control_residues), cohort=c(rep("Cases", length(case_residues)), rep("Controls", length(control_residues))))
  colors <- c("#D55E00", "#009E73")
  plots = list()

  THEME = list(labs(x = "", y = ""), theme_bw(15), theme(panel.grid.minor = element_blank(),
                                                         axis.text.y = element_blank(),
                                                         axis.ticks.y = element_blank(),
                                                         panel.border = element_blank(),
                                                         axis.line = element_line()))

  if("histogram" %in% type){
    plots[["hist"]] = ggplot(DF, aes(x = ppos, fill=cohort)) + geom_histogram(alpha=0.4, bins = 15, aes(y=0.5*..density..), position="dodge") +
      scale_fill_manual(values=colors, name = "Cohort") + THEME
  }

  if("density" %in% type){
    plots[["dens"]] = ggplot(DF, aes(x = ppos, fill=cohort)) + geom_density(alpha=0.4, position="identity") +
      scale_fill_manual(values=colors, name = "Cohort") + THEME
  }

  if("stripchart" %in% type){
    plots[["strip"]] = ggplot(DF, aes(x = ppos, color=cohort, y = cohort)) + geom_jitter(alpha=0.4, height=0.2, size=5) +
      scale_color_manual(values=colors, name = "Cohort") + THEME
  }

  g1 = ggarrange(plotlist=plots, nrow = length(plots), legend = "right", common.legend = T)
  annotate_figure(g1, bottom=text_grob("Variant residue position in linear protein sequence", size=20))


}
adamwaring/POSBURDEN documentation built on Feb. 21, 2020, 11:21 a.m.