R/plots.R

Defines functions density_plot bar_plot getMinimumDistances

Documented in bar_plot density_plot

#' density_plot
#'
#' Density (line) plot of multiple samples
#'
#' Intended for use with GC content and gene length data
#'
#' @param plotting_data list of numerical vectors
#' @param main title
#' @param log whether to log2 transform the data [default: false]
#' @param dataset_names vector of names of the datasets in the plotting_data list, by default these are
#' taken from the names of the components in the list
#'
#' @return produces a density plot.
#' @examples
#' density_plot(some_data)
density_plot <- function(plotting_data, main = "", xlab = "", log = FALSE,
                         dataset_names = names(plotting_data), legend = TRUE,
                         legend_cex = 1, col = NULL, legend_pos = "topright") {

  if (!is.list(plotting_data)) stop("plotting_data must be a list")

  # need at least 2 values to plot density
  if (
    is.null(plotting_data) |
    length(plotting_data) < 1 |
    length(unlist(plotting_data)) < 2
  ) stop(
    "data to plot must contain at least one dataset with a minimum of 2 values"
  )

  ifelse(is.null(dataset_names), legend_text <- "", legend_text <- dataset_names)
  ifelse(is.null(col), colours <- 1:length(plotting_data), colours <- col)

  plotting_data <- na.omit(plotting_data)

  if(log == TRUE) plotting_data <- lapply(plotting_data, log2)

  x_ranges <- sapply(plotting_data, simplify = TRUE, function(x) range(density(x)$x))
  x_range  <- range(unlist(x_ranges))

  y_maxes <- sapply(plotting_data, simplify = TRUE, function(x) max(density(x)$y))
  y_range <- c(0,max(unlist(y_maxes)))

  plot(
    density(plotting_data[[1]], na.rm = T),
    main = main,
    xlim = x_range,
    ylim = y_range,
    lwd = 2,
    xlab = xlab,
    col = colours[1]
  )

  for (i in 2:length(plotting_data)) {
    lines(density(plotting_data[[i]]),  col = colours[i], lwd = 2)
  }

  if (legend == TRUE) {
    legend(legend_pos, legend = legend_text, fill = colours, bty = "n", cex = legend_cex)
  }
}


#' bar_plot
#'
#' barplot to compare 2 samples
#'
#' Intended for use with chromosome and (biotype - not sure if we're using this here) data.
#' Uses the base function barplot and passes extra parameters to it.
#'
#' @param plotting_data dataframe or vector containing numerical data
#' @param main title
#' @param dataset_names vector of names of the datasets in the plotting_data list,
#' by default these are taken from the column names of the dataframe.
#' @return produces a barplot and returns values generated by base function barplot.
#' @examples
#' bar_plot(chromosomes)
#' bar_plot(chromosomes, cex_names = 0.8, col = topo.colors(2), ylab = "% of genes", xlab = "chromosome")

bar_plot <- function(plotting_data, main = "", xlab = "",  ylab = "%", las = 1,
                     dataset_names = colnames(plotting_data), order_numerically = FALSE,
                     ordered_categories = NULL, plot_differences = FALSE,
                     cex_y_axis = par("cex.axis"), cex_names = par("cex.axis"),
                     col = NULL, legend = TRUE, legend_cex = 1) {

  if (!is.numeric(plotting_data)) stop("plotting_data must be a numeric vector or dataframe")

  if (!(
    is.vector(plotting_data) |
    is.data.frame(plotting_data) |
    is.matrix(plotting_data)
  )) {
    stop("plotting_data must be a vector or a dataframe")
  }

  if (is.null(plotting_data)) stop("some data values must be supplied")

  ifelse(is.null(dataset_names), legend_text <- "", legend_text <- dataset_names)

  if (plot_differences == FALSE) {

    ifelse(is.null(col), colours <- 1:ncol(plotting_data), colours <- col)

    barplot(
      t(plotting_data),
      beside = T,
      main = main,
      xlab = xlab,
      ylab = ylab,
      las = las,
      col = colours,
      cex.names = cex_names,
      cex.axis = cex_y_axis
    )

    if (legend == TRUE) {
      legend_colours <- colours
      legend(
        "topright",
        legend = legend_text,
        fill = legend_colours,
        bty = "n",
        cex = legend_cex
      )
    }

  } else if (plot_differences == TRUE & ncol(plotting_data) != 2) {

    stop("To plot differences, the data must contain 2 columns")

  } else {

    diff_data <- plotting_data[,2] - plotting_data[,1]

    if (length(legend_text) == 1) {
      label = "proportion difference between the 2 datasets"
      warning("To label the y axis more informatively, include column names for the data")
    } else {
      label <- paste(legend_text[2], " - ", legend_text[1], " proportion", sep = "")
    }
    barplot(diff_data, main = main, xlab = xlab, ylab = label, las = las)
  }
}


#=================================================
# function to get minimum distances between genes
#=================================================
getMinimumDistances <- function(query.location.data) {

  # get the number of genes per chromosome
  chr.counts <- table(query.location.data[, "chromosome"])

  # remove the chromosomes where there's only 1 gene
  query.location.data <- query.location.data[query.location.data[, "chromosome"] %in% names(chr.counts)[chr.counts > 1], ]

  centrepoints <- query.location.data[, "start"] + (query.location.data[, "end"] - query.location.data[, "start"])/2

  # get the distance between a gene and its closest neighbour
  min.distances <- tapply(centrepoints, INDEX = query.location.data[, "chromosome"], FUN = function(x) {

    if (length(x) < 2) return(NA) else {

      my.dist <- as.matrix(dist(x, upper = T))
      # we don't want the diagonals to be 0
      diag(my.dist) <- NA

      apply(my.dist, 1, min, na.rm = T)
    }
  })
  return(min.distances)
}
laurabiggins/GOcategoryStats documentation built on Oct. 27, 2019, 11:36 a.m.