#' density_plot
#'
#' Density (line) plot of multiple samples
#'
#' Intended for use with GC content and gene length data
#'
#' @param plotting_data list of numerical vectors
#' @param main title
#' @param log whether to log2 transform the data [default: false]
#' @param dataset_names vector of names of the datasets in the plotting_data list, by default these are
#' taken from the names of the components in the list
#'
#' @return produces a density plot.
#' @examples
#' density_plot(some_data)
density_plot <- function(plotting_data, main = "", xlab = "", log = FALSE,
dataset_names = names(plotting_data), legend = TRUE,
legend_cex = 1, col = NULL, legend_pos = "topright") {
if (!is.list(plotting_data)) stop("plotting_data must be a list")
# need at least 2 values to plot density
if (
is.null(plotting_data) |
length(plotting_data) < 1 |
length(unlist(plotting_data)) < 2
) stop(
"data to plot must contain at least one dataset with a minimum of 2 values"
)
ifelse(is.null(dataset_names), legend_text <- "", legend_text <- dataset_names)
ifelse(is.null(col), colours <- 1:length(plotting_data), colours <- col)
plotting_data <- na.omit(plotting_data)
if(log == TRUE) plotting_data <- lapply(plotting_data, log2)
x_ranges <- sapply(plotting_data, simplify = TRUE, function(x) range(density(x)$x))
x_range <- range(unlist(x_ranges))
y_maxes <- sapply(plotting_data, simplify = TRUE, function(x) max(density(x)$y))
y_range <- c(0,max(unlist(y_maxes)))
plot(
density(plotting_data[[1]], na.rm = T),
main = main,
xlim = x_range,
ylim = y_range,
lwd = 2,
xlab = xlab,
col = colours[1]
)
for (i in 2:length(plotting_data)) {
lines(density(plotting_data[[i]]), col = colours[i], lwd = 2)
}
if (legend == TRUE) {
legend(legend_pos, legend = legend_text, fill = colours, bty = "n", cex = legend_cex)
}
}
#' bar_plot
#'
#' barplot to compare 2 samples
#'
#' Intended for use with chromosome and (biotype - not sure if we're using this here) data.
#' Uses the base function barplot and passes extra parameters to it.
#'
#' @param plotting_data dataframe or vector containing numerical data
#' @param main title
#' @param dataset_names vector of names of the datasets in the plotting_data list,
#' by default these are taken from the column names of the dataframe.
#' @return produces a barplot and returns values generated by base function barplot.
#' @examples
#' bar_plot(chromosomes)
#' bar_plot(chromosomes, cex_names = 0.8, col = topo.colors(2), ylab = "% of genes", xlab = "chromosome")
bar_plot <- function(plotting_data, main = "", xlab = "", ylab = "%", las = 1,
dataset_names = colnames(plotting_data), order_numerically = FALSE,
ordered_categories = NULL, plot_differences = FALSE,
cex_y_axis = par("cex.axis"), cex_names = par("cex.axis"),
col = NULL, legend = TRUE, legend_cex = 1) {
if (!is.numeric(plotting_data)) stop("plotting_data must be a numeric vector or dataframe")
if (!(
is.vector(plotting_data) |
is.data.frame(plotting_data) |
is.matrix(plotting_data)
)) {
stop("plotting_data must be a vector or a dataframe")
}
if (is.null(plotting_data)) stop("some data values must be supplied")
ifelse(is.null(dataset_names), legend_text <- "", legend_text <- dataset_names)
if (plot_differences == FALSE) {
ifelse(is.null(col), colours <- 1:ncol(plotting_data), colours <- col)
barplot(
t(plotting_data),
beside = T,
main = main,
xlab = xlab,
ylab = ylab,
las = las,
col = colours,
cex.names = cex_names,
cex.axis = cex_y_axis
)
if (legend == TRUE) {
legend_colours <- colours
legend(
"topright",
legend = legend_text,
fill = legend_colours,
bty = "n",
cex = legend_cex
)
}
} else if (plot_differences == TRUE & ncol(plotting_data) != 2) {
stop("To plot differences, the data must contain 2 columns")
} else {
diff_data <- plotting_data[,2] - plotting_data[,1]
if (length(legend_text) == 1) {
label = "proportion difference between the 2 datasets"
warning("To label the y axis more informatively, include column names for the data")
} else {
label <- paste(legend_text[2], " - ", legend_text[1], " proportion", sep = "")
}
barplot(diff_data, main = main, xlab = xlab, ylab = label, las = las)
}
}
#=================================================
# function to get minimum distances between genes
#=================================================
getMinimumDistances <- function(query.location.data) {
# get the number of genes per chromosome
chr.counts <- table(query.location.data[, "chromosome"])
# remove the chromosomes where there's only 1 gene
query.location.data <- query.location.data[query.location.data[, "chromosome"] %in% names(chr.counts)[chr.counts > 1], ]
centrepoints <- query.location.data[, "start"] + (query.location.data[, "end"] - query.location.data[, "start"])/2
# get the distance between a gene and its closest neighbour
min.distances <- tapply(centrepoints, INDEX = query.location.data[, "chromosome"], FUN = function(x) {
if (length(x) < 2) return(NA) else {
my.dist <- as.matrix(dist(x, upper = T))
# we don't want the diagonals to be 0
diag(my.dist) <- NA
apply(my.dist, 1, min, na.rm = T)
}
})
return(min.distances)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.