#' @title Plot the distribution of overall NGS density at specific regions from deepTools matrices.
#'
#' @description Computes the score of each element in a list of regions and generates violins plots with percentiles and the mean (optional) for each sample/region. It uses as input a score matrix computed by deeptools's computeMatrix function or by \link{computeMatrix.deeptools} and \link{density.matrix} functions from this package.
#'
#' @param matrix.file A single string indicating a full path to a matrix.gz file generated by \code{deepTools/computeMatrix} or by \link{computeMatrix.deeptools}, or a list generated by the function \link{read.computeMatrix.file} or \link{density.matrix}.
#' @param missing.data.as.zero Logical value to define whether treat missing data as 0. If set as \code{FALSE} missing data will be converted to \code{NA} and will be excluded from the computations of the signal. By default \code{TRUE}.
#' @param sample.names Samples names could be defined by a string vector. If set as \code{NULL} sample names will be get automatically by the matrix file. By default \code{NULL}. \cr Example: \code{c("sample1", "sample2", "sample3")}
#' @param region.names Region names could be defined by a string vector. If set as \code{NULL} sample names will be get automatically by the matrix file. By default \code{NULL}. \cr Example: \code{c("regionA", "regionB")}
#' @param signal.type String indicating the signal to be computed and plotted/compared. Available parameters are "mean", "median" and "sum". By default "mean".
#' @param error.type String indicating the type of error to be computed and that will be available in the output data.table. Available parameters are "sem" and "sd", standard error mean and standard deviation respectively. By default "sem". Parameter considered only when \code{show.mean = TRUE)}.
#' @param subset.range A numeric vector indicating the range to which restrict the analyses (eg. c(-150, 250)). In the case of "scale-region" mode, the range is represented by (-upstream | 0 | body_length | body_length+downstream).By default \code{NULL}: the whole region is considered.
#'
#' @param inverted.comparisons Logical value to indicate whether to invert the order of the pair-comparisons. By default \code{FALSE}.
#' @param stat.method A single string defining the method to use for the statistical comparisons. By default \code{"wilcox.test"}. Available options: "t.test" "wilcox.test".
#' @param stat.paired Logical value to define if the statistical comparisons should be performed paired. By default \code{TRUE}. Notice that to allow a paired comparison the number of data should be the same in the two groups compared, so in the most of the cases non applicable to the comparisons between two regions. Used only in \code{"t.test"} and \code{"wilcox.test"} methods.
#' @param stat.p.levels A list containing the p-values levels/thresholds in the following format (default): \code{list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", "ns"))}. In other words, we use the following convention for symbols indicating statistical significance:
#' \itemize{
#' \item \code{ns}: p > 0.05
#' \item \code{*} p <= 0.05
#' \item \code{**} p <= 0.01
#' \item \code{***} p <= 0.001
#' \item \code{****} p <= 0.0001
#' }
#'
#' @param area.line.width Numeric value to define width of the line connecting the points in the area.plots. By default \code{0.5}.
#' @param area.fill.area Logical value to indicate whether to fill the area under the line in the area.plot. By default \code{TRUE}.
#' @param area.plot.zero.line Logical value to define whether to plot a dashed gray vertical line in correspondence of the 0 of each area.plot. By default \code{TRUE}.
#' @param area.y.identical.auto Logical value to define whether use the same Y-axis range for all the area.plots automatically depending on their values. By default \code{TRUE}.
#' @param area.y.ticks.interval A number indicating the interval/bin spacing two ticks on the Y-axis of area.plots. By default \code{NULL}: ticks are assigned automatically.
#' @param area.y.digits Numeric value defining the number of digits to use for the Y-axis values of area.plots. By default \code{1} (eg. 1.5).
#'
#' @param correlation.log2 Logical value to define whether the correlation.plots should show the log2 value of the score. By default \code{TRUE}.
#' @param correlation.plot.correlation Local value to indicate whether to plot the correlation curve on the correlation.plot. By default \code{TRUE}.
#' @param correlation.correlation.method Atomic string describing the method to use to compute the regression curve, eg. "lm", "glm", "gam", "loess", "rlm". By default \code{'lm'}.
#' @param correlation.show.equation = T
#' @param correlation.correlation.line.width Numeric value to define correlation line width for all correlation.plots. By default \code{0.75}.
#' @param correlation.correlation.line.color Numeric value to define correlation line width for all correlation.plots. By default \code{"purple"}.
#' @param correlation.correlation.line.type A numeric or character value to define the correlation line type. Both numeric and string codes are accepted. By default \code{"solid"}.
#' @param correlation.correlation.line.SE Logical value to indicate whether to plot the standard error (SE) of the correlation curve in the correlation.plot. By default \code{TRUE}.
#' @param correlation.correlation.formula Atomic string indicating the formula to use to compute the correlation curve. By default \code{"y ~ x"}.
#' @param correlation.add.rug Logical value to indicate whether to add a rug representation (1-d plot) of the data to the correlation.plot. By default \code{TRUE}.
#' @param correlation.x.identical.auto Logical value to define whether use the same X-axis range for all the correlation.plots automatically depending on their values. By default \code{TRUE}.
#' @param correlation.y.identical.auto Logical value to define whether use the same Y-axis range for all the correlation.plots automatically depending on their values. By default \code{TRUE}.
#' @param correlation.x.ticks.interval A number indicating the interval/bin spacing two ticks on the X-axis of correlation.plots. By default \code{NULL}: ticks are assigned automatically.
#' @param correlation.y.ticks.interval A number indicating the interval/bin spacing two ticks on the Y-axis of correlation.plots. By default \code{NULL}: ticks are assigned automatically.
#' @param correlation.x.digits Numeric value defining the number of digits to use for the X-axis values of correlation.plots. By default \code{1} (eg. 1.5).
#' @param correlation.y.digits Numeric value defining the number of digits to use for the Y-axis values of correlation.plots. By default \code{1} (eg. 1.5).
#'
#' @param points.size A numeric value defining the size of the points in both area and correlation plot. By default \code{0.5}.
#' @param transparency A numeric value to define the fraction of transparency of the fill area in the area.plot and the SE in the correlation plot (0 = transparent, 1 = full). By default \code{0.25}.
#' @param axis.line.width Numeric value to define the axes and ticks line width for all plots. By default \code{0.5}.
#' @param text.size Numeric value to define the size of the text for the labels of all the plots. By default 12.
#' @param legend.position Any ggplot supported value for the legend position (eg. "none, "top", "bottom", "left", "right", c(fraction.x, fraction.y)). By default \code{c(0.2, 0.85)}.
#' @param colors Vector of 3 elements to define the points and area colors ('Sample1', 'Sample2' and, 'No difference' values respectively). If only one value is provided it will applied to all the samples. If the number of values is less then 3, the default color set will be used. All supported R.colors values are accepted. By default \code{c("Sample1" = "#F8766D", "Sample2" = "#00A5CF", "No difference" = "#00BA38")}.
#'
#' @param n.row.multiplot Numeric value to define the number of rows in the final multiplot.
#' @param by.row Logical value to define whether the plots should be arranged by row. By default \code{TRUE}.
#'
#' @return The function returns a list containing:
#' \itemize{
#' \item \code{data.table} with the computed values with all groups and all samples;
#' \item \code{metadata} table with the information obtained from the matrix_file.gz;
#' \item \code{comparison.table.list} with a list of tables for each group with a table per each comparison containing the original data and the compared values (differences);
#' \item \code{comparison.statistics.table} with a table with all the statistical comparisons;
#' \item \code{area.plot.byGroup.list} with a list per group with a all the area.plots of each comparison;
#' \item \code{correlation.plot.byGroup.list} with a list per group with a all the correlation.plots of each comparison;
#' \item \code{area.multiplot.list} with an area.multiplot per each group;
#' \item \code{correlation.multiplot.list} with an correlation.multiplot per each group.
#' }
#'
#'
#' @details To know more about the deepTools's function \code{computeMatrix} see the package manual at the following link: \cr \url{https://deeptools.readthedocs.io/en/develop/content/tools/computeMatrix.html}.
#'
#' @export plot.density.differences
#'
# @import dplyr
# @import ggplot2
# @import ggpmisc stat_poly_eq
# @importFrom data.table fread
# @importFrom ggpubr compare_means
# @importFrom stringr str_split
# @importFrom robustbase colMedians
# @importFrom matrixStats colSds
# @importFrom purrr reduce
# @importFrom cowplot plot_grid
# @importFrom tools toTitleCase
plot.density.differences = function(
matrix.file,
missing.data.as.zero = NULL,
sample.names = NULL,
region.names = NULL,
signal.type = "mean", # type of signal computation c("mean", "median", "sum")
error.type = "sem",
subset.range = NULL,
# statistics
inverted.comparisons = F,
stat.method = "wilcox.test",
stat.paired = T,
stat.p.levels = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1),
symbols = c("****", "***", "**", "*", "ns")),
# area.plot parameters
area.line.width = 0.5,
area.fill.area = T,
area.plot.zero.line = T,
area.y.identical.auto = T,
area.y.ticks.interval = NULL,
area.y.digits = 1,
# correlation.plot parameters
correlation.log2 = T,
correlation.plot.correlation = T,
correlation.correlation.method = "lm",
correlation.show.equation = T,
correlation.correlation.line.width = 0.75,
correlation.correlation.line.color = "purple",
correlation.correlation.line.type = 1,
correlation.correlation.line.SE = T,
correlation.correlation.formula = "y ~ x",
correlation.add.rug = T,
correlation.x.identical.auto = T,
correlation.y.identical.auto = T,
correlation.x.ticks.interval = NULL,
correlation.y.ticks.interval = NULL,
correlation.x.digits = 1,
correlation.y.digits = 1,
# Other plot parameters
points.size = 0.5,
transparency = 0.25,
axis.line.width = 0.5,
text.size = 12,
legend.position = c(0.2, 0.85),
colors = c("Sample1" = "#F8766D",
"Sample2" = "#00A5CF",
"No difference" = "#00BA38"),
# Multiplot options
n.row.multiplot = 1,
by.row = T) {
##############################################################################
## ------------------------- BEGIN OF THE FUNCTION ------------------------ ##
##############################################################################
# Load all libraries
require(dplyr)
require(ggplot2)
# require(ggpmisc)
# require(ggpubr)
# require(data.table)
# require(stringr)
# require(robustbase)
# require(matrixStats)
# require(purrr)
# require(cowplot)
# require(tools)
# Check if Rseb is up-to-date #
Rseb::actualize(update = F, verbose = F)
##############################################################################
### Check that the signal method is allowed
if (class(signal.type) != "character" | !(signal.type %in% c("mean", "median", "sum")) | length(signal.type) != 1) {return(warning("The 'siganl.type' parameter must be one among [mean, median, sum]."))}
# ### Check colors
if (F %in% Rseb::is.color(colors)) {return(warning("The 'colors' parameter must be a single string or a 3-elements vector of strings including R-supported colors."))}
if (length(colors) == 1) {colors = rep(colors, 3)}
if (length(colors) == 2) {return(warning("The colors vector contains only two elements, three are required: Sample1, Sample2, No difference in this order."))}
if (length(colors) > 3) {
colors = colors[1:3]
message("Only frist 3 colors will be kept for Sample1, Sample2 and No difference data respectively.")}
### Check errors parameters
if (length(error.type == 1)) {
if (!(error.type %in% c("sem", "sd", "none"))) {
return(warning("The 'error.type' must be one among 'sem' or 'sd'."))
}
} else {return(warning("The 'error.type' must be a single string among 'sem' or 'sd'."))}
# Check stat.method
if (!(stat.method %in% c("t.test", "wilcox.test")) | length(stat.method) != 1 | class(stat.method) != "character") {
return(warning("The 'stat.method' parameter must be one among 't.test' and 'wilcox.test'")) }
##############################################################################
# Import/read the matrix.gz file
if (class(matrix.file) == "character" & length(matrix.file) == 1) {
m = Rseb::read.computeMatrix.file(matrix.file)
metadata = m$metadata
matrix.data = m$matrix.data
} else if (class(matrix.file) == "list" & length(matrix.file) == 3 & unique(names(matrix.file) == c("metadata", "matrix.data", "original.file.path"))) {
metadata = matrix.file$metadata
matrix.data = matrix.file$matrix.data
} else {
return(warning("The 'matrix.file' must be a single string indicating a full path to a matrix.gz file generated by deepTools/computeMatrix or by Rseb::computeMatrix.deeptools(), or a list generated by the function Rseb::read.computeMatrix.file."))
}
# Generate metadata variables
sample_start_column = as.numeric(as.vector(stringr::str_split(string = dplyr::filter(.data = metadata, parameters == "sample_boundaries")$values, pattern = ",")[[1]]))
sample_names = as.vector(stringr::str_split(string = dplyr::filter(.data = metadata, parameters == "sample_labels")$values, pattern = ",")[[1]])
if (length(sample_names) < 2) {return(warning("At least two samples are required to compute comparisons."))}
group_start_row = as.numeric(as.vector(stringr::str_split(string = dplyr::filter(.data = metadata, parameters == "group_boundaries")$values, pattern = ",")[[1]]))
group_names = as.vector(stringr::str_split(string = dplyr::filter(.data = metadata, parameters == "group_labels")$values, pattern = ",")[[1]])
##############################################################################
# Get the number at which the matrix data starts (after 'chr' 'start' 'end' 'strand' etc.)
number.info.columns = ncol(matrix.data) - sample_start_column[length(sample_start_column)]
# Remove NaN values and substitute them with 0 if missing.data.as.zero = TRUE or NA if FALSE
missing.data.as.zero.automatic = as.logical(toupper(dplyr::filter(.data = metadata, parameters == "missing_data_as_zero")$values))
if (is.null(missing.data.as.zero)) {
missing.data.as.zero = missing.data.as.zero.automatic
} else if (is.logical(missing.data.as.zero)) {
if (missing.data.as.zero != missing.data.as.zero.automatic) {
message(paste("The parameter 'missing.data.as.zero' set in this function as ", as.character(missing.data.as.zero),
", differs from the automatic obtained from the matrix file which is set as ", as.character(missing.data.as.zero.automatic),
".\nMissing data will be treated as set in this function: 'missing.data.as.zero = ", as.character(missing.data.as.zero), "'.", sep = ""))}
} else {return(warning("The parameter 'missing.data.as.zero' must be a logical value or NULL."))}
is.nan.data.frame = function(data.frame) do.call(cbind, lapply(data.frame, is.nan))
matrix.data[is.nan(matrix.data)] = ifelse(test = missing.data.as.zero,
yes = 0,
no = NA)
message(ifelse(test = missing.data.as.zero,
yes = "Missing data have been converted to 0 since the parameter 'missing.data.as.zero' is TRUE.",
no = "Missing data have been converted to NA since the parameter 'missing.data.as.zero' is FALSE. \nNA values will be excluded from the statistical computations."))
##############################################################################
# Generate a list with a table per sample
samples.table.list = list()
# Change sample and group names by custom ones
if (!is.null(sample.names)) {
if (length(unique(sample.names)) == length(sample_names)) {
sample_names = sample.names
} else {message("Default sample.names have been used because custom valus are not unique and/or incomplete")}
}
if (!is.null(region.names)) {
if (length(unique(region.names)) == length(group_names)) {
group_names = region.names
} else {message("Default region.names have been used because custom valus are not unique and/or incomplete")}
}
for (i in 1:length(sample_names)) {
# define the limits of the sample keeping the first lines
start.col = sample_start_column[i] + number.info.columns + 1
end.col = sample_start_column[i+1] + number.info.columns
sample.table =
matrix.data %>%
dplyr::select(c(1:number.info.columns, start.col:end.col)) %>%
# adding a column with the groups names
mutate(group = rep(group_names,
times = c(group_start_row[-1] - group_start_row[-length(group_start_row)])))
# Add the single table to a list
samples.table.list[[i]] = sample.table
}
names(samples.table.list) = sample_names
##############################################################################
# Generate the statistical tables
sample.stat.table.list = list()
for (s in 1:length(sample_names)) {
group.stats.table.list = list()
# x-axes values, distance from center
upstream = as.numeric(as.vector(stringr::str_split(string = dplyr::filter(.data = metadata, parameters == "upstream")$values, pattern = ",")[[1]]))[s]
downstream = as.numeric(as.vector(stringr::str_split(string = dplyr::filter(.data = metadata, parameters == "downstream")$values, pattern = ",")[[1]]))[s]
body = as.numeric(as.vector(stringr::str_split(string = dplyr::filter(.data = metadata, parameters == "body")$values, pattern = ",")[[1]]))[s]
bin = as.numeric(as.vector(stringr::str_split(string = dplyr::filter(.data = metadata, parameters == "bin_size")$values, pattern = ",")[[1]]))[s]
distance.range = seq(from = -upstream, to = (body + downstream), by = bin)
distance.range = distance.range[distance.range != 0]
# Computations of the stats per group in order to then merge the different group tables
for (g in 1:length(group_names)) {
group.table =
samples.table.list[[s]] %>%
filter(group == group_names[g]) %>%
dplyr::select(-c(1:number.info.columns), -group)
# Subset the group table
colnames(group.table) = distance.range
if (!is.null(subset.range)) {
subset.distance = distance.range[distance.range >= range(subset.range)[1]]
subset.distance = subset.distance[subset.distance <= range(subset.range)[2]]
group.table = group.table[, (colnames(group.table) %in% as.character(subset.distance))]
}
group.mean = rowMeans(group.table, na.rm = T)
group.median = robustbase::rowMedians(as.matrix(group.table), na.rm = T)
group.sum = rowSums(group.table, na.rm = T)
group.sd = matrixStats::rowSds(as.matrix(group.table), na.rm = T)
group.sem = group.sd / sqrt(ncol(group.table))
group.stats.table.list[[g]] =
data.frame(group = group_names[g],
mean.by.region = group.mean,
median.by.region = group.median,
sum.by.region = group.sum,
sd.by.region = group.sd,
sem.by.region = group.sem)
} # 'g' For end
sample.stat.table.list[[s]] =
purrr::reduce(.x = group.stats.table.list, .f = rbind) %>%
mutate(sample = sample_names[s])
} # 's' For end
# Merge of all the tables in a single one
full.stat.table = purrr::reduce(.x = sample.stat.table.list, .f = rbind)
full.stat.table = Rseb::move.df.col(data.frame = full.stat.table, move.command = "sample first")
regions.position = matrix.data[, 1:6]
names(regions.position) = c("chr", "start", "end", "name", "score", "strand")
full.stat.table = cbind(regions.position, full.stat.table)
#############################################################################
# Define all the samples combinations
sample_pairs = data.frame(t(combn(unique(full.stat.table$sample), m = 2, simplify = T)), stringsAsFactors = F)
names(sample_pairs) = c("S1", "S2")
# Invert the comparisons if 'inverted.comparisons' == TRUE
if (inverted.comparisons == TRUE) {
sample_pairs = data.frame(S1 = sample_pairs$S2,
S2 = sample_pairs$S1,
stringsAsFactors = F)
}
#############################################################################
# Generation of the comparison tables
comparison.by.group.tables = list()
stat_summary = list()
for (i in 1:length(group_names)) {
group_table = dplyr::filter(.data = full.stat.table, group == group_names[i])
# Compute statistical differences
## Define signal
if (signal.type == "mean") {score.to.compare = group_table$mean.by.region}
if (signal.type == "median") {score.to.compare = group_table$median.by.region}
if (signal.type == "sum") {score.to.compare = group_table$sum.by.region}
## Compute stat
stat_summary[[i]] =
ggpubr::compare_means(formula = score ~ sample,
data = data.frame(sample = group_table$sample, score = score.to.compare),
symnum.args = stat.p.levels,
method = stat.method,
paired = stat.paired) %>%
dplyr::mutate(paired = stat.paired,
.y. = signal.type)
names(stat_summary[[i]])[1:3] = c("signal.type", "sample1", "sample2")
names(stat_summary)[i] = group_names[i]
# Generate tables by sample pairs for each group
group_list_pairs = list()
for (k in 1:nrow(sample_pairs)) {
sample1_tb = dplyr::filter(.data = group_table, sample == sample_pairs[k, 1])
names(sample1_tb)[7] = "sample1"
names(sample1_tb)[9:13] = paste(names(sample1_tb)[9:13], "sample1", sep="_")
sample2_tb = dplyr::filter(.data = group_table, sample == sample_pairs[k, 2])
names(sample2_tb)[7] = "sample2"
names(sample2_tb)[9:13] = paste(names(sample2_tb)[9:13], "sample2", sep="_")
# compute differences and calculate ranking
group_list_pairs[[k]] =
cbind(sample1_tb, sample2_tb[,-c(1:6,8)]) %>%
dplyr::mutate(difference.mean = mean.by.region_sample1 - mean.by.region_sample2,
difference.median = median.by.region_sample1 - median.by.region_sample2,
difference.sum = sum.by.region_sample1 - sum.by.region_sample2) %>%
dplyr::mutate(rank.mean = rank(-difference.mean, ties.method = "first"),
rank.median = rank(-difference.median, ties.method = "first"),
rank.sum = rank(-difference.sum, ties.method = "first"))
group_list_pairs[[k]] = Rseb::move.df.col(data.frame = group_list_pairs[[k]],
move.command = "sample2 after sample1; group before sample1")
names(group_list_pairs)[k] = paste(sample_pairs[k, 1], sample_pairs[k, 2], sep=" - ")
} # 'k' for end (samples)
# Add the list of caparisons for the current group to the main list
comparison.by.group.tables[[i]] = group_list_pairs
names(comparison.by.group.tables)[i] = group_names[i]
} # 'i' for end (groups)
#############################################################################
# Generation of the plots
area.groups = list()
scatter.groups = list()
area.multiplot.list = list()
scatter.multiplot.list = list()
## Common parameters
range_used = ifelse(test = is.null(subset.range),
yes = paste("range: [",
range(distance.range)[1], ";",
range(distance.range)[2], "] bp", sep = ""),
no = paste("range: [", range(subset.range)[1], ";",
range(subset.range)[2], "] bp", sep = ""))
for (g in 1:length(comparison.by.group.tables)) {
area.groups[[g]] = list()
scatter.groups[[g]] = list()
names(area.groups)[g] = names(comparison.by.group.tables)[g]
names(scatter.groups)[g] = names(comparison.by.group.tables)[g]
for (d in 1:length(comparison.by.group.tables[[g]])) {
# Generate a new temp table for the plot
## Take all the columns that contain the required signal type
data.tb = comparison.by.group.tables[[g]][[d]] [,which(grepl(pattern = signal.type, x = names(comparison.by.group.tables[[g]][[d]])))]
names(data.tb) = c("score_sample1", "score_sample2", "difference", "rank")
## Definition of the 0-position
equal.to.zero = dplyr::filter(.data = data.tb, difference == 0)
if (nrow(equal.to.zero) >= 1) {
zero.position = round(mean(equal.to.zero$rank))} else {
positive.values = dplyr::filter(data.tb, difference >= 0)
if (nrow(positive.values) == 0) {
zero.position = min((data.tb %>% dplyr::filter(data.tb$difference == max(data.tb$difference[data.tb$difference <= 0])))$rank)} else {
zero.position = max((data.tb %>% dplyr::filter(data.tb$difference == min(data.tb$difference[data.tb$difference >= 0])))$rank)}
}
## Center the rank on the 0 position
data.tb$rank = zero.position - data.tb$rank
## Define the color category depending on the rank value (if positive or negative)
if (inverted.comparisons == T) {
levels = c(unique(comparison.by.group.tables[[g]][[d]]$sample2),
unique(comparison.by.group.tables[[g]][[d]]$sample1),
"No difference")
} else {
levels = c(unique(comparison.by.group.tables[[g]][[d]]$sample1),
unique(comparison.by.group.tables[[g]][[d]]$sample2),
"No difference")
}
data.tb = dplyr::mutate(.data = data.tb,
Enrichment = factor(ifelse(test = data.tb$difference > 0,
yes = unique(comparison.by.group.tables[[g]][[d]]$sample1),
no = ifelse(test = data.tb$difference < 0,
yes = unique(comparison.by.group.tables[[g]][[d]]$sample2),
no = "No difference")),
levels = levels,
ordered = T))
# Assign colors to corresponding sample
sample.colors = colors
if (inverted.comparisons == T) {
sample.colors[1] = colors[2]
sample.colors[2] = colors[1]
}
names(sample.colors) = c(unique(comparison.by.group.tables[[g]][[d]]$sample1),
unique(comparison.by.group.tables[[g]][[d]]$sample2),
"No difference")
####### AREA PLOT
area.groups[[g]][[d]] =
ggplot(data = data.tb,
aes(x = rank,
y = difference,
color = Enrichment,
fill = Enrichment)) +
geom_line(size = area.line.width) +
geom_point(size = points.size) +
scale_color_manual(values = sample.colors, drop = F) +
scale_fill_manual(values = sample.colors, drop = F) +
ylab(paste(signal.type, "difference\n", range_used)) +
xlab("Rank") +
ggtitle(paste(names(comparison.by.group.tables)[g], names(comparison.by.group.tables[[g]])[d], sep=": ")) +
geom_hline(yintercept = 0, color = "#000000", size = axis.line.width) +
theme_classic() +
theme(text = element_text(size = text.size),
axis.text = element_text(color = "#000000"),
axis.title = element_text(color = "#000000"),
axis.line = element_line(color = "#000000", size = axis.line.width),
axis.ticks = element_line(color = "#000000", size = axis.line.width),
title = element_text(color = "#000000"),
legend.title = element_text(color = "#000000"),
legend.text = element_text(color = "#000000"),
legend.position = legend.position,
legend.background = element_blank())
if (area.fill.area == T) {area.groups[[g]][[d]] = area.groups[[g]][[d]] + geom_area(alpha = transparency)}
if (area.plot.zero.line == T) {area.groups[[g]][[d]] = area.groups[[g]][[d]] + geom_vline(xintercept = 0,
size = axis.line.width,
color = "gray50",
linetype = 2)}
# Add the ticks of the extremities and dataset
ticks = c(min(data.tb$rank), ggplot_build(area.groups[[g]][[d]])$layout$panel_params[[1]]$x$breaks, max(data.tb$rank))
area.groups[[g]][[d]] = area.groups[[g]][[d]] + scale_x_continuous(breaks = ticks)
####### SCATTER/CORRELATION PLOT
### Convert to log if required
if (correlation.log2 == T) {
data.tb$score_sample1 = log2(data.tb$score_sample1 + 1)
data.tb$score_sample2 = log2(data.tb$score_sample2 + 1)
scatter.x.label = bquote(atop(paste("log"["2"] * "(", .(signal.type), " density signal + 1)"),
.(unique(comparison.by.group.tables[[g]][[d]]$sample2))))
scatter.y.label = bquote(atop(.(unique(comparison.by.group.tables[[g]][[d]]$sample1)),
paste("log"["2"] * "(", .(signal.type), " density signal + 1)")))
} else {
scatter.x.label = paste(signal.type, unique(comparison.by.group.tables[[g]][[d]]$sample2), "\n", range_used)
scatter.y.label = paste(signal.type, unique(comparison.by.group.tables[[g]][[d]]$sample1), "\n", range_used)
}
scatter.groups[[g]][[d]] =
ggplot(data = data.tb,
aes(x = score_sample2,
y = score_sample1,
color = Enrichment,
fill = Enrichment)) +
geom_point(size = points.size) +
scale_color_manual(values = sample.colors, drop = F) +
scale_fill_manual(values = sample.colors, drop = F) +
xlab(scatter.x.label) +
ylab(scatter.y.label) +
ggtitle(label = names(comparison.by.group.tables)[g], subtitle = range_used) +
geom_abline(slope = 1, intercept = c(0,0), color = "gray50", linetype = 2, size = axis.line.width) +
theme_classic() +
theme(text = element_text(size = text.size),
axis.text = element_text(color = "#000000"),
axis.title = element_text(color = "#000000"),
axis.line = element_line(color = "#000000", size = axis.line.width),
axis.ticks = element_line(color = "#000000", size = axis.line.width),
title = element_text(color = "#000000"),
legend.title = element_text(color = "#000000"),
legend.text = element_text(color = "#000000"),
legend.position = legend.position,
legend.background = element_blank())
## Add points rug if required
if (correlation.add.rug == T) {scatter.groups[[g]][[d]] = scatter.groups[[g]][[d]] + geom_rug()}
## Add correlation line if required
if (correlation.plot.correlation == T) {
scatter.groups[[g]][[d]] =
scatter.groups[[g]][[d]] +
geom_smooth(data = data.tb,
aes(x = score_sample2,
y = score_sample1),
formula = correlation.correlation.formula,
method = correlation.correlation.method,
size = correlation.correlation.line.width,
linetype = correlation.correlation.line.type,
color = correlation.correlation.line.color,
fill = correlation.correlation.line.color,
se = correlation.correlation.line.SE,
alpha = transparency,
fullrange = T,
inherit.aes = F)
}
## Add correlation equation and R2 if required
if (correlation.show.equation == T) {
scatter.groups[[g]][[d]] =
scatter.groups[[g]][[d]] +
ggpmisc::stat_poly_eq(data = data.tb,
aes(x = score_sample2,
y = score_sample1,
label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
formula = correlation.correlation.formula,
parse = TRUE,
inherit.aes = F,
size = rel(text.size * 0.3))
}
# Give a name to the two plots
names(area.groups[[g]])[d] = paste(unique(comparison.by.group.tables[[g]][[d]]$sample1), unique(comparison.by.group.tables[[g]][[d]]$sample2), sep=" - ")
names(scatter.groups[[g]])[d] = paste(unique(comparison.by.group.tables[[g]][[d]]$sample1), unique(comparison.by.group.tables[[g]][[d]]$sample2), sep=" vs ")
} # END 's' loop for samples in current group
# Set the same axis if required
if (area.y.identical.auto == T) {
area.groups[[g]] = Rseb::uniform.y.axis(plot.list = area.groups[[g]],
ticks.each = area.y.ticks.interval,
digits = area.y.digits)}
if (correlation.x.identical.auto == T) {
scatter.groups[[g]] = Rseb::uniform.x.axis(plot.list = scatter.groups[[g]],
ticks.each = correlation.x.ticks.interval,
digits = correlation.x.digits)}
if (correlation.y.identical.auto == T) {
scatter.groups[[g]] = Rseb::uniform.y.axis(plot.list = scatter.groups[[g]],
ticks.each = correlation.y.ticks.interval,
digits = correlation.y.digits)}
# Generation of the multiplots for the current group
if (length(comparison.by.group.tables[[g]]) == 1) {
area.multiplot.list[[g]] = cowplot::plot_grid(plotlist = list(area.groups[[g]]), nrow = n.row.multiplot, byrow = by.row, align = "hv")
scatter.multiplot.list[[g]] = cowplot::plot_grid(plotlist = list(scatter.groups[[g]]), nrow = n.row.multiplot, byrow = by.row, align = "hv")
} else {
area.multiplot.list[[g]] = cowplot::plot_grid(plotlist = area.groups[[g]], nrow = n.row.multiplot, byrow = by.row, align = "hv")
scatter.multiplot.list[[g]] = cowplot::plot_grid(plotlist = scatter.groups[[g]], nrow = n.row.multiplot, byrow = by.row, align = "hv")
}
names(area.multiplot.list)[g] = names(comparison.by.group.tables)[g]
names(scatter.multiplot.list)[g] = names(comparison.by.group.tables)[g]
} # END 'g' loop for current group
##############################################################################
# Build the output list
return(list(data.table = full.stat.table,
metadata = metadata,
comparison.table.list = comparison.by.group.tables,
comparison.statistics.table = purrr::reduce(.x = purrr::pmap(.l = list(tb = stat_summary, names = names(stat_summary)),
.f = function(tb, names){Rseb::move.df.col(dplyr::mutate(.data = tb, region.group = names), "region.group first")}),
.f = rbind),
area.plot.byGroup.list = area.groups,
correlation.plot.byGroup.list = scatter.groups,
area.multiplot.list = area.multiplot.list,
correlation.multiplot.list = scatter.multiplot.list
)
)
} # End function
################################################################################
#----------------------------- END OF FUNCTION --------------------------------#
################################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.