#' Function for Finger print plot on Gene set analysis
#'
#' The aim of this function is to create a visual representation of gene set enrichment results from differential gene expression analysis.
#'
#' @param res_dgsaseq an object of \code{dgsa_seq} function in \code{dearseq} package.
#' @param datalog2FC a data.frame with genes in rownames and if (time == NULL) 1 column with Fold Change else several columns are allowed but colnames must contain time's string characters.
#' @param genesets a string character "BTM" or "Chaussabel" to use genesets included or an other gmt object. Default is \code{Chaussabel}.
#' @param category a string character or vector. Only needed if \code{(genesets == "BTM")} : "immune", "signaling", "TF targets", "biological process", "molecular function", "location", "all".
#' @param time a string character to filter on time. This time must be present on \code{datalog2FC}'s colnames. Default is \code{NULL}.
#' @param alpha a numeric corresponding to type 1 error threshold. Default value is \code{0.05}.
#' @param title a string character for plot's title. Default is \code{NULL}.
#' @param max_size a numeric corresponding to the maximum size of point. Default is \code{8}.
#' @param size_y_lab labsize for non-interactive plot, can be useful to set to 0 to combine several plots
#' @param order_by_pvals a logical indicating to order genesets per pvalue left to right. Default is \code{FALSE}.
#' @param interactive.plot a logical indicating whether the plot will be interactive, recommended if order_by_pvals is \code{TRUE}. Default is \code{FALSE}.
#'
#' @return a \code{ggplot} object
#'
#' @importFrom stats setNames
#' @importFrom ggiraph geom_point_interactive opts_tooltip girafe
#' @importFrom scales rescale
#' @importFrom reshape2 melt
#' @importFrom tibble rownames_to_column column_to_rownames
#' @importFrom stringr str_replace
#'
#' @author Mélanie Huchon & Quentin Laval
#'
#' @export
#'
#' @examples
#'
Fingerprint <- function(res_dgsaseq,
datalog2FC,
genesets = 'Chaussabel',
category = 'immune',
time = NULL,
alpha = 0.05,
title = NULL,
max_size = 8,
size_y_lab = 9,
order_by_pvals = FALSE,
interactive.plot = FALSE){
stopifnot("Please select Chaussabel / BTM or put a personnal geneset"=
is.character(genesets) & genesets %in% c("Chaussabel", "BTM") |
is.list(genesets))
if (all(!genesets %in% c("Chaussabel", "BTM"))) {
stopifnot(
"Your personal geneset must contain the following list names: genesets, geneset.names, geneset.descriptions and Cluster" =
all(c("genesets", "geneset.names", "geneset.descriptions", "Cluster") %in% names(genesets))
)
}
stopifnot("'order_by_pvals' must be TRUE or FALSE"=order_by_pvals %in% c(TRUE, FALSE))
stopifnot("'interactive.plot' must be TRUE or FALSE"=interactive.plot %in% c(TRUE, FALSE))
if (all(is.character(genesets) & genesets == "Chaussabel")) {
load(file = "data/gmtChaussabel_sistmr.RData")
gmt <- gmtChaussabel
res_geneset <- table_GS(res_dgsaseq = res_dgsaseq,
time = time,
data_log2FC = datalog2FC,
geneset_modules = gmt)
# Remove TBD modules
id_TBD <- which(gmt$geneset.descriptions == 'TBD')
gmt$geneset.descriptions <- gmt$geneset.descriptions[- id_TBD]
gmt$geneset.names <- gmt$geneset.names[- id_TBD]
if (!identical(which(res_geneset$description == 'TBD'), integer(0))){
res_geneset <- res_geneset[-which(res_geneset$description == 'TBD'),]
}
} else if (all(is.character(genesets) & genesets == "BTM")) {
load(file = "data/gmtBTM_sistmr.RData")
gmt <- gmtBTM
res_geneset <- table_GS(res_dgsaseq = res_dgsaseq,
time = time,
data_log2FC = datalog2FC,
geneset_modules = gmt)
BTM_categories <- c("immune", "signaling", "TF targets", "biological process",
"molecular function", "location", "all")
if (!all(category %in% BTM_categories)) {
stop(paste("Please select one or more categories : ",
paste(BTM_categories, collapse = ", ")))
}
# Filter gmt according to category
if (length(which(category == "all")) > 0) {
sub_mods <- BTM_categories[-7]
} else {
sub_mods <- category
match_category <- which(gmt$Cluster %in% category)
gmt <- lapply(setNames(names(gmt), names(gmt)), function(b)
gmt[[b]] <- gmt[[b]][match_category])
}
# Subset modules via category
res_geneset <- res_geneset[which(res_geneset$aggregate %in% sub_mods),]
} else {
gmt <- genesets
res_geneset <- table_GS(res_dgsaseq = res_dgsaseq,
time = time,
data_log2FC = datalog2FC,
geneset_modules = gmt)
if (!is.null(category)){
match_category <- which(gmt$Cluster %in% category)
gmt <- lapply(setNames(names(gmt), names(gmt)), function(b)
gmt[[b]] <- gmt[[b]][match_category])
# Subset modules via category
res_geneset <- res_geneset[which(res_geneset$aggregate %in% category),]
} else {
# Remove TBD modules
id_TBD <- which(gmt$geneset.descriptions == 'TBD')
gmt$geneset.descriptions <- gmt$geneset.descriptions[- id_TBD]
gmt$geneset.names <- gmt$geneset.names[- id_TBD]
if (!identical(which(res_geneset$description == 'TBD'), integer(0))){
res_geneset <- res_geneset[-which(res_geneset$description == 'TBD'),]
}
}
}
# Name of functionnal clusters and right rows names
cluster_names <- unique(gmt$geneset.descriptions)
# Prepare data
missing_modules <- which(is.na(match(gmt$geneset.names,
rownames(res_geneset))))
if (!identical(missing_modules, integer(0))) {
res_geneset <- rbind(res_geneset,
data.frame(gene_set = missing_modules,
adjusted_pvalues = NA,
description = gmt$geneset.descriptions[missing_modules],
median_log2FC = NA,
aggregate = res_geneset$aggregate[1],
row.names = gmt$geneset.names[missing_modules]))
}
res_geneset$gene_set <- rownames(res_geneset)
Group_plot <- res_geneset
# Create a variable "Stat" which combine the pvalue and FC informations
Group_plot$Stat <- Group_plot$adjusted_pvalues
Group_plot <- Group_plot[, "Stat", drop = FALSE]
Group_plot <- Group_plot[as.character(rownames(res_geneset)),
1, drop = FALSE]
Group_plot <- as.data.frame(Group_plot)
Group_plot[is.na(Group_plot) == TRUE] <- 1
# Create new grid with all filtered cluster
grid_mat <- matrix(nrow = length(cluster_names),
ncol = max(table(res_geneset$description)))
rownames(grid_mat) = sort(cluster_names)
colnames(grid_mat) = seq_len(max(table(res_geneset$description)))
# Extract all x and y coordinates and counter creation for y coord
num_desc <- as.numeric(factor(gmt$geneset.descriptions,
levels = sort(cluster_names),
labels = sort(cluster_names)))
counter <- integer(length(cluster_names))
gmt$Cluster_position <- 0
for (p in seq_along(gmt$geneset.descriptions)) {
val <- num_desc[p]
counter[val] <- counter[val] + 1
gmt$Cluster_position[p] <- paste(val, counter[val], sep = ".")
}
match_id <- match(rownames(res_geneset), gmt$geneset.names)
res_geneset$Cluster_position <- gmt$Cluster_position[match_id]
# Order Cluster_position by adj_pvalues rank in each cluster
if (order_by_pvals) {
res_geneset <- res_geneset %>%
rownames_to_column(var = "rownames") %>% # Convert rownames to a column
group_by(description) %>%
mutate(rank = rank(adjusted_pvalues, ties.method = "first")) %>%
ungroup() %>%
rowwise() %>%
mutate(
Cluster_position = str_replace(
Cluster_position,
"(?<=\\.)[0-9]+",
as.character(rank)
)
) %>%
ungroup() %>%
select(-rank) %>%
column_to_rownames(var = "rownames") # Restore rownames
}
x_coords <- res_geneset[rownames(Group_plot), "description"]
y_coords <- sapply(strsplit(res_geneset[rownames(Group_plot),
"Cluster_position"],
"\\."), function(x) x[[2]])
# Extract assign values
values <- Group_plot$Stat
# Convert x and y coordinates into matrix indices
x_indices <- match(x_coords, rownames(grid_mat))
y_indices <- as.numeric(y_coords)
# Update grid_mat with the corresponding values
grid_mat[cbind(x_indices, y_indices)] <- values
melted_df <- melt(grid_mat, id.var = "row.names")
colnames(melted_df) <- c("Aggregate", "Sub_aggregate", "AdjPval")
melted_df$AdjPval <- abs(melted_df$AdjPval)
res_geneset$Cluster_join <- paste0(res_geneset$description,
".",
lapply(strsplit(res_geneset$Cluster_position,"\\."),
function(x) x[[2]]) )
res_geneset$gene_setID <- res_geneset$gene_set
melted_df <- left_join(melted_df %>%
mutate(Cluster_join = paste0(Aggregate, ".", Sub_aggregate)),
res_geneset[, c("Cluster_join", "gene_setID")],
by = "Cluster_join") %>%
select(-Cluster_join)
melted_df <- left_join(melted_df,
res_geneset[, c("gene_set", "adjusted_pvalues",
"median_log2FC")],
join_by(gene_setID == gene_set,
AdjPval == adjusted_pvalues))
# Tile fill color
color <- rep("white", nrow(melted_df))
color[is.na(melted_df$AdjPval)] <- "#E6E6E6"
recurrence <- length(cluster_names)
# To create an empty white tile when an existing module isn't DE
# "#E6E6E6" equals to na.value for the plot
gray_id <- which(color == "#E6E6E6")
# Shifted indices by recurrence
white_id <- gray_id + recurrence
# Make sure offset indices are within the limits of the vector
valid_id <- white_id <= length(color)
# Apply changes only on valid_id
color[gray_id[valid_id]] <- ifelse(color[white_id[valid_id]] == "white",
"white",
color[gray_id[valid_id]])
melted_df$AdjPval[gray_id[valid_id]] <- ifelse(color[white_id[valid_id]] == "white",
1,
melted_df$AdjPval[gray_id[valid_id]])
melted_df$signif <- melted_df$AdjPval <= alpha
if (interactive.plot) {
# Remove tooltip for NA
id <- which(is.na(melted_df$gene_setID))
melted_df$gene_setID[id] <- ""
melted_df$tooltip_text <- paste(
melted_df$gene_setID,
sprintf("FC: %.2f", 2^melted_df$median_log2FC),
sprintf("FDR: %.2e", melted_df$AdjPval),
sep = "\n"
)
plot <- ggplot(melted_df, aes(Aggregate, as.factor(Sub_aggregate))) +
geom_tile(color = "grey85", linewidth = 0.2, fill = color) +
geom_point_interactive(aes(size = AdjPval, colour = median_log2FC, shape = signif,
tooltip = gene_setID), show.legend = TRUE) +
scale_size(name = "FDR",
breaks = c(0.001, 0.01, 0.05, 0.1, 0.2, 1),
range = c(max_size, 0),
transform = "sqrt",
labels = c(0.001, 0.01, 0.05, 0.1, 0.2, 1),
limits = c(min(min(melted_df$AdjPval, na.rm = TRUE), 10^-3), 1)) +
scale_shape_manual(values = c("TRUE" = 15, "FALSE" = 19), na.translate = FALSE, name = "Significant FDR") +
guides(size = guide_legend(reverse=F, override.aes = list(shape = c(rep(15, 3),
rep(19,3)))),
shape = "none") +
scale_color_gradientn(colours = c("#002C63", "darkblue", "blue", "white", "red", "darkred", "#660000"),
values = rescale(x = log2(c(1/3, 2/3, 1, 1.5, 3))),
limits = c(-log2(3.1), log2(3.1)),
breaks = log2(c(1/3, 1/2, 2/3, 1, 1.5, 2, 3)),
labels = c("1/3", "1/2", "2/3", "1", "1.5", "2", "3"),
name = "Fold-Change") +
ylab("") +
xlab("") +
labs(x = "", y = "", title = title) +
theme_light() +
theme(panel.grid.minor = element_line(colour = "black",
linewidth = 0.32)) +
coord_flip() +
scale_x_discrete(limits = rev(levels(melted_df$Aggregate))) +
theme(panel.border = element_rect(color = "black", linewidth = 0.18),
axis.text.x = element_blank(),
axis.text.y = element_text(colour = "black", size = 6,
angle = 0, hjust = 1,
vjust = 0.5, face = "plain"),
title = element_text(colour = "black" , size = 6),
legend.text = element_text(colour = "black", size = 6))
plot <- girafe(ggobj = plot, options = list(
opts_tooltip(offx = 40,
offy = 40,
css = "font-size: 45px;")),
pointsize = 12)
} else {
plot <- ggplot(melted_df, aes(Aggregate, as.factor(Sub_aggregate))) +
geom_tile(color = "grey85", linewidth = 0.2, fill = color) +
geom_point(aes(size = AdjPval, colour = median_log2FC, shape = signif), show.legend = TRUE) +
scale_size(name = "FDR",
breaks = c(0.001, 0.01, 0.05, 0.1, 0.2, 1),
range = c(max_size, 0),
transform = "sqrt",
labels = c(0.001, 0.01, 0.05, 0.1, 0.2, 1),
limits = c(min(min(melted_df$AdjPval, na.rm = TRUE), 10^-3), 1)) +
scale_shape_manual(values = c("TRUE" = 15, "FALSE" = 19), na.translate = FALSE, name = "Significant FDR") +
guides(size = guide_legend(reverse=F, override.aes = list(shape = c(rep(15, 3),
rep(19,3)))),
shape = "none") +
scale_color_gradientn(colours = c("#002C63", "darkblue", "blue", "white", "red", "darkred", "#660000"),
values = rescale(x = log2(c(1/3, 2/3, 1, 1.5, 3))),
limits = c(-log2(3.1), log2(3.1)),
breaks = log2(c(1/3, 1/2, 2/3, 1, 1.5, 2, 3)),
labels = c("1/3", "1/2", "2/3", "1", "1.5", "2", "3"),
name = "Fold-Change") +
ylab("") +
xlab("") +
labs(x = "", y = "", title = title) +
theme_light() +
theme(panel.grid.minor = element_line(colour = "black",
linewidth = 0.32)) +
coord_flip() +
scale_x_discrete(limits = rev(levels(melted_df$Aggregate))) +
theme(panel.border = element_rect(color = "black", linewidth = 0.18),
axis.text.x = element_blank(),
axis.text.y = element_text(colour = "black" , size = size_y_lab,
angle = 0, hjust = 1,
vjust = 0.5, face = "plain"))
}
return(plot)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.