#' Collect gene IDs from a table and make them readable.
#'
#' @param table Gene table from (initially) clusterProfiler.
#' @param mappings Table of mapped gene IDs.
#' @param new String used to disambiguate mappings when it is not provided by the table.
#' @param primary_key Column name to use when extracting IDs.
gather_cp_genes <- function(table, mappings, new = "ORF", primary_key = 1) {
strings <- table[["geneID"]]
if (nrow(table) == 0) {
return(NULL)
}
separate <- strsplit(x = strings, split = "/")
rownames(mappings) <- make.names(mappings[[primary_key]], unique = TRUE)
## make.names() may prefix the rownames with 'X' if entrezids are numeric...
rownames(mappings) <- gsub(x = rownames(mappings), pattern = "^X", replacement = "")
## Taken from https://stackoverflow.com/questions/23420331
a <- as.relistable(separate)
u <- glue("{unlist(a)}")
if (! new %in% colnames(mappings)) {
new <- colnames(mappings)[2]
}
u <- mappings[u, new]
mapped <- relist(u, a)
return(mapped)
}
#' Given a set of goseq data from simple_goseq(),
#' make a list of genes represented in each ontology.
#'
#' This function uses the GO2ALLEG data structure to reverse map ontology
#' categories to a list of genes represented. It therefore assumes that the
#' GO2ALLEG.rda data structure has been deposited in pwd(). This in turn
#' may be generated by clusterProfilers buildGOmap() function if it doesn't
#' exist. For some species it may also be auto-generated.
#' With little work this can be made much more generic, and it probably should.
#'
#' @param result List of results as generated by simple_*().
#' @param ontology Ontology to search (MF/BP/CC).
#' @param column Which column to use for extracting ontologies?
#' @param pval Maximum accepted pvalue to include in the list of categories to
#' cross reference.
#' @param include_all Include all genes in the ontology search?
#' @param ... Extra options without a purpose just yet.
#' @return Data frame of categories/genes.
#' @seealso [simple_goseq()]
#' @examples
#' \dontrun{
#' data <- simple_goseq(sig_genes = limma_output, lengths = annotation_df, goids = goids_df)
#' genes_in_cats <- gather_genes(data, ont='BP')
#' }
#' @export
gather_ontology_genes <- function(result, ontology = NULL,
column = "over_represented_pvalue",
pval = 0.1, include_all = FALSE, ...) {
arglist <- list(...)
ontology <- toupper(ontology)
categories <- NULL
## I should reorganize the results from goseq
## But until then, just put a silly test here.
table_list <- NULL
if (is.null(result[["tables"]])) {
table_list <- result
} else {
table_list <- result[["tables"]]
}
if (is.null(ontology)) {
retlist <- list()
message("No ontology provided, performing all.")
for (type in c("MF", "BP", "CC")) {
retlist[[type]] <- gather_ontology_genes(
result, ontology = type, column = column,
pval = pval, include_all = include_all,
...)
}
return(retlist)
} else if (ontology == "MF") {
categories <- table_list[["mf_subset"]]
} else if (ontology == "BP") {
categories <- table_list[["bp_subset"]]
} else if (ontology == "CC") {
categories <- table_list[["cc_subset"]]
} else {
retlist <- list()
message("No ontology provided, performing all.")
for (type in c("MF", "BP", "CC")) {
retlist[[type]] <- gather_ontology_genes(
result, column = column, ontology = type, pval = pval,
include_all = include_all,
...)
}
return(retlist)
}
input <- result[["input"]]
filtered_categories <- categories[categories[[column]] <= pval, ]
rownames(filtered_categories) <- filtered_categories[[1]]
cats <- rownames(filtered_categories)
go_db <- result[["go_db"]]
genes_per_ont <- function(cat) {
colnames(go_db) <- c("ID", "GO")
## Only keep the set of entries which are filled in.
go_db <- go_db[complete.cases(go_db), ]
## Pull all rows which are of our category.
found_idx <- go_db[["GO"]] == cat
## Then extract those rows from the full set of go mappings
foundlings <- go_db[found_idx, ]
## Finally, pull those gene IDs
all_entries <- unique(foundlings[["ID"]])
## Extract the limma logFC for all genes.
all_names <- toString(all_entries)
## Now find the 'significant' genes in this set.
sig_limma <- sig_deseq <- sig_edger <- sig_basic <- sig_other <- c("")
if ("character" %in% class(input)) {
entries_in_sig <- input %in% all_entries
sig_names <- input[entries_in_sig]
} else if ("data.frame" %in% class(input)) {
entries_in_sig <- input[rownames(input) %in% all_entries, ]
sig_names <- toString(as.character(rownames(entries_in_sig)))
if (!is.null(entries_in_sig[["limma_logfc"]])) {
sig_limma <- toString(as.character(entries_in_sig[["limma_logfc"]]))
}
if (!is.null(entries_in_sig[["deseq_logfc"]])) {
sig_deseq <- toString(as.character(entries_in_sig[["deseq_logfc"]]))
}
if (!is.null(entries_in_sig[["edger_logfc"]])) {
sig_edger <- toString(as.character(entries_in_sig[["edger_logfc"]]))
}
if (!is.null(entries_in_sig[["basic_logfc"]])) {
sig_basic <- toString(as.character(entries_in_sig[["basic_logfc"]]))
}
if (!is.null(entries_in_sig[["logFC"]])) {
sig_other <- toString(as.character(entries_in_sig[["logFC"]]))
}
} else {
stop("This currently only understands vectors and dataframes.")
}
## And get their names.
## names <- toString(as.character(rownames(entries_in_input)))
retlist <- list("all" = all_names,
"sig" = sig_names,
"limma_sigfc" = sig_limma,
"edger_sigfc" = sig_edger,
"deseq_sigfc" = sig_deseq,
"basic_sigfc" = sig_basic,
"other_sigfc" = sig_other)
return(retlist)
}
gene_list <- lapply(cats, genes_per_ont)
names(gene_list) <- cats
gene_df <- data.table::rbindlist(gene_list)
gene_df <- as.data.frame(gene_df)
rownames(gene_df) <- cats
return(gene_df)
}
#' Make a pretty table of clusterprofiler data in excel.
#'
#' It is my intention to make a function like this for each ontology tool in my
#' repetoire
#'
#' @param cp_result A set of results from simple_clusterprofiler().
#' @param excel An excel file to which to write some pretty results.
#' @param add_trees Include topgoish ontology trees?
#' @param order_by What column to order the data by?
#' @param pval Choose a cutoff for reporting by p-value.
#' @param add_plots Include some pvalue plots in the excel output?
#' @param height Height of included plots.
#' @param width and their width.
#' @param decreasing which direction?
#' @param primary_key Use this annotation column to keep track of annotation IDs.
#' @param ... Extra arguments are passed to arglist.
#' @return The result from openxlsx in a prettyified xlsx file.
#' @seealso [openxlsx]
#' @export
write_cp_data <- function(cp_result, excel = "excel/clusterprofiler.xlsx",
add_trees = TRUE, order_by = "qvalue", pval = 0.1, add_plots = TRUE,
height = 15, width = 10, decreasing = FALSE, primary_key = 1,
...) {
arglist <- list(...)
image_list <- c()
if (!is.null(arglist[["table_style"]])) {
table_style <- arglist[["table_style"]]
}
excel_basename <- "."
wb <- NULL
if ("character" %in% class(excel)) {
## This this is a filename
xlsx <- init_xlsx(excel)
wb <- xlsx[["wb"]]
excel_basename <- xlsx[["basename"]]
} else {
wb <- excel
}
hs1 <- openxlsx::createStyle(fontColour = "#000000", halign = "LEFT", textDecoration = "bold",
border = "Bottom", fontSize = "30")
pval_column <- "limma_adjp"
if (!is.null(arglist[["pval_column"]])) {
pval_column <- arglist[["pval_column"]]
}
if (!is.null(wb)) {
message("Writing a sheet containing the legend.")
legend <- data.frame(rbind(
c("Ontology", "Molecular Function, Biological Process, or Cellular Component."),
c("Category", "Gene ontology Identifier."),
c("Term", "Short definition of the category."),
c("Over p-value", "Estimate of cp over-representation in the category."),
c("Q-value", "False discovery rate correction of the p-value."),
c("DE genes in cat", "What genes provided are in this specific category?"),
c("All genes in cat", "The full set of annotations included in this category."),
c("Num. de", "The number of genes in column 'F'."),
c("Num. in cat", "The number of genes in column 'G'.")
))
colnames(legend) <- c("column name", "column definition")
xls_result <- write_xlsx(wb, data = legend, sheet = "legend", rownames = FALSE,
title = "Columns used in the following tables.")
summary_row <- nrow(legend) + 5
summary_df <- data.frame(rbind(
c("Queried BP ontologies", nrow(cp_result[["enrich_go"]][["BP_all"]])),
c("Significant BP ontologies", nrow(cp_result[["enrich_go"]][["BP_sig"]])),
c("Queried MF ontologies", nrow(cp_result[["enrich_go"]][["MF_all"]])),
c("Significant MF ontologies", nrow(cp_result[["enrich_go"]][["MF_sig"]])),
c("Queried CC ontologies", nrow(cp_result[["enrich_go"]][["CC_all"]])),
c("Significant CC ontologies", nrow(cp_result[["enrich_go"]][["CC_sig"]]))))
colnames(summary_df) <- c("Ontology type", "Number found")
xls_result <- write_xlsx(wb, data = summary_df, sheet = "legend", rownames = FALSE,
title = "Summary of the cp search.", start_row = 1, start_col = 4)
} ## End making sure that an excel is desired.
## Pull out the relevant portions of the cp data
## For this I am using the same (arbitrary) rules as in gather_ontology_genes()
skip_mf <- 0
cp_mf <- cp_result[["enrich_go"]][["MF_sig"]]
if (nrow(cp_mf) > 0) {
cp_mf <- cp_mf[cp_mf[["pvalue"]] <= pval, ]
cp_mf_genes <- gather_cp_genes(cp_result[["enrich_go"]][["MF_sig"]],
cp_result[["all_mappings"]],
primary_key = primary_key)
cp_mf[["named_genes"]] <- cp_mf_genes
mf_idx <- order(cp_mf[[order_by]], decreasing = decreasing)
cp_mf <- cp_mf[mf_idx, ]
cp_mf[["Ontology"]] <- "MF"
} else {
skip_mf <- 1
}
skip_bp <- 0
cp_bp <- cp_result[["enrich_go"]][["BP_sig"]]
if (nrow(cp_bp) > 0) {
cp_bp <- cp_bp[cp_bp[["pvalue"]] <= pval, ]
cp_bp_genes <- gather_cp_genes(cp_result[["enrich_go"]][["BP_sig"]],
cp_result[["all_mappings"]],
primary_key = primary_key)
cp_bp[["named_genes"]] <- cp_bp_genes
bp_idx <- order(cp_bp[[order_by]], decreasing = decreasing)
cp_bp <- cp_bp[bp_idx, ]
cp_bp[["Ontology"]] <- "BP"
} else {
skip_bp <- 1
}
skip_cc <- 1
cp_cc <- cp_result[["enrich_go"]][["CC_sig"]]
if (nrow(cp_cc) > 0) {
cp_cc <- cp_cc[cp_cc[["pvalue"]] <= pval, ]
cp_cc_genes <- gather_cp_genes(cp_result[["enrich_go"]][["CC_sig"]],
cp_result[["all_mappings"]],
primary_key = primary_key)
cp_cc[["named_genes"]] <- cp_cc_genes
cc_idx <- order(cp_cc[[order_by]], decreasing = decreasing)
cp_cc <- cp_cc[cc_idx, ]
cp_cc[["Ontology"]] <- "CC"
} else {
skip_cc <- 1
}
kept_columns <- c("ID", "Ontology", "Description", "GeneRatio", "BgRatio", "pvalue",
"p.adjust", "qvalue", "Count", "geneID", "named_genes")
new_columns <- c("ID", "Ontology", "Description", "Ratio", "BgRatio", "P value",
"Adjusted P", "Q value", "Count", "gene ID", "Named ID")
if (!skip_mf) {
cp_mf <- cp_mf[, kept_columns]
colnames(cp_mf) <- new_columns
}
if (!skip_bp) {
cp_bp <- cp_bp[, kept_columns]
colnames(cp_bp) <- new_columns
}
if (!skip_cc) {
cp_cc <- cp_cc[, kept_columns]
colnames(cp_cc) <- new_columns
}
cp_kegg <- cp_result[["kegg_data"]][["kegg_sig"]]
skip_kegg <- FALSE
if (is.null(cp_kegg)) {
skip_kegg <- TRUE
} else if (nrow(cp_kegg) == 0) {
skip_kegg <- TRUE
}
if (isFALSE(skip_kegg)) {
kegg_idx <- order(cp_kegg[[order_by]], decreasing = decreasing)
cp_kegg <- cp_kegg[kegg_idx, ]
}
cp_david <- cp_result[["david_data"]]
skip_david <- FALSE
if (is.null(cp_david)) {
skip_david <- TRUE
} else if (nrow(cp_david) == 0) {
skip_david <- TRUE
}
if (isFALSE(skip_david)) {
david_idx <- order(cp_kegg[[order_by]], decreasing = decreasing)
cp_david <- cp_david[david_idx, ]
}
if (isFALSE(skip_bp)) {
new_row <- 1
message("Writing the BP data.")
sheet <- "BP"
dfwrite <- write_xlsx(data = cp_bp, wb = wb, sheet = sheet,
title = "BP REsults from cp.", start_row = new_row)
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
if (isTRUE(add_plots)) {
a_plot <- cp_result[["plots"]][["ego_sig_bp"]]
plot_try <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = width, height = height,
start_col = ncol(cp_bp) + 2, start_row = new_row,
plotname = "bp_plot", savedir = excel_basename, doWeights = FALSE)
if (! "try-error" %in% class(plot_try)) {
image_list <- c(image_list, plot_try[["filename"]])
}
b_plot <- cp_result[["plots"]][["tree_sig_bp"]]
if (!is.null(b_plot)) {
plot_try <- xlsx_insert_png(b_plot, wb = wb, sheet = sheet, width = 12, height = 12,
start_col = ncol(cp_bp) + 2, start_row = 80, res = 210,
plotname = "bp_trees", savedir = excel_basename)
if (! "try-error" %in% class(plot_try)) {
image_list <- c(image_list, plot_try[["filename"]])
}
}
}
new_row <- new_row + nrow(cp_bp) + 2
width_set <- try(openxlsx::setColWidths(wb, sheet = sheet, cols = 2:9, widths = "auto"),
silent = TRUE)
width_set <- try(openxlsx::setColWidths(wb, sheet = sheet, cols = 6:7, widths = 30),
silent = TRUE)
}
if (isFALSE(skip_mf)) {
new_row <- 1
message("Writing the MF data.")
sheet <- "MF"
dfwrite <- write_xlsx(data = cp_mf, wb = wb, sheet = sheet, title = "MF Results from cp.",
start_row = new_row)
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
if (isTRUE(add_plots)) {
a_plot <- cp_result[["plots"]][["ego_sig_mf"]]
plot_try <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = width, height = height,
start_col = ncol(cp_mf) + 2, start_row = new_row,
plotname = "mf_plot", savedir = excel_basename, doWeights = FALSE)
if (! "try-error" %in% class(plot_try)) {
image_list <- c(image_list, plot_try[["filename"]])
}
b_plot <- cp_result[["plots"]][["tree_sig_mf"]]
if (!is.null(b_plot)) {
plot_try <- xlsx_insert_png(b_plot, wb = wb, sheet = sheet, width = 12, height = 12,
start_col = ncol(cp_mf) + 2, start_row = 80, res = 210,
plotname = "mf_trees", savedir = excel_basename)
if (! "try-error" %in% class(plot_try)) {
image_list <- c(image_list, plot_try[["filename"]])
}
}
}
new_row <- new_row + nrow(cp_mf) + 2
width_set <- try(openxlsx::setColWidths(wb, sheet = sheet, cols = 2:9, widths = "auto"),
silent = TRUE)
width_set <- try(openxlsx::setColWidths(wb, sheet = sheet, cols = 6:7, widths = 30),
silent = TRUE)
}
if (isFALSE(skip_cc)) {
new_row <- 1
message("Writing the CC data.")
sheet <- "CC"
dfwrite <- write_xlsx(data = cp_cc, wb = wb, sheet = sheet,
title = "CC Results from cp.",
start_row = new_row)
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
if (isTRUE(add_plots)) {
a_plot <- cp_result[["plots"]][["ego_sig_cc"]]
plot_try <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = width, height = height,
start_col = ncol(cp_cc) + 2, start_row = new_row,
plotname = "cc_plot", savedir = excel_basename, doWeights = FALSE)
if (! "try-error" %in% class(plot_try)) {
image_list <- c(image_list, plot_try[["filename"]])
}
b_plot <- cp_result[["plots"]][["tree_sig_cc"]]
if (!is.null(b_plot)) {
plot_try <- xlsx_insert_png(b_plot, wb = wb, sheet = sheet, width = 12, height = 12,
start_col = ncol(cp_cc) + 2, start_row = 80, res = 210,
plotname = "cc_trees", savedir = excel_basename)
if (! "try-error" %in% class(plot_try)) {
image_list <- c(image_list, plot_try[["filename"]])
}
}
}
new_row <- new_row + nrow(cp_cc) + 2
width_set <- try(openxlsx::setColWidths(wb, sheet = sheet, cols = 2:9, widths = "auto"),
silent = TRUE)
width_set <- try(openxlsx::setColWidths(wb, sheet = sheet, cols = 6:7, widths = 30),
silent = TRUE)
}
if (isFALSE(skip_kegg)) {
new_row <- 1
message("Writing the KEGG data.")
sheet <- "KEGG"
dfwrite <- write_xlsx(data = cp_kegg, wb = wb, sheet = sheet,
title = "KEGG Results from cp.",
start_row = new_row)
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
}
if (isFALSE(skip_david)) {
new_row <- 1
message("Writing the DAVID data.")
sheet <- "DAVID"
dfwrite <- write_xlsx(data = cp_david, wb = wb, sheet = sheet,
title = "DAVID Results from cp.",
start_row = new_row)
}
res <- openxlsx::saveWorkbook(wb, excel, overwrite = TRUE)
message("Finished writing excel file.")
for (img in image_list) {
removed <- file.remove(img)
}
class(res) <- "written_clusterprofiler"
return(res)
}
#' Make a pretty table of goseq data in excel.
#'
#' It is my intention to make a function like this for each ontology tool in my repetoire
#'
#' @param goseq_result A set of results from simple_goseq().
#' @param excel An excel file to which to write some pretty results.
#' @param wb Workbook object to write to.
#' @param add_trees Include topgoish ontology trees?
#' @param gather_genes Make a table of the genes in each category? (This may be slow)
#' @param order_by What column to order the data by?
#' @param pval Choose a cutoff for reporting by p-value.
#' @param add_plots Include some pvalue plots in the excel output?
#' @param height Height of included plots.
#' @param width and their width.
#' @param decreasing In forward or reverse order?
#' @param ... Extra arguments are passed to arglist.
#' @return The result from openxlsx in a prettyified xlsx file.
#' @seealso [openxlsx] [simple_goseq()]
#' @export
write_goseq_data <- function(goseq_result, excel = "excel/goseq.xlsx", wb = NULL,
add_trees = TRUE, gather_genes = TRUE, order_by = "qvalue", pval = 0.1,
add_plots = TRUE, height = 15, width = 10, decreasing = FALSE, ...) {
arglist <- list(...)
image_files <- c()
if (!is.null(arglist[["table_style"]])) {
table_style <- arglist[["table_style"]]
}
excel_basename <- NULL
if (is.null(wb)) {
xlsx <- init_xlsx(excel)
wb <- xlsx[["wb"]]
excel_basename <<- xlsx[["basename"]]
}
hs1 <- openxlsx::createStyle(fontColour = "#000000", halign = "LEFT", textDecoration = "bold",
border = "Bottom", fontSize = "30")
pval_column <- "limma_adjp"
if (!is.null(arglist[["pval_column"]])) {
pval_column <- arglist[["pval_column"]]
}
if (!is.null(wb)) {
message("Writing a sheet containing the legend.")
legend <- data.frame(rbind(
c("Ontology", "Molecular Function, Biological Process, or Cellular Component."),
c("Category", "Gene ontology Identifier."),
c("Term", "Short definition of the category."),
c("Over p-value", "Estimate of goseq over-representation in the row's category."),
c("Q-value", "False discovery rate correction of the p-value."),
c("DE genes in cat", "What genes provided are in this specific category?"),
c("All genes in cat", "The full set of gene annotations included in this category."),
c("Num. de", "The number of genes in column 'F'."),
c("Num. in cat", "The number of genes in column 'G'.")
))
colnames(legend) <- c("column name", "column definition")
xls_result <- write_xlsx(wb, data = legend, sheet = "legend", rownames = FALSE,
title = "Columns used in the following tables.")
summary_row <- nrow(legend) + 5
summary_df <- data.frame(rbind(
c("Queried BP ontologies", nrow(goseq_result[["bp_subset"]])),
c("Significant BP ontologies", nrow(goseq_result[["bp_interesting"]])),
c("Queried MF ontologies", nrow(goseq_result[["mf_subset"]])),
c("Significant MF ontologies", nrow(goseq_result[["mf_interesting"]])),
c("Queried CC ontologies", nrow(goseq_result[["cc_subset"]])),
c("Significant CC ontologies", nrow(goseq_result[["cc_interesting"]]))))
colnames(summary_df) <- c("Ontology type", "Number found")
xls_result <- write_xlsx(wb, data = summary_df, sheet = "legend", rownames = FALSE,
title = "Summary of the goseq search.", start_row = 1, start_col = 4)
if (isTRUE(add_plots)) {
printme <- "Histogram of observed ontology (adjusted) p-values by goseq."
xl_result <- openxlsx::writeData(wb, "legend", x = printme,
startRow = summary_row - 1, startCol = 1)
plot_try <- xlsx_insert_png(goseq_result[["pvalue_histogram"]], wb = wb, sheet = "legend",
start_col = 1, start_row = summary_row, plotname = "p_histogram",
savedir = excel_basename)
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
plot_try <- xlsx_insert_png(goseq_result[["pwf_plot"]], wb = wb, sheet = "legend",
start_col = 8, start_row = summary_row, plotname = "pwf_plot",
savedir = excel_basename)
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
}
} ## End making sure that an excel is desired.
trees <- NULL
if (isTRUE(add_trees)) {
trees <- try(goseq_trees(goseq_result, pval_column = pval_column), silent = TRUE)
if (class(trees)[1] == "try-error") {
trees <- NULL
}
}
table_list <- list()
for (ont in c("BP", "MF", "CC")) {
subset_name <- glue("{tolower(ont)}_subset")
categories <- goseq_result[[subset_name]]
## Pull out the relevant portions of the goseq data
## For this I am using the same (arbitrary) rules as in gather_ontology_genes()
keeper_idx <- categories[["over_represented_pvalue"]] <= pval
categories <- categories[keeper_idx, ]
if (sum(keeper_idx) == 0) {
message("No data survived to be written for the ", ont, " ontology.")
next
}
if (isTRUE(gather_genes)) {
genes_per_category <- gather_ontology_genes(goseq_result, ontology = ont,
column = "over_represented_pvalue",
pval = pval)
categories <- merge(categories, genes_per_category, by = "row.names")
rownames(categories) <- categories[["Row.names"]]
categories <- categories[, -1]
order_idx <- order(categories[[order_by]], decreasing = decreasing)
categories <- categories[order_idx, ]
kept_columns <- c("ontology", "category", "term", "over_represented_pvalue",
"qvalue", "sig", "all", "numDEInCat", "numInCat",
"limma_sigfc", "deseq_sigfc", "edger_sigfc")
categories <- categories[, kept_columns]
better_column_names <- c("Ontology", "Category", "Term", "Over p-value", "Q-value",
"DE genes in cat", "All genes in cat", "Num. DE", "Num. in cat.",
"FC from limma", "FC from DESeq", "FC from edgeR")
colnames(categories) <- better_column_names
}
## Now write the data.
new_row <- 1
message("Writing the ", ont, " data.")
## Added the suppresswarnings in case a category has too many genes to be written without complaint.
written <- suppressWarnings(write_xlsx(data = categories, wb = wb, sheet = ont,
title = glue("{ont} Results from goseq.")))
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
if (isTRUE(add_plots)) {
plot_name <- glue("{tolower(ont)}p_plot_over")
a_plot <- goseq_result[["pvalue_plots"]][[plot_name]]
plot_try <- xlsx_insert_png(a_plot, wb = wb, sheet = ont, width = width, height = height,
start_col = ncol(categories) + 2, start_row = new_row,
plotname = plot_name, savedir = excel_basename, doWeights = FALSE)
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
tree_name <- glue("{ont}_over")
if (!is.null(trees[[tree_name]])) {
plot_try <- xlsx_insert_png(trees[[tree_name]], wb = wb, sheet = ont, width = 12, height = 12,
start_col = ncol(categories) + 2, start_row = 80, res = 210,
plotname = tree_name, savedir = excel_basename)
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
}
}
new_row <- new_row + nrow(categories) + 2
widths <- openxlsx::setColWidths(wb, sheet = ont, cols = 2:9, widths = "auto")
widths <- openxlsx::setColWidths(wb, sheet = ont, cols = 6:7, widths = 30)
table_list[[ont]] <- categories
} ## End of the for loop
res <- openxlsx::saveWorkbook(wb, excel, overwrite = TRUE)
message("Finished writing excel file.")
for (img in image_files) {
removed <- file.remove(img)
}
class(res) <- "written_goseq"
return(res)
}
#' Make a pretty table of gostats data in excel.
#'
#' It is my intention to make a function like this for each ontology tool in my repetoire
#'
#' @param gostats_result A set of results from simple_gostats().
#' @param excel An excel file to which to write some pretty results.
#' @param add_trees Include topgoish ontology trees?
#' @param order_by Which column to order the data by?
#' @param pval Choose a cutoff for reporting by p-value.
#' @param add_plots Include some pvalue plots in the excel output?
#' @param height Height of included plots.
#' @param width and their width.
#' @param decreasing Which order?
#' @param ... Extra arguments are passed to arglist.
#' @return The result from openxlsx in a prettyified xlsx file.
#' @seealso [openxlsx] [simple_gostats()]
#' @export
write_gostats_data <- function(gostats_result, excel = "excel/gostats.xlsx",
add_trees = TRUE, order_by = "qvalue", pval = 0.1, add_plots = TRUE,
height = 15, width = 10, decreasing = FALSE, ...) {
arglist <- list(...)
image_files <- c()
if (!is.null(arglist[["table_style"]])) {
table_style <- arglist[["table_style"]]
}
excel_basename <- "gostats_excel"
wb <- NULL
if ("character" %in% class(excel)) {
xlsx <- init_xlsx(excel)
wb <- xlsx[["wb"]]
excel_basename <- xlsx[["basename"]]
} else {
wb <- excel
}
hs1 <- openxlsx::createStyle(fontColour = "#000000", halign = "LEFT", textDecoration = "bold",
border = "Bottom", fontSize = "30")
pval_column <- "limma_adjp"
if (!is.null(arglist[["pval_column"]])) {
pval_column <- arglist[["pval_column"]]
}
if (!is.null(wb)) {
message("Writing a sheet containing the legend.")
legend <- data.frame(rbind(
c("Ontology", "Molecular Function, Biological Process, or Cellular Component."),
c("Category", "Gene ontology Identifier."),
c("Term", "Short definition of the category."),
c("Over p-value", "Estimate of gostats over-representation in the row's category."),
c("Q-value", "False discovery rate correction of the p-value."),
c("DE genes in cat", "What genes provided are in this specific category?"),
c("All genes in cat", "The full set of gene annotations included in this category."),
c("Num. de", "The number of genes in column 'F'."),
c("Num. in cat", "The number of genes in column 'G'.")
))
colnames(legend) <- c("column name", "column definition")
xls_result <- write_xlsx(wb, data = legend, sheet = "legend", rownames = FALSE,
title = "Columns used in the following tables.")
summary_row <- nrow(legend) + 5
summary_df <- data.frame(rbind(
c("Queried BP ontologies", nrow(gostats_result[["tables"]][["bp_subset"]])),
c("Significant BP ontologies", nrow(gostats_result[["tables"]][["bp_interesting"]])),
c("Queried MF ontologies", nrow(gostats_result[["tables"]][["mf_subset"]])),
c("Significant MF ontologies", nrow(gostats_result[["tables"]][["mf_interesting"]])),
c("Queried CC ontologies", nrow(gostats_result[["tables"]][["cc_subset"]])),
c("Significant CC ontologies", nrow(gostats_result[["tables"]][["cc_interesting"]]))))
colnames(summary_df) <- c("Ontology type", "Number found")
xls_result <- write_xlsx(wb, data = summary_df, sheet = "legend", rownames = FALSE,
title = "Summary of the gostats search.", start_row = 1, start_col = 4)
if (isTRUE(add_plots)) {
printme <- "Histogram of observed ontology (adjusted) p-values by gostats."
xl_result <- openxlsx::writeData(wb, "legend", x = printme,
startRow = summary_row - 1, startCol = 1)
plot_try <- xlsx_insert_png(gostats_result[["pvalue_histogram"]], wb = wb, sheet = "legend",
start_col = 1, start_row = summary_row, plotname = "p_histogram",
savedir = excel_basename)
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
}
} ## End making sure that an excel is desired.
trees <- NULL
if (isTRUE(add_trees)) {
trees <- sm(try(gostats_trees(gostats_result, pval_column = pval_column), silent = TRUE))
if (class(trees[1]) == "try-error") {
trees <- NULL
}
}
table_list <- list()
for (ont in c("BP", "MF", "CC")) {
subset_name <- glue("{tolower(ont)}_subset")
categories <- gostats_result[["tables"]][[subset_name]]
## Pull out the relevant portions of the gostats data
## For this I am using the same (arbitrary) rules as in gather_ontology_genes()
keeper_idx <- categories[["Pvalue"]] <= pval
categories <- categories[keeper_idx, ]
genes_per_category <- gather_ontology_genes(gostats_result, ontology = ont,
column = "Pvalue", pval = pval)
rownames(categories) <- categories[[1]]
categories <- merge(categories, genes_per_category, by = "row.names")
rownames(categories) <- categories[["Row.names"]]
categories[["ontology"]] <- ont
order_idx <- order(categories[[order_by]], decreasing = decreasing)
categories <- categories[order_idx, ]
kept_columns <- c("ontology", "Row.names", "Term", "Pvalue", "qvalue",
"Count", "Size", "OddsRatio", "ExpCount",
"limma_sigfc", "deseq_sigfc", "edger_sigfc")
categories <- categories[, kept_columns]
better_column_names <- c("Ontology", "Category", "Term", "Over p-value", "Q-value",
"DE genes in cat", "All genes in cat", "Odds Ratio", "Exp. Count",
"FC from limma", "FC from DESeq", "FC from edgeR")
colnames(categories) <- better_column_names
## Now write the data
new_row <- 1
message("Writing the ", ont, " data.")
dfwrite <- write_xlsx(data = categories, wb = wb, sheet = ont, title = glue("{ont} Results from gostats."))
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
if (isTRUE(add_plots)) {
plot_name <- glue("{tolower(ont)}p_plot_over")
a_plot <- gostats_result[["pvalue_plots"]][[plot_name]]
plot_try <- xlsx_insert_png(a_plot, wb = wb, sheet = ont, width = width, height = height,
start_col = ncol(categories) + 2, start_row = new_row,
plotname = plot_name, savedir = excel_basename, doWeights = FALSE)
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
tree_name <- glue("{ont}_over")
if (!is.null(trees[[tree_name]])) {
plot_try <- xlsx_insert_png(trees[[tree_name]], wb = wb, sheet = ont, width = 12, height = 12,
start_col = ncol(categories) + 2, start_row = 80, res = 210,
plotname = tree_name, savedir = excel_basename)
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
}
}
new_row <- new_row + nrow(categories) + 2
openxlsx::setColWidths(wb, sheet = ont, cols = 2:9, widths = "auto")
openxlsx::setColWidths(wb, sheet = ont, cols = 6:7, widths = 30)
table_list[[ont]] <- categories
} ## End of the for loop
res <- openxlsx::saveWorkbook(wb, excel, overwrite = TRUE)
message("Finished writing excel file.")
for (img in image_files) {
removed <- file.remove(img)
}
class(res) <- "written_gostats"
return(res)
}
#' Take the result from extract_significant_genes() and perform ontology searches.
#'
#' It can be annoying/confusing to extract individual sets of 'significant' genes from a
#' differential expression analysis. This function should make that process easier.
#'
#' @param significant_result Result from extract_siggenes()
#' @param excel_prefix How to start the output filenames?
#' @param excel_suffix How to end the excel filenames?
#' @param search_by Use the definition of 'significant' from which program?
#' @param type Which specific ontology search to use?
#' @param ... Arguments passed to the various simple_ontology() function.
#' @return A list of the up/down results of the ontology searches.
#' @seealso [openxlsx] [simple_goseq()] [simple_clusterprofiler()] [simple_topgo()]
#' [simple_gprofiler()] [simple_topgo()] [simple_gostats()]
sig_ontologies <- function(significant_result,
excel_prefix = "excel/sig_ontologies",
search_by = "deseq",
excel_suffix = ".xlsx", type = "gprofiler",
...) {
up_lst <- significant_result[[search_by]][["ups"]]
down_lst <- significant_result[[search_by]][["downs"]]
name_lst <- names(up_lst)
up_ret <- list()
down_ret <- list()
for (c in 1:length(name_lst)) {
name <- name_lst[[c]]
up_name <- glue("{excel_prefix}_up_{name}_{search_by}_{type}{excel_suffix}")
down_name <- glue("{excel_prefix}_down_{name}_{search_by}_{type}{excel_suffix}")
up_table <- up_lst[[c]]
down_table <- down_lst[[c]]
chosen_column <- glue("{search_by}_logfc")
switchret <- switch(
type,
"goseq" = {
up_ret[[name]] <- try(simple_goseq(
up_table, excel = up_name, ...))
down_ret[[name]] <- try(simple_goseq(
down_table, excel = down_name, ...))
},
"cp" = {
up_ret[[name]] <- try(simple_clusterprofiler(
up_table, excel = up_name, ...))
down_ret[[name]] <- try(simple_clusterprofiler(
down_table, excel = down_name, ...))
},
"topgo" = {
up_ret[[name]] <- try(simple_topgo(
up_table, excel = up_name, ...))
down_ret[[name]] <- try(simple_topgo(
down_table, excel = down_name, ...))
},
"gostats" = {
up_ret[[name]] <- try(simple_gostats(
up_table, excel = up_name, ...))
down_ret[[name]] <- try(simple_gostats(
down_table, excel = down_name, ...))
},
"gprofiler" = {
up_ret[[name]] <- try(simple_gprofiler(
up_table, first_col = chosen_column, excel = up_name, ...))
down_ret[[name]] <- try(simple_gprofiler(
down_table, first_col = chosen_column, excel = down_name, ...))
},
{
message("It appears you did not choose a type. Doing nothing.")
return(NULL)
}
)
}
retlist <- list(
"ups" = up_ret,
"downs" = down_ret
)
return(retlist)
}
#' Write some excel results from a gprofiler search.
#'
#' Gprofiler is pretty awesome. This function will attempt to write its results
#' to an excel file.
#'
#' @param gprofiler_result The result from simple_gprofiler().
#' @param wb Optional workbook object, if you wish to append to an existing workbook.
#' @param excel Excel file to which to write.
#' @param order_by Which column to order the data by?
#' @param add_plots Add some pvalue plots?
#' @param height Height of included plots?
#' @param width And their width.
#' @param decreasing Which order?
#' @param ... More options, not currently used I think.
#' @return A prettyified table in an xlsx document.
#' @seealso [openxlsx] [simple_gprofiler()]
#' @export
write_gprofiler_data <- function(gprofiler_result, wb = NULL,
excel = "excel/gprofiler_result.xlsx",
order_by = "recall", add_plots = TRUE, height = 15,
width = 10, decreasing = FALSE, ...) {
## FIXME: This function is dumb, split out the logic and simplify it.
arglist <- list(...)
image_files <- c()
excel_basename <- NULL
if (is.null(wb)) {
xlsx <- init_xlsx(excel)
wb <- xlsx[["wb"]]
excel_basename <<- xlsx[["basename"]]
}
hs1 <- openxlsx::createStyle(fontColour = "#000000", halign = "LEFT",
textDecoration = "bold", border = "Bottom", fontSize = "30")
bp_data <- mf_data <- cc_data <- kegg_data <- NULL
tf_data <- react_data <- mi_data <- hp_data <- corum_data <- NULL
do_go <- TRUE
if (is.null(gprofiler_result[["GO"]])) {
do_go <- FALSE
}
if (nrow(gprofiler_result[["GO"]]) == 0) {
do_go <- FALSE
}
if (isTRUE(do_go)) {
new_row <- 1
sheet <- "GO_BP"
go_data <- gprofiler_result[["GO"]]
bp_data <- go_data[go_data[, "source"] == "GO:BP", ]
bp_order <- order(bp_data[[order_by]], decreasing = decreasing)
bp_data <- bp_data[bp_order, ]
mf_data <- go_data[go_data[, "source"] == "GO:MF", ]
mf_order <- order(mf_data[[order_by]], decreasing = decreasing)
mf_data <- mf_data[mf_order, ]
cc_data <- go_data[go_data[, "source"] == "GO:CC", ]
cc_order <- order(cc_data[[order_by]], decreasing = decreasing)
cc_data <- cc_data[cc_order, ]
if (nrow(bp_data) > 0) {
dfwrite <- write_xlsx(data = bp_data, wb = wb, sheet = sheet,
title = glue("BP Results from {sheet}."), start_row = new_row)
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
if (isTRUE(add_plots)) {
a_plot <- gprofiler_result[["pvalue_plots"]][["BP"]]
plot_try <- xlsx_insert_png(
a_plot, wb = wb, sheet = sheet, width = width, height = height,
start_col = ncol(bp_data) + 2, start_row = new_row,
plotname = "bp_plot", savedir = excel_basename, doWeights = FALSE)
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
}
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:7, widths = "auto")
new_row <- new_row + nrow(bp_data) + 2
}
new_row <- 1
sheet <- "GO_MF"
if (nrow(mf_data) > 0) {
dfwrite <- write_xlsx(data = mf_data, wb = wb, sheet = sheet,
title = glue("MF Results from {sheet}."), start_row = new_row)
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
if (isTRUE(add_plots)) {
a_plot <- gprofiler_result[["pvalue_plots"]][["MF"]]
plot_try <- xlsx_insert_png(
a_plot, wb = wb, sheet = sheet, width = width, height = height,
start_col = ncol(mf_data) + 2, start_row = new_row,
plotname = "mf_plot", savedir = excel_basename, doWeights = FALSE)
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
}
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:7, widths = "auto")
new_row <- new_row + nrow(mf_data) + 2
}
new_row <- 1
sheet <- "GO_CC"
if (nrow(cc_data) > 0) {
dfwrite <- write_xlsx(data = cc_data, wb = wb, sheet = sheet,
title = glue("CC Results from {sheet}."),
start_row = new_row)
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
if (isTRUE(add_plots)) {
a_plot <- gprofiler_result[["pvalue_plots"]][["CC"]]
plot_try <- xlsx_insert_png(
a_plot, wb = wb, sheet = sheet, width = width, height = height,
start_col = ncol(cc_data) + 2, start_row = new_row,
plotname = "cc_plot", savedir = excel_basename, doWeights = FALSE)
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
}
new_row <- new_row + nrow(cc_data) + 2
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:7, widths = "auto")
}
} ## End checking if go data is null
do_kegg <- TRUE
if (is.null(gprofiler_result[["KEGG"]])) {
do_kegg <- FALSE
}
if (nrow(gprofiler_result[["KEGG"]]) == 0) {
do_kegg <- FALSE
}
if (isTRUE(do_kegg)) {
new_row <- 1
sheet <- "KEGG"
kegg_data <- gprofiler_result[["KEGG"]]
kegg_order <- order(kegg_data[[order_by]], decreasing = decreasing)
kegg_data <- kegg_data[kegg_order, ]
dfwrite <- write_xlsx(data = kegg_data, wb = wb, sheet = sheet,
title = glue("Results from {sheet}."),
start_row = new_row)
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
if (isTRUE(add_plots)) {
a_plot <- gprofiler_result[["pvalue_plots"]][["kegg_plot_over"]]
plot_try <- xlsx_insert_png(
a_plot, wb = wb, sheet = sheet, width = width, height = height,
start_col = ncol(kegg_data) + 2, start_row = new_row,
plotname = "kegg_plot", savedir = excel_basename, doWeights = FALSE)
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
new_row <- new_row + nrow(kegg_data) + 2
}
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:7, widths = "auto")
} ## End checking KEGG data
do_tf <- TRUE
if (is.null(gprofiler_result[["TF"]])) {
do_tf <- FALSE
}
if (nrow(gprofiler_result[["TF"]]) == 0) {
do_tf <- FALSE
}
if (isTRUE(do_tf)) {
new_row <- 1
sheet <- "tf"
tf_data <- gprofiler_result[["TF"]]
tf_order <- order(tf_data[[order_by]], decreasing = decreasing)
tf_data <- tf_data[tf_order, ]
dfwrite <- write_xlsx(data = tf_data, wb = wb, sheet = sheet,
title = glue("Results from {sheet}."),
start_row = new_row)
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
if (isTRUE(add_plots)) {
a_plot <- gprofiler_result[["pvalue_plots"]][["tf_plot_over"]]
plot_try <- xlsx_insert_png(
a_plot, wb = wb, sheet = sheet, width = width, height = height,
start_col = ncol(tf_data) + 2, start_row = new_row,
plotname = "tf_plot", savedir = excel_basename, doWeights = FALSE)
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
}
new_row <- new_row + nrow(tf_data) + 2
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:7, widths = "auto")
} ## End checking tf data
do_reactome <- TRUE
if (is.null(gprofiler_result[["REAC"]])) {
do_reactome <- FALSE
}
if (nrow(gprofiler_result[["REAC"]]) == 0) {
do_reactome <- FALSE
}
if (isTRUE(do_reactome)) {
new_row <- 1
sheet <- "reactome"
react_data <- gprofiler_result[["REAC"]]
react_order <- order(react_data[[order_by]], decreasing = decreasing)
react_data <- react_data[react_order, ]
dfwrite <- write_xlsx(data = react_data, wb = wb, sheet = sheet,
title = glue("Results from {sheet}."),
start_row = new_row)
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
if (isTRUE(add_plots)) {
a_plot <- gprofiler_result[["pvalue_plots"]][["reactome_plot_over"]]
plot_try <- xlsx_insert_png(
a_plot, wb = wb, sheet = sheet, width = width, height = height,
start_col = ncol(react_data) + 2, start_row = new_row,
plotname = "react_plot", savedir = excel_basename, doWeights = FALSE)
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
}
new_row <- new_row + nrow(react_data) + 2
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:7, widths = "auto")
} ## End checking reactome data
do_mi <- TRUE
if (is.null(gprofiler_result[["MIRNA"]])) {
do_mi <- FALSE
}
if (nrow(gprofiler_result[["MIRNA"]]) == 0) {
do_mi <- FALSE
}
if (isTRUE(do_mi)) {
new_row <- 1
sheet <- "mirna"
mi_data <- gprofiler_result[["MIRNA"]]
mi_order <- order(mi_data[[order_by]], decreasing = decreasing)
mi_data <- mi_data[mi_order, ]
dfwrite <- write_xlsx(data = mi_data, wb = wb, sheet = sheet,
title = glue("Results from {sheet}."),
start_row = new_row)
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
if (isTRUE(add_plots)) {
a_plot <- gprofiler_result[["pvalue_plots"]][["mi_plot_over"]]
plot_try <- xlsx_insert_png(
a_plot, wb = wb, sheet = sheet, width = width, height = height,
start_col = ncol(mi_data) + 2, start_row = new_row,
plotname = "mi_plot", savedir = excel_basename, doWeights = FALSE)
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
}
new_row <- new_row + nrow(mi_data) + 2
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:7, widths = "auto")
} ## End checking mirna data
do_hp <- TRUE
if (is.null(gprofiler_result[["HP"]])) {
do_hp <- FALSE
}
if (nrow(gprofiler_result[["HP"]]) == 0) {
do_hp <- FALSE
}
if (isTRUE(do_hp)) {
new_row <- 1
sheet <- "hp"
hp_data <- gprofiler_result[["HP"]]
hp_order <- order(hp_data[[order_by]], decreasing = decreasing)
hp_data <- hp_data[hp_order, ]
dfwrite <- write_xlsx(data = hp_data, wb = wb, sheet = sheet,
title = glue("Results from {sheet}."),
start_row = new_row)
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
if (isTRUE(add_plots)) {
a_plot <- gprofiler_result[["pvalue_plots"]][["hp_plot_over"]]
plot_try <- xlsx_insert_png(
a_plot, wb = wb, sheet = sheet, width = width, height = height,
start_col = ncol(hp_data) + 2, start_row = new_row,
plotname = "hp_plot", savedir = excel_basename, doWeights = FALSE)
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
}
new_row <- new_row + nrow(hp_data) + 2
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:7, widths = "auto")
} ## End checking HP data
do_hpa <- TRUE
if (is.null(gprofiler_result[["HPA"]])) {
do_hpa <- FALSE
}
if (nrow(gprofiler_result[["HPA"]]) == 0) {
do_hpa <- FALSE
}
if (isTRUE(do_hpa)) {
new_row <- 1
sheet <- "hpa"
hpa_data <- gprofiler_result[["HPA"]]
hpa_order <- order(hpa_data[[order_by]], decreasing = decreasing)
hpa_data <- hpa_data[hpa_order, ]
dfwrite <- write_xlsx(data = hpa_data, wb = wb, sheet = sheet,
title = glue("Results from {sheet}."),
start_row = new_row)
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
if (isTRUE(add_plots)) {
a_plot <- gprofiler_result[["pvalue_plots"]][["hpa_plot_over"]]
plot_try <- xlsx_insert_png(
a_plot, wb = wb, sheet = sheet, width = width, height = height,
start_col = ncol(hp_data) + 2, start_row = new_row,
plotname = "hpa_plot", savedir = excel_basename, doWeights = FALSE)
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
}
new_row <- new_row + nrow(hp_data) + 2
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:7, widths = "auto")
} ## End checking HPA data
do_corum <- TRUE
if (is.null(gprofiler_result[["CORUM"]])) {
do_corum <- FALSE
}
if (nrow(gprofiler_result[["CORUM"]]) == 0) {
do_corum <- FALSE
}
if (isTRUE(do_corum)) {
new_row <- 1
sheet <- "corum"
corum_data <- gprofiler_result[["CORUM"]]
corum_order <- order(corum_data[[order_by]], decreasing = decreasing)
corum_data <- corum_data[corum_order, ]
dfwrite <- write_xlsx(data = corum_data, wb = wb, sheet = sheet,
title = glue("Results from {sheet}."),
start_row = new_row)
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
if (isTRUE(add_plots)) {
a_plot <- gprofiler_result[["pvalue_plots"]][["corum_plot_over"]]
plot_try <- xlsx_insert_png(
a_plot, wb = wb, sheet = sheet, width = width, height = height,
start_col = ncol(corum_data) + 2, start_row = new_row,
plotname = "corum_plot", savedir = excel_basename, doWeights = FALSE)
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
}
new_row <- new_row + nrow(corum_data) + 2
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:7, widths = "auto")
} ## End checking corum data
excel_ret <- NULL
if (!is.null(excel)) {
excel_ret <- try(openxlsx::saveWorkbook(wb, excel, overwrite = TRUE))
}
mesg("Finished writing excel file.")
for (img in image_files) {
removed <- file.remove(img)
}
class(excel_ret) <- "written_gprofiler"
return(excel_ret)
}
#' Make a pretty table of topgo data in excel.
#'
#' It is my intention to make a function like this for each ontology tool in my
#' repetoire
#'
#' @param topgo_result A set of results from simple_topgo().
#' @param excel An excel file to which to write some pretty results.
#' @param wb Workbook object to write to.
#' @param order_by Which column to order the results by?
#' @param pval Choose a cutoff for reporting by p-value.
#' @param add_plots Include some pvalue plots in the excel output?
#' @param height Height of included plots.
#' @param width and their width.
#' @param decreasing In forward or reverse order?
#' @param ... Extra arguments are passed to arglist.
#' @return The result from openxlsx in a prettyified xlsx file.
#' @seealso [openxlsx] [simple_topgo()]
#' @export
write_topgo_data <- function(topgo_result, excel = "excel/topgo.xlsx", wb = NULL,
order_by = "fisher", decreasing = FALSE,
pval = 0.1, add_plots = TRUE, height = 15, width = 10,
...) {
arglist <- list(...)
image_files <- c()
excel_basename <- NULL
if (is.null(wb)) {
xlsx <- init_xlsx(excel)
wb <- xlsx[["wb"]]
excel_basename <- xlsx[["basename"]]
}
hs1 <- openxlsx::createStyle(fontColour = "#000000", halign = "LEFT", textDecoration = "bold",
border = "Bottom", fontSize = "30")
pval_column <- "limma_adjp"
if (!is.null(arglist[["pval_column"]])) {
pval_column <- arglist[["pval_column"]]
}
table_list <- topgo_result[["tables"]]
result_list <- topgo_result[["results"]]
trees <- NULL
if (!is.null(wb)) {
message("Writing a sheet containing the legend.")
legend <- data.frame(rbind(
c("Ontology", "Molecular Function, Biological Process, or Cellular Component."),
c("Category", "Gene ontology Identifier."),
c("Term", "Short definition of the category."),
c("Over p-value", "Estimate of topgo over-representation in the row's category."),
c("Q-value", "False discovery rate correction of the p-value."),
c("DE genes in cat", "What genes provided are in this specific category?"),
c("All genes in cat", "The full set of gene annotations included in this category."),
c("Num. de", "The number of genes in column 'F'."),
c("Num. in cat", "The number of genes in column 'G'.")
))
colnames(legend) <- c("column name", "column definition")
xls_result <- write_xlsx(wb, data = legend, sheet = "legend", rownames = FALSE,
title = "Columns used in the following tables.")
summary_row <- nrow(legend) + 5
summary_df <- data.frame(rbind(
c("Queried BP ontologies", nrow(table_list[["bp"]])),
c("Significant BP ontologies", nrow(table_list[["bp_interesting"]])),
c("Queried MF ontologies", nrow(table_list[["mf"]])),
c("Significant MF ontologies", nrow(table_list[["mf_interesting"]])),
c("Queried CC ontologies", nrow(table_list[["cc"]])),
c("Significant CC ontologies", nrow(table_list[["cc_interesting"]]))))
colnames(summary_df) <- c("Ontology type", "Number found")
xls_result <- write_xlsx(wb, data = summary_df, sheet = "legend", rownames = FALSE,
title = "Summary of the topgo search.", start_row = 1, start_col = 4)
if (isTRUE(add_plots)) {
printme <- "Histogram of observed ontology p-values by topgo."
xl_result <- openxlsx::writeData(wb, "legend", x = printme,
startRow = summary_row - 1, startCol = 1)
plot_try <- sm(xlsx_insert_png(topgo_result[["pvalue_histograms"]][["fisher"]],
wb = wb, sheet = "legend", start_col = 1,
start_row = summary_row, plotname = "fisher_histogram",
savedir = excel_basename))
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
plot_try <- sm(xlsx_insert_png(topgo_result[["pvalue_histograms"]][["KS"]],
wb = wb, sheet = "legend", start_col = 11,
start_row = summary_row, plotname = "ks_histogram",
savedir = excel_basename))
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
plot_try <- sm(xlsx_insert_png(topgo_result[["pvalue_histograms"]][["EL"]],
wb = wb, sheet = "legend", start_col = 21,
start_row = summary_row, plotname = "el_histogram",
savedir = excel_basename))
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
plot_try <- sm(xlsx_insert_png(topgo_result[["pvalue_histograms"]][["weight"]],
wb = wb, sheet = "legend", start_col = 1,
start_row = summary_row + 31, plotname = "weight_histogram",
savedir = excel_basename))
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
trees <- try(topgo_trees(topgo_result, score_limit = pval), silent = TRUE)
if (class(trees)[1] == "try-error") {
trees <- NULL
}
}
} ## End making sure that an excel is desired.
## Pull out the relevant portions of the topgo data
## For this I am using the same (arbitrary) rules as in gather_ontology_genes()
for (ont in c("BP", "MF", "CC")) {
table_name <- glue("{tolower(ont)}_subset")
categories <- table_list[[table_name]]
categories <- categories[categories[[order_by]] <= pval, ]
genes_per_category <- gather_ontology_genes(topgo_result, ontology = ont,
pval = pval, column = order_by)
categories <- merge(categories, genes_per_category, by = "row.names")
rownames(categories) <- categories[["Row.names"]]
categories <- categories[, -1] ## Drop Row.names column
order_idx <- order(categories[[order_by]], decreasing = decreasing)
categories <- categories[order_idx, ]
categories[["ontology"]] <- ont
kept_columns <- c("ontology", "GO.ID", "Term", "Annotated",
"Significant", "Expected", "fisher", "KS", "EL", "weight", "qvalue",
"all", "sig", "limma_sigfc", "edger_sigfc", "deseq_sigfc")
kept_columns_idx <- kept_columns %in% colnames(categories)
kept_columns <- kept_columns[kept_columns_idx]
categories <- categories[, kept_columns]
better_column_names <- c(
"Ontology", "Category", "Term", "Annotated genes in cat.",
"Significant genes in cat.", "Expected genes in cat.", "Fisher score",
"KS score", "EL score", "weighted score", "qvalue",
"All genes in cat.", "DE genes in cat.",
"FC from limma", "FC from DESeq", "FC from edgeR")
better_column_names <- better_column_names[kept_columns_idx]
colnames(categories) <- better_column_names
## Now write the data.
message("Writing the ", ont, " data.")
new_row <- 1
dfwrite <- write_xlsx(data = categories, wb = wb, sheet = ont,
title = glue("{ont} Results from topgo."),
start_row = new_row)
p_plot_name <- glue("{tolower(ont)}p_plot_over")
tree_plot_name <- glue("{ont}_over")
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
if (isTRUE(add_plots)) {
a_plot <- topgo_result[["pvalue_plots"]][[p_plot_name]]
if (!is.null(a_plot)) {
plot_try <- sm(xlsx_insert_png(
a_plot, wb = wb, sheet = ont, width = width, height = height,
start_col = ncol(categories) + 2, start_row = new_row,
plotname = p_plot_name, savedir = excel_basename, doWeights = FALSE))
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
}
a_plot <- trees[[tree_plot_name]]
if (!is.null(a_plot)) {
plot_try <- sm(xlsx_insert_png(
trees[[tree_plot_name]], wb = wb, sheet = ont, width = 12, height = 12,
start_col = ncol(categories) + 2, start_row = 80, res = 210,
plotname = tree_plot_name, savedir = excel_basename))
if (! "try-error" %in% class(plot_try)) {
image_files <- c(image_files, plot_try[["filename"]])
}
}
}
new_row <- new_row + nrow(categories) + 2 ## I think not needed.
openxlsx::setColWidths(wb, sheet = ont, cols = 2:9, widths = "auto")
openxlsx::setColWidths(wb, sheet = ont, cols = 6:7, widths = 30)
} ## End of the for loop MF/BP/CC
res <- openxlsx::saveWorkbook(wb, excel, overwrite = TRUE)
message("Finished writing excel file.")
for (img in image_files) {
if (file.exists(img)) {
removed <- file.remove(img)
} else {
mesg("The file ", img,
" does not exist, there is likely a missing image in the xlsx document.")
}
}
class(res) <- "written_topgo"
return(res)
}
#' Write gene ontology tables for data subsets
#'
#' Given a set of ontology results, this attempts to write them to an excel
#' workbook in a consistent and relatively easy-to-read fashion.
#'
#' @param kept_ontology A result from subset_ontology_search()
#' @param outfile Workbook to which to write.
#' @param dated Append the year-month-day-hour to the workbook.
#' @param n How many ontology categories to write for each search
#' @param overwritefile Overwrite an existing workbook?
#' @param add_plots Add the various p-value plots to the end of each sheet?
#' @param ... some extra parameters
#' @return a set of excel sheet/coordinates
#' @seealso [openxlsx]
#' @examples
#' \dontrun{
#' all_contrasts <- all_pairwise(expt, model_batch = TRUE)
#' keepers <- list(bob = ('numerator','denominator'))
#' kept <- combine_de_tables(all_contrasts, keepers = keepers)
#' changed <- extract_significant_genes(kept)
#' kept_ontologies <- subset_ontology_search(changed, lengths = gene_lengths,
#' goids = goids, gff = gff, gff_type='gene')
#' go_writer <- write_subset_ontologies(kept_ontologies)
#' }
#' @export
write_subset_ontologies <- function(kept_ontology, outfile = "excel/subset_go", dated = TRUE,
n = NULL, overwritefile = TRUE,
add_plots = TRUE, ...) {
arglist <- list(...)
image_files <- c()
if (!is.null(arglist[["table_style"]])) {
table_style <- arglist[["table_style"]]
}
xlsx <- init_xlsx(outfile)
wb <- xlsx[["wb"]]
excel_basename <- xlsx[["basename"]]
suffix <- ".xlsx"
outfile <- gsub(pattern = "\\.xlsx", replacement = "", x = outfile, perl = TRUE)
outfile <- gsub(pattern = "\\.xls", replacement = "", x = outfile, perl = TRUE)
types_list <- c("up_goseq", "down_goseq", "up_cluster", "down_cluster",
"up_topgo", "down_topgo", "up_gostats", "down_gostats",
"up_gprofiler", "down_gprofiler")
## names_list doesn't exist at this point, I losted it
## It is buried not very deep in kept_ontology I think
names_list <- names(kept_ontology[["up_goseq"]])
count <- 0
for (name in names_list) {
count <- count + 1
up_filename <- glue("{outfile}_up-{name}")
down_filename <- glue("{outfile}_down-{name}")
if (isTRUE(dated)) {
timestamp <- format(Sys.time(), "%Y%m%d%H")
up_filename <- glue("{up_filename}-{timestamp}{suffix}")
down_filename <- glue("{down_filename}-{timestamp}{suffix}")
} else {
up_filename <- glue("{up_filename}{suffix}")
down_filename <- glue("{down_filename}{suffix}")
}
onts <- c("bp", "mf", "cc")
up_stuff <- list()
down_stuff <- list()
for (ont in onts) {
ONT <- toupper(ont)
## The goseq columns are probably wrong because I dropped one, remember that.
varname <- glue("{ont}_subset")
if (!identical(list(), kept_ontology[["up_goseq"]])) {
goseq_up <- kept_ontology[["up_goseq"]][[count]]
goseq_up_ont <- goseq_up[[varname]]
if (!is.null(n)) {
goseq_up_ont <- head(goseq_up_ont, n = n)
}
goseq_up_ont <- goseq_up_ont[, c(7, 1, 6, 2, 4, 5, 8)]
colnames(goseq_up_ont) <- c("Ontology", "Category", "Term", "Over p-value",
"Num. DE", "Num. in cat.", "Q-value")
goseq_down <- kept_ontology[["down_goseq"]][[count]]
goseq_down_ont <- goseq_down[[varname]]
if (!is.null(n)) {
goseq_down_ont <- head(goseq_down_ont, n = n)
}
goseq_down_ont <- goseq_down_ont[, c(7, 1, 6, 2, 4, 5, 8)]
colnames(goseq_down_ont) <- c("Ontology", "Category", "Term", "Over p-value",
"Num. DE", "Num. in cat.", "Q-value")
element_name <- glue("goseq_{ont}")
up_stuff[[element_name]] <- goseq_up_ont
down_stuff[[element_name]] <- goseq_down_ont
}
if (!identical(list(), kept_ontology[["up_cluster"]])) {
varname <- glue("{ont}_all")
cluster_up <- kept_ontology[["up_cluster"]][[count]]
cluster_up_ont <- as.data.frame(cluster_up[[varname]]@result)
if (!is.null(n)) {
cluster_up_ont <- head(cluster_up_ont, n = n)
}
cluster_up_ont[["geneID"]] <- gsub(x = cluster_up_ont[["geneID"]],
pattern = "/", replacement = " ")
cluster_up_ont[["ontology"]] <- ONT
cluster_up_ont <- cluster_up_ont[, c(10, 1, 2, 5, 3, 4, 6, 7, 9, 8)]
colnames(cluster_up_ont) <- c("Ontology", "Category", "Term", "Over p-value",
"Gene ratio", "BG ratio", "Adj. p-value", "Q-value",
"Count", "Genes")
cluster_down <- kept_ontology[["down_cluster"]][[count]]
cluster_down_ont <- as.data.frame(cluster_down[[varname]]@result)
if (!is.null(n)) {
cluster_down_ont <- head(cluster_down_ont, n = n)
}
cluster_down_ont[["geneID"]] <- gsub(x = cluster_down_ont[["geneID"]],
pattern = "/", replacement = " ")
cluster_down_ont[["ontology"]] <- ONT
cluster_down_ont <- cluster_down_ont[, c(10, 1, 2, 5, 3, 4, 6, 7, 9, 8)]
colnames(cluster_down_ont) <- c("Ontology", "Category", "Term", "Over p-value",
"Gene ratio", "BG ratio", "Adj. p-value", "Q-value",
"Count", "Genes")
element_name <- glue("cluster_{ont}")
up_stuff[[element_name]] <- cluster_up_ont
down_stuff[[element_name]] <- cluster_down_ont
}
if (!identical(list(), kept_ontology[["up_topgo"]])) {
varname <- glue("{ont}_interesting")
topgo_up <- kept_ontology[["up_topgo"]][[count]]
topgo_up_ont <- topgo_up[["tables"]][[varname]]
if (!is.null(n)) {
topgo_up_ont <- head(topgo_up_ont, n = n)
}
topgo_up_ont <- topgo_up_ont[, c(2, 1, 11, 6, 7, 8, 9, 10, 4, 3, 5)]
colnames(topgo_up_ont) <- c("Ontology", "Category", "Term", "Fisher p-value",
"Q-value", "KS score", "EL score", "Weight score",
"Num. DE", "Num. in cat.", "Exp. in cat.")
topgo_down <- kept_ontology[["down_topgo"]][[count]]
topgo_down_ont <- topgo_down[["tables"]][[varname]]
if (!is.null(n)) {
topgo_down_ont <- head(topgo_down_ont, n = n)
}
topgo_down_ont <- topgo_down_ont[, c(2, 1, 11, 6, 7, 8, 9, 10, 4, 3, 5)]
colnames(topgo_down_ont) <- c("Ontology", "Category", "Term", "Fisher p-value",
"Q-value", "KS score", "EL score", "Weight score",
"Num. DE", "Num. in cat.", "Exp. in cat.")
element_name <- glue("topgo_{ont}")
up_stuff[[element_name]] <- topgo_up_ont
down_stuff[[element_name]] <- topgo_down_ont
}
if (!identical(list(), kept_ontology[["up_gostats"]])) {
varname <- glue("{ont}_over_all")
gostats_up <- kept_ontology[["up_gostats"]][[count]]
gostats_up_ont <- gostats_up[[varname]]
if (!is.null(n)) {
gostats_up_ont <- head(gostats_up_ont, n = n)
}
gostats_up_ont[["t"]] <- gsub(x = gostats_up_ont[["Term"]],
pattern = ".*\">(.*)</a>", replacement = "\\1")
gostats_up_ont[["Term"]] <- gsub(x = gostats_up_ont[["Term"]],
pattern = "<a href=\"(.*)\">.*", replacement = "\\1")
gostats_up_ont[["ont"]] <- ONT
gostats_up_ont <- gostats_up_ont[, c(10, 1, 9, 2, 5, 6, 3, 4, 8, 7)]
colnames(gostats_up_ont) <- c("Ontology", "Category", "Term", "Fisher p-value",
"Num. DE", "Num. in cat.", "Odds ratio", "Exp. in cat.",
"Q-value", "Link")
gostats_down <- kept_ontology[["down_gostats"]][[count]]
gostats_down_ont <- gostats_down[[varname]]
if (!is.null(n)) {
gostats_down_ont <- head(gostats_down_ont, n = n)
}
gostats_down_ont[["t"]] <- gsub(x = gostats_down_ont[["Term"]],
pattern = ".*\">(.*)</a>", replacement = "\\1")
gostats_down_ont[["Term"]] <- gsub(x = gostats_down_ont[["Term"]],
pattern = "<a href=\"(.*)\">.*", replacement = "\\1")
gostats_down_ont[["ont"]] <- ONT
gostats_down_ont <- gostats_down_ont[, c(10, 1, 9, 2, 5, 6, 3, 4, 8, 7)]
colnames(gostats_down_ont) <- c("Ontology", "Category", "Term", "Fisher p-value",
"Num. DE", "Num. in cat.", "Odds ratio", "Exp. in cat.",
"Q-value", "Link")
element_name <- glue("gostats_{ont}")
up_stuff[[element_name]] <- gostats_up_ont
down_stuff[[element_name]] <- gostats_down_ont
}
if (!identical(list(), kept_ontology[["up_gprofiler"]])) {
gprofiler_up <- kept_ontology[["up_gprofiler"]][[count]]
gprofiler_up_ont <- gprofiler_up[[varname]]
if (!is.null(n)) {
gprofiler_up_ont <- head(gprofiler_up_ont, n = n)
}
gprofiler_up_ont <- gprofiler_up_ont[, c(7, 1, 6, 2, 4, 5, 8)]
colnames(gprofiler_up_ont) <- c("Ontology", "Category", "Term", "Over p-value",
"Num. DE", "Num. in cat.", "Q-value")
gprofiler_down <- kept_ontology[["down_gprofiler"]][[count]]
gprofiler_down_ont <- gprofiler_down[[varname]]
if (!is.null(n)) {
gprofiler_down_ont <- head(gprofiler_down_ont, n = n)
}
gprofiler_down_ont <- gprofiler_down_ont[, c(7, 1, 6, 2, 4, 5, 8)]
colnames(gprofiler_down_ont) <- c("Ontology", "Category", "Term", "Over p-value",
"Num. DE", "Num. in cat.", "Q-value")
element_name <- glue("gprofiler_{ont}")
up_stuff[[element_name]] <- gprofiler_up_ont
down_stuff[[element_name]] <- gprofiler_down_ont
}
} ## End MF/BP/CC loop
hs1 <- openxlsx::createStyle(fontColour = "#000000", halign = "LEFT", textDecoration = "bold",
border = "Bottom", fontSize = "30")
## This stanza will be repeated so I am just incrementing the new_row
## Write goseq data
new_row <- 1
sheet <- "goseq"
## Write goseq BP data
if (!is.null(up_stuff[["goseq_bp"]])) {
dfwrite <- write_xlsx(data = up_stuff[["goseq_bp"]], wb = wb, sheet = sheet,
title = glue("BP Results from {sheet}."),
start_row = new_row)
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["up_goseq"]][[name]][["pvalue_plots"]][["bpp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(up_stuff[["goseq_bp"]]) + 2,
start_row = new_row, plotname = "goseq_bp",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
new_row <- new_row + nrow(up_stuff[["goseq_bp"]]) + 2
}
## write goseq MF data
if (!is.null(up_stuff[["goseq_mf"]])) {
dfwrite <- write_xlsx(data = up_stuff[["goseq_mf"]], wb = wb, sheet = sheet,
title = glue("MF Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["up_goseq"]][[name]][["pvalue_plots"]][["mfp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(up_stuff[["goseq_mf"]]) + 2,
start_row = new_row, plotname = "goseq_mf",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
new_row <- new_row + nrow(up_stuff[["goseq_mf"]]) + 2
}
## write goseq CC data
if (!is.null(up_stuff[["goseq_cc"]])) {
dfwrite <- write_xlsx(data = up_stuff[["goseq_cc"]], wb = wb, sheet = sheet,
title = glue("CC Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["up_goseq"]][[name]][["pvalue_plots"]][["ccp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(up_stuff[["goseq_cc"]]) + 2,
start_row = new_row, plotname = "goseq_cc",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:7, widths = "auto")
}
## Move to cluster profiler
new_row <- 1
sheet <- "clusterProfiler"
## Write clusterprofiler BP data
if (!is.null(up_stuff[["cluster_bp"]])) {
dfwrite <- write_xlsx(data = up_stuff[["cluster_bp"]], wb = wb, sheet = sheet,
title = glue("BP Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["up_cluster"]][[name]][["pvalue_plots"]][["bp_all_barplot"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(up_stuff[["cluster_bp"]]) + 2,
start_row = new_row, plotname = "cluster_bp",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
new_row <- new_row + nrow(up_stuff[["cluster_bp"]]) + 2
}
## Write clusterprofiler MF data
if (!is.null(up_stuff[["cluster_mf"]])) {
dfwrite <- write_xlsx(data = up_stuff[["cluster_mf"]], wb = wb, sheet = sheet,
title = glue("MF Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["up_cluster"]][[name]][["pvalue_plots"]][["mf_all_barplot"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(up_stuff[["cluster_mf"]]) + 2,
start_row = new_row, plotname = "cluster_mf",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
new_row <- new_row + nrow(up_stuff[["cluster_mf"]]) + 2
}
## Write clusterprofiler CC data
if (!is.null(up_stuff[["cluster_cc"]])) {
dfwrite <- write_xlsx(data = up_stuff[["cluster_cc"]], wb = wb, sheet = sheet,
title = glue("CC Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["up_cluster"]][[name]][["pvalue_plots"]][["cc_all_barplot"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(up_stuff[["cluster_cc"]]) + 2,
start_row = new_row, plotname = "cluster_cc",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
}
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:9, widths = "auto")
## Move to topgo
new_row <- 1
sheet <- "topgo"
## Write topgo BP results
if (!is.null(up_stuff[["topgo_bp"]])) {
dfwrite <- write_xlsx(data = up_stuff[["topgo_bp"]], wb = wb, sheet = sheet,
title = glue("BP Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["up_topgo"]][[name]][["pvalue_plots"]][["bpp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(up_stuff[["topgo_bp"]]) + 2,
start_row = new_row, plotname = "topgo_bp",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
new_row <- new_row + nrow(up_stuff[["topgo_bp"]]) + 2
}
## write topgo MF results
if (!is.null(up_stuff[["topgo_mf"]])) {
dfwrite <- write_xlsx(data = up_stuff[["topgo_mf"]], wb = wb, sheet = sheet,
title = glue("MF Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["up_topgo"]][[name]][["pvalue_plots"]][["mfp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(up_stuff[["topgo_mf"]]) + 2,
start_row = new_row, plotname = "topgo_mf",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
new_row <- new_row + nrow(up_stuff[["topgo_mf"]]) + 2
}
## and cc
if (!is.null(up_stuff[["topgo_cc"]])) {
dfwrite <- write_xlsx(data = up_stuff[["topgo_cc"]], wb = wb, sheet = sheet,
title = glue("CC Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["up_topgo"]][[name]][["pvalue_plots"]][["ccp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(up_stuff[["topgo_cc"]]) + 2,
start_row = new_row, plotname = "topgo_cc",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:11, widths = "auto")
}
## move to gostats
new_row <- 1
sheet <- "gostats"
## Write gostats BP stuff
if (!is.null(up_stuff[["gostats_bp"]])) {
dfwrite <- write_xlsx(data = up_stuff[["gostats_bp"]], wb = wb, sheet = sheet,
title = glue("BP Results from {sheet}."),
start_row = new_row)
links <- up_stuff[["gostats_bp"]][["Link"]]
class(links) <- "hyperlink"
names(links) <- up_stuff[["gostats_bp"]][["Category"]]
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["up_gostats"]][[name]][["pvalue_plots"]][["bp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(up_stuff[["gostats_bp"]]) + 2,
start_row = new_row, plotname = "gostats_bp",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
new_row <- new_row + nrow(up_stuff[["gostats_bp"]]) + 2
}
## and mf
if (!is.null(up_stuff[["gostats_mf"]])) {
openxlsx::writeData(wb, sheet, x = links, startRow = new_row + 1, startCol = 10)
message("The previous line was a warning about overwriting existing data because of a link.")
new_row <- new_row + nrow(up_stuff[["gostats_bp"]]) + 2
dfwrite <- write_xlsx(data = up_stuff[["gostats_mf"]], wb = wb, sheet = sheet,
title = glue("MF Results from {sheet}."),
start_row = new_row)
links <- up_stuff[["gostats_mf"]][["Link"]]
class(links) <- "hyperlink"
names(links) <- up_stuff[["gostats_mf"]][["Category"]]
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["up_gostats"]][[name]][["pvalue_plots"]][["mf_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(up_stuff[["gostats_mf"]]) + 2,
start_row = new_row, plotname = "gostats_mf",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
new_row <- new_row + nrow(up_stuff[["gostats_mf"]]) + 2
}
## and cc
if (!is.null(up_stuff[["gostats_cc"]])) {
openxlsx::writeData(wb, sheet, x = links, startRow = new_row + 1, startCol = 10)
new_row <- new_row + nrow(up_stuff[["gostats_mf"]]) + 2
dfwrite <- write_xlsx(data = up_stuff[["gostats_cc"]], wb = wb, sheet = sheet,
title = glue("CC Results from {sheet}."),
start_row = new_row)
links <- up_stuff[["gostats_cc"]][["Link"]]
class(links) <- "hyperlink"
names(links) <- up_stuff[["gostats_cc"]][["Category"]]
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["up_gostats"]][[name]][["pvalue_plots"]][["cc_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(up_stuff[["gostats_cc"]]) + 2,
start_row = new_row, plotname = "gostats_cc",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
openxlsx::writeData(wb, sheet, x = links, startRow = new_row + 1, startCol = 10)
}
## Write gprofiler data
new_row <- 1
sheet <- "gprofiler"
## Write gprofiler BP data
if (!is.null(up_stuff[["gprofiler_bp"]])) {
dfwrite <- write_xlsx(data = up_stuff[["gprofiler_bp"]], wb = wb, sheet = sheet,
title = glue("BP Results from {sheet}."),
start_row = new_row)
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["up_gprofiler"]][[name]][["pvalue_plots"]][["bpp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(up_stuff[["gprofiler_bp"]]) + 2,
start_row = new_row, plotname = "gprofiler_bp",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
new_row <- new_row + nrow(up_stuff[["gprofiler_bp"]]) + 2
}
## write gprofiler MF data
if (!is.null(up_stuff[["gprofiler_mf"]])) {
dfwrite <- write_xlsx(data = up_stuff[["gprofiler_mf"]], wb = wb, sheet = sheet,
title = glue("MF Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["up_gprofiler"]][[name]][["pvalue_plots"]][["mfp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(up_stuff[["gprofiler_mf"]]) + 2,
start_row = new_row, plotname = "gprofiler_mf",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
new_row <- new_row + nrow(up_stuff[["gprofiler_mf"]]) + 2
}
## write gprofiler CC data
if (!is.null(up_stuff[["gprofiler_cc"]])) {
dfwrite <- write_xlsx(data = up_stuff[["gprofiler_cc"]], wb = wb, sheet = sheet,
title = glue("CC Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["up_gprofiler"]][[name]][["pvalue_plots"]][["ccp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(up_stuff[["gprofiler_cc"]]) + 2,
start_row = new_row, plotname = "gprofiler_cc",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:7, widths = "auto")
}
## Now the down data.
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:9, widths = "auto")
res <- openxlsx::saveWorkbook(wb, up_filename, overwrite = TRUE)
wb <- openxlsx::createWorkbook(creator = "atb")
hs1 <- openxlsx::createStyle(fontColour = "#000000", halign = "LEFT", textDecoration = "bold",
border = "Bottom", fontSize = "30")
## This stanza will be repeated so I am just incrementing the new_row
## Starting with goseq
new_row <- 1
sheet <- "goseq"
## Goseq down BP data
if (!is.null(down_stuff[["goseq_bp"]])) {
dfwrite <- write_xlsx(data = down_stuff[["goseq_bp"]], wb = wb, sheet = sheet,
title = glue("BP Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["down_goseq"]][[name]][["pvalue_plots"]][["bpp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(down_stuff[["goseq_bp"]]) + 2,
start_row = new_row, plotname = "goseq_bp",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
new_row <- new_row + nrow(down_stuff[["goseq_bp"]]) + 2
}
## Goseq down MF data
if (!is.null(down_stuff[["goseq_mf"]])) {
dfwrite <- write_xlsx(data = down_stuff[["goseq_mf"]], wb = wb, sheet = sheet,
title = glue("MF Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["down_goseq"]][[name]][["pvalue_plots"]][["mfp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(down_stuff[["goseq_mf"]]) + 2,
start_row = new_row, plotname = "goseq_mf",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
new_row <- new_row + nrow(down_stuff[["goseq_mf"]]) + 2
}
## Goseq down CC data
if (!is.null(down_stuff[["goseq_cc"]])) {
dfwrite <- write_xlsx(data = down_stuff[["goseq_cc"]], wb = wb, sheet = sheet,
title = glue("CC Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["down_goseq"]][[name]][["pvalue_plots"]][["ccp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(down_stuff[["goseq_cc"]]) + 2,
start_row = new_row, plotname = "goseq_cc",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
}
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:7, widths = "auto")
## Now clusterprofiler data
new_row <- 1
sheet <- "clusterProfiler"
## cp down bp
if (!is.null(down_stuff[["cluster_bp"]])) {
dfwrite <- write_xlsx(data = down_stuff[["cluster_bp"]], wb = wb, sheet = sheet,
title = glue("BP Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["down_cluster"]][[name]][["pvalue_plots"]][["bp_all_barplot"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(down_stuff[["cluster_bp"]]) + 2,
start_row = new_row, plotname = "cluster_bp",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
new_row <- new_row + nrow(down_stuff[["cluster_bp"]]) + 2
}
## cp down mf
if (!is.null(down_stuff[["cluster_mf"]])) {
dfwrite <- write_xlsx(data = down_stuff[["cluster_mf"]], wb = wb, sheet = sheet,
title = glue("MF Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["down_cluster"]][[name]][["pvalue_plots"]][["mf_all_barplot"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(down_stuff[["cluster_mf"]]) + 2,
start_row = new_row, plotname = "cluster_mf",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
new_row <- new_row + nrow(down_stuff[["cluster_mf"]]) + 2
}
## cp down cc
if (!is.null(down_stuff[["cluster_cc"]])) {
dfwrite <- write_xlsx(data = down_stuff[["cluster_cc"]], wb = wb, sheet = sheet,
title = glue("CC Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["down_cluster"]][[name]][["cc_all_barplot"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(down_stuff[["cluster_cc"]]) + 2,
start_row = new_row, plotname = "cluster_cc",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
}
## Move to topgo
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:9, widths = "auto")
new_row <- 1
sheet <- "topgo"
## tp down bp
if (!is.null(down_stuff[["topgo_bp"]])) {
dfwrite <- write_xlsx(data = down_stuff[["topgo_bp"]], wb = wb, sheet = sheet,
title = glue("BP Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["down_topgo"]][[name]][["pvalue_plots"]][["bpp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(down_stuff[["topgo_bp"]]) + 2,
start_row = new_row, plotname = "topgo_bp",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
new_row <- new_row + nrow(down_stuff[["topgo_bp"]]) + 2
}
## tp down mf
if (!is.null(down_stuff[["topgo_mf"]])) {
dfwrite <- write_xlsx(data = down_stuff[["topgo_mf"]], wb = wb, sheet = sheet,
title = glue("MF Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["down_topgo"]][[name]][["pvalue_plots"]][["mfp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(down_stuff[["topgo_mf"]]) + 2,
start_row = new_row, plotname = "topgo_mf",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
new_row <- new_row + nrow(down_stuff[["topgo_mf"]]) + 2
}
## tp down cc
if (!is.null(down_stuff[["topgo_cc"]])) {
dfwrite <- write_xlsx(data = down_stuff[["topgo_cc"]], wb = wb, sheet = sheet,
title = glue("CC Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["down_topgo"]][[name]][["pvalue_plots"]][["ccp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(down_stuff[["topgo_cc"]]) + 2,
start_row = new_row, plotname = "topgo_cc",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
}
## Move to gostats
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:11, widths = "auto")
new_row <- 1
sheet <- "gostats"
## gs down bp
if (!is.null(down_stuff[["gostats_bp"]])) {
dfwrite <- write_xlsx(data = down_stuff[["gostats_bp"]], wb = wb, sheet = sheet,
title = glue("BP Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["down_gostats"]][[name]][["pvalue_plots"]][["bp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(down_stuff[["gostats_bp"]]) + 2,
start_row = new_row, plotname = "gotats_bp",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
links <- down_stuff[["gostats_bp"]][["Link"]]
class(links) <- "hyperlink"
names(links) <- down_stuff[["gostats_bp"]][["Category"]]
openxlsx::writeData(wb, sheet, x = links, startRow = new_row + 1, startCol = 10)
new_row <- new_row + nrow(down_stuff[["gostats_bp"]]) + 2
}
## gs down mf
if (!is.null(down_stuff[["gostats_mf"]])) {
dfwrite <- write_xlsx(data = down_stuff[["gostats_mf"]], wb = wb, sheet = sheet,
title = glue("MF Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["down_gostats"]][[name]][["pvalue_plots"]][["mf_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(down_stuff[["gostats_mf"]]) + 2,
start_row = new_row, plotname = "gostats_mf",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
links <- down_stuff[["gostats_mf"]][["Link"]]
class(links) <- "hyperlink"
names(links) <- down_stuff[["gostats_mf"]][["Category"]]
openxlsx::writeData(wb, sheet, x = links, startRow = new_row + 1, startCol = 10)
new_row <- new_row + nrow(down_stuff[["gostats_mf"]]) + 2
}
## gs down cc
if (!is.null(down_stuff[["gostats_cc"]])) {
dfwrite <- write_xlsx(data = down_stuff[["gostats_cc"]], wb = wb, sheet = sheet,
title = glue("CC Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["down_gostats"]][[name]][["pvalue_plots"]][["cc_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(down_stuff[["gostats_cc"]]) + 2,
start_row = new_row, plotname = "gostats_cc",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
links <- down_stuff[["gostats_cc"]][["Link"]]
class(links) <- "hyperlink"
names(links) <- down_stuff[["gostats_cc"]][["Category"]]
openxlsx::writeData(wb, sheet, x = links, startRow = new_row + 1, startCol = 10)
}
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:9, widths = "auto")
## Write gprofiler data
new_row <- 1
sheet <- "gprofiler"
## Write gprofiler BP data
if (!is.null(down_stuff[["gprofiler_bp"]])) {
dfwrite <- write_xlsx(data = down_stuff[["gprofiler_bp"]], wb = wb, sheet = sheet,
title = glue("BP Results from {sheet}."),
start_row = new_row)
## I want to add the pvalue plots, which are fairly deeply embedded in kept_ontology
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["down_gprofiler"]][[name]][["pvalue_plots"]][["bpp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(down_stuff[["gprofiler_bp"]]) + 2,
start_row = new_row, plotname = "gprofiler_bp",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
new_row <- new_row + nrow(down_stuff[["gprofiler_bp"]]) + 2
}
## write gprofiler MF data
if (!is.null(down_stuff[["gprofiler_mf"]])) {
dfwrite <- write_xlsx(data = down_stuff[["gprofiler_mf"]], wb = wb, sheet = sheet,
title = glue("MF Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["down_gprofiler"]][[name]][["pvalue_plots"]][["mfp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(down_stuff[["gprofiler_mf"]]) + 2,
start_row = new_row, plotname = "gprofiler_mf",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
new_row <- new_row + nrow(down_stuff[["gprofiler_mf"]]) + 2
}
## write gprofiler CC data
if (!is.null(down_stuff[["gprofiler_cc"]])) {
dfwrite <- write_xlsx(data = down_stuff[["gprofiler_cc"]], wb = wb, sheet = sheet,
title = glue("CC Results from {sheet}."),
start_row = new_row)
if (isTRUE(add_plots)) {
a_plot <- kept_ontology[["down_gprofiler"]][[name]][["pvalue_plots"]][["ccp_plot_over"]]
try_result <- xlsx_insert_png(a_plot, wb = wb, sheet = sheet, width = 6, height = 6,
start_col = ncol(down_stuff[["gprofiler_cc"]]) + 2,
start_row = new_row, plotname = "gprofiler_cc",
savedir = excel_basename)
if (! "try-error" %in% class(try_result)) {
image_files <- c(image_files, try_result[["filename"]])
}
}
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:7, widths = "auto")
}
res <- openxlsx::saveWorkbook(wb, down_filename, overwrite = TRUE)
} ## End of name_list
for (img in image_files) {
removed <- file.remove(img)
}
return(res)
}
#' Write gene ontology tables for excel
#'
#' Combine the results from goseq, cluster profiler, topgo, and gostats and drop
#' them into excel. Hopefully with a relatively consistent look.
#'
#' @param goseq The goseq result from simple_goseq()
#' @param cluster The result from simple_clusterprofiler()
#' @param topgo Guess
#' @param gostats Yep, ditto
#' @param gprofiler woo hoo!
#' @param file the file to save the results.
#' @param dated date the excel file
#' @param n the number of ontology categories to include in each table.
#' @param overwritefile overwrite an existing excel file
#' @return the list of ontology information
#' @seealso [openxlsx] [simple_goseq()] [simple_clusterprofiler()] [simple_gostats()]
#' [simple_topgo()] [simple_gprofiler()]
#' @export
write_go_xls <- function(goseq, cluster, topgo, gostats, gprofiler,
file = "excel/merged_go",
dated = TRUE, n = 30, overwritefile = TRUE) {
n <- get0("n")
if (is.null(n)) {
n <- 30
}
file <- get0("file")
if (is.null(file)) {
file <- "excel/merged_go"
}
excel_dir <- dirname(file)
if (!file.exists(excel_dir)) {
dir.create(excel_dir, recursive = TRUE)
}
suffix <- ".xlsx"
file <- gsub(pattern = "\\.xlsx", replacement = "", x = file, perl = TRUE)
file <- gsub(pattern = "\\.xls", replacement = "", x = file, perl = TRUE)
filename <- NULL
if (isTRUE(dated)) {
timestamp <- format(Sys.time(), "%Y%m%d%H")
filename <- glue("{file}-{timestamp}{suffix}")
} else {
filename <- glue("{file}{suffix}")
}
if (file.exists(filename)) {
if (isTRUE(overwritefile)) {
backup_file(filename)
}
}
## Massage the goseq tables to match Najib's request
goseq_mf <- head(goseq[["mf_subset"]], n = n)
goseq_bp <- head(goseq[["bp_subset"]], n = n)
goseq_cc <- head(goseq[["cc_subset"]], n = n)
goseq_mf <- goseq_mf[, c(7, 1, 6, 2, 4, 5, 8)]
goseq_bp <- goseq_bp[, c(7, 1, 6, 2, 4, 5, 8)]
goseq_cc <- goseq_cc[, c(7, 1, 6, 2, 4, 5, 8)]
colnames(goseq_mf) <- c("Ontology", "Category", "Term", "Over p-value",
"Num. DE", "Num. in cat.", "Q-value")
colnames(goseq_bp) <- c("Ontology", "Category", "Term", "Over p-value",
"Num. DE", "Num. in cat.", "Q-value")
colnames(goseq_cc) <- c("Ontology", "Category", "Term", "Over p-value",
"Num. DE", "Num. in cat.", "Q-value")
## Massage the clusterProfiler tables similarly
cluster_mf <- head(as.data.frame(cluster[["mf_all"]]@result), n = n)
cluster_bp <- head(as.data.frame(cluster[["bp_all"]]@result), n = n)
cluster_cc <- head(as.data.frame(cluster[["cc_all"]]@result), n = n)
cluster_mf[["geneID"]] <- gsub(x = cluster_mf[["geneID"]], pattern = "/", replacement = " ")
cluster_bp[["geneID"]] <- gsub(x = cluster_bp[["geneID"]], pattern = "/", replacement = " ")
cluster_cc[["geneID"]] <- gsub(x = cluster_cc[["geneID"]], pattern = "/", replacement = " ")
cluster_mf[["ontology"]] <- "MF"
cluster_bp[["ontology"]] <- "BP"
cluster_cc[["ontology"]] <- "CC"
cluster_mf <- cluster_mf[, c(10, 1, 2, 5, 3, 4, 6, 7, 9, 8)]
cluster_bp <- cluster_bp[, c(10, 1, 2, 5, 3, 4, 6, 7, 9, 8)]
cluster_cc <- cluster_cc[, c(10, 1, 2, 5, 3, 4, 6, 7, 9, 8)]
colnames(cluster_mf) <- c("Ontology", "Category", "Term", "Over p-value", "Gene ratio",
"BG ratio", "Adj. p-value", "Q-value", "Count", "Genes")
colnames(cluster_bp) <- c("Ontology", "Category", "Term", "Over p-value", "Gene ratio",
"BG ratio", "Adj. p-value", "Q-value", "Count", "Genes")
colnames(cluster_cc) <- c("Ontology", "Category", "Term", "Over p-value", "Gene ratio",
"BG ratio", "Adj. p-value", "Q-value", "Count", "Genes")
## Now do the topgo data
topgo_mf <- head(topgo[["tables"]][["mf_interesting"]], n = n)
topgo_bp <- head(topgo[["tables"]][["bp_interesting"]], n = n)
topgo_cc <- head(topgo[["tables"]][["cc_interesting"]], n = n)
topgo_mf <- topgo_mf[, c(2, 1, 11, 6, 7, 8, 9, 10, 4, 3, 5)]
topgo_bp <- topgo_bp[, c(2, 1, 11, 6, 7, 8, 9, 10, 4, 3, 5)]
topgo_cc <- topgo_cc[, c(2, 1, 11, 6, 7, 8, 9, 10, 4, 3, 5)]
colnames(topgo_mf) <- c("Ontology", "Category", "Term", "Fisher p-value", "Q-value", "KS score",
"EL score", "Weight score", "Num. DE", "Num. in cat.", "Exp. in cat.")
colnames(topgo_bp) <- c("Ontology", "Category", "Term", "Fisher p-value", "Q-value", "KS score",
"EL score", "Weight score", "Num. DE", "Num. in cat.", "Exp. in cat.")
colnames(topgo_cc) <- c("Ontology", "Category", "Term", "Fisher p-value", "Q-value", "KS score",
"EL score", "Weight score", "Num. DE", "Num. in cat.", "Exp. in cat.")
## And the gostats data
gostats_mf <- head(gostats[["mf_over_all"]], n = n)
gostats_bp <- head(gostats[["bp_over_all"]], n = n)
gostats_cc <- head(gostats[["cc_over_all"]], n = n)
gostats_mf[["t"]] <- gsub(
x = gostats_mf[["Term"]], pattern = ".*\">(.*)</a>", replacement = "\\1")
gostats_bp[["t"]] <- gsub(
x = gostats_bp[["Term"]], pattern = ".*\">(.*)</a>", replacement = "\\1")
gostats_cc[["t"]] <- gsub(
x = gostats_cc[["Term"]], pattern = ".*\">(.*)</a>", replacement = "\\1")
gostats_mf[["Term"]] <- gsub(
x = gostats_mf[["Term"]], pattern = "<a href=\"(.*)\">.*", replacement = "\\1")
gostats_bp[["Term"]] <- gsub(
x = gostats_bp[["Term"]], pattern = "<a href=\"(.*)\">.*", replacement = "\\1")
gostats_cc[["Term"]] <- gsub(
x = gostats_cc[["Term"]], pattern = "<a href=\"(.*)\">.*", replacement = "\\1")
gostats_mf[["ont"]] <- "MF"
gostats_bp[["ont"]] <- "BP"
gostats_cc[["ont"]] <- "CC"
gostats_mf <- gostats_mf[, c(10, 1, 9, 2, 5, 6, 3, 4, 8, 7)]
gostats_bp <- gostats_bp[, c(10, 1, 9, 2, 5, 6, 3, 4, 8, 7)]
gostats_cc <- gostats_cc[, c(10, 1, 9, 2, 5, 6, 3, 4, 8, 7)]
colnames(gostats_mf) <- c("Ontology", "Category", "Term", "Fisher p-value", "Num. DE",
"Num. in cat.", "Odds ratio", "Exp. in cat.", "Q-value", "Link")
colnames(gostats_bp) <- c("Ontology", "Category", "Term", "Fisher p-value", "Num. DE",
"Num. in cat.", "Odds ratio", "Exp. in cat.", "Q-value", "Link")
colnames(gostats_cc) <- c("Ontology", "Category", "Term", "Fisher p-value", "Num. DE",
"Num. in cat.", "Odds ratio", "Exp. in cat.", "Q-value", "Link")
lst <- list("goseq_mf" = goseq_mf,
"goseq_bp" = goseq_bp,
"goseq_cc" = goseq_cc,
"cluster_mf" = cluster_mf,
"cluster_bp" = cluster_bp,
"cluster_cc" = cluster_cc,
"topgo_mf" = topgo_mf,
"topgo_bp" = topgo_bp,
"topgo_cc" = topgo_cc,
"gostats_mf" = gostats_mf,
"gostats_bp" = gostats_bp,
"gostats_cc" = gostats_cc)
wb <- openxlsx::createWorkbook(creator = "atb")
hs1 <- openxlsx::createStyle(fontColour = "#000000", halign = "LEFT",
textDecoration = "bold", border = "Bottom", fontSize = "30")
## This stanza will be repeated so I am just incrementing the new_row
new_row <- 1
sheet <- "goseq"
dfwrite <- write_xlsx(data = lst[["goseq_mf"]], wb = wb, sheet = sheet,
title = glue("MF Results from {sheet}."),
start_row = new_row)
new_row <- new_row + nrow(lst[["goseq_mf"]]) + 2
dfwrite <- write_xlsx(data = lst[["goseq_cc"]], wb = wb, sheet = sheet,
title = glue("CC Results from {sheet}."),
start_row = new_row)
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:7, widths = "auto")
new_row <- 1
sheet <- "clusterProfiler"
dfwrite <- write_xlsx(data = lst[["cluster_bp"]], wb = wb, sheet = sheet,
title = glue("BP Results from {sheet}."),
start_row = new_row)
new_row <- new_row + nrow(lst[["cluster_bp"]]) + 2
dfwrite <- write_xlsx(data = lst[["cluster_mf"]], wb = wb, sheet = sheet,
title = glue("MF Results from {sheet}."),
start_row = new_row)
new_row <- new_row + nrow(lst[["cluster_mf"]]) + 2
dfwrite <- write_xlsx(data = lst[["cluster_cc"]], wb = wb, sheet = sheet,
title = glue("CC Results from {sheet}."),
start_row = new_row)
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:9, widths = "auto")
new_row <- 1
sheet <- "topgo"
dfwrite <- write_xlsx(data = lst[["topgo_bp"]], wb = wb, sheet = sheet,
title = glue("BP Results from {sheet}."),
start_row = new_row)
new_row <- new_row + nrow(lst[["topgo_bp"]]) + 2
dfwrite <- write_xlsx(data = lst[["topgo_mf"]], wb = wb, sheet = sheet,
title = glue("MF Results from {sheet}."),
start_row = new_row)
new_row <- new_row + nrow(lst[["topgo_mf"]]) + 2
dfwrite <- write_xlsx(data = lst[["topgo_cc"]], wb = wb, sheet = sheet,
title = glue("CC Results from {sheet}."),
start_row = new_row)
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:11, widths = "auto")
new_row <- 1
sheet <- "gostats"
dfwrite <- write_xlsx(data = lst[["gostats_bp"]], wb = wb, sheet = sheet,
title = glue("BP Results from {sheet}."),
start_row = new_row)
links <- lst[["gostats_bp"]][["Link"]]
class(links) <- "hyperlink"
names(links) <- lst[["gostats_bp"]][["Category"]]
openxlsx::writeData(wb, sheet, x = links, startRow = new_row + 1, startCol = 10)
new_row <- new_row + nrow(lst[["gostats_bp"]]) + 2
dfwrite <- write_xlsx(data = lst[["gostats_mf"]], wb = wb, sheet = sheet,
title = glue("MF Results from {sheet}."),
start_row = new_row)
links <- lst[["gostats_mf"]][["Link"]]
class(links) <- "hyperlink"
names(links) <- lst[["gostats_mf"]][["Category"]]
openxlsx::writeData(wb, sheet, x = links, startRow = new_row + 1, startCol = 10)
new_row <- new_row + nrow(lst[["gostats_mf"]]) + 2
dfwrite <- write_xlsx(data = lst[["gostats_cc"]], wb = wb, sheet = sheet,
title = glue("CC Results from {sheet}."),
start_row = new_row)
links <- lst[["gostats_cc"]][["Link"]]
class(links) <- "hyperlink"
names(links) <- lst[["gostats_cc"]][["Category"]]
openxlsx::writeData(wb, sheet, x = links, startRow = new_row + 1, startCol = 10)
openxlsx::setColWidths(wb, sheet = sheet, cols = 2:9, widths = "auto")
res <- openxlsx::saveWorkbook(wb, file, overwrite = TRUE)
return(res)
}
## EOF
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.