# jam_save_sestats.R
#' Save SE contrast stats output
#'
#' Save SE contrast stats output
#'
#' This function is intended as a convenient method to export
#' a series of statistical tables into organized, formatted Excel
#' worksheets.
#'
#' @param sestats `list` object output from `se_contrast_stats()`
#' @param file `character` string indicating the filename to save.
#' When `file` is `NULL`, output is returned as a `list`, equivalent
#' to `type="list"`.
#' @param assay_names `character` string indicating which assay names
#' to save, stored in `dimnames(sestats$hit_array)$Signal`.
#' When `NULL` then all assay names are saved.
#' @param contrast_names `character` string indicating which contrasts
#' to save, stored in `dimnames(sestats$hit_array)$Contrasts`.
#' The default `NULL` will save all contrasts.
#' @param type `character` string indicating the type of file to save.
#' * `"xlsx"` - saves an Excel xlsx file using `jamba::writeOpenxlsx()`.
#' Each worksheet is renamed so the string length does not exceed
#' `max_nchar_sheetname`, whose default is 31 characters.
#' * `"list"` - returns a `list` of `data.frame` objects, equivalent
#' to the data to be stored in an output file.
#' This option will **not** save data to `file`.
#' @param data_content `character` string describing the data content
#' to include:
#' * `"contrasts","hits"` - include worksheets per `contrast_names`,
#' then assemble one `"hit sheet"` across all contrasts.
#' One hit sheet is created for each value in `assay_names`.
#' * `"contrasts"` - (default) include worksheets per `contrast_names`
#' * `"hits"` - include only one `"hit sheet"` per value in
#' `assay_names`.
#' @param max_nchar_sheetname `integer` number of characters allowed in
#' Microsoft Excel worksheet names, default 31 characters.
#' @param review_output `logical` indicating whether a summary of output
#' should be returned as a `data.frame` without exporting data. This
#' summary will indicate all worksheets to be saved, in addition
#' to the sheetName for each worksheet.
#' @param sheet_prefix `character` string with optional character prefix
#' to use when creating worksheet names.
#' @param use_assay_suffix `logical` indicating whether to include
#' `assay_names` as suffix when forming sheet names, when there is more
#' than one unique assay name to be saved. This step will
#' attempt to abbreviate `assay_names` by taking up to 4 characters
#' from each word in the assay name, where each word is
#' delimited by `"[-.:_ ]+"`.
#' Otherwise, sheet names are forced to be unique by taking a substring
#' of the contrast name of up to `max_nchar_sheetname`, passing any
#' duplicate strings to `jamba::makeNames()` with suffix `"_v"`
#' followed by an integer number.
#' @param width_factor `numeric` used to adjust relative column widths
#' in the output Excel worksheets.
#' @param colorSub `character` vector of colors, optional, used to define
#' categorical background colors for text string fields in Excel.
#' The `names(colorSub)` are matched to character strings to assign
#' colors.
#' @param hitRule,hitFormat,freezePaneColumn arguments passed to
#' `jamba::writeOpenxlsx()`.
#' @param verbose `logical` indicating whether to print verbose output.
#' @param ... additional arguments are passed to `jamba::writeOpenxlsx()`
#'
#' @export
save_sestats <- function
(sestats,
file=NULL,
assay_names=NULL,
contrast_names=NULL,
cutoff_names=NULL,
type=c("xlsx",
"list"),
data_content=c("data",
"hits"),
max_nchar_sheetname=31,
review_output=TRUE,
sheet_prefix=NULL,
use_assay_suffix=TRUE,
width_factor=1,
max_rows=NULL,
colorSub=NULL,
rename_contrasts=TRUE,
se=NULL,
rowData_colnames=NULL,
row_type="gene_name",
hitRule=c(-1, 0, 1),
hitFormat="#,##0",
freezePaneColumn=2,
verbose=FALSE,
...)
{
# validate arguments
type <- match.arg(type);
data_content <- match.arg(data_content,
choices=c("data", "hits"),
several.ok=TRUE);
if (length(file) == 0) {
type <- "list";
}
# determine which results to save
# validate assay_names
assay_names <- intersect(assay_names,
dimnames(sestats$hit_array)$Signal);
if (length(assay_names) == 0) {
assay_names <- dimnames(sestats$hit_array)$Signal;
}
# validate contrast_names
contrast_names <- intersect(contrast_names,
dimnames(sestats$hit_array)$Contrasts);
if (length(contrast_names) == 0) {
contrast_names <- dimnames(sestats$hit_array)$Contrasts;
}
# validate cutoff_names
cutoff_names <- intersect(cutoff_names,
dimnames(sestats$hit_array)$Cutoffs);
if (length(cutoff_names) == 0) {
cutoff_names <- head(dimnames(sestats$hit_array)$Cutoffs, 1);
}
# assembly export_df to describe what data will be exported
# assay_names <- paste0("assay", 1);
# cutoff_names <- paste0("cutoff", 1);
# contrast_names <- paste0("contrast", 1:5);
export_df <- data.frame(check.names=FALSE,
assay_names=rep(assay_names,
each=length(cutoff_names) * length(contrast_names)),
cutoff_names=rep(
rep(cutoff_names,
each=length(contrast_names)), length(assay_names)),
contrast_names=rep(
rep(contrast_names,
length(assay_names) * length(cutoff_names)))
)
# create sheet names
export_df$sheetName <- export_df$contrast_names;
export_df$saved <- ifelse("data" %in% data_content, "Yes", "No")
# optionally rename contrasts
if (rename_contrasts) {
export_df$sheetName <- tryCatch({
gsub("[:]+", ";",
contrast2comp(export_df$contrast_names))
}, error=function(e){
export_df$contrast_names
});
}
if (length(sheet_prefix) > 0) {
export_df$sheetName <- paste0(sheet_prefix,
export_df$sheetName);
}
# abbreviate assay_names
short_assay_names <- "";
if (use_assay_suffix &&
length(unique(export_df$assay_names)) > 1) {
for (n in 4:1) {
short_assay_names <- export_df$assay_names
ipattern <- paste0("([a-zA-Z]{1,",
n,
"})[a-zA-Z. ]*(_|$)")
short_assay_names <- gsub(ipattern,
"\\1",
short_assay_names)
test_nchar <- max(nchar(export_df$sheetName)) +
max(nchar(short_assay_names)) + 4;
if (test_nchar <= max_nchar_sheetname) {
break;
}
}
export_df$sheetName <- paste0(export_df$sheetName,
"_",
short_assay_names);
}
# optionally add hit sheets per assay_name
if ("hits" %in% data_content) {
hit_df <- data.frame(assay_names=assay_names,
cutoff_names=NA,
contrast_names="hits",
sheetName=paste("hit", assay_names),
saved="Yes")
export_df <- rbind(export_df, hit_df)
}
# quick function to ensure unique sheetName,
# up to max_nchar_sheetname nchar length
validate_sheetNames <- function
(sheetNames,
max_nchar_sheetname=31,
trim_n=3,
...)
{
sheetNames <- gsub("^[-.:_ ]+|[-.:_ ]+$", "", sheetNames);
is_toolong <- (nchar(sheetNames) > max_nchar_sheetname);
if (any(is_toolong)) {
sheetNames[is_toolong] <- substr(sheetNames, 1, max_nchar_sheetname)
}
sheetNames <- gsub("^[-.:_ ]+|[-.:_ ]+$", "", sheetNames);
is_dupe <- duplicated(sheetNames);
if (any(is_dupe)) {
sheetNames[is_dupe] <- jamba::makeNames(
substr(sheetNames[is_dupe], 1, max_nchar_sheetname - trim_n),
renameOnes=TRUE,
suffix="_v");
}
add_n <- 0;
while (any(nchar(sheetNames) > max_nchar_sheetname)) {
add_n <- add_n + 1;
sheetNames[is_dupe] <- jamba::makeNames(
substr(sheetNames[is_dupe], 1, max_nchar_sheetname - (trim_n + add_n)),
renameOnes=TRUE,
suffix="_v");
if (add_n > 3) {
break;
}
}
if (any(duplicated(sheetNames))) {
sheetNames <- validate_sheetNames(sheetNames,
max_nchar_sheetname,
trim_n + 1);
}
sheetNames
}
# ensure each sheet name is unique and no greater than max_nchar_sheetname
export_df$sheetName <- validate_sheetNames(export_df$sheetName,
max_nchar_sheetname);
if (length(max_rows) > 0) {
max_rows <- rep(max_rows, nrow(export_df));
export_df$max_rows <- max_rows;
}
if (TRUE %in% review_output) {
return(export_df);
}
# Iterate each stat table and save to Excel worksheet
append <- FALSE;
wb <- NULL;
export_dfs <- list();
hit_list_dfs <- list();
for (irow in seq_len(nrow(export_df))) {
assay_name <- export_df$assay_names[irow];
cutoff_name <- export_df$cutoff_names[irow];
contrast_name <- export_df$contrast_names[irow];
sheetName <- export_df$sheetName[irow];
if (verbose) {
jamba::printDebug("save_sestats(): ",
c("Preparing sheet (", irow, "): "),
sheetName,
sep="");
}
name_colnames <- NULL;
if ("hits" %in% export_df$contrast_names[irow]) {
if (TRUE %in% verbose) {
jamba::printDebug("save_sestats(): ",
"Assembling hit sheet for assay_name '", assay_name, "'");
}
# assemble one hit data.frame
name_colnames <- unique(unlist(lapply(hit_list_dfs[[assay_name]], function(xdf){
attr(xdf, "nameColumns")
})));
# make first column a factor to help order rows consistently
iDF <- jamba::mergeAllXY(
lapply(hit_list_dfs[[assay_name]], function(xdf){
xdf[,1] <- factor(xdf[,1], levels=unique(xdf[,1]))
xdf
}));
iDF <- jamba::mixedSortDF(iDF, byCols=1);
iDF[,1] <- as.character(iDF[,1]);
attr(iDF, "nameColumns") <- name_colnames;
} else {
iDF <- sestats$stats_dfs[[assay_name]][[contrast_name]];
if (length(max_rows) > 0) {
iDF <- head(iDF,
max_rows[irow]);
}
if (length(iDF) == 0 || nrow(iDF) == 0) {
if (verbose) {
jamba::printDebug("save_sestats(): ",
" No data availale to save.");
}
next;
}
iDF <- jamba::renameColumn(iDF,
from="probes",
to=row_type);
name_grep <- unique(c(
gsub("[()]", ".", row_type),
"probes",
"symbol",
"gene",
"protein"));
name_grep <- jamba::cPaste(sep="|",
name_grep[nchar(name_grep) > 0]);
name_colnames <- jamba::provigrep(name_grep, colnames(iDF));
icols <- jamba::provigrep(c(
name_grep,
"^hit ",
"."),
colnames(iDF));
iDF <- iDF[,icols, drop=FALSE];
# optionally add rowData_colnames
if (length(se) > 0 && length(rowData_colnames) > 0) {
rowData_colnames <- intersect(rowData_colnames,
colnames(SummarizedExperiment::rowData(se)));
if (length(rowData_colnames) > 0) {
imatch <- match(iDF[,1], rownames(se));
if (any(is.na(imatch))) {
jmatch <- match(rownames(iDF), rownames(se));
if (!any(is.na(jmatch))) {
imatch <- jmatch;
}
}
if (!any(is.na(imatch))) {
jDF <- data.frame(check.names=FALSE,
SummarizedExperiment::rowData(se[imatch,])[, rowData_colnames, drop=FALSE]);
iDF <- data.frame(check.names=FALSE,
iDF[, 1, drop=FALSE],
jDF,
iDF[, -1, drop=FALSE]);
# add to name_colnames
name_colnames <- c(name_colnames[1],
colnames(jDF),
name_colnames[-1]);
}
}
}
}
iDF$assay_name <- assay_name;
# detect column types
highlightColumns <- jamba::igrep("symbol|gene|descr", colnames(iDF));
hitColumns <- jamba::igrep("^hit ", colnames(iDF));
pvalueColumns <- setdiff(
jamba::igrep("p.val|pval|adjp|adj.p", colnames(iDF)),
hitColumns);
fcColumns <- jamba::igrep("^fold ", colnames(iDF));
lfcColumns <- jamba::igrep("^logFC ", colnames(iDF));
numColumns <- jamba::igrep(" mean$|^mgm ", colnames(iDF));
# adjust column widths
smallColumns <- unique(c(pvalueColumns,
fcColumns,
lfcColumns,
numColumns));
save_widths <- rep(10, ncol(iDF));
save_widths[length(save_widths)] <- 40;
save_widths[hitColumns] <- 20;
save_widths[smallColumns] <- 15;
save_widths <- save_widths * width_factor;
# Add attributes with Excel column types
attr(iDF, "colWidths") <- save_widths;
attr(iDF, "highlightColumns") <- highlightColumns;
attr(iDF, "hitColumns") <- hitColumns;
attr(iDF, "pvalueColumns") <- pvalueColumns;
attr(iDF, "fcColumns") <- fcColumns;
attr(iDF, "lfcColumns") <- lfcColumns;
attr(iDF, "numColumns") <- numColumns;
attr(iDF, "nameColumns") <- name_colnames;
# optionally assemble hit sheets
if ("hits" %in% data_content) {
# add the hit data.frame to a list to assemble later
# jamba::printDebug("head(iDF, 5):");print(head(iDF, 5));# debug
# jamba::printDebug("name_colnames:");print(name_colnames);# debug
# jamba::printDebug("hitColumns:");print(hitColumns);# debug
hit_list_dfs[[assay_name]][[sheetName]] <- iDF[, c(name_colnames,
colnames(iDF)[hitColumns]), drop=FALSE];
attr(hit_list_dfs[[assay_name]][[sheetName]],
"nameColumns") <- name_colnames;
}
# save to Excel
if ("xlsx" %in% type && "Yes" %in% export_df[irow, "saved"]) {
wb <- jamba::writeOpenxlsx(
file=NULL,
wb=wb,
append=append,
sheetName=sheetName,
x=iDF,
verbose=FALSE,
autoWidth=FALSE,
highlightColumns=highlightColumns,
hitColumns=hitColumns,
hitRule=hitRule,
hitFormat=hitFormat,
pvalueColumns=pvalueColumns,
pvalueRule=c(1e-05, 0.05, 1),
fcColumns=fcColumns,
lfcColumns=lfcColumns,
numColumns=numColumns,
freezePaneColumn=freezePaneColumn,
colWidths=save_widths,
colorSub=colorSub,
...);
append <- TRUE;
}
# add to list
if ("list" %in% type && "Yes" %in% export_df[irow, "saved"]) {
export_dfs[[sheetName]] <- iDF;
}
}
if ("xlsx" %in% type) {
openxlsx::saveWorkbook(wb=wb,
file=file,
overwrite=TRUE);
}
return(invisible(c(export_dfs,
list(key=export_df))));
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.