jamses: Jam SummarizedExperiment Stats

#' Compute contrast statistics on SummarizedExperiment data
#'
#' Compute contrast statistics on SummarizedExperiment data
#'
#' This function is essentially a wrapper around statistical methods
#' in the `limma` package, with additional steps to apply statistical
#' thresholds to define "statistical hits" by three main criteria:
#'
#' * P-value or adjusted P-value
#' * fold change
#' * max group mean
#'
#' This function is unique in that it applies the statistical methods
#' to one or more "signals" in the input `SummarizedExperiment` assays,
#' specifically intended to compare things like normalization methods.
#'
#' If multiple statistical thresholds are defined, each one is applied
#' in order, which is specifically designed to compare the effect of
#' applying different statistical thresholds. For example one may want
#' to pre-compute "statistical hits" using adjusted P-value 0.05, and 0.01;
#' or using fold change >= 1.5, or fold change >= 2.0. The underlying
#' statistics are the same, but a column indicating hits is created
#' for each set of thresholds.
#'
#' Hits are annotated:
#'
#'  * `-1` for down-regulation
#'  * `0` for un-changed (by the given criteria)
#'  * `1` for up-regulation
#'
#'  The results are therefore intended to feed directional Venn
#'  diagrams, which display the overlapping gene hits, and whether the
#'  directions are shared or opposed.
#'
#' This function can optionally apply the limma-voom workflow,
#' which involves calculating matrix weights using `limma::voom()`,
#' then applying those weights during the model fit.
#'
#' The output is intended to include several convenient formats:
#'
#' * `stats_dfs` - list of `data.frame` stats one per contrast
#' * `stats_df` - one `data.frame` with all stats together
#' * `hit_array` - `array` with three dimensions: signal; contrast; threshold
#' whose cells contain hit flags (-1, 0, 1) named by `rownames(se)`.
#'
#' Design and contrast matrices can be defined using the function
#' `jamses::groups_to_sedesign()`. That function assigns each sample
#' to a sample group, then assembles all relevant group contrasts
#' which involve only one-factor contrast at a time. It optionally
#' defines two-factor contrasts (contrast of contrasts) where
#' applicable.
#'
#' A subset of genes (`rownames(se)`) or samples (`colnames(se)`) can
#' be defined, to restrict calculations to use only the subset data.
#'
#' @param se `SummarizedExperiment` object.
#'    * Note that `colnames(se)` should match the samples in `sedesign`,
#'    or the `rownames(idesign)` when `sedesign` is not supplied.
#'    * Data is subset by `colnames(se)` using `isamples` when supplied,
#'    and `rownames(se)` when `igenes` is supplied.
#'    * Note argument `rowData_colnames` can be used to retain some
#'    `rowData(se)` columns in the stat `data.frame` summaries for
#'    convenience, particularly helpful when analyzing microarray data
#'    where the `rownames(se)` represent probe ID or assay ID.
#' @param assay_names `character` vector with one or more assay names
#'    from `names(assays(se))`.
#' @param adjp_cutoff `numeric` value threshold with the adjusted P-value
#'    at or below which a contrast result can be considered a statistical
#'    hit.
#'    This threshold is applied in addition to other `cutoff` values.
#' @param p_cutoff `numeric` value threshold with the unadjusted P-value
#'    at or below which a contrast result can be considered a statistical
#'    hit. This argument is not recommended, in favor of using `adjp_cutoff`.
#'    This threshold is applied in addition to other `cutoff` values.
#' @param fold_cutoff `numeric` value threshold indicating the normal
#'    absolute fold change at or above which a contrast result can be
#'    considered a statistical hit. For clarity, this threshold is normal
#'    space fold change, for example 2-fold would be `fold_cutoff=2`.
#' @param int_adjp_cutoff,int_p_cutoff,int_fold_cutoff optional thresholds
#'    used only when a two-way interaction style contrast is detected.
#'    These optional thresholds may be useful to apply more lenient criteria
#'    to interaction contrasts, but in that event are cautioned to be
#'    used for data mining exercises. Ideally, the thresholds are identical
#'    between pairwise and interaction contrasts, and ideally there are
#'    enough replicates in the data to support the interaction contrasts
#'    with sufficient confidence to make those comparisons.
#' @param mgm_cutoff `numeric` value threshold of the maximum group mean
#'    value required in each contrast for the contrast to be considered
#'    a statistical hit.
#'    The "max group mean" logic requires only one
#'    group in a contrast to be above this threshold, while all other
#'    groups can have values below the threshold.
#'    This threshold is applied in addition to other `cutoff` values.
#' @param ave_cutoff `numeric` value threshold of the average expression
#'    as reported by `limma::lmFit()`, within each normgroup if relevant,
#'    for the contrast to be considered a statistical hit.
#'    This threshold is applied in addition to other `cutoff` values.
#'    Typically the column "AvgExpr" is calculated as a row mean.
#' @param confint `logical` passed to `limma::topTable()` indicating
#'    whether to calculate 95% confidence intervals for log2 fold change
#'    `logFC` values. Alternatively it can be a `numeric` value between
#'    zero and one specifying a specific confidence interval.
#' @param floor_min,floor_value `numeric` minimum value (floor_min) at
#'    or below which `numeric` values in the assay data matrix are
#'    reverted to `floor_value` as a replacement. This option is
#'    valuable to set all `numeric` values at or below zero to zero,
#'    or to set all values at or below zero to `NA` in circumstances
#'    where zero indicates "no measurement" and would be more accurately
#'    represented as a missing measurement than a measurement of `0`.
#'    * Note: `floor_min` is applied before `handle_na`, so that this step
#'    is able to create `NA` values which will be handled appropriately
#'    based upon the option `handle_na`.
#' @param sedesign `SEDesign` object as defined by `groups_to_sedesign()`,
#'    with slotNames `"design"`, `"contrasts"`, and `"samples"`.
#'    The arguments `idesign` and `icontrasts` are ignored when this
#'    argument is defined.
#' @param icontrasts,idesign `numeric` matrices representing statistical
#'    contrasts, and sample-group design matrices. These values are ignored
#'    when `sedesign` is defined.
#' @param isamples `character` vector with optional subset of `colnames(se)`,
#'    by default it uses `colnames(se)` that are also defined in the design
#'    matrix.
#' @param igenes `character` vector with optional subset of `rownames(se)`,
#'    by default it uses all `rownames(se)`.
#' @param enforce_design `logical` (this option is not implemented).
#'    By default the design matrix is used to subset the input `colnames(se)`
#'    as needed, and `isamples` is used to subset the design matrix
#'    and corresponding contrasts as relevant.
#' @param use_voom `logical` indicating whether to apply `limma-voom`
#'    analysis steps. When applied, data is not scaled using `limma:voom()`,
#'    instead uses data as supplied.
#' @param voom_block_twostep `logical` indicating whether to perform the
#'    "two-step" voom methodology when `block` is also supplied.
#'    This workflow is recommended by voom authors:
#'    * call `voom()` first without `block` to determine overall `weights`
#'    * call `duplicateCorrelation()` if necessary, using the voom `weights`,
#'    and the `block` argument, to calculate `correlation`.
#'    * call `voom()` again using the `correlation`, `weights`, and `block`
#'    arguments. This produces improved `weights`.
#'    * call `duplicateCorrelation()` again with the updated `weights`,
#'    and `block` in order to calculate improved `correlation`.
#'    * Then proceed with `lmFit()` using appropriate `weights` using
#'    `block`; and appropriate `correlation` also using the proper `weights`
#'    and `block`.
#' @param posthoc_test `character` string indicating an optional post-hoc
#'    test to apply.
#'    * `"none"`: applies `limma::eBayes()` by default, the moderated t-test.
#'    * `"DEqMS"`: applies adjustment for proteomics measurements provided
#'    by the package `"DEqMS"`. See `posthoc_args`.
#' @param posthoc_args `list` named by the `posthoc_test` above.
#'    `"DEqMS"` recognizes two arguments, which are passed to
#'    `DEqMS::spectraCounteBayes()`:
#'    * `"PSM_counts"`: a `numeric` vector of peptide spectra matched, one
#'    per `igenes` or `rownames(se)`. These values are used by DEqMS to
#'    model variability based upon the number of spectra as the key
#'    measure of confidence.
#'    * `"fit.method"`: `character` name of the model to use, default
#'    is `"loess"`.
#' @param weights `numeric` non-negative precision weights passed to
#'    `limma::lmFit()`, either as a matrix with nrow and ncol that
#'    match `igenes` and `isamples`, respectively, or matching one
#'    of `length(igenes)` or `length(isamples)`. When `igenes` or `isamples`
#'    are not supplied, it uses `nrow(se)` or `ncol(se)`, respectively.
#' @param robust `logical` passed to `limma::eBayes()`, whether estimation
#'    of `df.prior` and `var.prior` should be robust against outlier
#'    sample variances.
#' @param handle_na `character` string indicating how to handle `NA`
#'    values in the data matrix, passed to `handle_na_values()`.
#'    * `"partial"`: Replace `NA` values with `na_value`,
#'    except when an entire group is `NA` the entire group is kept at `NA`.
#'    * `"full"`: Retain `NA` values, except when an entire group is `NA`
#'    the replicate values are replaced with `na_value`. The option
#'    `"full1"` may be preferred, since it only replaces one value
#'    in the group, therefore does not misrepresent the variability
#'    of the group as zero.
#'    * `"full1"`: Retain `NA` values, except when an entire group is `NA`,
#'    one replicate value in the group is replaced with `na_value`. By
#'    replacing only one replicate with `na_value` the group does not
#'    have a variance/dispersion, forcing the variance to be determined by
#'    the other group in the contrast, while still allowing an approximate
#'    fold change to be calculated. This option is suitable when there
#'    is a noise floor above zero, as it retains an approximate
#'    fold change while estimating a P-value using only the variance
#'    derived from the non-`NA` group.
#'    * `"all"`: replace all `NA` values with `na_value`.
#'    * `"none"`: perform no replacement of `NA` values.
#' @param na_value `numeric` passed to `handle_na_values()`
#' @param rowData_colnames `character` vector of colnames in `rowData(se)`
#'    that should be retained in each stat `data.frame` produced
#'    as output. The values in `rowData_colnames` are intersected
#'    with `colnames(rowData(se))` to determine which columns to keep.
#' @param collapse_by_gene `logical` (not currently implemented).
#' @param block `character`, `factor`, or `numeric` used as a blocking
#'    factor, using argument `block` in `limma::lmFit()` for example.
#'
#'    The argument can be supplied as one of the following:
#'    * `character` with one or more `colnames(colData(se))`
#'    * `character` vector named using values in `isamples` and/or
#'    `colnames(se)`
#'    * `character` vector without names, whose length must equal
#'    `length(isamples)` or when `isamples` is not provided,
#'    `ncol(se)`.
#' @param correlation optional inter-duplicate or inter-technical
#'    correlation matrix passed to `limma::lmFit()`.
#' @param max_correlation_rows `numeric` maximum number of rows in
#'    `imatrix` to use when calling `limma::duplicateCorrelation()`.
#'    This process only occurs when `block` is defined, `correlation=NULL`
#'    and `nrow(imatrix) > max_correlation_rows`. In this scenario,
#'    a random subset of rows are used to calculate `correlation`,
#'    then that `correlation` is used for `limma::lmFit()`. This
#'    process is intended to help very large data volumes, where
#'    the speed of `limma::duplicateCorrelation()` is impacted in
#'    quadratic manner by increasing number of rows, while also not
#'    improving the summary correlation value.
#' @param normgroup `character` or `factor` vector with length
#'    `ncol(se)` or `length(isamples)` when `isamples` is defined.
#'    Values define independent normalization groups, which performs
#'    independent analyses within each unique normalization group.
#'    This option is intended for convenience, enabling separate
#'    variance models for each normalization group, which is
#'    appropriate when analyzing very different sample types.
#'    During limma model fit, all samples in all groups are used by default,
#'    which may incorrectly estimate variance when the variability
#'    by row is not uniform across different sample types.
#'    When `normgroup=NULL` the default is to assume all samples are in
#'    the same `normgroup="bulk"`.
#'    Each subset of samples begins with the same `sedesign`, `idesign`,
#'    `icontrast`, however they are fed into `validate_sestats()` to
#'    subset the appropriate contrasts to use only samples within the
#'    current normgroup. As a result, any contrasts that span two
#'    normgroups will be removed, and will not appear in the output.
#'
#'    The argument can be supplied as one of the following:
#'    * `character` with one or more `colnames(colData(se))`
#'    * `character` vector named using values in `isamples` and/or
#'    `colnames(se)`
#'    * `character` vector without names, whose length must equal
#'    `length(isamples)` or when `isamples` is not provided,
#'    `ncol(se)`.
#' @param do_normgroups `logical` whether to enable normgroup processing,
#'    or to use the previous technique that kept all samples together.
#'    This argument may be removed in future, with recommendation to use
#'    `normgroup=NULL` to disable normalization group processing.
#'    Note that when `normgroup=NULL` the output should be identical
#'    with `do_normgroups=TRUE` and `do_normgroups=FALSE`.
#' @param seed `numeric` used to set a random seed with `set.seed()` for
#'    reproducibility. Use `seed=NULL` to avoid setting a random seed.
#'    Note this action will only affect downstream functions that
#'    employ some form of randomization.
#' @param verbose `logical` indicating whether to print verbose output.
#' @param ... additional arguments are ignored.
#'
#' @family jamses stats
#'
#' @examples
#' set.seed(123)
#' expr <- rnorm(20) + 7;
#' noise <- rnorm(120) / 5;
#' fold <- rnorm(20) / 2.5;
#' m <- matrix(expr + noise, ncol=6);
#' for (i in 4:6) {
#'    m[,i] <- m[,i] + fold;
#' }
#' # m <- matrix(rnorm(120)/2 + 5, ncol=6);
#' colnames(m) <- paste0(rep(c("Vehicle", "Treated"), each=3), 1:3)
#' rownames(m) <- paste0("row", seq_len(20))
#' # simulate some "hits"
#' m[1, 4:6] <- m[1, 4:6] + 2
#' m[2, 4:6] <- m[2, 4:6] + 1.5
#' m[3, 4:6] <- m[3, 4:6] - 1.3
#' # create SummarizedExperiment
#' se <- SummarizedExperiment::SummarizedExperiment(
#'    assays=list(counts=m),
#'    colData=data.frame(sample=colnames(m),
#'    group=factor(rep(c("Vehicle", "Treated"), each=3),
#'       c("Vehicle", "Treated"))),
#'    rowData=data.frame(measurement=rownames(m)))
#'
#' # assign colors
#' sample_color_list <- platjam::design2colors(se)
#'
#' # heatmap
#' heatmap_se(se, sample_color_list=sample_color_list)
#'
#' # create SEDesign
#' sedesign <- groups_to_sedesign(se, group_colnames="group")
#'
#' # plot the design and contrasts
#' plot_sedesign(sedesign)
#'
#' # limma contrasts
#' sestats <- se_contrast_stats(se=se,
#'    sedesign=sedesign,
#'    assay_names="counts")
#'
#' # print data.frame summary of hits
#' sestats_to_df(sestats)
#'
#' # plot the design with number of hits labeled
#' plot_sedesign(sedesign, sestats=sestats)
#'
#' # heatmap with hits
#' heatmap_se(se,
#'    sample_color_list=sample_color_list,
#'    sestats=sestats)
#'
#' # review stats table
#' stats_df <- sestats$stats_dfs$counts[["Treated-Vehicle"]]
#' head(stats_df)
#'
#' # volcano plot for one contrast
#' jamma::volcano_plot(stats_df)
#'
#' ###############################
#' # simulate a two-way model
#' adjust <- rnorm(20);
#' new_fold <- rnorm(20);
#' se2 <- cbind(se, se)
#' groups2 <- paste0(rep(c("WT", "KO"), each=6),
#'    "_",
#'    SummarizedExperiment::colData(se)$group)
#' groups2 <- factor(groups2, levels=unique(groups2))
#' colnames(se2) <- paste0(groups2, 1:3)
#' SummarizedExperiment::colData(se2)$sample <- colnames(se2);
#' SummarizedExperiment::colData(se2)$group <- groups2;
#' SummarizedExperiment::colData(se2)$genotype <- jamba::gsubOrdered("_.+", "",
#'    SummarizedExperiment::colData(se2)$group)
#' SummarizedExperiment::colData(se2)$treatment <- jamba::gsubOrdered("^.+_", "",
#'    SummarizedExperiment::colData(se2)$group)
#' SummarizedExperiment::assays(se2)$counts[,7:12] <- (
#'    SummarizedExperiment::assays(se2)$counts[,7:12] + adjust)
#' SummarizedExperiment::assays(se2)$counts[,10:12] <- (
#'    SummarizedExperiment::assays(se2)$counts[,10:12] + new_fold)
#'
#' # assign colors
#' sample_color_list2 <- platjam::design2colors(se2,
#'    class_colnames="genotype",
#'    color_sub=c(
#'       Vehicle="palegoldenrod",
#'       Treated="firebrick4"),
#'    group_colnames="group")
#'
#' # heatmap
#' hm2a <- heatmap_se(se2,
#'    sample_color_list=sample_color_list2,
#'    controlSamples=1:3,
#'    center_label="versus WT_Vehicle",
#'    column_title_rot=60,
#'    row_cex=0.4,
#'    column_cex=0.5,
#'    column_title_gp=grid::gpar(fontsize=8),
#'    column_split="group")
#' ComplexHeatmap::draw(hm2a, column_title=attr(hm2a, "hm_title"), merge_legends=TRUE)
#'
#' # heatmap centered by genotype
#' hm2b <- heatmap_se(se2,
#'    sample_color_list=sample_color_list2,
#'    controlSamples=c(1:3, 7:9),
#'    control_label="versus vehicle",
#'    column_gap=grid::unit(c(1, 3, 1), "mm"),
#'    centerby_colnames="genotype",
#'    column_title_rot=60,
#'    row_cex=0.4,
#'    column_cex=0.5,
#'    column_title_gp=grid::gpar(fontsize=8),
#'    column_split="group")
#' ComplexHeatmap::draw(hm2b, column_title=attr(hm2b, "hm_title"), merge_legends=TRUE)
#'
#' # create SEDesign
#' sedesign2 <- groups_to_sedesign(se2, group_colnames="group")
#'
#' # plot the design and contrasts
#' plot_sedesign(sedesign2, label_cex=0.7)
#' # note the two-way contrast can be flipped
#' plot_sedesign(sedesign2, flip_twoway=TRUE, label_cex=0.7)
#'
#' # limma contrasts
#' sestats2 <- se_contrast_stats(se=se2,
#'    sedesign=sedesign2,
#'    assay_names="counts")
#'
#' # print data.frame summary of hits
#' sestats_to_df(sestats2)
#'
#' # plot the design with number of hits labeled
#' plot_sedesign(sedesign2, sestats=sestats2, label_cex=0.7)
#'
#' # heatmap with hits
#' hm2hits <- heatmap_se(se2,
#'    sample_color_list=sample_color_list2,
#'    sestats=sestats2,
#'    controlSamples=c(1:3, 7:9),
#'    column_gap=grid::unit(c(1, 3, 1), "mm"),
#'    control_label="versus vehicle",
#'    centerby_colnames="genotype",
#'    row_split=3,
#'    row_cex=0.4,
#'    column_title_rot=60,
#'    column_cex=0.5,
#'    column_title_gp=grid::gpar(fontsize=8),
#'    column_split="group")
#' ComplexHeatmap::draw(hm2hits,
#'    column_title=attr(hm2hits, "hm_title"),
#'    merge_legends=TRUE)
#'
#' # venn diagram
#' hit_list <- hit_array_to_list(sestats2,
#'    contrast_names=c(1:2))
#' jamba::sdim(hit_list);
#' # names(hit_list) <- gsub(":", ",<br>\n", contrast2comp(names(hit_list)))
#' # vo <- venndir::venndir(hit_list, expand_fraction=0.1)
#' # venndir::venndir_legender(venndir_out=vo, setlist=hit_list)
#'
#' # vo <- venndir::venndir(hit_list, expand_fraction=0.1, proportional=TRUE)
#' # venndir::venndir_legender(venndir_out=vo, setlist=hit_list)
#'
#' @export
se_contrast_stats <- function
(se,
 assay_names,
 adjp_cutoff=0.05,
 p_cutoff=NULL,
 fold_cutoff=1.5,
 int_adjp_cutoff=adjp_cutoff,
 int_p_cutoff=p_cutoff,
 int_fold_cutoff=fold_cutoff,
 mgm_cutoff=NULL,
 ave_cutoff=NULL,
 confint=FALSE,
 floor_min=NULL,
 floor_value=NULL,
 sedesign=NULL,
 icontrasts=NULL,
 idesign=NULL,
 igenes=NULL,
 isamples=NULL,
 enforce_design=TRUE,
 use_voom=FALSE,
 voom_block_twostep=TRUE,
 posthoc_test=c("none",
    "DEqMS"),
 posthoc_args=list(DEqMS=list(
    PSM_counts=NULL,
    fit.method="loess")),
 weights=NULL,
 robust=FALSE,
 handle_na=c("full1",
    "full",
    "partial",
    "all",
    "none"),
 na_value=0,
 rowData_colnames=c("SYMBOL"),
 collapse_by_gene=FALSE,
 block=NULL,
 correlation=NULL,
 max_correlation_rows=10000,
 normgroup=NULL,
 do_normgroups=TRUE,
 seed=123,
 verbose=FALSE,
 ...)
{
   handle_na <- match.arg(handle_na);
   posthoc_test <- match.arg(posthoc_test);

   ## Validate input parameters
   isamples_was_assigned <- FALSE;
   if (length(isamples) == 0) {
      isamples <- colnames(se);
      isamples_was_assigned <- TRUE;
   }
   if (any(!isamples %in% colnames(se))) {
      stop("not all values in isamples are present in colnames(se)");
   }
   # isamples <- intersect(isamples, colnames(se));
   if (length(igenes) == 0) {
      igenes <- rownames(se);
   }
   igenes <- intersect(igenes, rownames(se));

   ####################################
   ## normgroup validation
   if (length(normgroup) == 0) {
      if (verbose) {
         jamba::printDebug("se_contrast_stats(): ",
            "normgroup defined 'bulk' for all samples");
      }
      normgroup <- jamba::nameVector(rep("bulk", length(isamples)),
         isamples);
   } else if (all(normgroup %in% colnames(SummarizedExperiment::colData(se)))) {
      if (verbose) {
         jamba::printDebug("se_contrast_stats(): ",
            "normgroup defined by matching colData(se) colnames");
      }
      normgroup <- jamba::nameVector(
         jamba::pasteByRowOrdered(
            data.frame(check.names=FALSE,
               SummarizedExperiment::colData(se[, isamples])[,normgroup, drop=FALSE])),
         isamples);
   } else if (length(names(normgroup)) == 0) {
      if (length(normgroup) == length(isamples)) {
         names(normgroup) <- isamples;
         # issue a warning
         if (isamples_was_assigned) {
            cli::cli_alert_info(paste0(
               "Note: {.var names(normgroup)} were assigned using colnames(se)",
               "in the order defined by {.var colnames(se)}. ",
               "Consider supplying names(normgroup) in future."));
         } else {
            cli::cli_alert_info(paste0(
               "Note: {.var names(normgroup)} were assigned using isamples",
               "in the order defined by argument {.var isamples}. ",
               "Consider supplying names(normgroup) in future."));
         }
      } else {
         stop("normgroup supplied as a vector must contain names(normgroup) which match isamples.");
      }
   } else if (!all(isamples %in% names(normgroup))) {
      stop("not all isamples are contained in names(normgroup)");
   } else {
      # order normgroup by isamples
      if (verbose) {
         jamba::printDebug("se_contrast_stats(): ",
            "names(normgroup) was matched to isamples");
      }
   }
   if (length(normgroup) > 0) {
      normgroup <- normgroup[isamples];
   }
   if (any(is.na(normgroup))) {
      cli::cli_abort(paste0(
         "{.var normgroup} must not contain NA values."));
      stop("normgroup must not contain NA values.");
   }
   if (!all(isamples %in% names(normgroup))) {
      stop("not all isamples are present in names(normgroup)");
   }

   ####################################
   ## validate block
   if (length(block) > 0) {
      if (all(block %in% colnames(SummarizedExperiment::colData(se)))) {
         if (verbose) {
            jamba::printDebug("se_contrast_stats(): ",
               "block defined by matching colData(se) colnames");
         }
         block <- jamba::nameVector(
            jamba::pasteByRowOrdered(
               data.frame(check.names=FALSE,
                  SummarizedExperiment::colData(se[,isamples])[,block, drop=FALSE])),
            isamples);
      } else if (length(names(block)) == 0) {
         if (length(block) == length(isamples)) {
            names(block) <- isamples;
            # issue a warning
            if (isamples_was_assigned) {
               cli::cli_alert_info(paste0(
                  "Note: {.var names(block)} were assigned using colnames(se)",
                  "in the order defined by {.var colnames(se)}. ",
                  "Consider supplying names(block) in future."));
            } else {
               cli::cli_alert_info(paste0(
                  "Note: {.var names(block)} were assigned using isamples",
                  "in the order defined by argument {.var isamples}. ",
                  "Consider supplying names(block) in future."));
            }
         } else {
            stop("block supplied as a vector must contain names(block) which match isamples.");
         }
      } else if (!all(isamples %in% names(block))) {
         cli::cli_abort(paste0(
            "{.var isamples} must all exist in {.var names(block)}."));
         stop("isamples must all exist in names(block)");
      } else {
         # block with names: all isamples must be found in names(block)
         if (!all(isamples %in% names(block))) {
            cli::cli_abort(paste0(
               "{.var isamples} must all exist in {.var names(block)}."));
            stop("isamples must all exist in names(block)");
         }
         # synchronize block order using isamples
         if (verbose) {
            jamba::printDebug("se_contrast_stats(): ",
               "names(block) was matched to isamples");
         }
      }
   }
   if (length(block) > 0) {
      block <- block[isamples];
      if (any(is.na(block))) {
         cli::cli_abort(paste0(
            "{.var block} must not contain NA values."));
         stop("block must not contain NA values.");
      }
   }

   ##################################
   # validate sedesign input
   # - use idesign,icontrasts only when sedesign is not provided
   if (length(sedesign) > 0 && "SEDesign" %in% class(sedesign)) {
      if (length(idesign) > 0 || length(icontrasts) > 0) {
         warning(paste0("Note when supplying sedesign,",
            "idesign and icontrasts are ignored."))
      }
      if (length(isamples) > 0) {
         if (verbose) {
            jamba::printDebug("se_contrast_stats(): ",
               "validate_sedesign()");
         }
         sedesign <- validate_sedesign(sedesign,
            samples=isamples,
            verbose=verbose);
      }
      isamples <- sedesign@samples;
      idesign <- sedesign@design;
      icontrasts <- sedesign@contrasts;
   } else {
      if (length(idesign) == 0) {
         stop("idesign must be defined.");
      }
      isamples <- intersect(isamples, rownames(idesign));
      idesign <- idesign[match(isamples, rownames(idesign)),,drop=FALSE];
      if (length(rownames(idesign)) == 0) {
         stop("rownames(idesign) must contain values matching colnames(se).");
      }
      icontrasts <- icontrasts[match(rownames(icontrasts), colnames(idesign)),,drop=FALSE];
      if (length(rownames(icontrasts)) == 0) {
         stop("rownames(icontrasts) must match values in colnames(idesign).");
      }
   }

   # confirm normgroup,block order matches isamples
   if (length(normgroup) > 0) {
      normgroup <- normgroup[isamples];
   }
   if (length(block) > 0) {
      block <- block[isamples];
   }

   # isamples_normgroup_list is a list separated by normgroup,
   # each vector will be analyzed independently
   isamples_normgroup_list <- split(isamples, normgroup[isamples]);


   ## prepare optional gene_df data.frame
   rowData_df <- NULL;
   rowData_colnames <- intersect(rowData_colnames,
      colnames(SummarizedExperiment::rowData(se)));
   if (length(rowData_colnames) > 0) {
      rowData_df <- data.frame(check.names=FALSE,
         stringsAsFactors=FALSE,
         probes=igenes,
         data.frame(check.names=FALSE,
            SummarizedExperiment::rowData(
               se[igenes, ])[, rowData_colnames, drop=FALSE])
      )
   }

   ## Iterate each assay_name
   ## Run statistical tests for gene data
   stats_hits_dfs1 <- lapply(jamba::nameVector(assay_names), function(signalSet) {
      retVals <- list();
      imatrix <- SummarizedExperiment::assays(se[igenes, isamples])[[signalSet]];
      if (length(imatrix) == 0) {
         return(NULL)
      }
      if (verbose) {
         jamba::printDebug("se_contrast_stats(): ",
            "Analyzing assay_name: ", signalSet);
      }

      #######################################################
      ## Optionally convert values at or below floor_min
      ## to floor_value.
      # - TODO: Stress test this section with sparse data
      #   to ensure this section does not break two-step Voom
      #   by creating missing data.
      if (length(floor_min) == 1 &&
            !is.na(floor_min) &&
            any(!is.na(imatrix) &
                  imatrix <= floor_min)) {
         if (verbose) {
            jamba::printDebug("se_contrast_stats(): ",
               c("Applying floor_min:",
                  floor_min,
                  ", replacing with floor_value:",
                  floor_value),
               sep="");
         }
         to_replace <- (!is.na(imatrix) & imatrix <= floor_min)
         imatrix[to_replace] <- floor_value;
      }

      # iterate each normgroup independently
      if (TRUE %in% do_normgroups) {
         normgroup_stats <- lapply(jamba::nameVectorN(isamples_normgroup_list), function(normgroup_name) {
            normgroup_samples <- isamples_normgroup_list[[normgroup_name]];
            if (verbose) {
               jamba::printDebug("se_contrast_stats(): ",
                  "   Analyzing normgroup_name: ", normgroup_name);
            }
            # new imatrix_ng only uses samples in this normgroup
            imatrix_ng <- imatrix[,normgroup_samples, drop=FALSE];
            sestats_ng <- validate_sedesign(
               new("SEDesign",
                  design=idesign,
                  contrasts=icontrasts),
               samples=normgroup_samples);
            icontrasts_ng <- sestats_ng@contrasts;
            idesign_ng <- sestats_ng@design;
            if (length(icontrasts_ng) == 0 || ncol(icontrasts_ng) == 0 || nrow(icontrasts_ng) == 0) {
               if (verbose) {
                  jamba::printDebug("se_contrast_stats(): ",
                     "   No contrasts were defined for this normgroup_name.",
                     fgText=c("darkorange2", "red3"));
               }
               return(NULL)
            }

            if (!"none" %in% handle_na && any(is.na(imatrix_ng))) {
               if (verbose) {
                  jamba::printDebug("se_contrast_stats(): ",
                     "   Performing handle_na: ", handle_na);
               }
               imatrix_ng <- handle_na_values(imatrix_ng,
                  idesign=idesign_ng,
                  handle_na=handle_na,
                  na_value=na_value,
                  ...)[, normgroup_samples, drop=FALSE];
               colnames(imatrix_ng) <- normgroup_samples;
            }
            ## Optionally determine voom weights prior to running limma
            if (use_voom) {
               if (verbose) {
                  jamba::printDebug("se_contrast_stats(): ",
                     "   Determining Voom weight matrix (within probe reps).");
               }
               imatrix_v <- voom_jam((2^imatrix_ng)-1,
                  design=idesign_ng,
                  normalize.method="none",
                  plot=FALSE,
                  verbose=verbose,
                  ...);
               weights <- imatrix_v$weights;
               rownames(weights) <- rownames(imatrix_ng);
               colnames(weights) <- colnames(imatrix_ng);
               if (verbose) {
                  jamba::printDebug("se_contrast_stats(): ",
                     "   Determined Voom weight matrix.");
               }
            }

            #######################################################
            ## Calculate correlation when necessary
            ## - block is defined but not correlation, calculate correlation
            ## - Note that voom weights are included here if they were calculated
            calculate_correlation <- FALSE;
            use_block <- NULL;
            if (length(block) > 0) {
               use_block <- block[normgroup_samples];
            }
            if (length(unique(use_block)) > 1 &&
                  length(correlation) == 0) {
               calculate_correlation <- TRUE;
            }
            if (length(unique(use_block)) <= 1) {
               use_block <- NULL;
            }
            if (TRUE %in% calculate_correlation) {
               # use a random subset of rows to calculate correlation
               if (length(max_correlation_rows) == 1 &&
                     max_correlation_rows > 0 &&
                     nrow(imatrix_ng) > max_correlation_rows) {
                  if (length(seed) > 0) {
                     set.seed(head(seed, 1))
                  }
                  k <- sample(seq_len(nrow(imatrix_ng)),
                     size=max_correlation_rows)
               } else {
                  k <- seq_len(nrow(imatrix_ng))
               }
               if (verbose) {
                  jamba::printDebug("se_contrast_stats(): ",
                     "Calculating correlation using ",
                     jamba::formatInt(length(k)),
                     " rows.");
               }
               dupcor <- limma::duplicateCorrelation(
                  object=imatrix_ng[k, , drop=FALSE],
                  design=idesign_ng,
                  weights=weights[k, , drop=FALSE],
                  block=use_block)
               correlation <- dupcor$consensus;
               if (verbose) {
                  jamba::printDebug("se_contrast_stats(): ",
                     "correlation: ", correlation);
               }
            }

            #######################################################
            ## Re-calculate voom weights only when block is defined
            ## - Note it uses voom weights, and correlation
            if (length(unique(use_block)) > 1 &&
                  length(correlation) > 0 &&
                  TRUE %in% voom_block_twostep &&
                  TRUE %in% use_voom) {
               if (verbose) {
                  jamba::printDebug("se_contrast_stats(): ",
                     "   Re-calculating Voom weight matrix (with block).");
               }
               imatrix_v <- voom_jam((2^imatrix_ng)-1,
                  design=idesign_ng,
                  normalize.method="none",
                  plot=FALSE,
                  block=use_block,
                  correlation=correlation,
                  weights=weights,
                  verbose=verbose,
                  ...);
               weights <- imatrix_v$weights;
               rownames(weights) <- rownames(imatrix_ng);
               colnames(weights) <- colnames(imatrix_ng);
               if (verbose) {
                  jamba::printDebug("se_contrast_stats(): ",
                     "   Determined Voom weight matrix.");
               }
               # Now optionally re-calculate correlation
               if (TRUE %in% calculate_correlation) {
                  if (verbose) {
                     jamba::printDebug("se_contrast_stats(): ",
                        "Re-calculating correlation using updated Voom weights, ",
                        jamba::formatInt(length(k)),
                        " rows.");
                  }
                  dupcor <- jamba::call_fn_ellipsis(
                     limma::duplicateCorrelation,
                     object=imatrix_ng[k, , drop=FALSE],
                     design=idesign_ng,
                     weights=weights[k, , drop=FALSE],
                     block=use_block,
                     ...)
                  correlation <- dupcor$consensus;
                  if (verbose) {
                     jamba::printDebug("se_contrast_stats(): ",
                        "Updated correlation: ", correlation);
                  }
               }
            }

            #######################################################
            ## Optionally convert zero (or less than zero) to NA
            # - TODO: Stress test this section with sparse data
            #   to ensure this section does not break two-step Voom
            #   by creating missing data.
            if (length(floor_min) == 1 &&
                  !is.na(floor_min) &&
                  any(!is.na(imatrix_ng) &
                        imatrix_ng <= floor_min)) {
               if (verbose) {
                  jamba::printDebug("se_contrast_stats(): ",
                     c("Applying floor_min:",
                        floor_min,
                        ", replacing with floor_value:",
                        floor_value),
                     sep="");
               }
               to_replace <- (!is.na(imatrix_ng) & imatrix_ng <= floor_min)
               imatrix_ng[to_replace] <- floor_value;
            }

            #######################################################
            ## Run limma
            ## - future option to call DESeq2 equivalent steps
            rlr_result_ng <- run_limma_replicate(imatrix=imatrix_ng,
               idesign=idesign_ng,
               icontrasts=icontrasts_ng,
               weights=weights,
               robust=robust,
               verbose=verbose,
               adjp_cutoff=adjp_cutoff,
               p_cutoff=p_cutoff,
               fold_cutoff=fold_cutoff,
               mgm_cutoff=mgm_cutoff,
               int_adjp_cutoff=int_adjp_cutoff,
               int_p_cutoff=int_p_cutoff,
               int_fold_cutoff=int_fold_cutoff,
               ave_cutoff=ave_cutoff,
               rowData_df=rowData_df,
               collapse_by_gene=collapse_by_gene,
               block=use_block,
               correlation=correlation,
               posthoc_test=posthoc_test,
               posthoc_args=posthoc_args,
               ...);
            return(rlr_result_ng);
         });
         # end normgroup processing
         # now assemble normgroup_stats into rlr_result as before
         rlr_result_stats_dfs <- unlist(recursive=FALSE,
            lapply(names(normgroup_stats), function(rlr_name){
               rlr <- normgroup_stats[[rlr_name]];
               rlr$stats_dfs;
            }));
         if (verbose > 1) {
            jamba::printDebug("se_contrast_stats(): ",
               "ssdim(normgroup_stats):");
            print(jamba::ssdim(normgroup_stats));
         }
         rlr_result_stats_df_colnames <- unique(unlist(lapply(rlr_result_stats_dfs, colnames)));
         if (verbose > 1) {
            jamba::printDebug("se_contrast_stats(): ",
               "sdim(rlr_result_stats_dfs):");
            print(jamba::sdim(rlr_result_stats_dfs));
            # print(head(head(se_contrast_stats, 1)[[1]], 3));
            # print(head(tail(se_contrast_stats, 1)[[1]], 3));
         }
         rlr_result_stats_df <- jamba::mergeAllXY(rlr_result_stats_dfs)
         # rlr_result_stats_df <- rlr_result_stats_df[,rlr_result_stats_df_colnames, drop=FALSE];
         rlr_result <- list(
            stats_df=rlr_result_stats_df,
            stats_dfs=rlr_result_stats_dfs,
            rep_fits=lapply(normgroup_stats, function(rlr){rlr$rep_fits})
         )
      } else {
         if (!"none" %in% handle_na && any(is.na(imatrix))) {
            if (verbose) {
               jamba::printDebug("se_contrast_stats(): ",
                  "   Performing handle_na: ", handle_na);
            }
            imatrix <- handle_na_values(imatrix[, isamples, drop=FALSE],
               idesign=idesign,
               handle_na=handle_na,
               na_value=na_value,
               ...)[, isamples, drop=FALSE];
            colnames(imatrix) <- isamples;
         }

         #######################################################
         ## Determine voom weights prior to running limma
         ## - note no blocking factor is included here
         if (use_voom) {
            if (verbose) {
               jamba::printDebug("se_contrast_stats(): ",
                  "   Determining Voom weight matrix (within probe reps).");
            }
            imatrix_v <- voom_jam((2^imatrix)-1,
               design=idesign,
               normalize.method="none",
               plot=FALSE,
               verbose=verbose,
               ...);
            weights <- imatrix_v$weights;
            rownames(weights) <- rownames(imatrix);
            colnames(weights) <- colnames(imatrix);
            if (verbose) {
               jamba::printDebug("se_contrast_stats(): ",
                  "   Determined Voom weight matrix.");
            }
         }

         #######################################################
         ## Optionally convert zero (or less than zero) to NA
         # - TODO: Stress test this section with sparse data
         #   to ensure this section does not break two-step Voom
         #   by creating missing data.
         if (length(floor_min) == 1 &&
               !is.na(floor_min) &&
               any(!is.na(imatrix) &
                     imatrix <= floor_min)) {
            if (verbose) {
               jamba::printDebug("se_contrast_stats(): ",
                  c("Applying floor_min:",
                     floor_min,
                     ", replacing with floor_value:",
                     floor_value),
                  sep="");
            }
            to_replace <- (!is.na(imatrix) & imatrix <= floor_min);
            imatrix[to_replace] <- floor_value;
         }

         #######################################################
         ## Calculate correlation when necessary
         ## - block is defined but not correlation, calculate correlation
         ## - Note that voom weights are included here if they were calculated
         calculate_correlation <- FALSE;
         if (length(unique(block)) > 1 &&
               length(correlation) == 0) {
            calculate_correlation <- TRUE;
         }
         if (TRUE %in% calculate_correlation) {
            # use a random subset of rows to calculate correlation
            if (length(max_correlation_rows) == 1 &&
                  max_correlation_rows > 0 &&
                  nrow(imatrix) > max_correlation_rows) {
               if (length(seed) > 0) {
                  set.seed(head(seed, 1))
               }
               k <- sample(seq_len(nrow(imatrix)),
                  size=max_correlation_rows)
            } else {
               k <- seq_len(nrow(imatrix))
            }
            if (verbose) {
               jamba::printDebug("se_contrast_stats(): ",
                  "Calculating correlation using ",
                  jamba::formatInt(length(k)),
                  " rows.");
            }
            dupcor <- jamba::call_fn_ellipsis(
               limma::duplicateCorrelation,
               object=imatrix[k, , drop=FALSE],
               design=design,
               weights=weights[k, , drop=FALSE],
               block=block,
               ...)
            correlation <- dupcor$consensus;
         }

         #######################################################
         ## Re-calculate voom weights only when block is defined
         ## - Note it uses voom weights, and correlation
         if (length(unique(block)) > 1 &&
               TRUE %in% voom_block_twostep &&
               TRUE %in% use_voom) {
            if (verbose) {
               jamba::printDebug("se_contrast_stats(): ",
                  "   Re-calculating Voom weight matrix (with block).");
            }
            imatrix_v <- voom_jam((2^imatrix)-1,
               design=idesign,
               normalize.method="none",
               plot=FALSE,
               block=block,
               correlation=correlation,
               weights=weights,
               verbose=verbose,
               ...);
            weights <- imatrix_v$weights;
            rownames(weights) <- rownames(imatrix);
            colnames(weights) <- colnames(imatrix);
            if (verbose) {
               jamba::printDebug("se_contrast_stats(): ",
                  "   Determined Voom weight matrix.");
            }
            # Now optionally re-calculate correlation
            if (TRUE %in% calculate_correlation) {
               if (verbose) {
                  jamba::printDebug("se_contrast_stats(): ",
                     "Re-calculating correlation using updated Voom weights, ",
                     jamba::formatInt(length(k)),
                     " rows.");
               }
               dupcor <- jamba::call_fn_ellipsis(
                  limma::duplicateCorrelation,
                  object=imatrix[k, , drop=FALSE],
                  design=design,
                  weights=weights[k, , drop=FALSE],
                  block=block,
                  ...)
               correlation <- dupcor$consensus;
            }
         }

         #######################################################
         ## Run limma
         ## - future option to call DESeq2 equivalent steps
         rlr_result <- run_limma_replicate(imatrix=imatrix,
            idesign=idesign,
            icontrasts=icontrasts,
            weights=weights,
            robust=robust,
            verbose=verbose,
            adjp_cutoff=adjp_cutoff,
            p_cutoff=p_cutoff,
            fold_cutoff=fold_cutoff,
            mgm_cutoff=mgm_cutoff,
            int_adjp_cutoff=int_adjp_cutoff,
            int_p_cutoff=int_p_cutoff,
            int_fold_cutoff=int_fold_cutoff,
            ave_cutoff=ave_cutoff,
            rowData_df=rowData_df,
            collapse_by_gene=collapse_by_gene,
            block=block,
            correlation=correlation,
            posthoc_test=posthoc_test,
            posthoc_args=posthoc_args,
            ...);
      }
      ## rlr_result is a list
      ## - statsDF
      ## - statsDFs
      ## - repFits=list(subFit1,subFit2,subFit3)
      return(rlr_result);
   });

   ## Assemble list of statsDF
   stats_df <- lapply(jamba::rmNULL(stats_hits_dfs1), function(i){
      i$stats_df;
   });
   ret_list <- list(stats_df=stats_df);

   ## Assemble list of statsDFs
   stats_dfs <- lapply(stats_hits_dfs1, function(i){
      i$stats_dfs;
   });
   ret_list$stats_dfs <- stats_dfs;

   ## list of named lists
   stats_hits <- lapply(stats_dfs, function(iDFs){
      lapply(iDFs, function(iDF){
         iHitCols <- jamba::nameVector(
            jamba::provigrep("^hit[ .]", colnames(iDF)));
         if (verbose) {
            jamba::printDebug("se_contrast_stats(): ",
               "iHitCols:", iHitCols);
         }
         lapply(iHitCols, function(iHitCol){
            iHitRows <- (!is.na(iDF[,iHitCol]) & iDF[,iHitCol] != 0);
            jamba::nameVector(
               iDF[iHitRows,iHitCol],
               rownames(iDF)[iHitRows]);
         });
      });
   });
   if (verbose > 1) {
      jamba::printDebug("se_contrast_stats(): ",
         "ssdim(stats_hits[[1]]):");
      print(jamba::ssdim(stats_hits[[1]]));
   }

   ## array of named lists
   all_cutoffs_list <- lapply(stats_hits[[1]], function(i){
      gsub("[ ]+[^ ]+$", "", names(i))
   })
   all_cutoffs <- unique(unlist(all_cutoffs_list));
   arrayDim <- c(length(all_cutoffs),
      length(stats_hits[[1]]),
      length(stats_hits));
   arrayDimnames <- list(all_cutoffs,
      names(stats_hits[[1]]),
      names(stats_hits));
   names(arrayDimnames) <- c("Cutoffs",
      "Contrasts",
      "Signal");
   if (verbose) {
      jamba::printDebug("se_contrast_stats(): ",
         "arrayDim:",
         arrayDim);
   }

   # assemble hit_array
   # - allow for missing entries
   # first define the array indices
   kji <- jamba::rbindList(lapply(names(stats_hits), function(i){
      jamba::rbindList(lapply(names(stats_hits[[i]]), function(j){
         jamba::rbindList(lapply(names(stats_hits[[i]][[j]]), function(k){
            k1 <- gsub("[ ][^ ]+$", "", k);
            kji <- cbind(match(k1, arrayDimnames[[1]]),
               match(j, arrayDimnames[[2]]),
               match(i, arrayDimnames[[3]]))
            kji
         }))
      }))
   }))
   hit_array <- array(dim=arrayDim,
      dimnames=arrayDimnames);
   # fill data into the array, which somehow converts it into a list
   # - thanks R
   hit_array[kji] <- unlist(recursive=FALSE,
      unlist(recursive=FALSE,
         stats_hits))
   # create the array again using the list data
   hit_array <- array(dim=arrayDim,
      data=hit_array,
      dimnames=arrayDimnames);

   ret_list$hit_array <- hit_array;
   ret_list$hit_list <- stats_hits;

   ## Add design and contrast data used
   ret_list$idesign <- idesign;
   ret_list$icontrasts <- icontrasts;
   if (length(normgroup) > 0 && TRUE %in% do_normgroups) {
      ret_list$normgroup <- normgroup;
   }

   return(ret_list);
}


#' Handle NA values in a numeric matrix
#'
#' Handle NA values in a numeric matrix
#'
#' This function provides reasonable alternatives intended to
#' manage the presence of missing data encoded as `NA` values
#' in a numeric matrix.
#'
#' The alternatives are defined by argument `handle_na`:
#'
#' * `"full"`: Retain `NA` values, except when an entire group is `NA`
#' it is replaced with `na_value`. This option is intended for sparse data
#' where non-NA values are accepted as real measurements
#' for each group, and where a group with all NA values should be
#' retained for statistical contrasts by assigning `na_value`.
#' This method essentially keeps the data as-is, except when groups
#' are otherwise entirely `NA` the value `na_value` is used in
#' order to retain any relevant contrasts that involve this group.
#' * `"full1"`: Similar to `"full"`, retain `NA` values, except
#' when an entire group is `NA`, then replace only one entry
#' with `na_value`. This option is intended to keep data as-is,
#' except to retain groups that are otherwise entirely `NA`.
#' In these groups, only one `na_value` is used in order to
#' prevent the group from contributing toward group variability
#' or dispersion calculations.
#' * `"partial"`: Replace `NA` values with `na_value`, except
#' when an entire group is `NA` the entire group is kept at `NA`.
#' * `"all"`: Replace all `NA` values with `na_value`.
#' * `"none"`: Perform no replacement of `NA` values.
#'
#' @return `numeric` matrix with equal dimensions as input `x`,
#'    where `NA` values have been handled as defined by `handle_na`.
#'
#' @family jamses stats
#'
#' @param x `numeric` matrix
#' @param idesign `numeric` matrix with `rownames(idesign)` equal to
#'    `colnames(x)`, containing `0` or `1` to fill the design matrix.
#' @param handle_na `character` string to determine the method used
#'    to handle NA values in `x`.
#' @param na_value `numeric` or `NA` used to handle NA values.
#' @param na_weight `numeric` weight between `0` and `1` used for `NA`
#'    values in the weight matrix, used when `return_weights=TRUE`.
#' @param return_weights `logical` indicating whether to include a
#'    weight matrix as an attribute with name `"weights"`.
#' @param verbose `logical` indicating whether to print verbose output.
#' @param ... additional arguments are ignored.
#'
#'
#' @export
handle_na_values <- function
(x,
 idesign,
 handle_na=c("full1",
    "full",
    "partial",
    "none",
    "all"),
 na_value=0,
 na_weight=0,
 return_weights=FALSE,
 verbose=FALSE,
 ...)
{
   handle_na <- match.arg(handle_na);
   xNA <- is.na(x);
   groupL <- multienrichjam::im2list(idesign);
   groupV <- jamba::nameVector(
      rep(names(groupL),
         lengths(groupL)),
      unlist(groupL));
   # revert to input order
   groupV <- groupV[match(rownames(idesign), names(groupV))];
   if ("partial" %in% handle_na) {
      ## We replace NA with zero, except when an entire
      ## group is NA, then we leave it as NA
      # x[xNA] <- 0;
      if (verbose) {
         jamba::printDebug("handle_na_values(): ",
            c("Filling singlet NA with ",
               NAvalue,
               ", leaving full group NA as-is."),
            sep="");
      }
      x <- jamba::rowGroupMeans(x[,names(groupV),drop=FALSE],
         # groups=rep(names(groupL), lengths(groupL)),
         groups=groupV,
         rowStatsFunc=function(x,...){
            x1 <- x;
            x1[is.na(x)] <- na_value;
            # changed from rowMins()
            x1[apply(is.na(x)*1, 1, min) == 1,] <- NA;
            x1;
         });
   } else if ("full" %in% handle_na) {
      ## We replace NA with na_value only when an entire group is NA
      ## in order to retain the group in final results.
      ## Otherwise, partial NA values within a group are kept NA.
      if (verbose) {
         jamba::printDebug("handle_na_values(): ",
            c("Leaving singlet NA as-is, filling full-group NA with ",
               na_value),
            sep="");
      }
      x <- jamba::rowGroupMeans(x[,names(groupV),drop=FALSE],
         # groups=rep(names(groupL), lengths(groupL)),
         groups=groupV,
         rowStatsFunc=function(x,...){
            x1 <- x;
            x1[is.na(x)] <- NA;
            # changed from rowMins()
            # x1[rowMins(is.na(x)*1) == 1, ] <- na_value;
            x1[apply(is.na(x)*1, 1, min) == 1, ] <- NA;
            x1;
         });
   } else if ("full1" %in% handle_na) {
      ## Same as "full" in that this method replaces NA only
      ## when an entire group contains NA values, however, it
      ## only replaces one entry with na_value in order to
      ## prevent the value from contributing toward estimate of
      ## group variability.
      if (verbose) {
         jamba::printDebug("handle_na_values(): ",
            c("Filling only full-group NA with one instance of ",
               na_value,
               " per group."),
            sep="");
      }
      ## 0.0.60.900 - slightly modified to avoid copying data x to x1
      x <- jamba::rowGroupMeans(x[,names(groupV),drop=FALSE],
         # groups=rep(names(groupL), lengths(groupL)),
         groups=groupV,
         rowStatsFunc=function(x,...){
            # x1 <- x;
            full_na_rows <- (apply(is.na(x)*1, 1, min) == 1);
            if (any(full_na_rows)) {
               # x1[full_na_rows, 1] <- na_value;
               x[full_na_rows, 1] <- na_value;
            }
            x;
         });
            ## unclear why the following step was used
            # x1[is.na(x)] <- NA;

            ## changed from previous matrixStats::rowMins
            # x1[matrixStats::rowMins(is.na(x)*1) == 1, 1] <- na_value;
            ## 0.0.60.900 - fixed bug from copy-paste above
            # x1[apply(is.na(x)*1, 1, min) == 1, ] <- NA;
   } else if ("all" %in% handle_na) {
      if (verbose) {
         jamba::printDebug("handle_na_values(): ",
            c("Replacing all NA values with ",
               na_value),
            sep="");
      }
      if (any(xNA)) {
         x[xNA] <- na_value;
      }
   }
   ## define weight matrix
   if (TRUE %in% return_weights) {
      weights <- jamba::noiseFloor(1-(xNA),
         minimum=na_weight);
      attr(x, "weights") <- weights
   }

   return(x);
}

#' Limma-voom customized for Jam
#'
#' Limma-voom customized for Jam
#'
#' This function is based directly upon `limma::voom()` with a
#' small adjustment to handle the presence of `NA` values, which
#' otherwise causes the `stats::lowess()` output to be clearly
#' incorrect. The correction removes `NA` values during this step,
#' producing a result as expected.
#'
#' @family jamses stats
#'
#' @export
voom_jam <- function
(counts,
 design=NULL,
 lib.size=NULL,
 normalize.method="none",
 block=NULL,
 correlation=NULL,
 weights=NULL,
 span=0.5,
 plot=FALSE,
 save.plot=TRUE,
 verbose=FALSE,
 ...)
{
   out <- list()

   ## Check counts
   if(is(counts,"DGEList")) {
      out$genes <- counts$genes
      out$targets <- counts$samples
      if(is.null(design) && diff(range(as.numeric(counts$sample$group)))>0) {
         design <- model.matrix(~group,
            data=counts$samples)
      }
      if(is.null(lib.size)) {
         lib.size <- with(counts$samples,
            lib.size * norm.factors);
      }
      counts <- counts$counts;
   } else {
      isExpressionSet <- suppressPackageStartupMessages(is(counts,"ExpressionSet"))
      if(isExpressionSet) {
         if(length(Biobase::fData(counts))) {
            out$genes <- Biobase::fData(counts)
         }
         if(length(Biobase::pData(counts))) {
            out$targets <- Biobase::pData(counts)
         }
         counts <- Biobase::exprs(counts)
      } else {
         counts <- as.matrix(counts)
      }
   }

   n <- nrow(counts);
   if (n < 2L) {
      stop("Need at least two rows to fit a mean-variance trend.");
   }

   ## Check design
   if (is.null(design)) {
      design <- matrix(1, ncol(counts), 1);
      rownames(design) <- colnames(counts);
      colnames(design) <- "GrandMean";
   }

   ## Check lib.size
   if (is.null(lib.size)) {
      lib.size <- colSums(counts,
         na.rm=TRUE);
   }
   if (verbose) {
      jamba::printDebug("voom_jam(): ",
         "lib.size:", format(digits=2,
            big.mark=",",
            scientific=FALSE,
            lib.size));
      jamba::printDebug("voom_jam(): ",
         "span:", format(digits=2,
            big.mark=",",
            span));
   }

   ## Fit linear model to log2-counts-per-million
   y <- t(log2(t(counts + 0.5) / (lib.size + 1) * 1e6));
   y <- limma::normalizeBetweenArrays(y,
      method=normalize.method);
   fit <- limma::lmFit(y,
      design,
      block=block,
      correlation=correlation,
      weights=weights,
      ...);
   if (is.null(fit$Amean)) {
      fit$Amean <- rowMeans(y,
         na.rm=TRUE);
   }

   NWithReps <- sum(fit$df.residual > 0L)
   if (NWithReps < 2L) {
      if (NWithReps == 0L) {
         warning("The experimental design has no replication. Setting weights to 1.")
      }
      if (NWithReps == 1L) {
         warning("Only one gene with any replication. Setting weights to 1.")
      }
      out$E <- y;
      out$weights <- y;
      out$weights[] <- 1;
      out$design <- design;
      if (is.null(out$targets)) {
         out$targets <- data.frame(lib.size=lib.size);
      } else {
         out$targets$lib.size <- lib.size;
      }
      return(new("EList", out));
   }

   ## Fit lowess trend to sqrt-standard-deviations by log-count-size
   sx <- fit$Amean + mean(log2(lib.size + 1)) - log2(1e6);
   sy <- sqrt(fit$sigma);
   allzero <- (rowSums(counts, na.rm=TRUE)==0 %in% c(TRUE,NA));
   if (verbose) {
      jamba::printDebug("voom_jam(): ",
         "head(allzero, 10):");
      print(head(allzero, 10));
   }
   if (any(allzero)) {
      sx <- sx[!allzero];
      sy <- sy[!allzero];
   }
   ## 03dec2019: Major change in lowess logic (below)
   ## If there are any NA values in sx or sy, the results
   ## are highly variable, and not correct.
   ## y1 <- rnorm(5000);
   ## y1[sample(1:5000, size=20)] <- NA;
   ## x1 <- seq(from=0.01, to=10, length.out=5000);
   ## plot(x1, y1);
   ## lines(lowess(x1, y1, f=2/3), col="red");
   ## lines(lowess(x1[!is.na(y1)], y1[!is.na(y1)], f=2/3), col="green");
   #l <- stats::lowess(sx,
   #   sy,
   #   f=span);
   s_no_na <- (!is.na(sx) & !is.na(sy));
   l <- stats::lowess(sx[s_no_na],
      sy[s_no_na],
      f=span,
      ...);
   out$sx <- sx;
   out$sy <- sy;
   out$l <- l;
   if (plot) {
      jamba::plotSmoothScatter(x=sx,
         y=sy,
         xlab="log2(count size + 0.5)",
         ylab="sqrt(standard deviation)",
         pch=16,
         cex=0.25);
      title("voom: Mean-variance trend");
      lines(l, col="red");
   }

   ## Make interpolating rule
   f <- tryCatch({
      approxfun(l,
         rule=2,
         ties=list("ordered", mean));
   }, error=function(e){
      jamba::printDebug("Error in voom_jam() approxfun():",
         fgText="red");
      #print(e);
      jamba::printDebug("head(fit$Amean, 20):");
      print(head(fit$Amean, 20));
      jamba::printDebug("head(sx, 20):");
      print(head(sx, 20));
      jamba::printDebug("head(fit$sigma, 20):");
      print(head(fit$sigma, 20));
      jamba::printDebug("head(sy, 20):");
      print(head(sy, 20 ));
      jamba::printDebug("str(l):");
      print(str(l));
      stop("voom_jam() failed.");
   });

   ## Find individual quarter-root fitted counts
   if (fit$rank < ncol(design)) {
      if (verbose) {
         jamba::printDebug("voom_jam(): ",
            "Using subset of fit$rank, length(fit$rank):",
            length(fit$rank));
      }
      j <- fit$pivot[seq_len(fit$rank)];
      fitted.values <- fit$coef[,j,drop=FALSE] %*% t(fit$design[,j,drop=FALSE])
   } else {
      fitted.values <- fit$coef %*% t(fit$design)
   }

   fitted.cpm <- 2^fitted.values;
   fitted.count <- 1e-6 * t(t(fitted.cpm) * (lib.size + 1));
   fitted.logcount <- log2(fitted.count);

   ## Apply trend to individual observations
   w <- 1 / f(fitted.logcount)^4;
   dim(w) <- dim(fitted.logcount);

   ## Output
   rownames(w) <- rownames(y);
   colnames(w) <- colnames(y);
   out$E <- y;
   out$weights <- w;
   out$design <- design;
   out$other <- list(fitted.values=fitted.values);
   if (is.null(out$targets)) {
      out$targets <- data.frame(lib.size=lib.size);
   } else {
      out$targets$lib.size <- lib.size;
   }
   if (save.plot) {
      out$voom.xy <- list(x=sx,
         y=sy,
         xlab="log2( count size + 0.5 )",
         ylab="Sqrt( standard deviation )");
      out$voom.line <- l;
   }
   new("EList", out);
}

#' Run limma contrasts with optional probe replicates
#'
#' Run limma contrasts with optional probe replicates
#'
#' This function is called by `se_contrast_stats()` to perform
#' the comparisons defined as contrasts. The `se_contrast_stats()`
#' function operates on a `SummarizedExperiment` object,
#' this function operates on the `numeric` `matrix` values
#' directly.
#'
#' This function also calls `ebayes2dfs()` which extracts
#' each contrast result as a `data.frame`, whose column names
#' are modified to include the contrast names.
#'
#' This function optionally (not yet ported from previous
#' implementation) detects replicate probes, and performs
#' the internal correlation calculations recommended by
#' `limma user guide` for replicate probes. In that case,
#' it detects each level of probe replication so that
#' each can be properly calculated. For example, Agilent
#' human 4x44 arrays often contain a large number of probes
#' with 8 replicates; a subset of probes with 4 replicates;
#' then the remaining probes (the majority overall) have
#' only one replicate each. In that case, this function
#' splits data into 8-replicate, 4-replicate, and 1-replicate
#' subsets, calculates correlations on the 8-replicate and
#' 4-replicate subsets separately, then runs limma calculations
#' on the three subsets independently, then merges the results
#' into one large table. The end result is that the
#' final table contains one row per unique probe after
#' adjusting for probe replication properly in each scenario.
#' As the Agilent microarray layout is markedly less widely
#' used that in past, the priority to port this methodology
#' is quite low.
#'
#' @return `list` with the following entries:
#'    * "stats_df": `data.frame` with all individual `data.frame` per contrast,
#'    merged together.
#'    * "stats_df": `list` of individual `data.frame` per contrast, each
#'    result is the output from `ebayes2dfs()`.
#'    * "rep_fits": `list` of various intermediate model fits, dependent
#'    upon whether limma, limma-voom, or limma-DEqMS were used.
#'
#' @inheritParams ebayes2dfs
#' @param correlation `numeric` or `NULL` passed to `limma::lmFit()`.
#'    Note that when `block` is defined (and non-empty), and when
#'    `correlation=NULL`, the correlation will be calculated by
#'    calling `limma::duplicateCorrelation()`.
#' @param seed `numeric` value used to define `set.seed()` for reproducibility.
#'    To avoid setting seed, use `seed=NULL`.
#'
#' @family jamses stats
#'
#' @export
run_limma_replicate <- function
(imatrix,
 idesign,
 icontrasts,
 weights=NULL,
 robust=FALSE,
 adjust.method="BH",
 confint=FALSE,
 trim_colnames=c("t",
    "B",
    "F",
    "sca.t"),
 adjp_cutoff=0.05,
 p_cutoff=NULL,
 fold_cutoff=1.5,
 int_adjp_cutoff=adjp_cutoff,
 int_p_cutoff=p_cutoff,
 int_fold_cutoff=fold_cutoff,
 mgm_cutoff=NULL,
 ave_cutoff=NULL,
 block=NULL,
 rowData_df=NULL,
 collapse_by_gene=FALSE,
 correlation=NULL,
 posthoc_test=c("none",
    "DEqMS"),
 posthoc_args=list(DEqMS=list(
    PSM_counts=NULL,
    fit.method="loess")),
 seed=123,
 verbose=FALSE,
 ...)
{
   # validate arguments
   posthoc_test <- match.arg(posthoc_test);

   # prepate ExpressionSet
   if (verbose) {
      jamba::printDebug("run_limma_replicate(): ",
         "Creating ExpressionSet object.");
   }
   imatrixES <- Biobase::ExpressionSet(assayData=imatrix,
      featureData=new("AnnotatedDataFrame",
         data=data.frame(
            probes=rownames(imatrix),
            row.names=rownames(imatrix)
         )
      )
   );

   ## lmFit
   if (verbose) {
      jamba::printDebug("run_limma_replicate(): ",
         "Performing model fit with limma::lmFit().");
   }
   subFit1 <- limma::lmFit(imatrixES,
      design=idesign,
      weights=weights,
      block=block,
      correlation=correlation,
      ...);
   if (length(rownames(subFit1$coefficients)) == 0) {
      rownames(subFit1$coefficients) <- rownames(subFit1$genes);
   }

   ## Add Amean values if not calculated (probe means)
   if (length(subFit1$Amean) == 0) {
      if (verbose) {
         jamba::printDebug("run_limma_replicate(): ",
            "Adding Amean values since lmFit did not.");
      }
      subFit1$Amean <- jamba::nameVector(
         rowMeans(imatrix, na.rm=TRUE),
         rownames(imatrix));
   }

   ## run contrasts on the limma model
   if (verbose) {
      jamba::printDebug("run_limma_replicate(): ",
         "Running contrasts.fit on subFit1");
   }
   subFit2 <- limma::contrasts.fit(fit=subFit1,
      contrasts=icontrasts);

   ## eBayes adjustment for signal-based noise
   if (verbose) {
      jamba::printDebug("run_limma_replicate(): ",
         "Running eBayes on subFit2");
   }
   subFit3 <- limma::eBayes(subFit2,
      robust=robust);
   #if (nrow(subMatrix) == ndups) {
   #   subFit3 <- subFit3[1,];
   #}

   ## define colnames to rename to include the contrast name
   renameCols <- c("logFC",
      "P.Value",
      "adj.P.Val",
      "sca.p",
      "sca.adj.pval",
      "t",
      "F",
      "B",
      "CI.L",
      "CI.R",
      "AveExpr");

   ## optionally apply post-hoc tests here, driving example is DEqMS
   subFit4 <- NULL;
   if ("DEqMS" %in% posthoc_test) {
      if (!jamba::check_pkg_installed("DEqMS")) {
         stop(paste("The Bioconductor package DEqMS is not installed,",
            "but posthoc_test='DEqMS'"));
      }
      PSM_counts <- posthoc_args$DEqMS$PSM_counts;
      if (length(posthoc_args$DEqMS$fit.method) == 0) {
         posthoc_args$DEqMS$fit.method <- "loess";
      }
      if (length(PSM_counts) == 0) {
         stop(paste("posthoc_args$DEqMS$PSM_counts is empty,",
            "and is required when using posthoc_test='DEqMS'."));
      }
      if (!all(rownames(subFit3$coefficients) %in% names(PSM_counts))) {
         print(head(subFit3$coefficients));
         stop(paste("all rownames(subFit3$coefficients) must be present in",
            "names(PSM_counts) when using posthoc_test='DEqMS'."));
      }
      subFit3$count <- PSM_counts[rownames(subFit3$coefficients)];
      subFit4 <- DEqMS::spectraCounteBayes(subFit3,
         fit.method=posthoc_args$DEqMS$fit.method);
      renameCols <- c(renameCols,
         "sca.p",
         "sca.adj.pval",
         "sca.t",
         "count");
   }

   ## Get summary table for each contrast
   contrastNames <- colnames(subFit3$contrast);

   dimNum <- nrow(subFit3$coefficients);

   ## top table for each contrast
   if (TRUE) {
      stats_dfs <- ebayes2dfs(lmFit3=subFit3,
         lmFit1=subFit1,
         lmFit4=subFit4,
         define_hits=TRUE,
         trim_colnames=trim_colnames,
         adjp_cutoff=adjp_cutoff,
         p_cutoff=p_cutoff,
         fold_cutoff=fold_cutoff,
         int_adjp_cutoff=int_adjp_cutoff,
         int_p_cutoff=int_p_cutoff,
         int_fold_cutoff=int_fold_cutoff,
         mgm_cutoff=mgm_cutoff,
         ave_cutoff=ave_cutoff,
         posthoc_test=posthoc_test,
         rowData_df=rowData_df,
         verbose=verbose,
         ...);
   } else {
      stats_dfs <- lapply(jamba::nameVector(contrastNames), function(contrastName) {
         tt <- limma::topTable(subFit3,
            coef=contrastName,
            number=Inf,
            adjust.method=adjust.method,
            confint=confint);

         ## Rename columns to include the contrast
         ## Note: this step could rename "probes" or "genes"
         tt <- jamba::renameColumn(tt,
            from=c(renameCols),
            to=c(paste(renameCols, contrastName)));

         if (verbose) {
            jamba::printDebug("run_limma_replicate(): ",
               "contrastName:",
               contrastName,
               " head(topTable):");
            print(head(tt, 5));
         }
         tt;
      } );
   }

   # produce stats_df for a merged data.frame
   # TODO: make this section optional, it is time-consuming,
   # and potentially more appropriate in a separate function.
   stats_df_colnames <- unique(
      unlist(lapply(stats_dfs, colnames)));
   stats_df <- NULL;
   stats_df <- jamba::mergeAllXY(stats_dfs);
   stats_df <- stats_df[, stats_df_colnames, drop=FALSE];
   # 0.0.47.900: do not define rownames() since they add object.size
   # and are not trustworthy identifiers as rownames.
   # if (length(jamba::tcount(stats_df[,1], minCount=2)) == 0) {
   #    rownames(stats_df) <- stats_df[,1];
   # }
   # subFit4 is included below, but removed by jamba::rmNULL() when empty
   return(
      list(stats_df=stats_df,
         stats_dfs=stats_dfs,
         rep_fits=jamba::rmNULL(list(
            lmFit1=subFit1,
            lmFit2=subFit2,
            lmFit3=subFit3,
            lmFit4=subFit4))
      )
   )
}


#' Convert limma eBayes fit to data.frame with annotated hits
#'
#' Convert limma eBayes fit to data.frame with annotated hits
#'
#' This function is called by `run_limma_replicate()` as
#' an extension to `limma::topTable()`, that differs in that
#' it is performed for each contrast in the input `lmFit3` object.
#'
#' By default the columns include the contrast, so that each `data.frame`
#' is self-described.
#'
#' When `define_hits=TRUE`, then statistical thresholds are applied
#' to define a set of statistical hits. The thresholds available include:
#'
#' 1. `adjp_cutoff` - applied to `"adj.P.Val"` for adjusted P-value.
#' 2. `p_cutoff` - applied to `"P.Value"` for raw, unadjusted P-value.
#' 3. `fold_cutoff` - normal space fold change, applied to `"logFC"`
#'    by using `log2(fold_cutoff)`.
#' 4. `mgm_cutoff` - max group mean, applied to the highest group mean
#'    value involved in each specific contrast.
#' 5. `ave_cutoff` - applied to `"AveExpr"` which represents the mean
#'    value across all sample groups.
#'
#' Note that `mgm_cutoff` requires input `lmFit1` which stores the
#' group mean values used in the limma workflow.
#'
#' Note also there are optional arguments specific to interaction
#' contrasts, which in this context is assumed to be a
#' "fold change of fold changes" style of contrast, for example:
#' `(groupA-groupB)-(groupC-groupD)`. The purpose is distinct interaction
#' thresholds is to enable reasonable data mining, sometimes with
#' somewhat more lenient thresholds for interaction contrasts.
#' For example, one may use `adjp_cutoff=0.01` and `int_adjp_cutoff=0.05`,
#' or `fold_cutoff=2` and `int_fold_cutoff=1.5`.
#'
#' By default, `rename_headers=TRUE` causes colnames to include the
#' contrast, for example renaming colname `"logFC"` to `"logFC contrastA"`.
#' This change helps reinforce the source of the statistical results,
#' and allows the `data.frame` results to be merged together using
#' `base::merge()`.
#'
#' Indeed, `merge_df=TRUE` will cause all `data.frame` results to be
#' merged into one large `data.frame`, using `jamba::mergeAllXY()`.
#'
#' @return `list` with one `data.frame` per contrast defined in
#'    the input `lmFit3` object. When `define_hits=TRUE` there
#'    will be one column per statistical threshold, named `"hit"`
#'    followed by an abbreviation of the statistical thresholds
#'    which were applied.
#'    When `merge_df=TRUE` the returned data will be one
#'    `data.frame` object.
#'
#' @family jamses stats
#'
#' @param lmFit3 object returned by `limma::eBayes()`.
#' @param lmFit1 object returned by `limma::lmFit()`, optional.
#' @param lmFit4 object returned by `posthoc_test="DEqMS"` in
#'    `run_limma_replicate()`.
#' @param define_hits `logical` indicating whether to define hits
#'    using the statistical thresholds.
#' @param adjp_cutoff,p_cutoff,fold_cutoff,mgm_cutoff,ave_cutoff `numeric`
#'    values representing the appropriate statistical threshold,
#'    or `NULL` when a threshold should not be applied.
#' @param int_adjp_cutoff,int_p_cutoff,int_fold_cutoff `numeric`
#'    thresholds to apply only to interaction contrasts.
#' @param confint `logical` passed to `limma::topTable()`, which defines
#'    whether to return confidence intervals for each log2 fold change.
#' @param use_cutoff_colnames `logical` whether to include the
#'    statistical thresholds abbreviated in the `"hit"` colname,
#'    when `define_hits=TRUE`.
#' @param rename_headers `logical` indicating whether to rename
#'    statistical colnames returned by `limma::topTable()` to the
#'    colnames include the contrast name.
#' @param return_fold `logical` whether to return an additional column
#'    with the signed fold change, see `log2fold_to_fold()`.
#' @param merge_df `logical` indicating whether to merge the final
#'    `data.frame` list into one `data.frame`.
#' @param include_ave_expr `logical` indicating whether to retain
#'    the column `"AveExpr"`. This column can be misleading, especially
#'    if the `mgm` (max group mean) threshold is used when determining
#'    statistical hits. This column is mainly useful in reviewing limma
#'    output, since it uses the `"AveExpr"` values to apply its moderated
#'    variance statistic.
#' @param include_group_means `logical` indicating whether to include each
#'    group mean along with the relevant contrast. These values are
#'    helpful, in that they should exactly represent the reported `logFC`
#'    value. Sometimes it is helpful and comforting to see the exact values
#'    used in that calculation.
#' @param rowData_df `data.frame` representing optional rowData annotation
#'    to be retained in the resulting stat `data.frame`. This argument
#'    is usually defined using `rowData_colnames` in `se_contrast_stats()`,
#'    which uses corresponding columns from `rowData(se)`.
#' @param collapse_by_gene `logical` indicating whether to apply
#'    `collapse_stats_by_gene` which chooses one "best" exemplar per gene
#'    when there are multiple rows that represent the same gene.
#' @param rename_contrasts `logical` (inactive) which will in future allow
#'    for automated renaming of contrasts.
#' @param sep `character` string used as a delimiter in certain output
#'    colnames.
#' @param int_grep `character` string used to recognize contrasts which
#'    are considered "interaction contrasts". The default pattern recognizes
#'    any contrasts that contain multiple fold changes, recognized by the
#'    presence of more than one hypen `"-"` in the contrast name.
#' @param verbose `logical` indicating whether to print verbose output.
#'
#' @export
ebayes2dfs <- function
(lmFit3=NULL,
 lmFit1=NULL,
 lmFit4=NULL,
 define_hits=TRUE,
 adjp_cutoff=0.05,
 p_cutoff=NULL,
 fold_cutoff=1.5,
 int_adjp_cutoff=adjp_cutoff,
 int_p_cutoff=p_cutoff,
 int_fold_cutoff=fold_cutoff,
 mgm_cutoff=NULL,
 ave_cutoff=NULL,
 confint=FALSE,
 use_cutoff_colnames=TRUE,
 rename_headers=TRUE,
 return_fold=TRUE,
 merge_df=FALSE,
 include_ave_expr=FALSE,
 include_group_means=TRUE,
 transform_means=c("none", "exp2signed", "10^"),
 rowData_df=NULL,
 collapse_by_gene=FALSE,
 rename_contrasts=FALSE,
 sep=" ",
 int_grep="[(].+-.+-.+[)]|-.+-",
 trim_colnames=c("t",
    "B",
    "F",
    "sca.t"),
 posthoc_test=c("none",
    "DEqMS"),
 verbose=FALSE,
 ...)
{
   ## Purpose is to convert the lmFit3 results of eBayes() into a list of data.frames
   ##
   ## Note the cutoffFold is normal space fold change, but converted to log2fold to compare with limma output
   ## Note cutoffMaxGroupmean is in whatever units are sent to limma... usually log2 intensity or log2 counts
   ##
   ## collapseByGene=TRUE will attempt to produce per-gene results, using pre-defined logic to select the best
   ## exemplar(s) among multiple probes for the same gene.
   ##    1. choose statistical hits first
   ##       a. if multiple hits, same direction, choose them all.
   ##       b. if multiple hits, diff direction,
   ##          i.  mark this gene as multi-direction
   ##          ii. choose best P-value, then take hits with same direction
   ##    2. if no statistical hits, choose entry(ies) above maxMean signal cutoff
   ##    3. If no hits, and no entries above signal cutoff, choose all entries
   ##
   ## renameContrasts=TRUE will rename a fully described contrast into a more human-readable
   ## contrast, e.g.
   ## from: dHSA10GR_EtOH-SW13GR_EtOH
   ## to:   dHSA10GR-SW13GR (EtOH)
   ##
   ## intCutoffPVal and intCutoffAdjPVal are intended to allow
   ## applying a different P-value threshold for interaction effects.
   ##
   ## confint is used by topTable(), when FALSE no confidence intervals are
   ## calculated; when TRUE, by default it calculates 0.95 confidence
   ## intervals, reported as logFC upper and lower bounds, CI.L and CI.R,
   ## respectively.
   ##
   ## Optionally transform the AveExpr values, most useful when reporting normal space values which are stored in log space
   transform_means <- match.arg(transform_means);
   posthoc_test <- match.arg(posthoc_test);

   ## cutoffMaxGroupMean requires lmFit1
   if (!define_hits) {
      mgm_cutoff <- NULL;
   }
   if ((length(jamba::rmNA(mgm_cutoff)) > 0 || include_group_means) && length(lmFit1) == 0) {
      #stop("To use cutoffMaxGroupMean, lmFit1 must be supplied, from which the group means are obtained.");
      jamba::printDebug("ebayes2dfs(): ",
         c("lmFit1 is required for mgm_cutoff or include_group_means. Setting ",
            "mgm_cutoff=NULL",
            ", and ",
            "include_group_means=FALSE"),
         sep="");
      include_group_means <- FALSE;
      mgm_cutoff <- NULL;
   }

   ## TODO: Allow some curation of labels here
   contrastNames <- colnames(coef(lmFit3));
   contrastLabels <- jamba::nameVector(contrastNames, contrastNames);
   is_interaction <- grepl(int_grep,
      ignore.case=TRUE,
      contrastNames);

   # define hits is applied only to each relevant contrast
   define_hits <- rep(define_hits,
      length.out=length(contrastNames));
   names(define_hits) <- contrastNames;

   if (any(define_hits)) {
      ## Add cutoff parameters to the colnames, optional
      cutoff_l <- list(
         mgm=mgm_cutoff,
         p=p_cutoff,
         adjp=adjp_cutoff,
         fc=fold_cutoff);
      cutoff_df <- as.data.frame(jamba::rmNULL(cutoff_l));
      if (length(cutoff_df) == 0 || nrow(cutoff_df) == 0) {
         define_hits[!is_interaction] <- FALSE;
         cutoff_df <- NULL;
      }

      int_cutoff_l <- list(
         mgm=mgm_cutoff,
         p=int_p_cutoff,
         adjp=int_adjp_cutoff,
         fc=int_fold_cutoff);
      int_cutoff_df <- as.data.frame(jamba::rmNULL(int_cutoff_l));
      if (length(int_cutoff_df) == 0 || nrow(int_cutoff_df) == 0) {
         define_hits[is_interaction] <- FALSE;
         int_cutoff_df <- NULL;
      }
   }

   # define cutoff strings for contrasts and interaction contrasts
   if (any(define_hits)) {
      if (length(cutoff_df) > 0) {
         cutoff_string_df <- cutoff_df;
         for (i in colnames(cutoff_df)) {
            cutoff_string_df[[i]] <- paste0(i, cutoff_df[[i]]);
         }
         cutoff_string <- jamba::pasteByRow(cutoff_string_df,
            sep=sep);
      }
      if (length(int_cutoff_df) > 0) {
         int_cutoff_string_df <- int_cutoff_df;
         for (i in colnames(int_cutoff_df)) {
            int_cutoff_string_df[[i]] <- paste0(i, int_cutoff_df[[i]]);
         }
         int_cutoff_string <- jamba::pasteByRow(int_cutoff_string_df,
            sep=sep);
      }

      for (i in colnames(cutoff_df)) {
         cutoff_string_df[[i]] <- paste0(i, cutoff_df[[i]]);
         ## if column is "int"
         ## remove if values are all identical to "non-int" cutoff
         if (jamba::igrepHas("^int", i)) {
            j <- gsub("^int", "", i);
            if (j %in% colnames(cutoff_df)) {
               if (all(cutoff_df[[i]] == cutoff_df[[j]])) {
                  cutoff_string_df[,i] <- list(NULL);
               }
            }
         }
      }
      if (verbose) {
         # jamba::printDebug("ebayes2dfs(): ",
         #    "cutoff_df:");
         # print(cutoff_df);
         # jamba::printDebug("ebayes2dfs(): ",
         #    "int_cutoff_df:");
         # print(int_cutoff_df);
         # jamba::printDebug("ebayes2dfs(): ",
         #    "cutoff_string_df:");
         # print(cutoff_string_df);
         jamba::printDebug("ebayes2dfs(): ",
            "cutoff_string:",
            cutoff_string);
         # jamba::printDebug("ebayes2dfs(): ",
         #    "int_cutoff_string_df:");
         # print(int_cutoff_string_df);
         jamba::printDebug("ebayes2dfs(): ",
            "int_cutoff_string:",
            int_cutoff_string);
      }
      #return(cutoff_string_df);
   }

   ## assign rownames if not present in lmFit1$coefficients
   if (length(lmFit1) > 0 &&
         length(rownames(lmFit1$coefficients)) == 0) {
      jamba::printDebug("ebayes2dfs(): ",
         c("Note there are no",
            " rownames(lmFit1$coefficients)"),
         sep="",
         fgText=c("darkorange1", "red1"));
      if ("genes" %in% names(lmFit1)) {
         rownames(lmFit1$coefficients) <- rownames(lmFit1$genes);
      }
   }

   ## assign rownames if not present in lmFit3$coefficients
   if (length(rownames(lmFit3$coefficients)) == 0) {
      jamba::printDebug("ebayes2dfs(): ",
         c("Note there are no",
            " rownames(lmFit3$coefficients)"),
         sep="",
         fgText=c("darkorange1", "red1"));
      if ("genes" %in% names(lmFit3)) {
         rownames(lmFit3$coefficients) <- rownames(lmFit3$genes);
      }
   }

   ## TODO: handle cases with zero residual degrees of freedom,
   ## where we would not have a P-value but still have fold changes.
   ## Examples would be per-patient fold changes.

   ## lmTopTables is a list:
   ## - named by contrastNames
   ## - containing elements "iTopTableDF"
   ## - if collapseByGene=TRUE
   ##    - "iTopTableByGene"
   ##    - "multiDirProbes"
   lmTopTables <- lapply(jamba::nameVector(contrastNames), function(i){
      retVals <- list();
      iLabel <- contrastLabels[i];
      ## Detect whether the contrast is an interaction effect (2-way or 3-way)
      ## or is a simple pairwise style t-test
      isInteraction <- jamba::igrepHas(int_grep, iLabel);
      if (verbose) {
         if (isInteraction) {
            jamba::printDebug("ebayes2dfs(): ",
               c("Interaction effect detected for contrast:",
                  iLabel),
               sep="",
               fgText=c("darkorange1", "purple"));
         } else {
            jamba::printDebug("ebayes2dfs(): ",
               c("Evaluting contrast:",
                  iLabel),
               sep="");
         }
      }
      #if (rename_contrasts) {
      #   iLabel <- contrast2comp(iLabel);
      #}

      # confirm there is some data available in "genes" by using
      # rownames(coefficients) as a backup plan
      if (!"genes" %in% names(lmFit3)) {
         lmFit3$genes <- data.frame(
            check.names=FALSE,
            stringsAsFactors=FALSE,
            probes=rownames(lmFit3$coefficients));
      }

      ## Assemble top table, handling single replicate data in a specific way
      if (!any(lmFit3$df.residual > 0)) {
         jamba::printDebug("ebayes2dfs(): ",
            "No values with df.residual > 0.",
            fgText=c("darkorange1", "red1"));
         if (confint) {
            iTopTable <- data.frame(
               check.names=FALSE,
               stringsAsFactors=FALSE,
               lmFit3$genes,
               logFC=lmFit3$coefficients[,i],
               CI.L=lmFit3$coefficients[,i],
               CI.R=lmFit3$coefficients[,i],
               adj.P.Val=1,
               P.Value=1,
               AveExpr=lmFit3$Amean);
         } else {
            iTopTable <- data.frame(
               check.names=FALSE,
               stringsAsFactors=FALSE,
               lmFit3$genes,
               logFC=lmFit3$coefficients[,i],
               adj.P.Val=1,
               P.Value=1,
               AveExpr=lmFit3$Amean);
         }
      } else {
         # optional post-hoc test
         if (length(lmFit4) > 0) {
            if ("DEqMS" %in% posthoc_test) {
               if (verbose) {
                  jamba::printDebug("ebayes2dfs(): ",
                     c("DEqMS::outputResult(lmFit4) for contrast: ",
                        iLabel),
                     sep="");
                  print(head(lmFit4$coefficients));
                  print(head(lmFit4$sca.t));
               }
               # determine coefficient index position
               # with only one coefficient the colname gets dropped
               # from the numeric matrix lmFit4$sca.t
               # produced by DEqMS. So DEqMS::outputResult() fails to find the
               # column by name, and must refer to the column by integer number
               coef_col <- match(i,
                  colnames(lmFit4$coefficients))
               iTopTable <- DEqMS::outputResult(lmFit4,
                  coef_col=coef_col);
               if (verbose) {
                  jamba::printDebug("ebayes2dfs(): ",
                     "head(iTopTable): ");
                  print(head(head(iTopTable)));
               }
            } else {
               if (verbose) {
                  jamba::printDebug("ebayes2dfs(): ",
                     c("limma::topTable(lmFit4) for contrast: ",
                        iLabel),
                     sep="");
               }
               iTopTable <- limma::topTable(
                  lmFit4,
                  coef=i,
                  sort.by="none",
                  number=nrow(lmFit4),
                  confint=confint);
            }
         } else {
            # post-hoc test was not used
            if (verbose) {
               jamba::printDebug("ebayes2dfs(): ",
                  c("limma::topTable(lmFit3) for contrast: ",
                     iLabel),
                  sep="");
            }
            iTopTable <- limma::topTable(
               lmFit3,
               coef=i,
               sort.by="none",
               number=nrow(lmFit3),
               confint=confint);
         }
      }
      ## Optionally remove extraneous colnames
      if (length(trim_colnames) > 0) {
         trimmed_colnames <- setdiff(colnames(iTopTable),
            trim_colnames)
         iTopTable <- iTopTable[, trimmed_colnames, drop=FALSE];
      }

      ## Optionally include group mean and maxgroupmean values
      if (length(lmFit1) > 0) {
         ## Improve maxGroupMean by using the specific groups included in the contrast
         iCoefCols <- names(which(lmFit3$contrasts[,i] != 0));
         iCoefLabs <- paste(iCoefCols,
            sep=sep,
            "mean");
         coef_match <- match(rownames(iTopTable),
            rownames(lmFit1$coefficients));
         gm_m <- jamba::renameColumn(
            lmFit1$coefficients[coef_match, iCoefCols, drop=FALSE],
            from=iCoefCols,
            to=iCoefLabs);
         # 29nov2022: replace rowMaxs() due to persistent Segfaults
         # caused by matrixStats::rowMaxs() 'memory not mapped'.
         #
         mgm <- apply(gm_m, 1, function(imax){
            max(imax, na.rm=TRUE)
         })
         # mgm <- matrixStats::rowMaxs(gm_m,
         #    na.rm=TRUE);
         if (include_group_means) {
            iTopTable[,colnames(gm_m)] <- gm_m;
         }
         iTopTable[,"mgm"] <- mgm;
      }

      ## Optionally merge rowData_df
      gene_colnames <- head(colnames(iTopTable), 1);
      if (length(rowData_df) > 0 &&
            length(dim(rowData_df)) == 2 &&
            ncol(rowData_df) > 1) {
         probe_colname <- head(colnames(iTopTable), 1);
         row_match <- match(iTopTable[[probe_colname]],
            rowData_df$probes)
         if (all(is.na(row_match))) {
            if (verbose) {
               jamba::printDebug("ebayes2dfs(): ",
                  "adding rowData_df, no rows matched.");
            }
         } else {
            if (verbose) {
               jamba::printDebug("ebayes2dfs(): ",
                  "adding rowData_df colnames:",
                  colnames(rowData_df)[-1]);
            }
            iTopTable <- data.frame(check.names=FALSE,
               stringsAsFactors=FALSE,
               iTopTable[, 1, drop=FALSE],
               rowData_df[row_match, -1, drop=FALSE],
               iTopTable[, -1, drop=FALSE])
            gene_colnames <- head(colnames(iTopTable),
               ncol(rowData_df))
         }
      }

      ## Apply statistical thresholds
      if (TRUE %in% define_hits[i] || TRUE %in% collapse_by_gene) {
         probe_colname <- head(colnames(iTopTable), 1);
         if (TRUE %in% collapse_by_gene) {
            if (verbose) {
               jamba::printDebug("ebayes2dfs(): ",
                  "collapse_by_gene.");
            }
            gene_colname <- head(jamba::provigrep(
               c("^SYMBOL$",
                  "SYMBOL",
                  "GeneName",
                  "geneSymbol",
                  "gene"),
               colnames(iTopTable)), 1);
            isGenes <- jamba::nameVector(iTopTable[,gene_colname],
               rownames(iTopTable));
         }
         pcol <- "p";
         adjpcol <- "adjp";
         foldcol <- "fc";

         ## iterate each set of thresholds by row in cutoff_df
         ## but only iterate unique set of cutoff thresholds
         if (isInteraction) {
            k_rows <- match(unique(int_cutoff_string),
               int_cutoff_string);
            use_cutoff_string <- int_cutoff_string;
            use_cutoff_df <- int_cutoff_df;
         } else {
            k_rows <- match(unique(cutoff_string),
               cutoff_string);
            use_cutoff_string <- cutoff_string;
            use_cutoff_df <- cutoff_df;
         }
         for (k in k_rows) {
            hit_colname <- paste("hit",
               sep=sep,
               use_cutoff_string[k]);
            mgm_cutoff <- ifelse("mgm" %in% colnames(use_cutoff_df),
               use_cutoff_df[k, "mgm"],
               NA);
            ave_cutoff <- ifelse("ave" %in% colnames(use_cutoff_df),
               use_cutoff_df[k, "ave"],
               NA);
            p_cutoff <- ifelse(pcol %in% colnames(use_cutoff_df),
               use_cutoff_df[k, pcol],
               NA);
            adjp_cutoff <- ifelse(adjpcol %in% colnames(use_cutoff_df),
               use_cutoff_df[k, adjpcol],
               NA);
            fold_cutoff <- ifelse(foldcol %in% colnames(use_cutoff_df),
               use_cutoff_df[k, foldcol],
               NA);

            if (verbose) {
               jamba::printDebug("ebayes2dfs(): ",
                  c("hit_colname:",
                     hit_colname),
                  sep="");
            }

            ## Define colnames to use for each cutoff
            adjp_colname<- "adj.P.Val"
            p_colname <- "P.Value"
            logfc_colname <- "logFC"
            mgm_colname <- "mgm"
            ave_colname <- "AveExpr"
            if ("DEqMS" %in% posthoc_test) {
               adjp_colname <- "sca.adj.pval";
               p_colname <- "sca.P.Value";
            }

            ## Call utility function to get hit flags
            hit_values <- mark_stat_hits(x=iTopTable,
               adjp_cutoff=adjp_cutoff,
               p_cutoff=p_cutoff,
               fold_cutoff=fold_cutoff,
               mgm_cutoff=mgm_cutoff,
               ave_cutoff=ave_cutoff,
               adjp_colname=adjp_colname,
               p_colname=p_colname,
               logfc_colname=logfc_colname,
               mgm_colname=mgm_colname,
               ave_colname=ave_colname);
            iTopTable[[hit_colname]] <- hit_values;
         }
      }

      ## Optionally transform intensities, e.g. exponentiating log2 values
      ## Note that the 2^ conversion now subtracts 1, since the typical
      ## transformation is log2(1+x)
      if (FALSE) {
         if (transformAveExpr %in% c("2^")) {
            #iTopTable[,"AveExpr"] <- 2^iTopTable[,"AveExpr"];
            iTopTable[,"AveExpr"] <- 2^iTopTable[,"AveExpr"] - 1;
            if ("maxGroupMean" %in% colnames(iTopTable)) {
               #iTopTable[,"maxGroupMean"] <- 2^iTopTable[,"maxGroupMean"];
               iTopTable[,"maxGroupMean"] <- 2^iTopTable[,"maxGroupMean"] - 1;
            }
         } else if (transformAveExpr %in% c("10^")) {
            #iTopTable[,"AveExpr"] <- 10^iTopTable[,"AveExpr"];
            iTopTable[,"AveExpr"] <- 10^iTopTable[,"AveExpr"] - 1;
            if ("maxGroupMean" %in% colnames(iTopTable)) {
               #iTopTable[,"maxGroupMean"] <- 10^iTopTable[,"maxGroupMean"];
               iTopTable[,"maxGroupMean"] <- 10^iTopTable[,"maxGroupMean"] - 1;
            }
         }
      }
      ## Optionally convert log2 fold change to normal fold change
      if (TRUE %in% return_fold && !"fold" %in% colnames(iTopTable)) {
         iTopTable[,"fold"] <- log2fold_to_fold(iTopTable[,"logFC"]);
      }

      ################################################
      ## Per-gene collapse:
      #
      # Note we define per-gene rows using the first stats hit criteria,
      # otherwise too many columns will be created.
      # E.g. changing the stats hit criteria changes the prioritization
      # of probes to include in the per-gene row, so it affects
      # the fold change, groupMean, P-Value, and adj-P-Val.
      # The current solution is to use only the first hit filters,
      # so only one set of these values is propagated downstream.
      # However, the "hit" columns can represent multiple stats hit criteria.
      #
      # TODO: implement the interaction P-value cutoffs here as well
      # Disabled this section for now
      if (FALSE && TRUE %in% collapse_by_gene) {
         if (verbose) {
            jamba::printDebug("collapseByGene iTopTable:");
            print(head(iTopTable));
            jamba::printDebug("geneColname:", geneColname);
         }
         # Note that DEqMS should not ever proceed here since the
         # method is inherently based upon per-gene logic.
         iTopTableByGeneL <- collapseTopTableByGene(iTopTable,
            geneColname=geneColname,
            aveExprColname="AveExpr",
            maxGroupMeanColname="maxGroupMean",
            adjPvalColname="adj.P.Val",
            PvalueColname="P.Value",
            logFCcolname="logFC",
            cutoffAveExpr=ave_cutoff[1],
            cutoffMaxGroupMean=mgm_cutoff[1],
            cutoffAdjPVal=adjp_cutoff[1],
            cutoffPVal=p_cutoff[1],
            cutoffFold=fold_cutoff[1]);
         iTopTableByGene <- iTopTableByGeneL$iTopTableByGene;
         retVals$top_bygene_df <- iTopTableByGene;
         retVals$multidir_probes <- iTopTableByGeneL$multiDirProbes;
      }

      ## Optionally rename column headers to include the contrast name
      # Note: gene_colnames is defined above, so that it can include
      # colnames(rowData_df)
      # gene_colnames <- intersect(colnames(iTopTable),
      #    c(colnames(lmFit3$genes),
      #       "gene",
      #       probe_colname));

      # optionally remove AveExpr
      if (!TRUE %in% include_ave_expr) {
         trimmed_colnames1 <- jamba::unvigrep("AveExpr", colnames(iTopTable))
         iTopTable <- iTopTable[, trimmed_colnames1, drop=FALSE];
      }

      # rename headers to include the contrast name
      if (TRUE %in% rename_headers) {
         ## Note we do rename maxGroupMean now,
         ## since we calculate it per each specific contrast
         #
         # rename_from <- jamba::vigrep(c("^AveExpr$|^mgm$|p.value|adj.p.val|sca.adj.pval|^sca.t$|^count$|^logFC$|^fold$"),
         #    colnames(iTopTable));
         rename_from <- setdiff(
            jamba::unvigrep("AveExpr| mean$|^gene|^symbol|^probe",
               colnames(iTopTable)),
            gene_colnames);
         rename_to <- paste(rename_from,
            iLabel,
            sep=sep);
         # remove repeat blank spaces, leading/trailing spaces
         rename_to <- gsub("^[ ]+|[ ]+$", "",
            gsub("[ \t\r\n]+", " ",
               rename_to));
         if (verbose > 1) {
            jamba::printDebug("ebayes2dfs(): ",
               "rename stat columns:");
            print(data.frame(rename_from, rename_to));
         }
         iTopTable <- jamba::renameColumn(iTopTable,
            from=rename_from,
            to=rename_to);
      }

      # re-order colnames
      if (TRUE) {
         neworder_colnames <- jamba::provigrep(c(
            "^probe|^gene|symbol|accession|accnum",
            paste0("^hit", sep),
            paste0("^logFC", sep),
            paste0("^fold", sep),
            "^sca.p.value",
            "^sca.adj.pval",
            "^p.value",
            "^adj.p.val",
            paste0("^mgm", sep),
            " mean$",
            paste0("^AveExpr", sep),
            "."),
            colnames(iTopTable));
         neworder_colnames <- unique(c(gene_colnames,
            neworder_colnames));
         iTopTable <- iTopTable[, neworder_colnames, drop=FALSE];
      }
      rownames(iTopTable) <- jamba::makeNames(iTopTable[,1]);

      retVals$top_df <- iTopTable;
      retVals;
   });

   ## Now prepare the data to return
   if (TRUE %in% merge_df) {
      lmTopTablesAll <- jamba::mergeAllXY(lapply(lmTopTables, function(i){
         i$top_df;
      }));
      if (FALSE && TRUE %in% collapse_by_gene) {
         lmTopTablesAllG <- jamba::mergeAllXY(lapply(lmTopTables, function(i){
            i$top_bygene_df;
         }));
      }
      ## Re-order columns so the genes, then hits, appear first
      colname_order <- unique(c(gene_colnames,
         jamba::provigrep(c("^hit", "."),
            colnames(lmTopTablesAll))));
      lmTopTablesAll <- lmTopTablesAll[, colname_order, drop=FALSE];
      rownames(lmTopTablesAll) <- jamba::makeNames(
         lmTopTablesAll[, 1]);
   } else {
      lmTopTablesAll <- lapply(lmTopTables, function(i){
         i$top_df;
      });
      if (FALSE && TRUE %in% collapse_by_gene) {
         lmTopTablesAllG <- lapply(lmTopTables, function(i){
            i$top_bygene_df;
         });
      }
   }

   if (any(define_hits)) {
      if (length(cutoff_df) > 0) {
         attr(lmTopTablesAll, "cutoff_df") <- cutoff_df;
      }
      if (length(int_cutoff_df) > 0) {
         attr(lmTopTablesAll, "int_cutoff_df") <- int_cutoff_df;
      }
   }
   if (FALSE && TRUE %in% collapse_by_gene) {
      if (any(define_hits)) {
         if (length(cutoff_df) > 0) {
            attr(lmTopTablesAllG, "cutoff_df") <- cutoff_df;
         }
         if (length(int_cutoff_df) > 0) {
            attr(lmTopTablesAllG, "int_cutoff_df") <- int_cutoff_df;
         }
      }
      retList <- list(top_df=lmTopTablesAll,
         top_bygene_df=lmTopTablesAllG);
      return(retList);
   }
   return(lmTopTablesAll);
}