R/jam_filter_contrast_names.R

#' Filter contrast names
#'
#' Filter contrast names
#'
#' Utility function to help filter a large set of contrasts to
#' a smaller set of comparisons versus specific factor level
#' controls.
#'
#' This function was motivated by a design with 5 treatments and 3 genotypes,
#' the "all-by-all" pairwise strategy produced 45 oneway contrasts, and
#' 75 contrasts overall.
#'
#' After filtering for comparisons versus the first level per
#' experimental factor, it produced 22 oneway contrasts, and 30 overall.
#'
#' This function is still being tested to determine effective ways
#' to pare down an enormous number of potential contrasts to the
#' minimal set of "useful contrasts". The examples show a few use cases.
#'
#' @family jam experiment design
#'
#' @param contrast_names `character` vector of contrast names
#' @param sedesign `SEDesign` object
#' @param factor_controls optional `character` vector with one value per
#'    experiment factor column used in the contrast names. Typically the
#'    first observed value in each column is taken as the overall control,
#'    using only the baseline control group.
#' @param apply_to_sedesign `logical` indicating when `sedesign` is supplied,
#'    whether to return an `SEDesign` object with update `contrasts()`
#'    consistent with the contrast_names generated by this function.
#' @param must_be_control `logical` indicating whether the control factor
#'    level must also be the control in factor comparisons.
#'    When `must-be_control=TRUE` (default) the factor comparison must
#'    be in the form (Test-Control).
#'    When `must_be_control=FALSE` it permits the control factor level
#'    to be present as the test or control: (Control-Test) or (Test-Control).
#' @param verbose `logical` indicating whether to print verbose output.
#' @param ... additional arguments are ignored.
#'
#' @examples
#' group_names <- paste0(
#'    rep(c("UL3", "dH1A", "dH1B"), each=5), "_",
#'    c("Veh", "DEX", "PMA", "SF", "Ins"))
#' sedesign <- groups_to_sedesign(group_names)
#' plot_sedesign(sedesign,
#'    which_contrasts=jamba::unvigrep("[(]", contrast_names(sedesign)),
#'    arrow_ex=0.2, twoway_lwd=1, contrast_style="none")
#' title(main="45 total oneway and twoway contrasts")
#'
#' new_contrast_names <- filter_contrast_names(contrast_names(sedesign))
#' new_contrast_names
#'
#' # apply to sedesign directly
#' sedesign2 <- filter_contrast_names(sedesign=sedesign, apply_to_sedesign=TRUE)
#' plot_sedesign(sedesign2,
#'    which_contrasts=jamba::unvigrep("[(]", contrast_names(sedesign2)),
#'    arrow_ex=0.2, twoway_lwd=1, contrast_style="none")
#' title(main="22 oneway contrasts (vs UL3 / vs Veh)")
#'
#' # subset for contrasts involving certain control values
#' # SF is the control for factors Ins,SF, so we filter for comparisons
#' # where SF is the control, using `must_be_control=TRUE` (default)
#' # but the contrast must involve SF or Ins directly
#' contrast_names_sf <- filter_contrast_names(
#'    contrast_names=jamba::vigrep("SF|Ins",
#'    contrast_names(sedesign)))
#' data.frame(jamba::rbindList(strsplit(contrast2comp(contrast_names_sf), ":")))
#'
#' # must_be_control=FALSE allows any orientation involving "UL3" or "SF"
#' # it therefore also permits "SF-Veh"
#' contrast_names_sf <- filter_contrast_names(
#'    contrast_names=jamba::vigrep("SF|Ins", contrast_names(sedesign)),
#'    must_be_control=FALSE,
#'    factor_controls=c(Genotype="dH1A", Treatment="SF"))
#' data.frame(jamba::rbindList(strsplit(contrast2comp(contrast_names_sf), ":")))
#'
#' # use must_be_control=c(FALSE, TRUE) to enforce only on the second factor
#' # 1. do not require UL3 to be the control in the comparison
#' # 2. do require SF to be the control in the comparison
#' contrast_names_sf <- filter_contrast_names(
#'    contrast_names=jamba::vigrep("SF|Ins", contrast_names(sedesign)),
#'    must_be_control=c(FALSE, TRUE),
#'    factor_controls=c(Genotype="dH1A", Treatment="SF"))
#' data.frame(jamba::rbindList(strsplit(contrast2comp(contrast_names_sf), ":")))
#'
#' # for the example below, use this form
#' contrast_names_sf <- filter_contrast_names(jamba::vigrep("SF|Ins", contrast_names(sedesign)))
#' data.frame(jamba::rbindList(strsplit(contrast2comp(contrast_names_sf), ":")))
#'
#' # Veh is the control for all other groups, omit Ins in these comparisons
#' contrast_names_veh <- filter_contrast_names(jamba::unvigrep("Ins", contrast_names(sedesign)))
#' data.frame(jamba::rbindList(strsplit(contrast2comp(contrast_names_veh), ":")))
#'
#' use_contrasts <- unique(c(contrast_names_veh, contrast_names_sf))
#' sedesign3 <- sedesign;
#' contrast_names(sedesign3) <- use_contrasts
#'
#' # show contrasts after filtering
#' jamba::rbindList(strsplit(contrast2comp(contrast_names(sedesign3)), ":"))
#'
#' # show all one-way contrasts
#' plot_sedesign(sedesign3,
#'    which_contrasts=jamba::unvigrep("[(]", contrast_names(sedesign3)),
#'    arrow_ex=0.5, twoway_lwd=1, contrast_style="none")
#' title(main="22 oneway contrasts (filtered)")
#'
#' # show only the two-way contrasts
#' plot_sedesign(sedesign3,
#'    which_contrasts=jamba::vigrep("[(]", contrast_names(sedesign3)),
#'    arrow_ex=0.5, twoway_lwd=1, contrast_style="none")
#' title(main="8 twoway contrasts (filtered)")
#'
#' # same two-way contrasts, showing flipped orientation
#' plot_sedesign(sedesign3,
#'    flip_twoway=TRUE,
#'    which_contrasts=jamba::vigrep("[(]", contrast_names(sedesign3)),
#'    arrow_ex=0.5, twoway_lwd=1, contrast_style="none")
#' title(main="8 twoway contrasts (flipped)")
#'
#' @export
filter_contrast_names <- function
(contrast_names=NULL,
 sedesign=NULL,
 factor_controls=NULL,
 apply_to_sedesign=FALSE,
 must_be_control=TRUE,
 verbose=FALSE,
 ...)
{
   #
   if (length(contrast_names) == 0) {
      if (length(sedesign) > 0) {
         contrast_names_v <- contrast_names(sedesign)
      } else {
         stop("Either contrast_names or sedesign must be provided.")
      }
   } else {
      contrast_names_v <- contrast_names;
   }

   # determine comps
   comps <- contrast2comp(contrast_names_v)
   comps_df <- data.frame(jamba::rbindList(
      strsplit(comps, ":")))

   if (length(factor_controls) == 0) {
      # first control level per factor
      factors_df <- data.frame(
         jamba::rbindList(strsplit(
            jamba::rbindList(
            strsplit(
               jamba::unvigrep("[(]", contrast_names_v),
               "-"))[,2],
            "_")))
      # factors_df - take first value that appears per column
      factor_controls <- sapply(factors_df, head, 1)
      factor_controls
   } else if (length(factor_controls) != ncol(comps_df)) {
      stop("factor_controls must equal the number of experimental factors.")
   } else {
      if (length(names(factor_controls)) > 0) {
         # ensure names are unique
         names(factor_controls) <- jamba::makeNames(names(factor_controls));
         # assign to comps_df
         colnames(comps_df) <- names(factor_controls);
      } else {
         names(factor_controls) <- colnames(comps_df);
      }
   }
   if (TRUE %in% verbose) {
      jamba::printDebug("filter_contrast_names(): ",
         "factor_controls: ",
         paste0(names(factor_controls), ": ", factor_controls))
   }

   # define a filter for each comp factor
   must_be_control <- rep(must_be_control, length.out=length(factor_controls));
   names(must_be_control) <- names(factor_controls);
   comp_filters <- lapply(names(factor_controls), function(ifactor){
      # allow one factor OR
      # factor comparison that involves the control factor level
      if (TRUE %in% must_be_control[[ifactor]]) {
         grepl(
            paste0("^[^-]+$",
               "|",
               # control group "-Control"
               paste0("-", factor_controls[ifactor], "$")
            ),
            comps_df[[ifactor]])
      } else {
         grepl(
            paste0("^[^-]+$",
               "|",
               # control group "-Control"
               paste0("-", factor_controls[ifactor], "$"),
               "|",
               # test group "Control-"
               paste0("^", factor_controls[ifactor], "-")
            ),
            comps_df[[ifactor]])
      }
   })
   # take the intersection of filters
   comp_filter <- Reduce("&", comp_filters)
   new_comps_df <- subset(comps_df, comp_filter)
   rownames(new_comps_df) <- seq_len(nrow(new_comps_df))
   # new_comps_df

   # convert to comps
   new_comps <- jamba::pasteByRow(new_comps_df, sep=":")

   # convert comp back to contrast names
   new_contrasts <- unname(comp2contrast(new_comps))

   # optionally re-apply to sedesign
   if (length(sedesign) > 0 && TRUE %in% apply_to_sedesign) {
      if (TRUE %in% verbose) {
         jamba::printDebug("filter_contrast_names(): ",
            "Re-applying to input ", "sedesign", ".")
      }
      contrast_names(sedesign) <- new_contrasts;
      return(sedesign);
   }

   # return new contrast_names
   return(new_contrasts)
}
jmw86069/jamses documentation built on May 31, 2024, 1:36 p.m.