R/filter_summary.R
In pmartR: Panomics Marketplace - Quality Control and Statistical Analysis for Panomics Data

Documented in print.customFilterSummary print.cvFilterSummary print.imdanovaFilterSummary print.moleculeFilterSummary print.proteomicsFilterSummary print.rmdFilterSummary print.RNAFiltSummary print.totalCountFiltSummary summary.customFilt summary.cvFilt summary.imdanovaFilt summary.moleculeFilt summary.proteomicsFilt summary.rmdFilt summary.RNAFilt summary.totalCountFilt

#' Molecule Filter Summary
#'
#' Provide summary of a moleculeFilt S3 object
#'
#' @param object S3 object of class 'moleculeFilt' created by
#'   \code{\link{molecule_filter}}
#' @param min_num integer value specifying the minimum number of times each
#'   feature must be observed across all samples. Default value is NULL.
#' @param ... further arguments passed to or from other methods
#' @return a summary table giving the number of biomolecules by number of
#'   observed values across all samples. If min_num is specified, the numbers of
#'   biomolecules to be filtered and to be retained based on the specified
#'   threshold are reported. If, upon creation of moleculeFilt object,
#'   \code{use_groups = TRUE} or \code{use_batches = TRUE} were specified, the
#'   numbers reported by the summary are based on groups and/or batches.
#'
#' @examplesIf requireNamespace("pmartRdata", quietly = TRUE)
#' library(pmartRdata)
#' myfilter <- molecule_filter(omicsData = pep_object)
#' summary(myfilter)
#' summary(myfilter, min_num = 2)
#'
#' @seealso \code{\link{molecule_filter}}
#'
#' @author Lisa Bramer, Kelly Stratton
#'
#' @export
#' @rdname summary.moleculeFilt
#' @name summary.moleculeFilt

summary.moleculeFilt <- function(object, min_num = NULL, ...) {
  filter_object <- object

  # Make sure we only have valid arguments
  if (length(list(...)) > 0) {
    warning("unused argument(s): ",
             toString(as.list(tail(match.call(), length(list(...))))))
  }
  
  if (!is.null(min_num)) {
    # check that min_num is not a vector #
    if (length(min_num) > 1) stop("min_num must be of length 1")
    # check that min_num is numeric >= 0 #
    if (!is.numeric(min_num) | min_num < 0) stop("min_num must be an integer >= 0")
    # check that min_num is an integer #
    if (min_num %% 1 != 0) stop("min_num must be an integer >= 0")
    # check that min_num is less than the max number of observations #
    if (min_num > max(filter_object$Num_Observations)) stop("min_num cannot be greater than the number of samples")
  }

  # return the numeric version of plot, the threshold used, the number that would be tested and the number that would not be tested

  # how many peptides appear in the dataset once, twice, 3 times, etc.
  cut_data <- table(cut(filter_object$Num_Observations, breaks = -1:max(filter_object$Num_Observations)))
  cumcounts <- cumsum(cut_data)
  pep_observation_counts <- data.frame(check.names = FALSE, num_observations = 0:(length(cumcounts) - 1), frequency_counts = cumcounts)

  if (!is.null(min_num)) {
    # get number molecules tested
    num_not_tested <- pep_observation_counts$frequency_counts[pep_observation_counts$num_observations == (min_num - 1)]

    # get number molecules not tested
    num_tested <- pep_observation_counts$frequency_counts[nrow(pep_observation_counts)] - num_not_tested
  } else {
    num_tested = NULL
    num_not_tested = NULL
    min_num = NULL
  }

  res <- list(pep_observation_counts = pep_observation_counts[-1, ], min_num = min_num, num_not_filtered = num_tested, num_filtered = num_not_tested)

  class(res) = c("moleculeFilterSummary", "list")

  attr(res, "use_batch") <- attr(filter_object, "use_batch")
  attr(res, "use_groups") <- attr(filter_object, "use_groups")

  return(res)
}



#' Molecule Filter Print Method
#'
#' Print method for moleculeFilt S3 object
#' @param x the moleculeFilt summary to print
#' @param ... further arguments passed to or from other methods
#'
#' @return No return value, prints details about x
#' 
#' @export
#' @name print.moleculeFilterSummary

print.moleculeFilterSummary <- function(x, ...) {
  object <- x

  # Make sure we only have valid arguments
  if (length(list(...)) > 0) {
    warning("unused argument(s): ",
             toString(as.list(tail(match.call(), length(list(...))))))
  }
  
  # create output #
  cat("\nSummary of Molecule Filter \n----------------------------------\n")
  if (attr(object, "use_batch") & !attr(object, "use_groups")) {
    cat("\nMinimum Number is Minimum Number per Batch \n----------------------------------\n")
  }
  if (!attr(object, "use_batch") & attr(object, "use_groups")) {
    cat("\nMinimum Number is Minimum Number per Group \n----------------------------------\n")
  }
  if (attr(object, "use_batch") & attr(object, "use_groups")) {
    cat("\nMinimum Number is Minimum Number per Batch and Group \n----------------------------------\n")
  }

  if (!is.null(object$min_num)) {
    cat(sprintf("Minimum Number:%d\nFiltered:%d\nNot Filtered:%d\n\n", object$min_num, object$num_filtered, object$num_not_filtered))
  }
  print(object$pep_observation_counts, row.names = FALSE)
}

#' RNA Filter Summary
#'
#' Provide summary of a RNAFilt S3 object
#'
#' @param object S3 object of class 'RNAFilt' created by
#'   \code{\link{RNA_filter}}.
#' @param min_nonzero integer or float between 0 and 1. Cut-off for number of
#'   unique biomolecules with non-zero counts or as a proportion of total
#'   biomolecules. Defaults to NULL.
#' @param size_library integer cut-off for sample library size (i.e. number of
#'   reads). Defaults to NULL.
#' @param ... further arguments passed to or from other methods
#'
#' @return a summary table giving the minimum, maximum, 1st and 3rd quartiles,
#'   mean and standard deviation for library size (the number of unique
#'   biomolecules with non-zero observations per sample), and the proportion of
#'   non-zero observations over the total number of biomolecules.
#'
#' @examplesIf requireNamespace("pmartRdata", quietly = TRUE)
#' library(pmartRdata)
#' myfilter <- RNA_filter(omicsData = rnaseq_object)
#' summary(myfilter)
#' summary(myfilter, min_nonzero = 2)
#'
#' @author Rachel Richardson
#'
#' @seealso \code{\link{RNA_filter}}
#'
#' @export
#' @rdname summary.RNAFilt
#' @name summary.RNAFilt

summary.RNAFilt <- function(object,
                            size_library = NULL,
                            min_nonzero = NULL,
                            ...) {
  filter_object <- object

  # Make sure we only have valid arguments
  if (length(list(...)) > 0) {
    warning("unused argument(s): ",
             toString(as.list(tail(match.call(), length(list(...))))))
  }
  
  # Have a looksie at the class of the filter object.
  if (!inherits(filter_object, "RNAFilt")) {
    # Fezzik, tear his arms off.
    stop("filter_object must be of class RNAFilt")
  }

  ## Checks for size_library as single integer
  if (!is.null(size_library) &&
    (length(size_library) > 1 ||
      size_library %% 1 != 0 ||
      size_library > max(filter_object$LibrarySize)
    )
  ) stop(
    paste0(
      "size_library must be integer of length 1 less than max library size (",
      max(filter_object$LibrarySize),
      ")"
    )
  )

  ## Checks for min_nonzero as single numeric
  if (!is.null(min_nonzero)) {
    ## Length
    if (length(min_nonzero) > 1) stop("min_nonzero must be length 1")

    ## proportion or int
    if (!(min_nonzero %% 1 == 0 || (min_nonzero > 0 && min_nonzero < 1))) stop(
      "min_nonzero must be integer or numeric between 0 and 1."
    )

    ## Within appropriate bounds
    if (min_nonzero %% 1 == 0 && min_nonzero > max(filter_object$NonZero)) stop(
      paste0(
        "min_nonzero exceeds maximum number of non-zero biomolecules (",
        max(filter_object$NonZero),
        ")"
      )
    )

    if (min_nonzero %% 1 != 0 &&
      min_nonzero > max(filter_object$ProportionNonZero)) stop(
      paste0(
        "min_nonzero exceeds maximum proportion of non-zero biomolecules (",
        signif(max(filter_object$ProportionNonZero)),
        ")"
      )
    )
  }

  #---------------
  temp_obj <- filter_object
  if (!is.null(min_nonzero)) {
    column_use <- ifelse(min_nonzero %% 1 == 0,
      "NonZero", "ProportionNonZero"
    )
    temp_obj <- temp_obj[temp_obj[[column_use]] >= min_nonzero, ]
  }

  if (!is.null(size_library)) {
    temp_obj <- temp_obj[temp_obj[["LibrarySize"]] >= size_library, ]
  }

  ## Pre-filter
  df <- as.data.frame(check.names = FALSE, apply(temp_obj[-1], 2, summary))
  df["SD", ] <- apply(temp_obj[-1], 2, sd)

  rmved <- !(filter_object[[colnames(filter_object)[[1]]]] %in% temp_obj[[colnames(filter_object)[[1]]]])

  res <- list(
    Summary = df,
    samples_filtered = filter_object[[colnames(filter_object)[[1]]]][rmved],
    num_filtered = sum(rmved),
    num_not_filtered = sum(!rmved),
    nonzero_thresh = min_nonzero,
    library_thresh = size_library
  )

  class(res) = c("RNAFiltSummary", class(res))

  return(res)
}


#' RNA Filter Print Method
#'
#' Print method for summary of RNAFilt
#' @param x the RNAFilt summary to print
#' @param ... further arguments passed to or from other methods
#'
#' @return No return value, prints details about x
#' 
#' @export
#' @name print.RNAFiltSummary
print.RNAFiltSummary <- function(x, ...) {
  object <- x

  # Make sure we only have valid arguments
  if (length(list(...)) > 0) {
    warning("unused argument(s): ",
             toString(as.list(tail(match.call(), length(list(...))))))
  }
  
  # create output #
  cat("\nSummary of RNA Filter \n----------------------------------\n")

  if (!is.null(object$library_thresh)) {
    cat(sprintf(
      "Library size cut-off: %d\n\n",
      object$library_thresh
    ))
  }

  if (!is.null(object$nonzero_thresh)) {
    if (object$nonzero_thresh %% 1 == 0) {
      cat(sprintf(
        "N Non-zero biomolecule cut-off: %d\n\n",
        object$nonzero_thresh
      ))
    } else {
      cat(sprintf(
        "Proportion Non-zero biomolecule cut-off: %f\n\n",
        object$nonzero_thresh
      ))
    }
  }

  if (length(object$samples_filtered) > 0) {
    cat(sprintf(
      "Samples Filtered:%d\n Samples Not Filtered:%d\n\n",
      object$num_filtered,
      object$num_not_filtered
    ))
  }

  print(object$Summary)
}


#' Total Count Filter Summary
#'
#' Provide summary of a totalCountFilt S3 object
#'
#' @param object S3 object of class 'totalCountFilt' created by
#' \code{\link{total_count_filter}}.
#' @param min_count numeric value greater than 1 and less than the value
#' given by filter_object$Total_Count. Values below min_count are filtered out.
#' Default value is NULL.
#' @param ... further arguments passed to or from other methods
#' @return a summary of the Total Count values, number of zero values, and
#' non-zero values. If a min_count is provided the biomolecules that would be
#' filtered at this threshold are reported.
#'
#' @examplesIf requireNamespace("pmartRdata", quietly = TRUE)
#' \donttest{
#' library(pmartRdata)
#' myfilt <- total_count_filter(omicsData = rnaseq_object)
#' summary(myfilt, min_count = 15)
#' }
#'
#' @author Rachel Richardson
#'
#' @seealso \code{\link{total_count_filter}}
#'
#' @export
#' @rdname summary.totalCountFilt
#' @name summary.totalCountFilt
summary.totalCountFilt <- function(object, min_count = NULL, ...) {
  filter_object <- object

  # Make sure we only have valid arguments
  if (length(list(...)) > 0) {
    warning("unused argument(s): ",
             toString(as.list(tail(match.call(), length(list(...))))))
  }
  
  # checks for cv_threshold if not null #
  if (!is.null(min_count)) {
    # check that cv_threshold is numeric
    if (!is.numeric(min_count)) stop("cv_threshold must be numeric of length 1")
    # chack that cv_threshold is of length 1
    if (length(min_count) > 1) stop("cv_threshold must be numeric of length 1")
    # check that cv_threshold is more than 1 and less than max CV value
    if (min_count <= 1 | min_count >= max(filter_object$Total_Counts, na.rm = TRUE)) stop("cv_threshold must be greater than 1 and less than the maximum CV value")
  }

  # get summary of Total_Counts #
  count_sum <- summary(filter_object$Total_Counts)

  # get biomolecules to filter if min_count is not NULL #
  if (!is.null(min_count)) {
    filt <- as.character(filter_object[filter_object$Total_Counts < min_count, 1])
    if (length(filt) == 0) filt <- NULL
  } else filt <- NULL

  res <- list(
    Summary_all = count_sum,
    filtered_biomolecules = length(filt)
  )

  class(res) = c("totalCountFiltSummary", "list")

  return(res)
}

#' Total Count Filter Print Method
#'
#' Print method for summary of Total Count filter
#' @param x the Total Count filter summary to print
#' @param ... further arguments passed to or from other methods
#'
#' @return No return value, prints details about x
#' 
#' @export
#' @name print.totalCountFiltSummary
#'
print.totalCountFiltSummary <- function(x, ...) {
  object <- x

  # create output #
  cat(c(
    "\nSummary of Total Count Filter\n----------------------\nCounts:\n",
    capture.output(object$Summary_all)
  ), sep = "\n")
  cat(c("\nNumber Filtered Biomolecules:", paste(object$filtered_biomolecules, collapse = ", "), "\n\n"))
}


#' Proteomics Filter Summary
#'
#' Provide summary of a proteomicsFilt S3 object
#'
#' @param object S3 object of class 'proteomicsFilt' created by
#'   \code{\link{proteomics_filter}}.
#' @param min_num_peps optional integer value between 1 and the maximum
#'   number of peptides that map to a protein in the data. The value specifies
#'   the minimum number of peptides that must map to a protein. Any protein with
#'   less than \code{min_num_peps} mapping to it will be returned as a protein
#'   that should be filtered. Default value is NULL.
#' @param redundancy logical indicator of whether to filter out 'degenerate' or 'redundant'
#'   peptides (i.e. peptides mapping to multiple proteins) (TRUE) or not
#'   (FALSE). Default value is FALSE.
#' @param ... further arguments passed to or from other methods
#' @return a summary table giving the number of Observed Proteins per Peptide
#'   and number of Observed Peptides per Protein. If min_num_peps is specified
#'   and/or redundancy is TRUE, the number of biomolecules to be filtered with
#'   the specified threshold(s) are reported.
#'
#' @examplesIf requireNamespace("pmartRdata", quietly = TRUE)
#' library(pmartRdata)
#' myfilt <- proteomics_filter(omicsData = pep_object)
#' summary(myfilt, redundancy = TRUE) # there are no degenerate peptides to filter out
#' summary(myfilt, min_num_peps = 2)
#'
#' @author Lisa Bramer
#'
#' @seealso \code{\link{proteomics_filter}}
#'
#' @rdname summary.proteomicsFilt
#' @name summary.proteomicsFilt
#' @export
summary.proteomicsFilt <- function(object, min_num_peps = NULL, redundancy = FALSE,
                                   ...) {
  filter_object <- object
  counts_by_pro <- filter_object$counts_by_pro
  counts_by_pep <- filter_object$counts_by_pep
  
  pro_id <- names(counts_by_pro)[names(counts_by_pro) != "n"]
  pep_id <- names(counts_by_pep)[names(counts_by_pep) != "n"]
  
  # Make sure we only have valid arguments
  if (length(list(...)) > 0) {
    warning("unused argument(s): ",
             toString(as.list(tail(match.call(), length(list(...))))))
  }
  
  # error checks for min_num_peps, if not NULL #
  if (!is.null(min_num_peps)) {
    # check that min_num_peps is not a vector #
    if (length(min_num_peps) > 1) stop("min_num_peps must be of length 1")
    # check that min_num_peps is numeric and >=1 #
    if (!is.numeric(min_num_peps) | min_num_peps < 1) stop("min_num_peps must be an integer greater than or equal to 1")
    # check that min_num_peps is an integer #
    if (min_num_peps %% 1 != 0) stop("min_num_peps must be an integer greater than or equal to 1")
    # check that min_num_peps is of length 1 #
    if (length(min_num_peps) != 1) stop("min_num_peps must be of length 1")
    # check that min_num_peps is less than the total number of peptides #
    if (min_num_peps > max(max(counts_by_pep$n), max(filter_object$counts_by_pro$n))) stop("min_num_peps cannot be greater than the total number of peptides")
  }
  # check that redundancy is logical #
  if (!is.logical(redundancy)) stop("redundancy must be either TRUE or FALSE")


  pep_sum <- summary(counts_by_pep$n)
  pro_sum <- summary(counts_by_pro$n)

  if (redundancy) {
    count_bypep <- counts_by_pep
    
    # pull peptides with more than one row in e_meta.
    degen_peptides <- as.character(
      data.frame(check.names = FALSE, count_bypep[which(count_bypep$n > 1), ])[, pep_id]
    )
    
    ## identify any proteins that now will not have peptides mapping to them ##
    ## find rows in e_meta that correspond to peptides to be filtered ##
    pepfilt.ids = which(attr(filter_object, "e_meta")[, pep_id] %in% degen_peptides)
    
    # Fish out the indices for the proteins that will be filtered.
    add_prots <- as.character(setdiff(
      attr(filter_object, "e_meta")[pepfilt.ids, pro_id],
      attr(filter_object, "e_meta")[-pepfilt.ids, pro_id]
    ))
    
    if (length(add_prots) == 0) {
      add_prots = NULL
    }
    
    if (length(degen_peptides) == 0) {
      degen_peptides = NULL
    }
    
    pepe <- list(
      e_data_remove = degen_peptides,
      e_meta_remove = add_prots
    )
  } else {
    pepe <- list(
      e_data_remove = NULL,
      e_meta_remove = NULL
    )
  }
  
  # Check if min_num_peps is not NULL.
  if (!is.null(min_num_peps)) {
    if (!is.null(pepe$e_data_remove)) {
      # find the which proteins had peptides removed.
      peps_rmv_by_pro <- attr(filter_object, "e_meta") %>% 
        dplyr::filter(!!dplyr::sym(pep_id) %in% pepe$e_data_remove) %>%
        dplyr::group_by(!!dplyr::sym(pro_id)) %>%
        dplyr::summarise(n = dplyr::n())
      
      # decrement the counts of proteins that had redundant peptides removed.
      temp_counts <- counts_by_pro 
      decr_idx = match(peps_rmv_by_pro[[pro_id]],temp_counts[[pro_id]])
      temp_counts[decr_idx,]$n <- temp_counts[decr_idx,]$n - peps_rmv_by_pro$n 
    } else {
      temp_counts <- counts_by_pro
    }
    
    # Find all rows with proteins that map to fewer peptides than min_num_peps.
    pro_idx <- which(temp_counts$n < min_num_peps)
    
    # pull proteins with less than min_num_peps.
    pro_filt <- as.character(counts_by_pro[pro_idx, pro_id])
    
    # Find rows in e_meta that correspond to proteins to be filtered.
    protfilt.ids = which(attr(filter_object, "e_meta")[, pro_id] %in% pro_filt)
    
    # Extricate the indices for the peptides that will be filtered.
    pep_filt <- as.character(setdiff(
      attr(filter_object, "e_meta")[protfilt.ids, pep_id],
      attr(filter_object, "e_meta")[-protfilt.ids, pep_id]
    ))
    
    if (length(pep_filt) == 0) {
      pep_filt = NULL
    }
    
    if (length(pro_filt) == 0) {
      pro_filt = NULL
    }
    
    pepe2 <- list(
      e_meta_remove = pro_filt,
      e_data_remove = pep_filt
    )
  } else {
    pepe2 <- list(
      e_data_remove = NULL,
      e_meta_remove = NULL
    )
  }

  ## consolidate pepe and pepe2 ##
  filter_object_new <- list(e_meta_remove = unique(c(pepe$e_meta_remove, pepe2$e_meta_remove)), e_data_remove = unique(c(pepe$e_data_remove, pepe2$e_data_remove)))

  num_filtered <- lapply(filter_object_new, length)
  # return the 5 number summary for both parts of the filter, give total number of peps and/or prots filtered
  res <- list(num_per_pep = pep_sum, num_per_pro = pro_sum, num_pep_filtered = num_filtered$e_data_remove, num_pro_filtered = num_filtered$e_meta_remove)
  res$num_pro_notfiltered = nrow(counts_by_pro) - res$num_pro_filtered
  res$num_pep_notfiltered = nrow(counts_by_pep) - res$num_pep_filtered
  class(res) = c("proteomicsFilterSummary", "list")

  return(res)
}

#' Proteomics Filter Print Method
#'
#' Print method for summary of proteomics filter
#' @param x the proteomics filter summary to print
#' @param ... further arguments passed to or from other methods
#'
#' @return No return value, prints details about x
#' 
#' @export
#' @name print.proteomicsFilterSummary
#'
print.proteomicsFilterSummary <- function(x, ...) {
  object <- x

  # Make sure we only have valid arguments
  if (length(list(...)) > 0) {
    warning("unused argument(s): ",
             toString(as.list(tail(match.call(), length(list(...))))))
  }
  
  # create output #
  catmat <- data.frame(check.names = FALSE, 
    c(object$num_per_pep, " ", "Peptides", object$num_pep_filtered, object$num_pep_notfiltered),
    c(object$num_per_pro, " ", "Proteins", object$num_pro_filtered, object$num_pro_notfiltered)
  )
  colnames(catmat) <- c("Obs Proteins Per Peptide", "Obs Peptides Per Protein")
  rownames(catmat) <- c(names(object$num_per_pep), " ", "  ", "Filtered", "Not Filtered")

  cat("\nSummary of Proteomics Filter \n\n")
  cat(capture.output(catmat), sep = "\n")
  cat("\n")
}

#' IMD-ANOVA Filter Summary
#'
#' Provide summary of a imdanovaFilt S3 object
#'
#' @param object S3 object of class 'imdanovaFilt' created by
#'   \code{\link{imdanova_filter}}.
#' @param min_nonmiss_gtest integer value specifying the minimum number of
#'   non-missing feature values allowed per group for \code{gtest_filter}.
#'   Defaults to NULL. Suggested value is 3.
#' @param min_nonmiss_anova integer value specifying the minimum number of
#'   non-missing feature values allowed per group for \code{anova_filter}.
#'   Defaults to NULL. Suggested value is 2.
#' @param comparisons data frame with columns for "Control" and "Test"
#'   containing the different comparisons of interest. Comparisons will be made
#'   between the Test and the corresponding Control (e.g. Control is the
#'   reference group).
#' @param ... further arguments passed to or from other methods
#'
#' @return If min_nonmiss_gtest or min_nonmiss_anova is specified, the number of
#'   biomolecules to be filtered with the specified threshold are reported.
#'
#' @examplesIf requireNamespace("pmartRdata", quietly = TRUE)
#' library(pmartRdata)
#' mypep <- group_designation(omicsData = pep_object, main_effects = "Phenotype")
#' myfilt <- imdanova_filter(omicsData = mypep)
#' summary(myfilt, min_nonmiss_anova = 2, min_nonmiss_gtest = 3)
#'
#' @seealso \code{\link{imdanova_filter}}
#'
#' @author Lisa Bramer
#'
#' @rdname summary.imdanovaFilt
#' @name summary.imdanovaFilt
#' @export
summary.imdanovaFilt <- function(object, min_nonmiss_anova = NULL,
                                 min_nonmiss_gtest = NULL, comparisons = NULL, ...) {
  filter_object <- object

  # Make sure we only have valid arguments
  if (length(list(...)) > 0) {
    warning("unused argument(s): ",
             toString(as.list(tail(match.call(), length(list(...))))))
  }
  
  ## initial checks ##

  # it is fine if both min_nonmiss_anova and min_nonmiss_gtest are NULL in the summary function #

  # check that if they aren't NULL, min_nonmiss_anova and min_nonmiss_gtest are numeric, >=2 and >=3, respectively,
  # and neither are bigger than the minimum group size (group_sizes in an attribute of the filter_object, see below) #
  if (!is.null(min_nonmiss_anova)) {
    # check that min_nonmiss_anova is not a vector #
    if (length(min_nonmiss_anova) > 1) stop("min_nonmiss_anova must be of length 1")
    # check that min_nonmiss_anova is numeric >= 2 #
    if (!is.numeric(min_nonmiss_anova) | min_nonmiss_anova < 2) stop("min_nonmiss_anova must be an integer >= 2")
    # check that min_nonmiss_anova is an integer #
    if (min_nonmiss_anova %% 1 != 0) stop("min_nonmiss_anova must be an integer >= 2")
    # check that min_nonmiss_anova is less than the minimum group size among non-singleton groups #
    nonsingleton_groups <- attributes(filter_object)$nonsingleton_groups
    if (min_nonmiss_anova > min(attributes(filter_object)$group_sizes$n_group[which(attributes(filter_object)$group_sizes$Group %in% nonsingleton_groups)])) stop("min_nonmiss_anova cannot be greater than the minimum group size")
  }
  if (!is.null(min_nonmiss_gtest)) {
    # check that min_nonmiss_gtest is not a vector #
    if (length(min_nonmiss_gtest) > 1) stop("min_nonmiss_gtest must be of length 1")
    # check that min_nonmiss_gtest is numeric >= 3 #
    if (!is.numeric(min_nonmiss_gtest) | min_nonmiss_gtest < 3) stop("min_nonmiss_gtest must be an integer >= 3")
    # check that min_nonmiss_gtest is an integer #
    if (min_nonmiss_gtest %% 1 != 0) stop("min_nonmiss_gtest must be an integer >= 3")
    # check that min_nonmiss_gtest is less than the minimum group size #
    nonsingleton_groups <- attributes(filter_object)$nonsingleton_groups
    if (min_nonmiss_gtest > min(attributes(filter_object)$group_sizes$n_group[which(attributes(filter_object)$group_sizes$Group %in% nonsingleton_groups)])) stop("min_nonmiss_gtest cannot be greater than the minimum group size")
  }

  ## end of initial checks ##

  # Check if data are paired. If they are we will filter biomolecules on paired
  # differences.
  if (!is.null(
    attr(attr(attr(filter_object, "omicsData"), "group_DF"), "pair_id")
  )) {
    # Create an omicsData object on the differences.
    diff_omicsData <- as.diffData(attr(filter_object, "omicsData"))

    # Create a new filter object with the differenced data. (For future readers:
    # I meant to use the word "differenced". I hope it made you chuckle.) We
    # need this object for the counts based on differences instead of pairs
    # because later we will filter and perform statistics on the differences.
    filter_object <- imdanova_filter(diff_omicsData)
  }

  my_names <- as.character(attr(filter_object, "group_sizes")$Group)

  # if min_nonmiss_anova is not provided, the vector of zeros will indicate nothing needs removing
  inds_rm_anova <- rep(0, nrow(filter_object))

  # if min_nonmiss_gtest is not provided, the vector of zeros will indicate nothing needs removing
  inds_rm_gtest <- rep(0, nrow(filter_object))

  # Determine what will be filtered: ANOVA -------------------------------------

  # if min_nonmiss_anova is provided
  if (!is.null(min_nonmiss_anova)) {
    # Check if there are no main effects. This can occur when data are paired.
    if ("paired_diff" %in% my_names) {
      inds_rm_anova <- filter_object$paired_diff < min_nonmiss_anova
    } else {
      # dplyr mumbo jumbo!
      # Determine which groups meet the min_nonmiss_anova criterion.
      inds_rm_anova <- filter_object %>%
        # Determine which groups have non-missing counts above the cutoff.
        dplyr::mutate(
          dplyr::across(dplyr::any_of(my_names), ~ . >= min_nonmiss_anova)
        ) %>%
        # Count the number of groups above the cutoff.
        dplyr::mutate(
          n_groups = rowSums(dplyr::across(dplyr::any_of(my_names)))
        )

      # Determine what will be filtered when comparisons is NULL.
      if (is.null(comparisons)) {
        # dplyr mumbo jumbo!
        # When comparisons are NULL we need at least two groups (across all
        # groups) that have counts above the min_nonmiss_anova threshold.
        inds_rm_anova <- inds_rm_anova %>%
          # Count the number of groups above the cutoff.
          dplyr::mutate(insufficient = n_groups < 2) %>%
          dplyr::pull(insufficient)
      } else {
        # Grab the groups in the Test and Control columns.
        testers <- unique(comparisons$Test)
        controllers <- unique(comparisons$Control)
        combiners <- unique(c(testers, controllers))

        # Sum across the unique groups in test, control, and combined (the
        # unique set of groups from both test and control). This will be used to
        # determine which rows need to be filtered depending on which scenario
        # we are in.
        inds_rm_anova <- inds_rm_anova %>%
          dplyr::mutate(
            n_test = rowSums(dplyr::across(dplyr::all_of(testers))),
            n_control = rowSums(dplyr::across(dplyr::all_of(controllers))),
            n_combine = rowSums(dplyr::across(dplyr::all_of(combiners)))
          )

        # Scenario 1: one group is compared to multiple other groups.
        if (length(controllers) == 1) {
          inds_rm_anova <- inds_rm_anova %>%
            # Count the number of groups above the cutoff.
            dplyr::mutate(insufficient = n_groups < 2) %>%
            # We can't filter the rows like the code does in applyFilt because
            # the length of inds_rm_anova and inds_rm_gest need to be the same.
            # Filtering either inds_rm_anova or inds_rm_gtest would break the
            # code at the end that counts the number of biomolecules that are
            # either filtered or not filtered.
            #
            # Change values in insufficient according to the values in n_test
            # and n_control. Rows in insufficient without any groups above the
            # cutoff in test or control must be TRUE (the corresponding
            # biomolecule is removed).
            #
            # We can't test one thing against nothing :)
            dplyr::mutate(
              insufficient = dplyr::case_when(
                n_test == 0 | n_control == 0 ~ TRUE,
                TRUE ~ insufficient
              )
            ) %>%
            dplyr::pull(insufficient)

          # Scenario 2: Some groups are compared to some other groups. In this
          # scenario a group can be in both test and control.
        } else {
          inds_rm_anova <- inds_rm_anova %>%
            # Count the number of groups above the cutoff.
            dplyr::mutate(insufficient = n_groups < 2) %>%
            # We can't filter the rows like the code does in applyFilt because
            # the length of inds_rm_anova and inds_rm_gest need to be the same.
            # Filtering either inds_rm_anova or inds_rm_gtest would break the
            # code at the end that counts the number of biomolecules that are
            # either filtered or not filtered.
            #
            # Change values in insufficient according to the values in n_test,
            # n_control and n_combine. Rows in insufficient without any groups
            # above the cutoff in control or test and rows where there is at
            # least one group in test or control but the count of combined
            # groups is less than two need to be TRUE (the corresponding
            # biomolecule is removed).
            #
            # We can't test one thing against nothing nor can we test one thing
            # against itself :)
            dplyr::mutate(
              insufficient = dplyr::case_when(
                (n_test == 0 | n_control == 0) |
                  (n_test > 0 & n_control > 0 & n_combine < 2) ~ TRUE,
                TRUE ~ insufficient
              )
            ) %>%
            dplyr::pull(insufficient)
        }
      }
    }
  }

  # Determine what will be filtered: G-Test ------------------------------------

  # if min_nonmiss_gtest is provided
  if (!is.null(min_nonmiss_gtest)) {
    # Check if there are no main effects. This can occur when data are paired.
    if ("paired_diff" %in% my_names) {
      # We cannot run a gtest when there are no main effects. Create a vector of
      # zeros so the gtest will not affect the counting below.
      inds_rm_gtest <- rep(0, dim(filter_object)[[1]])
    } else {
      # Determine what will be filtered when comparisons is NULL.
      if (is.null(comparisons)) {
        # dplyr mumbo jumbo!
        # When comparisons are NULL we need at least one group (across all
        # groups) that has a count above min_nonmiss_gtest.
        inds_rm_gtest <- filter_object %>%
          # Determine which groups have non-missing counts above the cutoff.
          dplyr::mutate(
            # Find groups above G-Test criterion.
            dplyr::across(dplyr::all_of(my_names), ~ . >= min_nonmiss_gtest),
            # Count number of groups above G-Test criterion.
            n_groups = rowSums(dplyr::across(dplyr::all_of(my_names))),
            # Determine which biomolecules have insufficient data.
            insufficient = n_groups == 0
          ) %>%
          dplyr::pull(insufficient)

        # Determine what will be filtered when custom comparisons are provided.
      } else {
        # Grab the groups in both the Test and Control columns.
        combiners <- unique(c(comparisons$Test, comparisons$Control))

        # dplyr mumbo jumbo!
        inds_rm_gtest <- filter_object %>%
          dplyr::mutate(
            # Find groups specified in comparisons above G-Test criterion.
            dplyr::across(dplyr::all_of(combiners), ~ . >= min_nonmiss_gtest),
            # Count number of groups above G-Test criterion.
            n_groups = rowSums(dplyr::across(dplyr::all_of(combiners))),
            # Determine which biomolecules have insufficient data.
            insufficient = n_groups == 0
          ) %>%
          dplyr::pull(insufficient)
      }
    }
  }

  # Count number of things that will be filtered -------------------------------

  # if-statement for the case where both min_nonmiss_anova and min_nonmiss_gtest
  # are non-NULL. Note: num_not_tested will be the count of (inds_rm_anova +
  # inds_rm_gtest) that sum to 2. That is the intersection of both filters.
  if (!is.null(min_nonmiss_anova) & !is.null(min_nonmiss_gtest)) {
    # We will count differently if the data are paired and there are no main
    # effects. In this case we only need the sum of inds_rm_anova and
    # inds_rm_gtest to be greater than zero.
    if ("paired_diff" %in% my_names) {
      # get number molecules not tested
      num_filtered <- sum((inds_rm_anova + inds_rm_gtest) > 0)
    } else {
      # get number molecules not tested
      num_filtered <- sum((inds_rm_anova + inds_rm_gtest) > 1)
    }

    # get number molecules tested
    num_not_filtered <- nrow(filter_object) - num_filtered
  } else if (!is.null(min_nonmiss_anova) | !is.null(min_nonmiss_gtest)) {
    # get number molecules not tested
    num_filtered <- sum((inds_rm_anova + inds_rm_gtest) > 0)

    # get number molecules tested
    num_not_filtered <- nrow(filter_object) - num_filtered
  } else {
    num_not_filtered = NULL
    num_filtered = NULL
  }

  res <- list(
    pep_observation_counts = nrow(filter_object),
    num_filtered = num_filtered,
    num_not_filtered = num_not_filtered
  )

  class(res) = c("imdanovaFilterSummary", "list")

  return(res)
}


#' IMD-ANOVA Filter Print Method
#'
#' Print method for summary of imdanova filter
#' @param x the imdanova filter summary to print
#' @param ... further arguments passed to or from other methods
#'
#' @return No return value, prints details about x
#' 
#' @export
#' @name print.imdanovaFilterSummary
#'
print.imdanovaFilterSummary <- function(x, ...) {
  object <- x

  # Make sure we only have valid arguments
  if (length(list(...)) > 0) {
    warning("unused argument(s): ",
             toString(as.list(tail(match.call(), length(list(...))))))
  }
  
  # create output #
  if (is.null(object$num_filtered)) {
    object$num_filtered <- NA
  }

  if (is.null(object$num_not_filtered)) {
    object$num_not_filtered <- NA
  }

  catmat <- data.frame(check.names = FALSE, c(object$pep_observation_counts, object$num_filtered, object$num_not_filtered))
  rownames(catmat) <- c("Total Observations: ", "Filtered: ", "Not Filtered: ")

  colnames(catmat) <- NULL

  cat("\nSummary of IMD-ANOVA Filter\n")
  cat(capture.output(catmat), sep = "\n")
}


#' RMD Filter Summary
#'
#' Provide summary of a rmdFilt S3 object
#'
#' @param object S3 object of class 'rmdFilt' created by
#'   \code{\link{rmd_filter}}.
#' @param pvalue_threshold A threshold for the Robust Mahalanobis Distance (RMD)
#'   p-value. All samples with p-values below the threshold will be filtered
#'   out. Default value is NULL. Suggested value is 0.0001
#' @param ... further arguments passed to or from other methods
#' @return a summary of the p-values associated with running RMD-PAV across all
#'   samples. If a p-value threshold is provided the samples that would be
#'   filtered at this threshold are reported.
#'
#' @examplesIf requireNamespace("pmartRdata", quietly = TRUE)
#' library(pmartRdata)
#' mymetab <- group_designation(omicsData = metab_object, main_effects = "Phenotype")
#' mymetab <- edata_transform(omicsData = mymetab, data_scale = "log2")
#' myfilt <- rmd_filter(omicsData = mymetab)
#' summary(myfilt, pvalue_threshold = 0.001)
#'
#' @author Lisa Bramer, Kelly Stratton
#'
#' @seealso \code{\link{rmd_filter}}
#'
#' @export
#' @rdname summary.rmdFilt
#' @name summary.rmdFilt
summary.rmdFilt <- function(object, pvalue_threshold = NULL, ...) {
  filter_object <- object

  # Make sure we only have valid arguments
  if (length(list(...)) > 0) {
    warning("unused argument(s): ",
             toString(as.list(tail(match.call(), length(list(...))))))
  }
  
  # check that pvalue_threshold is numeric [0,1] #
  if (!is.null(pvalue_threshold)) {
    if (!is.numeric(pvalue_threshold)) stop("pvalue_threshold must be numeric between 0 and 1")
    if (pvalue_threshold < 0 | pvalue_threshold > 1) stop("pvalue_threshold must be numeric between 0 and 1")
  }

  # get metrics used #
  samp_id <- names(attr(filter_object, "group_DF"))[1]
  metrics <- attributes(filter_object)$metrics

  # get samples filtered out, if pvalue_threshold is specified #
  filt <- NULL
  if (!is.null(pvalue_threshold)) {
    filt <- as.character(filter_object[filter_object$pvalue < pvalue_threshold, samp_id])
    if (length(filt) == 0) filt <- NULL
  }

  # Check if data are paired. If they are we will make sure no pairs are split
  # (e.g., one sample in a pair will be filtered but the other sample will not).
  if (!is.null(attr(attr(filter_object, "group_DF"), "pair_id"))) {
    # Snatch the sample name and pair name as they will be used in multiple
    # places. It will save some typing. ... However, all the typing I just saved
    # has probably been undone by writing this comment.
    sample_name <- attr(filter_object, "fdata_cname")
    pair_name <- attr(attr(filter_object, "group_DF"), "pair_id")

    # !#!#!#!#!#!#!#!#!#!
    # The following code checks if the samples in a pair will be split. For
    # example, one sample in a pair will be filtered and another sample in the
    # pair will not be filtered. If a pair is split remove ALL samples
    # associated with that pair.
    # !#!#!#!#!#!#!#!#!#!

    # Snag the associated pair IDs for the samples that will be filtered.
    filtered_pairs <- attr(filter_object, "fdata") %>%
      dplyr::filter(!!dplyr::sym(sample_name) %in% filt) %>%
      dplyr::pull(!!dplyr::sym(pair_name))

    # Go back to f_data and nab all the sample names corresponding to the pair
    # IDs associated with the original samples that were selected for removal.
    # These sample names will be used to filter the data. This vector will not
    # be the same as the original vector if any pairs were split. If there
    # were no pairs split the two vectors will be the same.
    filt <- attr(filter_object, "fdata") %>%
      dplyr::filter(!!dplyr::sym(pair_name) %in% filtered_pairs) %>%
      dplyr::pull(!!dplyr::sym(sample_name)) %>%
      as.character()
  }

  # for display #
  if (is.null(filt)) {
    filt <- "NULL"
  }

  res <- list(pvalue = summary(filter_object$pvalue), metrics = metrics, filtered_samples = filt)

  class(res) = c("rmdFilterSummary", "list")

  return(res)
}

#' RMD Filter Print Method
#'
#' Print method for summary of RMD filter
#' @param x the RMD filter summary to print
#' @param ... further arguments passed to or from other methods
#'
#' @return No return value, prints details about x
#' 
#' @export
#' @name print.rmdFilterSummary
#'
print.rmdFilterSummary <- function(x, ...) {
  object <- x

  # Make sure we only have valid arguments
  if (length(list(...)) > 0) {
    warning("unused argument(s): ",
             toString(as.list(tail(match.call(), length(list(...))))))
  }
  
  # create output #
  cat(c("\nSummary of RMD Filter\n----------------------\nP-values:\n", capture.output(object$pvalue)), sep = "\n")
  cat(c("\nMetrics Used:", paste(object$metrics, collapse = ", "), "\n"))
  cat(c("\nFiltered Samples:", paste(object$filtered_samples, collapse = ", "), "\n\n"))
}


#' Coefficient of Variation (CV) Filter Summary
#'
#' Provide summary of a cvFilt S3 object
#'
#' @param object S3 object of class 'cvFilt' created by
#'   \code{\link{cv_filter}}.
#' @param cv_threshold numeric value greater than 1 and less than the value
#'   given by filter_object$CV. CV values above cv_threshold are filtered out.
#'   Default value is NULL.
#' @param ... further arguments passed to or from other methods
#' @return a summary of the CV values, number of NA values, and non-NA values.
#'   If a CV threshold is provided, the biomolecules that would be filtered
#'   based on this threshold are reported.
#'
#' @examplesIf requireNamespace("pmartRdata", quietly = TRUE)
#' library(pmartRdata)
#' mypep <- group_designation(omicsData = pep_object, main_effects = "Phenotype")
#' to_filter <- cv_filter(omicsData = mypep, use_groups = TRUE)
#' summary(to_filter, cv_threshold = 30)
#'
#' @author Lisa Bramer
#'
#' @seealso \code{\link{cv_filter}}
#'
#' @export
#' @rdname summary.cvFilt
#' @name summary.cvFilt
summary.cvFilt <- function(object, cv_threshold = NULL, ...) {
  filter_object <- object

  # Make sure we only have valid arguments
  if (length(list(...)) > 0) {
    warning("unused argument(s): ",
             toString(as.list(tail(match.call(), length(list(...))))))
  }
  
  # checks for cv_threshold if not null #
  if (!is.null(cv_threshold)) {
    # check that cv_threshold is numeric
    if (!is.numeric(cv_threshold)) stop("cv_threshold must be numeric of length 1")
    # chack that cv_threshold is of length 1
    if (length(cv_threshold) > 1) stop("cv_threshold must be numeric of length 1")
    # check that cv_threshold is more than 1 and less than max CV value
    if (cv_threshold <= 1 | cv_threshold >= max(filter_object$CV, na.rm = TRUE)) stop("cv_threshold must be greater than 1 and less than the maximum CV value")
  }

  # get rid of NAs #
  new_object <- filter_object[!is.na(filter_object$CV), ]

  # get summary of CVs #
  CVs <- summary(new_object$CV)

  if (!is.null(attributes(filter_object)$tot_nas)) {
    # get total NAs, total non-NAs #
    tot_NAs <- attributes(filter_object)$tot_nas
    tot_non_NAs <- nrow(filter_object) - tot_NAs
  } else {
    # get total zeros, total non-zeros #
    tot_zeros <- attributes(filter_object)$tot_zeros
    tot_non_zeros <- nrow(filter_object) - tot_zeros
  }

  # get biomolecules to filter if cv_threshold is not NULL #
  filt <- NULL
  if (!is.null(cv_threshold)) {
    filt <- as.character(new_object[new_object$CV > cv_threshold, 1])
    if (length(filt) == 0) filt <- NULL
  }

  if (!is.null(attributes(filter_object)$tot_nas)) {
    res <- list(
      CVs = CVs,
      tot_NAs = tot_NAs,
      tot_non_NAs = tot_non_NAs,
      filtered_biomolecules = length(filt)
    )
  } else {
    res <- list(
      CVs = CVs,
      tot_zeros = tot_zeros,
      tot_non_zeros = tot_non_zeros,
      filtered_biomolecules = length(filt)
    )
  }

  class(res) = c("cvFilterSummary", "list")

  return(res)
}

#' CV Filter Print Method
#'
#' Print method for summary of CV filter
#' @param x the CV filter summary to print
#' @param ... further arguments passed to or from other methods
#'
#' @return No return value, prints details about x
#' 
#' @export
#' @name print.cvFilterSummary
#'
print.cvFilterSummary <- function(x, ...) {
  object <- x

  # Make sure we only have valid arguments
  if (length(list(...)) > 0) {
    warning("unused argument(s): ",
             toString(as.list(tail(match.call(), length(list(...))))))
  }
  
  # create output #
  cat(c("\nSummary of Coefficient of Variation (CV) ",
        "Filter\n----------------------\nCVs:\n",
        capture.output(object$CVs)),
      sep = "\n")
  if (!is.null(object$tot_NAs)) {
    cat(c("\nTotal NAs:", object$tot_NAs))
    cat(c("\nTotal Non-NAs:", object$tot_non_NAs, "\n"))
  } else {
    cat(c("\nTotal zeros:", object$tot_zeros))
    cat(c("\nTotal Non-zeros:", object$tot_non_zeros, "\n"))
  }
  cat(c("\nNumber Filtered Biomolecules:", 
        paste(object$filtered_biomolecules, collapse = ", "), "\n\n"))
}


#' Custom Filter Summary
#'
#' Provide summary of a customFilt S3 object
#'
#' @param object S3 object of class 'customFilt' created by
#'   \code{\link{custom_filter}}.
#' @param ... further arguments passed to or from other methods
#'
#' @return a summary of the items in e_data, f_data, and e_meta that will be
#'   removed as a result of applying the custom filter.
#'
#' @examplesIf requireNamespace("pmartRdata", quietly = TRUE)
#' library(pmartRdata)
#' to_filter <- custom_filter(omicsData = metab_object, e_data_remove = "fumaric acid",
#'                            f_data_remove = "Sample_1_Phenotype2_B")
#' summary(to_filter)
#'
#' to_filter2 <- custom_filter(omicsData = metab_object, 
#'                             f_data_keep = metab_object$f_data$SampleID[1:10])
#' summary(to_filter2)
#'
#' @author Lisa Bramer
#'
#' @seealso \code{\link{custom_filter}}
#'
#' @export
#' @rdname summary.customFilt
#' @name summary.customFilt
summary.customFilt <- function(object, ...) {
  filter_object <- object

  # Make sure we only have valid arguments
  if (length(list(...)) > 0) {
    warning("unused argument(s): ",
             toString(as.list(tail(match.call(), length(list(...))))))
  }
  
  # get omicsData object #
  omicsData <- attr(filter_object, "omicsData")
  summary_orig <- summary(omicsData)

  # get names #
  edata_id <- get_edata_cname(omicsData)
  emeta_id <- get_emeta_cname(omicsData)
  samp_id <- get_fdata_cname(omicsData)

  # get counts #
  num_samples <- attr(filter_object, "num_samples")
  num_edata <- attr(filter_object, "num_edata")
  num_emeta <- attr(filter_object, "num_emeta") ## is null where not used

  # apply the filter #
  filtered_data <- applyFilt(filter_object, omicsData)
  summary_filt <- summary(filtered_data)

  mode_rmv <- !is.null(filter_object$e_data_remove) |
    !is.null(filter_object$f_data_remove) |
    !is.null(filter_object$e_meta_remove)

  mode_kp <- !is.null(filter_object$e_data_keep) |
    !is.null(filter_object$f_data_keep) |
    !is.null(filter_object$e_meta_keep)

  # if filter_object contains removes
  if (mode_rmv) {
    # samples #
    if (!is.null(filter_object$f_data_remove)) {
      samps_filt <- length(filter_object$f_data_remove)
      samps_left <- num_samples - samps_filt
    } else {
      samps_filt <- 0
      samps_left <- num_samples
    }

    # e_data #
    if (!is.null(filter_object$e_data_remove)) {
      edata_filt <- length(filter_object$e_data_remove)
      edata_left <- num_edata - edata_filt
    } else {
      edata_filt <- num_edata - nrow(filtered_data$e_data)
      edata_left <- nrow(filtered_data$e_data)
    }

    # e_meta #
    if (!is.null(num_emeta)) {
      if (!is.null(filter_object$e_meta_remove)) {
        emeta_filt <- length(filter_object$e_meta_remove)
        emeta_left <- num_emeta - emeta_filt
      } else {
        emeta_filt <- num_emeta - length(
          unique(filtered_data$e_meta[, emeta_id])
        )
        emeta_left <- length(unique(filtered_data$e_meta[, emeta_id]))
      }
    }

    disp_colnames <- c("Filtered", "Remaining", "Total")
  }

  # if filter_object contains keeps
  # tags of `_left` and `_filt` correspond to items kept and items not kept
  if (mode_kp) {
    # samples #
    if (!is.null(filter_object$f_data_keep)) {
      samps_left <- length(filter_object$f_data_keep)
      samps_filt <- num_samples - samps_left
    } else {
      samps_left <- num_samples
      samps_filt <- 0
    }

    # e_data #
    if (!is.null(filter_object$e_data_keep)) {
      edata_left <- length(filter_object$e_data_keep)
      edata_filt <- num_edata - edata_left
    } else {
      edata_left <- nrow(filtered_data$e_data)
      edata_filt <- num_edata - nrow(filtered_data$e_data)
    }

    # e_meta #
    if (!is.null(num_emeta)) {
      if (!is.null(filter_object$e_meta_keep)) {
        emeta_left <- length(filter_object$e_meta_keep)
        emeta_filt <- num_emeta - emeta_left
      } else {
        emeta_left <- length(unique(filtered_data$e_meta[, emeta_id]))
        emeta_filt <- num_emeta - length(
          unique(filtered_data$e_meta[, emeta_id])
        )
      }
    }

    disp_colnames <- c("Discarded", "Kept", "Total")
  }

  # Display #

  ## Display text ##
  edata_plural_text <- ifelse(length(grep("s$", edata_id)) > 0, " ", "s ")
  emeta_plural_text <- ifelse(length(grep("s$", emeta_id)) > 0, " ", "s ")
  samp_plural_text <- ifelse(length(grep("s$", samp_id)) > 0, " ", "s ")
  samp_id <- paste(samp_id, samp_plural_text, "(f_data)", sep = "")
  edata_id <- paste(edata_id, edata_plural_text, "(e_data)", sep = "")
  display_emeta_id <- paste(emeta_id, emeta_plural_text, "(e_meta)", sep = "")

  ## construct data frame ##
  if (is.null(emeta_id)) {
    disp <- data.frame(check.names = FALSE, 
      c(samps_filt, edata_filt),
      c(samps_left, edata_left),
      c(num_samples, num_edata)
    )
    rownames(disp) <- c(samp_id, edata_id)
  } else {
    disp <- data.frame(check.names = FALSE, 
      c(samps_filt, edata_filt, emeta_filt),
      c(samps_left, edata_left, emeta_left),
      c(num_samples, num_edata, num_emeta)
    )
    rownames(disp) <- c(samp_id, edata_id, display_emeta_id)
  }

  colnames(disp) <- disp_colnames

  class(disp) = c("customFilterSummary", "data.frame")

  return(disp)
}


#' Custom Filter Print Method
#'
#' Print method for summary of custom filter
#' @param x the custom filter summary to print
#' @param ... further arguments passed to or from other methods
#'
#' @return No return value, prints details about x
#' 
#' @export
#' @name print.customFilterSummary
#'
print.customFilterSummary <- function(x, ...) {
  object <- x

  # Make sure we only have valid arguments
  if (length(list(...)) > 0) {
    warning("unused argument(s): ",
             toString(as.list(tail(match.call(), length(list(...))))))
  }
  
  ## Display output ##
  cat("\nSummary of Custom Filter\n\n")
  print.data.frame(object)
  cat("\n")
}
Any scripts or data that you put into this service are public.
pmartR documentation built on June 10, 2025, 9:10 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
pmartR
Panomics Marketplace - Quality Control and Statistical Analysis for Panomics Data

R/filter_summary.R
In pmartR: Panomics Marketplace - Quality Control and Statistical Analysis for Panomics Data

Try the pmartR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

pmartR Panomics Marketplace - Quality Control and Statistical Analysis for Panomics Data

R/filter_summary.R In pmartR: Panomics Marketplace - Quality Control and Statistical Analysis for Panomics Data

Try the pmartR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

pmartR
Panomics Marketplace - Quality Control and Statistical Analysis for Panomics Data

R/filter_summary.R
In pmartR: Panomics Marketplace - Quality Control and Statistical Analysis for Panomics Data