R/agg_results.R

Defines functions agg_results

Documented in agg_results

#' Aggregate results
#'
#' Aggregate enrichment results from generated by \link[MSTExplorer]{gen_results}.
#' Counts unique entries in \code{count_var} while grouping by \code{group_var}.
#' @param sep Separator for collapsed character columns.
#' @inheritParams plot_bar_summary
#' @inheritParams HPOExplorer::make_phenos_dataframe
#' @inheritParams HPOExplorer::make_network_object
#' @returns Aggregated \link[data.table]{data.table}
#'
#' @export
#' @import data.table
#' @importFrom stringr str_wrap
#' @import HPOExplorer
#' @examples
#' phenos <- subset_results(filters = list(CellType="Microglia"))
#' agg_res <- agg_results(phenos = phenos)
agg_results <- function(phenos,
                        count_var = "hpo_name",
                        group_var = "CellType",
                        sep="; ",
                        verbose = TRUE){
  effect <- sd_from_mean <- . <- gene_symbol <- NULL;
  messager("Aggregating results by",
           paste0("group_var=",paste(shQuote(group_var),collapse = "/")),
           v=verbose)
  phenos <- HPOExplorer::add_hpo_name(phenos)
  #### Aggregate ####
  counts_df <- unique(phenos[,.(
    count=data.table::uniqueN(eval(parse(text = count_var))),
    n_genes=if("gene_symbol" %in% names(phenos)) {
      data.table::uniqueN(gene_symbol,na.rm = TRUE)
    } else NA,
    genes=if("gene_symbol" %in% names(phenos)){
      paste(unique(gene_symbol),collapse = sep)
    } else NA,
    mean_effect=round(mean(effect),3),
    mean_q=round(mean(q),3),
    mean_sd_from_mean=round(mean(sd_from_mean),3),
    values=paste(
      stringr::str_wrap(paste(unique(eval(parse(text = count_var))),
                              collapse = sep)),
      sep = "<br>")
  ),
  by=group_var][,c(group_var,
                   "count",
                   "n_genes",
                   "genes",
                   "mean_effect",
                   "mean_q",
                   "mean_sd_from_mean",
                   "values"), with=FALSE])
  data.table::setnames(counts_df,
                       c("count","values"),
                       c(paste("n",paste0(tolower(count_var),"s"),sep="_"),
                         tolower(count_var)
                       ))
  return(counts_df)
}
neurogenomics/MultiEWCE documentation built on May 7, 2024, 1:52 p.m.