R/get_representative_solutions.R

Defines functions get_representative_solutions

Documented in get_representative_solutions

#' Extract representative solutions from a matrix of ARIs
#'
#' Following clustering with `batch_snf`, a matrix of pairwise ARIs that show
#' how related each cluster solution is to each other can be generated by the
#' `calc_aris` function. Partitioning of the ARI matrix can be done by
#' visual inspection of `meta_cluster_heatmap()` results or by
#' `shiny_annotator`. Given the indices of meta cluster boundaries, this
#' function will return a single representative solution from each meta cluster
#' based on maximum average ARI to all other solutions within that meta
#' cluster.
#'
#' @param aris Matrix of adjusted rand indices from `calc_aris()`
#' @param sol_df Output of `batch_snf` containing cluster solutions.
#' @param filter_fn Optional function to filter the meta-cluster by prior to
#'  maximum average ARI determination. This can be useful if you are explicitly
#'  trying to select a solution that meets a certain condition, such as only
#'  picking from the 4 cluster solutions within a meta cluster. An example
#'  valid function could be `fn <- function(x) x[x$"nclust" == 4, ]`.
#' @return The provided solutions data frame reduced to just one row per meta
#'  cluster defined by the split vector.
#' @export
#' @examples
#' # dl <- data_list(
#' #     list(subc_v, "subcortical_volume", "neuroimaging", "continuous"),
#' #     list(income, "household_income", "demographics", "continuous"),
#' #     list(pubertal, "pubertal_status", "demographics", "continuous"),
#' #     list(anxiety, "anxiety", "behaviour", "ordinal"),
#' #     list(depress, "depressed", "behaviour", "ordinal"),
#' #     uid = "unique_id"
#' # )
#' # 
#' # sc <- snf_config(
#' #     dl = dl,
#' #     n_solutions = 20,
#' #     min_k = 20,
#' #     max_k = 50
#' # )
#' # 
#' # sol_df <- batch_snf(dl, sc)
#' # 
#' # ext_sol_df <- extend_solutions(
#' #     sol_df,
#' #     dl = dl,
#' #     min_pval = 1e-10 # p-values below 1e-10 will be thresholded to 1e-10
#' # )
#' # 
#' # # Calculate pairwise similarities between cluster solutions
#' # sol_aris <- calc_aris(sol_df)
#' # 
#' # # Extract hierarchical clustering order of the cluster solutions
#' # meta_cluster_order <- get_matrix_order(sol_aris)
#' # 
#' # # Identify meta cluster boundaries with shiny app or trial and error
#' # # ari_hm <- meta_cluster_heatmap(sol_aris, order = meta_cluster_order)
#' # # shiny_annotator(ari_hm)
#' # 
#' # # Result of meta cluster examination
#' # split_vec <- c(2, 5, 12, 17)
#' # 
#' # ext_sol_df <- label_meta_clusters(ext_sol_df, split_vec, meta_cluster_order)
#' # 
#' # # Extracting representative solutions from each defined meta cluster
#' # rep_solutions <- get_representative_solutions(sol_aris, ext_sol_df)
get_representative_solutions <- function(aris,
                                         sol_df,
                                         filter_fn = NULL) {
    # Ensure solutions data frame is mc-labeled
    if (any(is.na(sol_df$"mc"))) {
        metasnf_error(
            "Identifying representative solutions can only be done for meta",
            " cluster labeled solutions data frames. See ?label_meta_clusters",
            " to assign meta cluster labels to a solutions data frame."
        )
    }
    ## Re-sort the solutions data frame based on the aris
    aris <- data.frame(aris)
    ## Extract and assign meta cluster labels
    aris$"mc" <- sol_df$"mc"
    mcs <- unique(sol_df$"mc")
    # Iterate through the meta clusters and keep the representative solution
    rep_solutions <- data.frame()
    for (mc in mcs) {
        # Subset to just those solutions and ARIs within the MC
        mc_sm <- sol_df[sol_df$"mc" == mc, ]
        mc_ari <- aris[aris$"mc" == mc, ]
        mc_ari$"mc" <- NULL
        # The most representative solution based on total ARI within MC
        mc_sm$"total_aris" <- rowSums(mc_ari)
        if (!is.null(filter_fn)) {
            mc_sm <- filter_fn(mc_sm)
        }
        rep_mc <- which(mc_sm$"total_aris" == max(mc_sm$"total_aris"))[1]
        rep_solution <- mc_sm[rep_mc, ]
        rep_solution$"total_aris" <- NULL
        rep_solutions <- rbind(rep_solutions, rep_solution)
    }
    # Assign mcs to the representative solutions
    rep_solutions <- rep_solutions[order(rep_solutions$"solution"), ]
    return(rep_solutions)
}

Try the metasnf package in your browser

Any scripts or data that you put into this service are public.

metasnf documentation built on April 3, 2025, 5:40 p.m.