Nothing
#' @title Calculate Overlap and Similarity Coefficients between Feature Lists
#' @description This function calculates the Overlap, Jaccard, and Soerensen-Dice coefficients to quantify
#' the similarity between feature lists. In addition to feature importance and permutation importance,
#' you can provide a custom list of feature names to be included in the overlap calculation.
#' @param pipeline_results A PipelineResults object containing the fitted pipelines, cross-validation results, selected features,
#' mean performance, and mean feature importances.
#' @param custom_lists An optional named list of character vectors. Each character vector should contain feature names.
#' The names of the list will be used as names in the resulting overlap coefficient matrices.
#' @return A list containing lists of matrices, where each list corresponds to a different type of feature list (inbuilt feature importance, permutation importance, and custom lists if provided).
#' Within each of these lists, there are three matrices showing the Overlap, Jaccard, and Soerensen-Dice coefficients for the feature lists:
#' - @field overlap: A matrix showing the Overlap coefficients.
#' - @field jaccard: A matrix showing the Jaccard coefficients.
#' - @field soerensen: A matrix showing the Soerensen-Dice coefficients.
#' These matrices compare the feature lists against each other, providing a numerical measure of their similarity.
#' Note: If permutation importance data is not present in the `pipeline_results`, the corresponding list entry will be absent.
#' @importFrom tmod modOverlaps
#' @examples
#' # Basic Usage with Mock Data
#' # Create a mock PipelineResults object with minimal data
#' mock_pipeline_results <- new("PipelineResults",
#' inbuilt_feature_importance = list(
#' "FeatureSet1" = data.frame(feature = c("feature1", "feature2")),
#' "FeatureSet2" = data.frame(feature = c("feature2", "feature3"))),
#' permutation_importance = list(
#' "FeatureSet1" = data.frame(feature = c("feature3", "feature4")),
#' "FeatureSet2" = data.frame(feature = c("feature1", "feature4"))))
#'
#' # Calculate overlap coefficients without custom lists
#' overlap_results <- calculate_overlap_coefficients(mock_pipeline_results)
#'
#'
#' # Including Custom Lists
#' # Create custom feature lists
#' custom_feature_lists <- list("CustomList1" = c("feature5", "feature6"),
#' "CustomList2" = c("feature6", "feature7"))
#'
#' # Calculate overlap coefficients with custom lists
#' overlap_results_custom <- calculate_overlap_coefficients(mock_pipeline_results,
#' custom_feature_lists)
#'
#' @export
calculate_overlap_coefficients <- function(pipeline_results, custom_lists = NULL) {
# Check if input object belongs to the PipelineResults class
if (!inherits(pipeline_results, "PipelineResults")) {
stop("The input object does not belong to the PipelineResults class.")
}
# Function to create feature lists and calculate coefficients
get_coefficients <- function(importances) {
# Create feature lists
feature_lists <- lapply(importances, function(x) x)
names(feature_lists) <- names(importances)
# Calculate the overlap coefficients and round to 2 decimal places
calculate_coefficients <- function(stat) {
coefficients <- tmod::modOverlaps(modules = feature_lists, mset = NULL, stat = stat)
round(coefficients, 2)
}
overlap.coef_features <- calculate_coefficients("overlap")
j.coef_features <- calculate_coefficients("jaccard")
s.coef_features <- calculate_coefficients("soerensen")
# Return the results as a list of matrices
return(list(overlap = overlap.coef_features, jaccard = j.coef_features, soerensen = s.coef_features))
}
# Extract feature names from mean_feature_importances
feature_importances_list <- lapply(pipeline_results@inbuilt_feature_importance, function(df) df$feature)
# Check if permutation_importances exist and apply function
permutation_importance_coefficients <- NULL
if (length(pipeline_results@permutation_importance) != 0) {
# Extract feature names from permutation_importances
permutation_importances_list <- lapply(pipeline_results@permutation_importance, function(df) df$feature)
} else {
permutation_importances_list <- NULL
}
if (!is.null(permutation_importances_list)) {
all_lists <- list("features" = feature_importances_list, "permutations" = permutation_importances_list)
} else all_lists <- list("features" = feature_importances_list)
if (!is.null(custom_lists)) {
all_lists <- lapply(all_lists, function(lst) c(lst, custom_lists))
}
result_lists <- lapply(all_lists, get_coefficients)
if (!is.null(permutation_importances_list)) {
names(result_lists) <- c("inbuilt_feature_importance_coefficient", "permutation_importance_coefficients")
} else names(result_lists) <- c('inbuilt_feature_importance_coefficient')
# Return both lists of coefficients
return(result_lists)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.