Nothing
#' @rdname get_colocboost_summary
#'
#' @title Get summary tables from a ColocBoost output.
#'
#' @description `get_colocboost_summary` get colocalization and trait-specific summary table
#' with or without the outcomes of interest.
#'
#' @param cb_output Output object from `colocboost` analysis
#' @param summary_level When \code{summary_level = 1}, return basic summary table for colocalization results. See details in `get_ucos_summary` function when \code{summary_level = 2}.
#' @param outcome_names Optional vector of names of outcomes, which has the same order as Y in the original analysis.
#' @param interest_outcome Optional vector specifying a subset of outcomes from \code{outcome_names} to focus on. When provided, only colocalization events that include at least one of these outcomes will be returned.
#' @param region_name Optional character string. When provided, adds a column with this gene name to the output table for easier filtering in downstream analyses.
#' @param min_abs_corr_between_ucos Minimum absolute correlation for variants across two trait-specific (uncolocalized) effects to be considered colocalized. The default is 0.5.
#' @param median_abs_corr_between_ucos Median absolute correlation for variants across two trait-specific (uncolocalized) effects to be considered colocalized. The default is 0.8.
#'
#' @return A list containing results from the ColocBoost analysis:
#' \itemize{
#' \item When \code{summary_level = 1} (default):
#' \itemize{
#' \item \code{cos_summary}: A summary table for colocalization events with the following columns:
#' \itemize{
#' \item \code{focal_outcome}: The focal outcome being analyzed if exists. Otherwise, it is \code{FALSE}.
#' \item \code{colocalized_outcomes}: Colocalized outcomes for colocalization confidence set (CoS)
#' \item \code{cos_id}: Unique identifier for colocalization confidence set (CoS)
#' \item \code{purity}: Minimum absolute correlation of variables within colocalization confidence set (CoS)
#' \item \code{top_variable}: The variable with highest variant colocalization probability (VCP)
#' \item \code{top_variable_vcp}: Variant colocalization probability for the top variable
#' \item \code{cos_npc}: Normalized probability of colocalization
#' \item \code{min_npc_outcome}: Minimum normalized probability of colocalized traits
#' \item \code{n_variables}: Number of variables in colocalization confidence set (CoS)
#' \item \code{colocalized_index}: Indices of colocalized variables
#' \item \code{colocalized_variables}: List of colocalized variables
#' \item \code{colocalized_variables_vcp}: Variant colocalization probabilities for all colocalized variables
#' }
#' }
#' \item When \code{summary_level = 2}:
#' \itemize{
#' \item \code{cos_summary}: As described above
#' \item \code{ucos_summary}: A summary table for trait-specific (uncolocalized) effects
#' }
#' \item When \code{summary_level = 3}:
#' \itemize{
#' \item \code{cos_summary}: As described above
#' \item \code{ucos_summary}: A summary table for trait-specific (uncolocalized) effects
#' \item \code{ambiguous_cos_summary}: A summary table for ambiguous colocalization events from trait-specific effects
#' }
#' }
#' @details When \code{summary_level = 1}, additional details and examples are introduced in \code{\link{get_cos_summary}}.
#' When \code{summary_level = 2} or \code{summary_level = 3}, additional details for trait-specific effects and ambiguous
#' colocalization events are included. See \code{\link{get_ucos_summary}} for details on these tables.
#'
#' @examples
#' # colocboost example
#' set.seed(1)
#' N <- 1000
#' P <- 100
#' # Generate X with LD structure
#' sigma <- 0.9^abs(outer(1:P, 1:P, "-"))
#' X <- MASS::mvrnorm(N, rep(0, P), sigma)
#' colnames(X) <- paste0("SNP", 1:P)
#' L <- 3
#' true_beta <- matrix(0, P, L)
#' true_beta[10, 1] <- 0.5 # SNP10 affects trait 1
#' true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized)
#' true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2
#' true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3
#' Y <- matrix(0, N, L)
#' for (l in 1:L) {
#' Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1)
#' }
#' res <- colocboost(X = X, Y = Y)
#' get_colocboost_summary(res)
#'
#' @source See detailed instructions in our tutorial portal:
#' \url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html}
#'
#' @family colocboost_inference
#' @export
#'
get_colocboost_summary <- function(cb_output,
summary_level = 1,
outcome_names = NULL,
interest_outcome = NULL,
region_name = NULL,
min_abs_corr_between_ucos = 0.5,
median_abs_corr_between_ucos = 0.8) {
if (!inherits(cb_output, "colocboost")) {
stop("Input must from colocboost output!")
}
cos_summary <- get_cos_summary(cb_output, outcome_names, interest_outcome, region_name)
if (summary_level == 1) {
return(list("cos_summary" = cos_summary))
}
if (summary_level == 2){
ucos_summary <- get_ucos_summary(cb_output, outcome_names, region_name)
return(list("cos_summary" = cos_summary,
"ucos_summary" = ucos_summary))
}
if (summary_level == 3){
ucos_summary <- get_ucos_summary(
cb_output, outcome_names, region_name,
ambiguous_cos = TRUE,
min_abs_corr_between_ucos = min_abs_corr_between_ucos,
median_abs_corr_between_ucos = median_abs_corr_between_ucos
)
return(list("cos_summary" = cos_summary,
"ucos_summary" = ucos_summary$ucos_summary,
"ambiguous_cos_summary" = ucos_summary$ambiguous_cos_summary))
}
}
#' @rdname get_robust_colocalization
#'
#' @title Recalibrate and summarize robust colocalization events.
#'
#' @description `get_robust_colocalization` get the colocalization by discarding the weaker colocalization events or colocalized outcomes
#'
#' @param cb_output Output object from `colocboost` analysis
#' @param cos_npc_cutoff Minimum threshold of normalized probability of colocalization (NPC) for CoS.
#' @param npc_outcome_cutoff Minimum threshold of normalized probability of colocalized traits in each CoS.
#' @param pvalue_cutoff Maximum threshold of marginal p-values of colocalized variants on colocalized traits in each CoS.
#' @param weight_fudge_factor The strength to integrate weight from different outcomes, default is 1.5
#' @param coverage A number between 0 and 1 specifying the \dQuote{coverage} of the estimated colocalization confidence sets (CoS) (default is 0.95).
#'
#' @return A \code{"colocboost"} object with some or all of the following elements:
#'
#' \item{cos_summary}{A summary table for colocalization events.}
#' \item{vcp}{The variable colocalized probability for each variable.}
#' \item{cos_details}{A object with all information for colocalization results.}
#' \item{data_info}{A object with detailed information from input data}
#' \item{model_info}{A object with detailed information for colocboost model}
#' \item{ucos_from_cos}{A object with information for trait-specific effects if exists after removing weaker signals.}
#'
#' @examples
#' # colocboost example
#' set.seed(1)
#' N <- 1000
#' P <- 100
#' # Generate X with LD structure
#' sigma <- 0.9^abs(outer(1:P, 1:P, "-"))
#' X <- MASS::mvrnorm(N, rep(0, P), sigma)
#' colnames(X) <- paste0("SNP", 1:P)
#' L <- 3
#' true_beta <- matrix(0, P, L)
#' true_beta[10, 1] <- 0.5 # SNP10 affects trait 1
#' true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized)
#' true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2
#' true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3
#' Y <- matrix(0, N, L)
#' for (l in 1:L) {
#' Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1)
#' }
#' res <- colocboost(X = X, Y = Y)
#' res$cos_details$cos$cos_index
#' filter_res <- get_robust_colocalization(res, cos_npc_cutoff = 0.5, npc_outcome_cutoff = 0.2)
#' filter_res$cos_details$cos$cos_index
#'
#' @source See detailed instructions in our tutorial portal:
#' \url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html}
#'
#' @family colocboost_inference
#' @export
get_robust_colocalization <- function(cb_output,
cos_npc_cutoff = 0.5,
npc_outcome_cutoff = 0.2,
pvalue_cutoff = NULL,
weight_fudge_factor = 1.5,
coverage = 0.95) {
if (!inherits(cb_output, "colocboost")) {
stop("Input must from colocboost object!")
}
if (is.null(cb_output$cos_details)) {
warning("No colocalization results in this region!")
return(cb_output)
}
if (npc_outcome_cutoff == 0 && cos_npc_cutoff == 0 && is.null(pvalue_cutoff)) {
message("All possible colocalization events are reported regardless of their relative evidence compared to uncolocalized events (cos_npc_cutoff = 0 and npc_outcome_cutoff = 0).")
return(cb_output)
} else {
if (is.null(pvalue_cutoff)) {
message(paste0(
"Extracting colocalization results with cos_npc_cutoff = ", cos_npc_cutoff, " and npc_outcome_cutoff = ", npc_outcome_cutoff, ".\n",
"Keep only CoS with cos_npc >= ", cos_npc_cutoff, ". ",
"For each CoS, keep the outcomes configurations that the npc_outcome >= ", npc_outcome_cutoff, "."
))
} else {
if (pvalue_cutoff > 1 | pvalue_cutoff < 0) {
warning("Please check the pvalue cutoff in [0,1].")
return(cb_output)
}
if (npc_outcome_cutoff == 0 && cos_npc_cutoff == 0) {
message(paste0(
"Extracting colocalization results with pvalue_cutoff = ", pvalue_cutoff, ".\n",
"For each CoS, keep the outcomes configurations that pvalue of variants for the outcome < ", pvalue_cutoff, "."
))
} else {
message(paste0(
"Extracting colocalization results with pvalue_cutoff = ", pvalue_cutoff, ", cos_npc_cutoff = ", cos_npc_cutoff, ", and npc_outcome_cutoff = ", npc_outcome_cutoff, ".\n",
"Keep only CoS with cos_npc >= ", cos_npc_cutoff, ". ",
"For each CoS, keep the outcomes configurations that pvalue of variants for the outcome < ", pvalue_cutoff, " and npc_outcome >", npc_outcome_cutoff, "."
))
}
}
}
remove_cos <- function(cb_output, remove_idx = NULL) {
if (length(remove_idx) == 0) {
return(cb_output)
}
ncos <- length(cb_output$cos_details$cos$cos_index)
if (ncos == 0 | length(remove_idx) == ncos) {
cb_output$vcp <- NULL
cb_output$cos_details <- NULL
cb_output <- c(cb_output, list(vcp = NULL, cos_details = NULL))
return(cb_output)
}
cos_details <- cb_output$cos_details
cos_details$cos_top_variables <- cos_details$cos_top_variables[-remove_idx, , drop = FALSE]
cos_details$cos$cos_index <- cos_details$cos$cos_index[-remove_idx]
cos_details$cos$cos_variables <- cos_details$cos$cos_variables[-remove_idx]
cos_details$cos_outcomes$outcome_index <- cos_details$cos_outcomes$outcome_index[-remove_idx]
cos_details$cos_outcomes$outcome_name <- cos_details$cos_outcomes$outcome_name[-remove_idx]
cos_details$cos_vcp <- cos_details$cos_vcp[-remove_idx]
cos_details$cos_weights <- cos_details$cos_weights[-remove_idx]
cos_details$cos_npc <- cos_details$cos_npc[-remove_idx]
cos_details$cos_min_npc_outcome <- cos_details$cos_min_npc_outcome[-remove_idx]
cos_details$cos_purity$min_abs_cor <- as.matrix(cos_details$cos_purity$min_abs_cor)[-remove_idx, -remove_idx, drop = FALSE]
cos_details$cos_purity$median_abs_cor <- as.matrix(cos_details$cos_purity$median_abs_cor)[-remove_idx, -remove_idx, drop = FALSE]
cos_details$cos_purity$max_abs_cor <- as.matrix(cos_details$cos_purity$max_abs_cor)[-remove_idx, -remove_idx, drop = FALSE]
vcp <- as.vector(1 - apply(1 - do.call(cbind, cos_details$cos_vcp), 1, prod))
names(vcp) <- cb_output$data_info$variables
cb_output$vcp <- vcp
cb_output$cos_details <- cos_details
return(cb_output)
}
get_npuc <- function(npc_outcome) {
max_idx <- which.max(npc_outcome)
npc_outcome[max_idx] * prod(1 - npc_outcome[-max_idx])
}
cos_details <- cb_output$cos_details
coloc_outcome_index <- coloc_outcome <- list()
colocset_names <- cos_min_npc_outcome <- cos_npc <- c()
for (i in 1:length(cos_details$cos$cos_index)) {
cos_npc_config <- cos_details$cos_outcomes_npc[[i]]
npc_outcome <- cos_npc_config$npc_outcome
pos_pass <- which(npc_outcome >= npc_outcome_cutoff)
if (!is.null(pvalue_cutoff)) {
cos_tmp <- cos_details$cos$cos_index[[i]]
cos_trait <- cos_details$cos_outcomes$outcome_index[[i]]
minPV <- sapply(cos_trait, function(tmp) {
z <- cb_output$data_info$z[[tmp]][cos_tmp]
pv <- pchisq(z^2, 1, lower.tail = FALSE)
min(pv)
})
pos_pass_pvalue <- which(minPV <= pvalue_cutoff)
if (length(pos_pass_pvalue) == 0) {
pos_pass <- NULL
} else {
pos_pass_pvalue <- match(cos_trait[pos_pass_pvalue], cos_npc_config$outcomes_index)
pos_pass <- intersect(pos_pass_pvalue, pos_pass)
}
}
if (length(pos_pass) == 0) {
coloc_outcome_index[[i]] <- 0
coloc_outcome[[i]] <- 0
cos_min_npc_outcome[i] <- 0
cos_npc[i] <- 0
colocset_names[i] <- paste0("remove", i)
} else {
cos_min_npc_outcome[i] <- min(npc_outcome[pos_pass])
if (length(pos_pass) > 1){
cos_npc[i] <- 1 - get_npuc(npc_outcome[pos_pass])
} else {
cos_npc[i] <- 0 # since single-trait remain
}
coloc_outcome_index[[i]] <- sort(cos_npc_config$outcomes_index[pos_pass])
coloc_outcome[[i]] <- rownames(cos_npc_config)[pos_pass]
colocset_names[i] <- paste0("cos", i, ":", paste0(paste0("y", coloc_outcome_index[[i]]), collapse = "_"))
if (grepl("merged", names(cos_details$cos$cos_index)[i])) {
colocset_names[i] <- paste0(colocset_names[i], ":merged")
}
}
}
names(coloc_outcome) <- names(coloc_outcome_index) <- names(cos_min_npc_outcome) <- names(cos_npc) <- colocset_names
cos_details$cos_outcomes <- list("outcome_index" = coloc_outcome_index, "outcome_name" = coloc_outcome)
cos_details$cos_min_npc_outcome <- cos_min_npc_outcome
cos_details$cos_npc <- cos_npc
# - VCP
cos_weights <- lapply(1:length(cos_details$cos_outcomes$outcome_index), function(idx) {
w <- cos_details$cos_weights[[idx]]
config_idx <- cos_details$cos_outcomes$outcome_index[[idx]]
if (length(config_idx) == 1) {
if (config_idx == 0) {
return(matrix(1, nrow = nrow(cos_details$cos_weights[[idx]])))
}
}
w_outcome <- colnames(w)
config_outcome <- paste0("outcome", config_idx)
pos <- which(w_outcome %in% config_outcome)
w[, pos, drop = FALSE]
})
cos_details$cos_weights <- cos_weights
int_weight <- lapply(cos_weights, get_integrated_weight, weight_fudge_factor = weight_fudge_factor)
names(int_weight) <- names(cos_weights) <- colocset_names
vcp <- as.vector(1 - apply(1 - do.call(cbind, int_weight), 1, prod))
names(vcp) <- cb_output$data_info$variables
cb_output$vcp <- vcp
cos_details$cos_vcp <- int_weight
# - resummary results
cos_re_idx <- lapply(int_weight, function(w) {
unlist(get_in_cos(w, coverage = coverage))
})
cos_re_var <- lapply(cos_re_idx, function(idx) {
cb_output$data_info$variables[idx]
})
coloc_csets <- list("cos_index" = cos_re_idx, "cos_variables" = cos_re_var)
cos_details$cos <- coloc_csets
# - hits variables in each csets
coloc_hits <- coloc_hits_variablenames <- coloc_hits_names <- c()
for (i in 1:length(int_weight)) {
inw <- int_weight[[i]]
if (length(unique(inw)) == 1) {
coloc_hits <- c(coloc_hits, 0)
coloc_hits_variablenames <- c(coloc_hits_variablenames, 0)
coloc_hits_names <- c(coloc_hits_names, names(int_weight)[i])
} else {
pp <- which(inw == max(inw))
coloc_hits <- c(coloc_hits, pp)
coloc_hits_variablenames <- c(coloc_hits_variablenames, cb_output$data_info$variables[pp])
if (length(pp) == 1) {
coloc_hits_names <- c(coloc_hits_names, names(int_weight)[i])
} else {
coloc_hits_names <- c(coloc_hits_names, paste0(names(int_weight)[i], ".", 1:length(pp)))
}
}
}
coloc_hits <- data.frame("top_index" = coloc_hits, "top_variables" = coloc_hits_variablenames)
rownames(coloc_hits) <- coloc_hits_names
cos_details$cos_top_variables <- coloc_hits
cb_output$cos_details <- cos_details
# remove CoS does not include outcomes pass npc_outcome_cutoff
remove <- grep("remove", colocset_names)
cb_output <- remove_cos(cb_output, remove_idx = remove)
cos_details <- cb_output$cos_details
# remove CoS does not pass cos_npc_cutoff
remove <- which(cos_details$cos_npc < cos_npc_cutoff)
cb_output <- remove_cos(cb_output, remove_idx = remove)
cos_details <- cb_output$cos_details
# remove CoS only with one trait
n_outcome <- sapply(cos_details$cos_outcomes$outcome_index, length)
single <- which(n_outcome == 1)
if (length(single) == length(n_outcome)) {
# - all remaining the single outcome
cb_output$cos_details <- cb_output$vcp <- NULL
cb_output <- c(cb_output, list(vcp = NULL, cos_details = NULL))
} else if (length(single) != 0 & length(single) != length(n_outcome)) {
# - partial remaining the single outcome
ucos_from_cos <- list(
"ucos" = list(
"ucos_index" = cos_details$cos$cos_index[single],
"ucos_variables" = cos_details$cos$cos_variables[single]
),
"ucos_outcomes" = list(
"outcome_idx" = cos_details$cos_outcomes$outcome_index[single],
"outcome_name" = cos_details$cos_outcomes$outcome_name[single]
),
"ucos_weight" = cos_details$cos_weights[single],
"ucos_purity" = list(
"min_abs_cor" = as.matrix(cos_details$cos_purity$min_abs_cor)[single, single, drop = FALSE],
"median_abs_cor" = as.matrix(cos_details$cos_purity$median_abs_cor)[single, single, drop = FALSE],
"max_abs_cor" = as.matrix(cos_details$cos_purity$max_abs_cor)[single, single, drop = FALSE]
),
"ucos_top_variables" = cos_details$cos_top_variables[single, , drop = FALSE]
)
cb_output$ucos_from_cos <- ucos_from_cos
cb_output <- remove_cos(cb_output, remove_idx = single)
}
# - refine and output
class(cb_output) <- "colocboost"
cb_output$cos_summary <- get_cos_summary(cb_output)
return(cb_output)
}
#' @rdname get_ambiguous_colocalization
#'
#' @title Get ambiguous colocalization events from trait-specific (uncolocalized) effects.
#'
#' @description `get_ambiguous_colocalization` get the colocalization by discarding the weaker colocalization events or colocalized outcomes
#'
#' @param cb_output Output object from `colocboost` analysis
#' @param min_abs_corr_between_ucos Minimum absolute correlation for variants across two trait-specific (uncolocalized) effects to be considered colocalized. The default is 0.5.
#' @param median_abs_corr_between_ucos Median absolute correlation for variants across two trait-specific (uncolocalized) effects to be considered colocalized. The default is 0.8.
#' @param tol A small, non-negative number specifying the convergence tolerance for checking the overlap of the variables in different sets.
#'
#' @return A \code{"colocboost"} object of colocboost output with additional elements:
#' \item{ambiguous_cos}{If exists, a list of ambiguous trait-specific (uncolocalized) effects.}
#'
#' @examples
#' data(Ambiguous_Colocalization)
#' test_colocboost_results <- Ambiguous_Colocalization$ColocBoost_Results
#' res <- get_ambiguous_colocalization(test_colocboost_results)
#' names(res$ambiguous_cos)
#'
#' @source See detailed instructions in our tutorial portal:
#' \url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html}
#'
#' @family colocboost_inference
#' @export
get_ambiguous_colocalization <- function(cb_output,
min_abs_corr_between_ucos = 0.5,
median_abs_corr_between_ucos = 0.8,
tol = 1e-9) {
if (!inherits(cb_output, "colocboost")) {
stop("Input must from colocboost output!")
}
if (!("ucos_details" %in% names(cb_output))) {
warning(
"Since you want to extract ambiguous colocalization from trait-specific (uncolocalized) sets,",
" but there is no output of ucos_details from colocboost.\n",
" Please run colocboost model with output_level=2!"
)
return(cb_output)
}
if (is.null(cb_output$ucos_details)){
message("No trait-specific (uncolocalized) effects in this region!")
return(cb_output)
}
ucos_details <- cb_output$ucos_details
nucos <- length(ucos_details$ucos$ucos_index)
if (nucos == 1) {
message("Only one trait-specific (uncolocalized) effect in this region!")
return(cb_output)
}
# Function to merge ambiguous ucos
merge_sets <- function(vec) {
split_lists <- lapply(vec, function(x) as.numeric(unlist(strsplit(x, ";"))))
result <- list()
while (length(split_lists) > 0) {
current <- split_lists[[1]]
split_lists <- split_lists[-1]
repeat {
overlap_index <- NULL
for (i in seq_along(split_lists)) {
if (length(intersect(current, split_lists[[i]])) > 0) {
overlap_index <- i
break
}
}
if (!is.null(overlap_index)) {
current <- union(current, split_lists[[overlap_index]])
split_lists <- split_lists[-overlap_index]
} else {
break
}
}
result <- c(result, list(paste(sort(current), collapse = ";")))
}
return(result)
}
purity <- ucos_details$ucos_purity
min_abs_cor <- purity$min_abs_cor
median_abs_cor <- purity$median_abs_cor
max_abs_cor <- purity$max_abs_cor
is_ambiguous <- (min_abs_cor > min_abs_corr_between_ucos) *
(abs(max_abs_cor - 1) < tol) *
(median_abs_cor > median_abs_corr_between_ucos)
diag(is_ambiguous) <- 0 # no need to check within ucos
if (sum(is_ambiguous) == 0){
message("No ambiguous colocalization events!")
return(cb_output)
} else {
message("There exists the ambiguous colocalization events from trait-specific effects. Extracting!")
}
temp <- sapply(1:nrow(is_ambiguous), function(x) {
tt <- c(x, which(is_ambiguous[x, ] != 0))
return(paste0(sort(tt), collapse = ";"))
})
temp <- merge_sets(temp)
potential_merged <- lapply(temp, function(x) as.numeric(unlist(strsplit(x, ";"))))
potential_merged <- potential_merged[which(sapply(potential_merged, length) >= 2)]
ambiguous_events <- list()
ambiguous_ucos_names <- c()
for (i in 1:length(potential_merged)) {
idx <- potential_merged[[i]]
test_outcome <- unique(unlist(ucos_details$ucos_outcomes$outcome_index[idx]))
if (length(test_outcome) == 1) next
ambiguous_ucos_names[i] <- paste0(names(ucos_details$ucos$ucos_index)[idx], collapse = ";")
tmp <- list(
ambiguous_cos = list(
ucos_index = ucos_details$ucos$ucos_index[idx],
ucos_variables = ucos_details$ucos$ucos_variables[idx]
),
ambiguous_cos_overlap = list(
ucos_index = Reduce(intersect, ucos_details$ucos$ucos_index[idx]),
ucos_variables = Reduce(intersect, ucos_details$ucos$ucos_variables[idx])
),
ambiguous_cos_union = list(
ucos_index = Reduce(union, ucos_details$ucos$ucos_index[idx]),
ucos_variables = Reduce(union, ucos_details$ucos$ucos_variables[idx])
),
ambiguous_cos_outcomes = list(
outcome_idx = unique(unlist(ucos_details$ucos_outcomes$outcome_index[idx])),
outcome_name = unique(unlist(ucos_details$ucos_outcomes$outcome_name[idx]))
),
ambigous_cos_weight = ucos_details$ucos_weight[idx],
ambigous_cos_purity = list(
min_abs_cor = min_abs_cor[idx, idx],
median_abs_cor = median_abs_cor[idx, idx],
max_abs_cor = max_abs_cor[idx, idx]
)
)
w <- tmp$ambigous_cos_weight
w <- do.call(cbind, w)
tmp$recalibrated_cos_vcp <- get_integrated_weight(w)
tmp$recalibrated_cos <- list(
"cos_index" = unlist(get_in_cos(tmp$recalibrated_cos_vcp)),
"cos_variables" = lapply(unlist(get_in_cos(tmp$recalibrated_cos_vcp)), function(idx) cb_output$data_info$variables[idx])
)
ambiguous_events[[i]] <- tmp
}
names(ambiguous_events) <- ambiguous_ucos_names
message(paste("There are", length(ambiguous_events), "ambiguous trait-specific effects."))
cb_output$ambiguous_cos <- ambiguous_events
return(cb_output)
}
#' @rdname get_cos_summary
#'
#' @title Get colocalization summary table from a ColocBoost output.
#'
#' @description `get_cos_summary` get the colocalization summary table with or without the outcomes of interest.
#'
#' @param cb_output Output object from `colocboost` analysis
#' @param outcome_names Optional vector of names of outcomes, which has the same order as Y in the original analysis.
#' @param interest_outcome Optional vector specifying a subset of outcomes from \code{outcome_names} to focus on. When provided, only colocalization events that include at least one of these outcomes will be returned.
#' @param region_name Optional character string. When provided, adds a column with this gene name to the output table for easier filtering in downstream analyses.
#'
#' @return A summary table for colocalization events with the following columns:
#' \item{focal_outcome}{The focal outcome being analyzed if exists. Otherwise, it is \code{FALSE}.}
#' \item{colocalized_outcomes}{Colocalized outcomes for colocalization confidence set (CoS) }
#' \item{cos_id}{Unique identifier for colocalization confidence set (CoS) }
#' \item{purity}{Minimum absolute correlation of variables with in colocalization confidence set (CoS) }
#' \item{top_variable}{The variable with highest variant colocalization probability (VCP) }
#' \item{top_variable_vcp}{Variant colocalization probability for the top variable}
#' \item{cos_npc}{Normalized probability of colocalization}
#' \item{min_npc_outcome}{Minimum normalized probability of colocalized traits}
#' \item{n_variables}{Number of variables in colocalization confidence set (CoS)}
#' \item{colocalized_index}{Indices of colocalized variables}
#' \item{colocalized_variables}{List of colocalized variables}
#' \item{colocalized_variables_vcp}{Variant colocalization probabilities for all colocalized variables}
#'
#' @examples
#' # colocboost example
#' set.seed(1)
#' N <- 1000
#' P <- 100
#' # Generate X with LD structure
#' sigma <- 0.9^abs(outer(1:P, 1:P, "-"))
#' X <- MASS::mvrnorm(N, rep(0, P), sigma)
#' colnames(X) <- paste0("SNP", 1:P)
#' L <- 3
#' true_beta <- matrix(0, P, L)
#' true_beta[10, 1] <- 0.5 # SNP10 affects trait 1
#' true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized)
#' true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2
#' true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3
#' Y <- matrix(0, N, L)
#' for (l in 1:L) {
#' Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1)
#' }
#' res <- colocboost(X = X, Y = Y)
#' get_cos_summary(res)
#'
#' @source See detailed instructions in our tutorial portal:
#' \url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html}
#'
#' @keywords colocboost_inference
#' @family colocboost_utilities
#' @export
get_cos_summary <- function(cb_output,
outcome_names = NULL,
interest_outcome = NULL,
region_name = NULL) {
if (!inherits(cb_output, "colocboost")) {
stop("Input must from colocboost output!")
}
coloc_csets <- cb_output$cos_details$cos$cos_index
if (length(coloc_csets) != 0) {
analysis_outcome <- cb_output$data_info$outcome_info$outcome_names
if (!is.null(outcome_names)) {
analysis_outcome <- outcome_names
}
coloc_outcome <- lapply(cb_output$cos_details$cos_outcomes$outcome_index, function(idx) analysis_outcome[idx])
coloc_sets <- cb_output$cos_details$cos$cos_index
if (!is.null(cb_output$cos_warnings)) {
message(cb_output$cos_warnings$warning_message)
}
vcp <- as.numeric(cb_output$vcp)
summary_table <- matrix(NA, nrow = length(coloc_sets), ncol = 12)
colnames(summary_table) <- c(
"focal_outcome", "colocalized_outcomes", "cos_id", "purity",
"top_variable", "top_variable_vcp", "cos_npc", "min_npc_outcome", "n_variables",
"colocalized_index", "colocalized_variables", "colocalized_variables_vcp"
)
summary_table <- as.data.frame(summary_table)
summary_table[, 1] <- FALSE
summary_table[, 2] <- unlist(sapply(coloc_outcome, function(tmp) paste0(tmp, collapse = "; ")))
summary_table[, 3] <- names(coloc_sets)
summary_table[, 4] <- as.numeric(diag(as.matrix(cb_output$cos_details$cos_purity$min_abs_cor)))
summary_table[, 5] <- unlist(sapply(cb_output$cos_details$cos$cos_variables, function(tmp) tmp[1]))
summary_table[, 6] <- sapply(coloc_sets, function(tmp) max(vcp[tmp]))
summary_table[, 7] <- round(as.numeric(cb_output$cos_details$cos_npc), 4)
summary_table[, 8] <- round(as.numeric(cb_output$cos_details$cos_min_npc_outcome), 4)
summary_table[, 9] <- as.numeric(sapply(coloc_sets, length))
summary_table[, 10] <- unlist(sapply(coloc_sets, function(tmp) paste0(tmp, collapse = "; ")))
summary_table[, 11] <- unlist(sapply(cb_output$cos_details$cos$cos_variables, function(tmp) paste0(tmp, collapse = "; ")))
summary_table[, 12] <- unlist(sapply(coloc_sets, function(tmp) paste0(vcp[tmp], collapse = "; ")))
if (!is.null(region_name)) {
summary_table$region_name <- region_name
}
# - if focal colocalization
focal_outcome_idx <- which(cb_output$data_info$outcome_info$is_focal)
if (length(focal_outcome_idx) != 0) {
focal_outcome <- analysis_outcome[focal_outcome_idx]
tmp <- sapply(focal_outcome, function(tmp) grep(paste0(tmp, "\\b"), analysis_outcome))
if.focal <- sapply(coloc_outcome, function(cp) {
tt <- sapply(focal_outcome, function(tmp) grep(paste0(tmp, "\\b"), cp))
all(sapply(tt, length) != 0)
})
summary_table$focal_outcome <- ifelse(if.focal, focal_outcome, FALSE)
summary_table <- summary_table[order(summary_table$focal_outcome == "FALSE"), ]
if (sum(if.focal) == 0) {
warning("No colocalization with focal outcomes.")
}
}
# - if extract only interest outcome colocalization
if (!is.null(interest_outcome)) {
tmp <- sapply(interest_outcome, function(tmp) grep(paste0(tmp, "\\b"), analysis_outcome))
if (all(sapply(tmp, length) != 0)) {
if.interest <- sapply(coloc_outcome, function(cp) {
tt <- sapply(interest_outcome, function(tmp) grep(paste0(tmp, "\\b"), cp))
all(sapply(tt, length) != 0)
})
summary_table$interest_outcome <- paste0(interest_outcome, collapse = "; ")
summary_table <- summary_table[which(if.interest), ]
if (sum(if.interest) == 0) {
warning("No colocalization with interest outcomes.")
}
} else {
warning("Interest outcome is not in the analysis outcomes, please check.")
}
}
} else {
summary_table <- NULL
}
return(summary_table)
}
#' @rdname get_ucos_summary
#'
#' @title Get trait-specific summary table from a ColocBoost output.
#'
#' @description `get_ucos_summary` produces a trait-specific summary table for uncolocalized (single-trait)
#' associations from ColocBoost results. This is particularly useful for examining trait-specific signals
#' or for summarizing results from single-trait FineBoost analyses.
#'
#' @param cb_output Output object from `colocboost` analysis
#' @param outcome_names Optional vector of names of outcomes, which has the same order as Y in the original analysis.
#' @param region_name Optional character string. When provided, adds a column with this gene name to the output table for easier filtering in downstream analyses.
#' @param ambiguous_cos Logical indicating whether to include ambiguous colocalization events. The default is FALSE.
#' @param min_abs_corr_between_ucos Minimum absolute correlation for variants across two trait-specific (uncolocalized) effects to be considered colocalized. The default is 0.5.
#' @param median_abs_corr_between_ucos Median absolute correlation for variants across two trait-specific (uncolocalized) effects to be considered colocalized. The default is 0.8.
#'
#' @return A list containing:
#' \itemize{
#' \item \code{ucos_summary}: A summary table for trait-specific, uncolocalized associations with the following columns:
#' \itemize{
#' \item \code{outcomes}: Outcome being analyzed
#' \item \code{ucos_id}: Unique identifier for trait-specific confidence sets
#' \item \code{purity}: Minimum absolute correlation of variables within trait-specific confidence sets
#' \item \code{top_variable}: The variable with highest variant-level probability of association (VPA)
#' \item \code{top_variable_vpa}: Variant-level probability of association (VPA) for the top variable
#' \item \code{ucos_npc}: Normalized probability of causal association for the trait-specific confidence set
#' \item \code{n_variables}: Number of variables in trait-specific confidence set
#' \item \code{ucos_index}: Indices of variables in the trait-specific confidence set
#' \item \code{ucos_variables}: List of variables in the trait-specific confidence set
#' \item \code{ucos_variables_vpa}: Variant-level probability of association (VPA) for all variables in the confidence set
#' \item \code{region_name}: Region name if provided through the region_name parameter
#' }
#' \item \code{ambiguous_cos_summary}: A summary table for ambiguous colocalization events with the following columns:
#' \itemize{
#' \item \code{outcomes}: Outcome in the ambiguous colocalization event
#' \item \code{ucos_id}: Unique identifiers for the ambiguous event
#' \item \code{min_between_purity}: Minimum absolute correlation between variables across trait-specific sets in the ambiguous event
#' \item \code{median_between_purity}: Median absolute correlation between variables across trait-specific sets in the ambiguous event
#' \item \code{overlap_idx}: Indices of variables that overlap between ambiguous trait-specific sets
#' \item \code{overlap_variables}: Names of variables that overlap between ambiguous trait-specific sets
#' \item \code{n_recalibrated_variables}: Number of variables in the recalibrated colocalization set from an ambiguous event
#' \item \code{recalibrated_index}: Indices of variables in the recalibrated colocalization set from an ambiguous event
#' \item \code{recalibrated_variables}: Names of variables in the recalibrated colocalization set from an ambiguous event
#' \item \code{recalibrated_variables_vcp}: Variant colocalization probabilities for recalibrated variables from an ambiguous event
#' \item \code{region_name}: Region name if provided through the region_name parameter
#' }
#' }
#'
#' @examples
#' # colocboost example with single trait analysis
#' set.seed(1)
#' N <- 1000
#' P <- 100
#' # Generate X with LD structure
#' sigma <- 0.9^abs(outer(1:P, 1:P, "-"))
#' X <- MASS::mvrnorm(N, rep(0, P), sigma)
#' colnames(X) <- paste0("SNP", 1:P)
#' L <- 1 # Only one trait for single-trait analysis
#' true_beta <- matrix(0, P, L)
#' true_beta[10, 1] <- 0.5 # SNP10 affects the trait
#' true_beta[80, 1] <- 0.2 # SNP11 also affects the trait but with lower effect
#' Y <- X %*% true_beta + rnorm(N, 0, 1)
#' res <- colocboost(X = X, Y = Y, output_level = 2)
#' # Get the trait-specifc effect summary
#' get_ucos_summary(res)
#'
#' @source See detailed instructions in our tutorial portal:
#' \url{https://statfungen.github.io/colocboost/articles/Interpret_ColocBoost_Output.html}
#'
#' @keywords colocboost_inference
#' @family colocboost_utilities
#' @export
get_ucos_summary <- function(cb_output, outcome_names = NULL, region_name = NULL,
ambiguous_cos = FALSE,
min_abs_corr_between_ucos = 0.5,
median_abs_corr_between_ucos = 0.8) {
# - check input
if (!inherits(cb_output, "colocboost")) {
stop("Input must from colocboost object!")
}
if (!("ucos_details" %in% names(cb_output))) {
warning(
"Since you want to extract trait-specific (uncolocalized) sets,",
" but there is no output of ucos_details from colocboost.\n",
" Please run colocboost model with output_level=2!"
)
return(NULL)
}
if (is.null(cb_output$ucos_details)){
message("No trait-specific (uncolocalized) effects in this region!")
return(NULL)
}
specific_cs <- cb_output$ucos_details
cs_outcome <- cb_output$data_info$outcome_info$outcome_names
if (!is.null(outcome_names)) {
cs_outcome <- outcome_names
}
vpa <- as.numeric(cb_output$vpa)
if (length(vpa) == 0){
w <- do.call(cbind, specific_cs$ucos_weight)
vpa <- as.vector(1 - apply(1 - w, 1, prod))
}
summary_table <- matrix(NA, nrow = length(specific_cs$ucos$ucos_index), ncol = 9)
colnames(summary_table) <- c(
"outcomes", "ucos_id", "purity",
"top_variable", "top_variable_vpa", "n_variables", "ucos_index",
"ucos_variables", "ucos_variables_vpa"
)
summary_table <- as.data.frame(summary_table)
summary_table[, 1] <- cs_outcome[unlist(specific_cs$ucos_outcomes$outcome_index)]
summary_table[, 2] <- names(specific_cs$ucos$ucos_index)
summary_table[, 3] <- as.numeric(diag(as.matrix(specific_cs$ucos_purity$min_abs_cor)))
summary_table[, 4] <- unlist(sapply(specific_cs$ucos$ucos_variables, function(tmp) tmp[1]))
summary_table[, 5] <- sapply(specific_cs$ucos$ucos_index, function(tmp) max(vpa[tmp]))
summary_table[, 6] <- as.numeric(sapply(specific_cs$ucos$ucos_index, length))
summary_table[, 7] <- unlist(sapply(specific_cs$ucos$ucos_index, function(tmp) paste0(tmp, collapse = "; ")))
summary_table[, 8] <- unlist(sapply(specific_cs$ucos$ucos_variables, function(tmp) paste0(tmp, collapse = "; ")))
summary_table[, 9] <- unlist(sapply(specific_cs$ucos$ucos_index, function(tmp) paste0(vpa[tmp], collapse = "; ")))
if (!is.null(region_name)) {
summary_table$region_name <- region_name
}
summary_table <- as.data.frame(summary_table)
if (!ambiguous_cos) return(summary_table)
# advanced summary for ambiguous colocalization at post-processing
output_summary <- list(
ucos_summary = summary_table,
ambiguous_cos_summary = NULL
)
test <- get_ambiguous_colocalization(
cb_output,
min_abs_corr_between_ucos = min_abs_corr_between_ucos,
median_abs_corr_between_ucos = median_abs_corr_between_ucos
)
if (length(test$ambiguous_cos) == 0) return(output_summary)
ambiguous_results <- test$ambiguous_cos
ambiguous_summary <- matrix(NA, nrow = length(ambiguous_results), ncol = 10)
colnames(ambiguous_summary) <- c(
"outcomes", "ucos_id", "min_between_purity", "median_between_purity",
"overlap_idx", "overlap_variables", "n_recalibrated_variables",
"recalibrated_index", "recalibrated_variables", "recalibrated_variables_vcp"
)
ambiguous_summary[, 1] <- unlist(sapply(ambiguous_results, function(tmp) paste0(tmp$ambiguous_cos_outcomes$outcome_name, collapse = "; ")))
ambiguous_summary[, 2] <- names(ambiguous_results)
ambiguous_summary[, 3] <- sapply(ambiguous_results, function(tmp) max(tmp$ambigous_cos_purity$min_abs_cor[lower.tri(tmp$ambigous_cos_purity$min_abs_cor)]) )
ambiguous_summary[, 4] <- sapply(ambiguous_results, function(tmp) max(tmp$ambigous_cos_purity$median_abs_cor[lower.tri(tmp$ambigous_cos_purity$median_abs_cor)]) )
ambiguous_summary[, 5] <- unlist(sapply(ambiguous_results, function(tmp) paste0(tmp$ambiguous_cos_overlap$ucos_index, collapse = "; ")))
ambiguous_summary[, 6] <- unlist(sapply(ambiguous_results, function(tmp) paste0(tmp$ambiguous_cos_overlap$ucos_variables, collapse = "; ")))
ambiguous_summary[, 7] <- as.numeric(sapply(ambiguous_results, function(tmp) length(tmp$recalibrated_cos$cos_index)))
ambiguous_summary[, 8] <- unlist(sapply(ambiguous_results, function(tmp) paste0(tmp$recalibrated_cos$cos_index, collapse = "; ")))
ambiguous_summary[, 9] <- unlist(sapply(ambiguous_results, function(tmp) paste0(tmp$recalibrated_cos$cos_variables, collapse = "; ")))
ambiguous_summary[, 10] <- unlist(sapply(ambiguous_results, function(tmp) paste0(tmp$recalibrated_cos_vcp[tmp$recalibrated_cos$cos_index], collapse = "; ")))
if (!is.null(region_name)) {
ambiguous_summary$region_name <- region_name
}
output_summary$ambiguous_cos_summary <- as.data.frame(ambiguous_summary)
return(output_summary)
}
#' Extract CoS at different coverage
#'
#' @description `get_cos` extracts colocalization confidence sets (CoS) at different coverage levels
#' from ColocBoost results. When genotype data (X) or correlation matrix (Xcorr) is provided, it
#' can also calculate and filter CoS based on purity statistics, ensuring that variants within
#' each CoS are sufficiently correlated.
#'
#' @param cb_output Output object from `colocboost` analysis
#' @param coverage A number between 0 and 1 specifying the \dQuote{coverage} of the estimated colocalization confidence sets (CoS) (default is 0.95).
#' @param X Genotype matrix of values of the p variables. Used to compute correlations if Xcorr is not provided.
#' @param Xcorr Correlation matrix of correlations between variables. Alternative to X.
#' @param n_purity The maximum number of CoS variables used in calculating the correlation (\dQuote{purity}) statistics.
#' @param min_abs_corr The minimum absolute correlation value of variants in a CoS to be considered pass (\dQuote{purity}) statistics.
#' @param median_abs_corr The median absolute correlation value of variants in a CoS to be considered pass (\dQuote{purity}) statistics.
#' When the number of variables included in the CoS is greater than this number, the CoS variables are randomly subsampled.
#'
#' @return A list of indices of variables in each CoS.
#'
#' @examples
#' # colocboost example
#' set.seed(1)
#' N <- 1000
#' P <- 100
#' # Generate X with LD structure
#' sigma <- 0.9^abs(outer(1:P, 1:P, "-"))
#' X <- MASS::mvrnorm(N, rep(0, P), sigma)
#' colnames(X) <- paste0("SNP", 1:P)
#' L <- 3
#' true_beta <- matrix(0, P, L)
#' true_beta[10, 1] <- 0.5 # SNP10 affects trait 1
#' true_beta[10, 2] <- 0.4 # SNP10 also affects trait 2 (colocalized)
#' true_beta[50, 2] <- 0.3 # SNP50 only affects trait 2
#' true_beta[80, 3] <- 0.6 # SNP80 only affects trait 3
#' Y <- matrix(0, N, L)
#' for (l in 1:L) {
#' Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1)
#' }
#' res <- colocboost(X = X, Y = Y)
#' get_cos(res, coverage = 0.99, X = X)
#' get_cos(res, coverage = 0.99, X = X, min_abs_corr = 0.95)
#'
#' @family colocboost_utilities
#' @export
get_cos <- function(cb_output, coverage = 0.95, X = NULL, Xcorr = NULL, n_purity = 100, min_abs_corr = 0.5, median_abs_corr = NULL) {
if (is.null(cb_output$cos_details$cos)){
warning("No colocalization results in this region!")
return(list("cos" = NULL, "cos_purity" = NULL))
}
# Refine CoS based on different coverage
cos_vcp <- cb_output$cos_details$cos_vcp
cos_diff_coverage <- lapply(cos_vcp, function(w) {
unlist(get_in_cos(w, coverage = coverage))
})
names(cos_diff_coverage) <- paste0(names(cos_diff_coverage), "_coverage_", coverage)
# Check purity if X or Xcorr exist
cos_purity <- NULL
if (!is.null(X) || !is.null(Xcorr)){
cos_purity <- get_cos_purity(cos_diff_coverage, X = X, Xcorr = Xcorr, n_purity = n_purity)
# Extract within-CoS purity
ncos <- nrow(cos_purity[[1]])
within_purity <- matrix(NA, nrow = ncos, ncol = 3)
colnames(within_purity) <- c("min_abs_cor", "max_abs_cor", "median_abs_cor")
rownames(within_purity) <- names(cos_diff_coverage)
within_purity[, 1] <- diag(cos_purity$min_abs_cor)
within_purity[, 2] <- diag(cos_purity$max_abs_cor)
within_purity[, 3] <- diag(cos_purity$median_abs_cor)
# Check purity
if (is.null(median_abs_corr)) {
is_pure <- which(within_purity[, 1] >= min_abs_corr)
} else {
is_pure <- which(within_purity[, 1] >= min_abs_corr | within_purity[, 3] >= median_abs_corr)
}
# Filter impured CoS
if (length(is_pure) == 0) {
cos_diff_coverage <- NULL
cos_purity <- NULL
} else if (length(is_pure) < ncos){
cos_diff_coverage <- cos_diff_coverage[is_pure]
cos_purity <- lapply(cos_purity, function(tmp) tmp[is_pure, is_pure, drop = FALSE] )
}
}
cos_refined <- list("cos" = cos_diff_coverage, "cos_purity" = cos_purity)
return(cos_refined)
}
#' Get integrated weight from different outcomes
#' @keywords cb_get_functions
#' @noRd
get_integrated_weight <- function(weights, weight_fudge_factor = 1.5) {
av <- apply(weights, 1, function(w) prod(w^(weight_fudge_factor / ncol(weights))))
return(av / sum(av))
}
#' Extract CoS based on coverage
#' @keywords cb_get_functions
#' @noRd
get_in_cos <- function(weights, coverage = 0.95) {
# Get indices in decreasing weight order
temp <- order(weights, decreasing = TRUE)
weights_ordered <- weights[temp]
# Find position where cumsum exceeds coverage
pos <- min(which(cumsum(weights_ordered) >= coverage))
# Get the actual threshold weight value
weight_thresh <- weights_ordered[pos]
# Find all indices with weights >= threshold
indices <- which(weights >= weight_thresh)
# Order these indices by their weights in decreasing order
csets <- indices[order(weights[indices], decreasing = TRUE)]
return(list(csets))
}
#' Calculate purity within and in-between CoS
#'
#' @description Calculate purity statistics between all pairs of colocalization confidence sets (CoS)
#'
#' @param cos List of variables in CoS
#' @param X Genotype matrix of values of the p variables. Used to compute correlations if Xcorr is not provided.
#' @param Xcorr Correlation matrix of correlations between variables. Alternative to X.
#' @param n_purity The maximum number of CoS variables used in calculating the correlation (\dQuote{purity}) statistics.
#' When the number of variables included in the CoS is greater than this number, the CoS variables are randomly subsampled.
#'
#' @return A list containing three matrices (min_abs_cor, max_abs_cor, median_abs_cor) with
#' purity statistics for all pairs of CoS. Diagonal elements represent within-CoS purity.
#'
#' @examples
#' # colocboost example
#' set.seed(1)
#' N <- 1000
#' P <- 100
#' # Generate X with LD structure
#' sigma <- 0.9^abs(outer(1:P, 1:P, "-"))
#' X <- MASS::mvrnorm(N, rep(0, P), sigma)
#' colnames(X) <- paste0("SNP", 1:P)
#' L <- 3
#' true_beta <- matrix(0, P, L)
#' true_beta[10, 1] <- 0.5
#' true_beta[10, 2] <- 0.4
#' true_beta[50, 2] <- 0.3
#' true_beta[80, 3] <- 0.6
#' Y <- matrix(0, N, L)
#' for (l in 1:L) {
#' Y[, l] <- X %*% true_beta[, l] + rnorm(N, 0, 1)
#' }
#' res <- colocboost(X = X, Y = Y)
#' cos_res <- get_cos(res, coverage = 0.8)
#' get_cos_purity(cos_res$cos, X = X)
#'
#' @family colocboost_utilities
#' @export
get_cos_purity <- function(cos, X = NULL, Xcorr = NULL, n_purity = 100) {
# Check inputs
if (is.null(cos) || length(cos) == 0) return(NULL)
if (is.null(X) && is.null(Xcorr)) stop("Either X or Xcorr must be provided")
if (is.numeric(cos)) cos <- list(cos)
# Get CoS names
cos_names <- names(cos)
ncos <- length(cos)
if (is.null(cos_names)) cos_names <- paste0("cos_", 1:ncos)
# If only one CoS, just return the purity as a matrix
if (ncos == 1) {
purity_stats <- get_purity(cos[[1]], X = X, Xcorr = Xcorr, n = n_purity)
cos_purity <- lapply(1:3, function(ii) {
mat <- matrix(purity_stats[ii], 1, 1)
rownames(mat) <- colnames(mat) <- cos_names
return(mat)
})
names(cos_purity) <- c("min_abs_cor", "max_abs_cor", "median_abs_cor")
return(cos_purity)
}
# Initialize empty matrices for purity values
empty_matrix <- matrix(NA, ncos, ncos)
colnames(empty_matrix) <- rownames(empty_matrix) <- cos_names
# Initialize purity matrices with diagonal values (within-CoS purity)
cos_purity <- lapply(1:3, function(ii) empty_matrix )
# Compute within-CoS purity
for (i in 1:ncos){
purity_stats <- get_purity(cos[[i]], X = X, Xcorr = Xcorr, n = n_purity)
for (ii in 1:3) {
cos_purity[[ii]][i, i] <- purity_stats[ii]
}
}
# Compute between-CoS purity for every pair
for (i in 1:(ncos - 1)) {
for (j in (i + 1):ncos) {
cos1 <- cos[[i]]
cos2 <- cos[[j]]
purity_stats <- get_between_purity(cos1, cos2, X = X, Xcorr = Xcorr)
# Update all three purity matrices
for (ii in 1:3) {
cos_purity[[ii]][i, j] <- cos_purity[[ii]][j, i] <- purity_stats[ii]
}
}
}
names(cos_purity) <- c("min_abs_cor", "max_abs_cor", "median_abs_cor")
return(cos_purity)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.