Nothing
#' @importFrom stats as.dist cutree hclust
colocboost_assemble_cos <- function(cb_obj,
coverage = 0.95,
weight_fudge_factor = 1.5,
check_null = 0.1,
check_null_method = "profile",
dedup = TRUE,
overlap = TRUE,
n_purity = 100,
min_abs_corr = 0.5,
sec_coverage_thresh = 0.8,
median_abs_corr = NULL,
min_cluster_corr = 0.8,
median_cos_abs_corr = 0.8,
tol = 1e-9) {
if (!inherits(cb_obj, "colocboost")) {
stop("Input must from colocboost function!")
}
cb_model <- cb_obj$cb_model
cb_model_para <- cb_obj$cb_model_para
cb_data <- cb_obj$cb_data
# define the confident sets for colocalization
update <- cb_model_para$update_status
pos.coloc <- which(colSums(update) > 1)
if (length(pos.coloc) == 0) {
ll <- list(
"cos" = NULL,
"evidence_strength" = NULL,
"requested_coverage" = coverage
)
} else if (length(pos.coloc) == 1) {
# try pos.coloc = pos.coloc[15]
coloc_outcomes <- which(update[, pos.coloc] == 1)
avWeight <- get_avWeigth(cb_model, coloc_outcomes, update, pos.coloc, name_weight = T)
# - check purity for each outcome
check_purity <- c()
for (iiii in 1:length(coloc_outcomes)) {
X_dict <- cb_data$dict[coloc_outcomes[iiii]]
tmp <- w_purity(avWeight[, iiii, drop = FALSE],
X = cb_data$data[[X_dict]]$X, Xcorr = cb_data$data[[X_dict]]$XtX,
N = cb_data$data[[coloc_outcomes[iiii]]]$N, n = n_purity, coverage = sec_coverage_thresh,
min_abs_corr = min_abs_corr, median_abs_corr = median_abs_corr,
miss_idx = cb_data$data[[coloc_outcomes[iiii]]]$variable_miss
)
check_purity[iiii] <- length(tmp) == 1
}
if (sum(check_purity) < 2) {
is_non_null <- NULL
} else {
pos_purity <- which(check_purity)
avWeight <- avWeight[, pos_purity, drop = FALSE]
coloc_outcomes <- coloc_outcomes[pos_purity]
weights <- get_integrated_weight(avWeight, weight_fudge_factor = weight_fudge_factor)
coloc_cos <- get_in_cos(weights, coverage = coverage)
evidence_strength <- sum(weights[coloc_cos[[1]]])
# ----- null filtering
res_temp <- check_null_post(cb_obj, coloc_cos, coloc_outcomes,
check_null = check_null,
check_null_method = check_null_method
)
cs_change <- res_temp$cs_change
is_non_null <- res_temp$is_non_null
}
if (length(is_non_null) == 0) {
ll <- list(
"cos" = NULL,
"evidence_strength" = NULL,
"requested_coverage" = coverage
)
} else {
purity <- c()
pos <- as.numeric(unlist(coloc_cos))
for (i in coloc_outcomes) {
X_dict <- cb_data$dict[i]
if (!is.null(cb_data$data[[X_dict]]$XtX)) {
pos <- match(pos, setdiff(1:cb_model_para$P, cb_data$data[[i]]$variable_miss))
}
p_tmp <- matrix(get_purity(pos,
X = cb_data$data[[X_dict]]$X,
Xcorr = cb_data$data[[X_dict]]$XtX,
N = cb_data$data[[i]]$N, n = n_purity
), 1, 3)
purity <- c(purity, list(p_tmp))
}
purity_all <- Reduce(pmax, purity)
purity_all <- as.data.frame(purity_all)
colnames(purity_all) <- c("min_abs_corr", "mean_abs_corr", "median_abs_corr")
if (is.null(median_abs_corr)) {
is_pure <- (purity_all[, 1] >= min_abs_corr)
} else {
is_pure <- (purity_all[, 1] >= min_abs_corr | purity_all[, 3] >= median_abs_corr)
}
if (!is_pure) {
ll <- list(
"cos" = NULL,
"evidence_strength" = NULL,
"requested_coverage" = coverage
)
} else {
row_names <- "cos1"
names(coloc_cos) <- row_names
rownames(purity_all) <- row_names
coloc_outcomes <- list(coloc_outcomes)
ll <- list(
"cos" = coloc_cos,
"purity" = purity_all,
"evidence_strength" = evidence_strength,
"requested_coverage" = coverage,
"cs_change" = cs_change,
"avWeight" = list(avWeight),
"coloc_outcomes" = coloc_outcomes
)
}
}
} else {
coloc_candidate <- update[, pos.coloc]
coloc_candidate <- apply(coloc_candidate, 2, paste0, collapse = ",")
coloc_temp <- table(coloc_candidate)
# iterations for each colocalization sets
pos_coloc_sets <- lapply(1:length(coloc_temp), function(x) {
which(coloc_candidate == names(coloc_temp)[x])
})
names(pos_coloc_sets) <- names(coloc_temp)
# - define coloc_sets
coloc_sets <- avWeight_coloc_sets <-
total_change_Loglik_coloc <- evidence_strength_coloc <-
cs_change_coloc <- coloc_outcomes_sets <- list()
flag <- 0
for (i in 1:length(coloc_temp)) {
pos_temp_coloc_each <- pos_coloc_sets[[i]]
coloc_outcomes <- which(unlist(strsplit(names(coloc_temp)[i], ",")) == 1)
# - if only one iteration for this coloc_set
if (length(pos_temp_coloc_each) == 1) {
avWeight <- get_avWeigth(cb_model, coloc_outcomes, update, pos.coloc[pos_temp_coloc_each], name_weight = T)
# - check purity for each outcome
check_purity <- c()
for (iiii in 1:length(coloc_outcomes)) {
X_dict <- cb_data$dict[coloc_outcomes[iiii]]
tmp <- w_purity(avWeight[, iiii, drop = FALSE],
X = cb_data$data[[X_dict]]$X, Xcorr = cb_data$data[[X_dict]]$XtX,
N = cb_data$data[[coloc_outcomes[iiii]]]$N, n = n_purity, coverage = sec_coverage_thresh,
min_abs_corr = min_abs_corr, median_abs_corr = median_abs_corr,
miss_idx = cb_data$data[[coloc_outcomes[iiii]]]$variable_miss
)
check_purity[iiii] <- length(tmp) == 1
}
if (sum(check_purity) < 2) {
is_non_null <- NULL
} else {
pos_purity <- which(check_purity)
avWeight <- avWeight[, pos_purity, drop = FALSE]
coloc_outcomes <- coloc_outcomes[pos_purity]
weights <- get_integrated_weight(avWeight, weight_fudge_factor = weight_fudge_factor)
coloc_cos <- get_in_cos(weights, coverage = coverage)
evidence_strength <- sum(weights[coloc_cos[[1]]])
# ----- null filtering
res_temp <- check_null_post(cb_obj, coloc_cos, coloc_outcomes,
check_null = check_null,
check_null_method = check_null_method
)
cs_change <- res_temp$cs_change
is_non_null <- res_temp$is_non_null
}
if (length(is_non_null) > 0) {
flag <- flag + 1
coloc_sets[[flag]] <- as.numeric(unlist(coloc_cos))
total_change_Loglik_coloc[[flag]] <- "ONE"
avWeight_coloc_sets[[flag]] <- avWeight
evidence_strength_coloc[[flag]] <- evidence_strength
cs_change_coloc[[flag]] <- cs_change
coloc_outcomes_sets[[flag]] <- coloc_outcomes
}
} else {
av <- lapply(coloc_outcomes, function(ii) {
get_avWeigth(cb_model, ii, update, pos.coloc[pos_temp_coloc_each])
})
weight_coloc <- do.call(cbind, av)
# Hierachical Clustering iteration based on sequenced weights
cormat <- get_cormat(t(weight_coloc))
hc <- hclust(as.dist(1 - cormat))
n_cluster <- get_n_cluster(hc, cormat, min_cluster_corr = min_cluster_corr)$n_cluster
index <- cutree(hc, n_cluster)
B <- sapply(1:n_cluster, function(t) as.numeric(index == t))
B <- as.matrix(B)
# --- define the confident sets
LogLik_change <- abs(diff(cb_model_para$profile_loglik)[pos.coloc[pos_temp_coloc_each]])
avWeight <- apply(B, 2, function(t) {
idx <- which(t == 1)
w <- LogLik_change[idx]
check_purity <- list()
for (iiii in 1:length(coloc_outcomes)) {
weight_cluster <- t(av[[iiii]][idx, , drop = FALSE])
X_dict <- cb_data$dict[coloc_outcomes[iiii]]
tmp <- w_purity(weight_cluster,
X = cb_data$data[[X_dict]]$X, Xcorr = cb_data$data[[X_dict]]$XtX,
N = cb_data$data[[coloc_outcomes[iiii]]]$N, n = n_purity, coverage = sec_coverage_thresh,
min_abs_corr = min_abs_corr, median_abs_corr = median_abs_corr,
miss_idx = cb_data$data[[coloc_outcomes[iiii]]]$variable_miss
)
check_purity[[iiii]] <- tmp
}
# check_purity <- unique(as.numeric(check_purity))
check_purity <- Reduce("intersect", check_purity)
if (length(check_purity) != 0) {
w <- w[check_purity]
w_tmp <- t(weight_coloc[idx, , drop = FALSE])
weight_cluster <- w_tmp[, check_purity]
avW <- as.matrix(weight_cluster) %*% as.matrix(w) / sum(w)
} else {
avW <- rep(0, cb_model_para$P * length(coloc_outcomes))
}
return(avW)
})
avWeight <- as.matrix(avWeight)
avWeight_each <- lapply(1:length(coloc_outcomes), function(x) {
start <- (x - 1) * cb_model_para$P + 1
end <- x * cb_model_para$P
return(as.matrix(avWeight[c(start:end), ]))
})
avWeight_coloc <- lapply(1:ncol(B), function(x) {
a <- lapply(avWeight_each, function(xx) {
xx[, x]
})
a <- do.call(cbind, a)
colnames(a) <- paste0("outcome", coloc_outcomes)
return(a)
})
total_change <- t(B) %*% as.matrix(LogLik_change) / colSums(B)
coloc_cos <- evidence_strength <- avWeight_csets <- list()
total_change_csets <- c()
fl <- 1
for (i.w in 1:length(avWeight_coloc)) {
w <- avWeight_coloc[[i.w]]
if (sum(w) != 0) {
weights <- get_integrated_weight(w, weight_fudge_factor = weight_fudge_factor)
csets <- get_in_cos(weights, coverage = coverage)
coloc_cos[[fl]] <- unlist(csets)
evidence_strength[[fl]] <- sum(weights[coloc_cos[[fl]]])
avWeight_csets[[fl]] <- w
total_change_csets <- c(total_change_csets, total_change[i.w])
fl <- fl + 1
}
}
if (length(coloc_cos) != 0) {
avWeight_coloc <- avWeight_csets
total_change <- total_change_csets
# ----- null filtering
res_temp <- check_null_post(cb_obj, coloc_cos, coloc_outcomes,
check_null = check_null,
check_null_method = check_null_method
)
cs_change <- res_temp$cs_change
is_non_null <- res_temp$is_non_null
avWeight_coloc <- avWeight_csets
if (length(is_non_null) > 0) {
coloc_cos <- coloc_cos[is_non_null]
evidence_strength <- evidence_strength[is_non_null]
cs_change <- cs_change[is_non_null, ]
avWeight_coloc <- avWeight_coloc[is_non_null]
total_change <- total_change[is_non_null]
for (i.b in 1:length(is_non_null)) {
flag <- flag + 1
coloc_sets[[flag]] <- as.numeric(coloc_cos[[i.b]])
total_change_Loglik_coloc[[flag]] <- total_change[i.b]
avWeight_coloc_sets[[flag]] <- avWeight_coloc[[i.b]]
evidence_strength_coloc[[flag]] <- evidence_strength[i.b]
cs_change_coloc[[flag]] <- cs_change[i.b, ]
coloc_outcomes_sets[[flag]] <- coloc_outcomes
}
}
}
}
}
# --- filter 2*: remove overlap confidence sets
if (length(coloc_sets) >= 2) {
if (overlap) {
is_overlap <- c()
ncsets <- length(coloc_sets)
for (i2 in 1:(ncsets - 1)) {
for (j in (i2 + 1):ncsets) {
cset1 <- coloc_sets[[i2]]
cset2 <- coloc_sets[[j]]
if (all(cset1 %in% cset2) | all(cset2 %in% cset1)) {
if (cb_model_para$L == 2) {
is_overlap <- c(
is_overlap,
check_two_overlap_sets(total_change_Loglik_coloc, i2, j)
)
} else {
outcome1 <- sort(colnames(avWeight_coloc_sets[[i2]]))
outcome2 <- sort(colnames(avWeight_coloc_sets[[j]]))
if (identical(outcome1, outcome2)) {
is_overlap <- c(
is_overlap,
check_two_overlap_sets(total_change_Loglik_coloc, i2, j)
)
} else {
if (all(outcome1 %in% outcome2)) {
is_overlap <- c(is_overlap, i2)
} else if (all(outcome2 %in% outcome1)) {
is_overlap <- c(is_overlap, j)
}
}
}
}
}
}
is_overlap <- unique(is_overlap)
if (length(is_overlap) > 0) {
coloc_sets <- coloc_sets[-is_overlap]
evidence_strength_coloc <- evidence_strength_coloc[-is_overlap]
cs_change_coloc <- cs_change_coloc[-is_overlap]
avWeight_coloc_sets <- avWeight_coloc_sets[-is_overlap]
total_change_Loglik_coloc <- total_change_Loglik_coloc[-is_overlap]
coloc_outcomes_sets <- coloc_outcomes_sets[-is_overlap]
}
}
}
# --- filter 2*: remove overlap confidence sets based on median_cos_abs_corr
if (length(coloc_sets) >= 2) {
if (overlap) {
# calculate between purity
ncsets <- length(coloc_sets)
min_between <- max_between <- ave_between <- matrix(0, nrow = ncsets, ncol = ncsets)
for (i.between in 1:(ncsets - 1)) {
for (j.between in (i.between + 1):ncsets) {
cset1 <- coloc_sets[[i.between]]
cset2 <- coloc_sets[[j.between]]
res <- list()
for (i in 1:cb_model_para$L) {
X_dict <- cb_data$dict[i]
res[[i]] <- get_between_purity(cset1, cset2,
X = cb_data$data[[X_dict]]$X,
Xcorr = cb_data$data[[X_dict]]$XtX,
miss_idx = cb_data$data[[i]]$variable_miss,
P = cb_model_para$P
)
}
res <- Reduce(pmax, res)
min_between[i.between, j.between] <- min_between[j.between, i.between] <- res[1]
max_between[i.between, j.between] <- max_between[j.between, i.between] <- res[2]
ave_between[i.between, j.between] <- ave_between[j.between, i.between] <- res[3]
}
}
is.between <- (min_between > min_abs_corr) * (abs(max_between - 1) < tol) * (ave_between > median_cos_abs_corr)
if (sum(is.between) != 0) {
temp <- sapply(1:nrow(is.between), function(x) {
tt <- c(x, which(is.between[x, ] != 0))
return(paste0(sort(tt), collapse = ";"))
})
temp <- merge_sets(temp)
potential_merged <- lapply(temp, function(x) as.numeric(unlist(strsplit(x, ";"))))
potential_merged <- potential_merged[which(sapply(potential_merged, length) >= 2)]
coloc_sets_merged <- avWeight_merged <-
cs_change_merged <- evidence_strength_merged <-
coloc_outcomes_merged <- list()
is_merged <- c()
for (i.m in 1:length(potential_merged)) {
temp_set <- as.numeric(potential_merged[[i.m]])
is_merged <- c(is_merged, temp_set)
# define merged set
coloc_sets_merged <- c(coloc_sets_merged, list(unique(unlist(coloc_sets[temp_set]))))
# refine avWeight
merged <- do.call(cbind, avWeight_coloc_sets[temp_set])
unique_coloc_outcomes <- unique(colnames(merged))
coloc_p_merged <- as.numeric(gsub("[^0-9.]+", "", unique_coloc_outcomes))
coloc_outcomes_merged <- c(
coloc_outcomes_merged,
list(coloc_p_merged[order(coloc_p_merged)])
)
temp <- lapply(unique_coloc_outcomes, function(tt) {
pos <- which(colnames(merged) == tt)
tmp <- as.matrix(rowMeans(as.matrix(merged[, pos])))
colnames(tmp) <- tt
return(tmp)
})
temp <- do.call(cbind, temp)
temp <- temp[, order(unique_coloc_outcomes)]
# colnames(temp) <- unique_coloc_outcomes
avWeight_merged <- c(avWeight_merged, list(temp))
# refine cs_change
cs_change_merged <- c(
cs_change_merged,
list(do.call(pmax, cs_change_coloc[temp_set]))
)
# refine evidence strength
evidence_strength_merged <- c(
evidence_strength_merged,
list(min(unlist(evidence_strength_coloc[temp_set])))
)
}
coloc_sets_single <- coloc_sets[-is_merged]
avWeight_sets_single <- avWeight_coloc_sets[-is_merged]
cs_change_single <- cs_change_coloc[-is_merged]
evidence_strength_single <- evidence_strength_coloc[-is_merged]
coloc_outcomes_single <- coloc_outcomes_sets[-is_merged]
# --- combine merged and single
coloc_sets <- c(coloc_sets_single, coloc_sets_merged)
evidence_strength_coloc <- c(evidence_strength_single, evidence_strength_merged)
cs_change_coloc <- c(cs_change_single, cs_change_merged)
avWeight_coloc_sets <- c(avWeight_sets_single, avWeight_merged)
coloc_outcomes_sets <- c(coloc_outcomes_single, coloc_outcomes_merged)
}
}
}
# --- filter 3: purity
if (length(coloc_sets) != 0) {
purity <- vector(mode = "list", length = length(coloc_sets))
for (ee in 1:length(coloc_sets)) {
coloc_t <- coloc_outcomes_sets[[ee]]
p_tmp <- c()
for (i3 in coloc_t) {
pos <- coloc_sets[[ee]]
X_dict <- cb_data$dict[i3]
if (!is.null(cb_data$data[[X_dict]]$XtX)) {
pos <- match(pos, setdiff(1:cb_model_para$P, cb_data$data[[i3]]$variable_miss))
}
tmp <- matrix(get_purity(pos,
X = cb_data$data[[X_dict]]$X,
Xcorr = cb_data$data[[X_dict]]$XtX,
N = cb_data$data[[i3]]$N, n = n_purity
), 1, 3)
p_tmp <- rbind(p_tmp, tmp)
}
purity[[ee]] <- matrix(apply(p_tmp, 2, max), 1, 3)
}
purity_all <- do.call(rbind, purity)
purity_all <- as.data.frame(purity_all)
colnames(purity_all) <- c("min_abs_corr", "mean_abs_corr", "median_abs_corr")
if (is.null(median_abs_corr)) {
is_pure <- which(purity_all[, 1] >= min_abs_corr)
} else {
is_pure <- which(purity_all[, 1] >= min_abs_corr | purity_all[, 3] >= median_abs_corr)
}
if (length(is_pure) > 0) {
coloc_sets <- coloc_sets[is_pure]
evidence_strength_coloc <- unlist(evidence_strength_coloc[is_pure])
cs_change_coloc <- do.call(rbind, cs_change_coloc[is_pure])
avWeight_coloc_sets <- avWeight_coloc_sets[is_pure]
coloc_outcomes_sets <- coloc_outcomes_sets[is_pure]
purity_all <- purity_all[is_pure, ]
row_names <- paste0("cos", is_pure)
names(coloc_sets) <- row_names
rownames(purity_all) <- row_names
rownames(cs_change_coloc) <- row_names
if (length(coloc_sets) == 1) {
ll <- list(
"cos" = coloc_sets,
"purity" = purity_all,
"evidence_strength" = evidence_strength_coloc,
"requested_coverage" = coverage,
"cs_change" = as.matrix(cs_change_coloc),
"avWeight" = avWeight_coloc_sets,
"coloc_outcomes" = coloc_outcomes_sets
)
} else {
# Re-order CS list and purity rows based on purity.
ordering <- order(purity_all[, 1], decreasing = TRUE)
ll <- list(
"cos" = coloc_sets[ordering],
"purity" = purity_all[ordering, ],
"evidence_strength" = evidence_strength_coloc[ordering],
"requested_coverage" = coverage,
"cs_change" = cs_change_coloc[ordering, ],
"avWeight" = avWeight_coloc_sets[ordering],
"coloc_outcomes" = coloc_outcomes_sets[ordering]
)
}
} else {
ll <- list(
"cos" = NULL,
"evidence_strength" = NULL,
"requested_coverage" = coverage
)
}
} else {
ll <- list(
"cos" = NULL,
"evidence_strength" = NULL,
"requested_coverage" = coverage
)
}
}
out <- list("cos" = ll)
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.