Nothing
#' @title Inference of LUCID model based on bootstrap resampling
#'
#' @description Generate \code{R} bootstrap replicates of LUCID parameters and
#' derive confidence interval (CI) based on bootstrap. Bootstrap replicates are
#' generated by nonparametric resampling, implemented with the \code{ordinary}
#' method of \code{boot::boot}. Supports \code{lucid_model = "early"},
#' \code{lucid_model = "parallel"}, and \code{lucid_model = "serial"}.
#'
#' @param G Exposures, a numeric vector, matrix, or data frame. Categorical variable
#' should be transformed into dummy variables. If a matrix or data frame, rows
#' represent observations and columns correspond to variables.
#' @param Z Omics data: for LUCID early integration, a numeric matrix/data frame; for LUCID in
#' parallel, a list of numeric matrices/data frames. Rows correspond to observations
#' and columns correspond to variables.
#' @param Y Outcome, a numeric vector. Categorical variable is not allowed. Binary
#' outcome should be coded as 0 and 1.
#' @param lucid_model Specifying LUCID model, "early" for early integration,
#' "parallel" for LUCID in parallel, "serial" for LUCID in serial.
#' Bootstrap inference is implemented for all three model types.
#' @param CoG Optional, covariates to be adjusted for estimating the latent cluster.
#' A numeric vector, matrix or data frame. Categorical variable should be transformed
#' into dummy variables.
#' @param CoY Optional, covariates to be adjusted for estimating the association
#' between latent cluster and the outcome. A numeric vector, matrix or data frame.
#' Categorical variable should be transformed into dummy variables.
#' @param model A LUCID model fitted by \code{estimate_lucid}.
#' If the fitted model uses nonzero penalties, \code{boot_lucid} will
#' automatically refit a zero-penalty model as fallback because bootstrap
#' inference is only supported for \code{Rho_G = Rho_Z_Mu = Rho_Z_Cov = 0}.
#' @param conf A numeric scalar between 0 and 1 to specify confidence level(s)
#' of the required interval(s).
#' @param R An integer to specify number of bootstrap replicates for LUCID model.
#' If feasible, it is recommended to set R >= 1000.
#' @param verbose A flag indicates whether detailed information
#' is printed in console. Default is FALSE.
#'
#' @return A list containing:
#' \item{beta}{Bootstrap CI table(s) for G-to-X effects. For
#' \code{lucid_model = "parallel"}, this is a list by omics layer and includes
#' the multinomial intercept plus exposures in \code{G} (not \code{CoG}).}
#' \item{mu}{Bootstrap CI table(s) for cluster-specific means of omics features.
#' For \code{lucid_model = "parallel"}, this is a list by omics layer.}
#' \item{gamma}{Bootstrap CI table for X-to-Y parameters.}
#' \item{stage}{For \code{lucid_model = "serial"}, a list of stage-wise CI tables
#' (each stage contains \code{beta}, \code{mu}, and \code{gamma} for the final stage only).}
#' \item{bootstrap}{The \code{boot} object returned by \code{boot::boot}.}
#'
#' @export
#'
#' @import boot
#' @import progress
#'
#' @examples
#' \donttest{
#' # use simulated data
#' G <- sim_data$G[1:300, , drop = FALSE]
#' Z <- sim_data$Z[1:300, , drop = FALSE]
#' Y_normal <- sim_data$Y_normal[1:300]
#'
#' # fit lucid model
#' fit1 <- estimate_lucid(G = G, Z = Z, Y = Y_normal, lucid_model = "early",
#' family = "normal", K = 2,
#' seed = 1008)
#'
#' # conduct bootstrap resampling
#' boot1 <- suppressWarnings(
#' boot_lucid(G = G, Z = Z, Y = Y_normal,
#' lucid_model = "early", model = fit1, R = 5)
#' )
#'
#' # Use 90% CI
#' boot2 <- suppressWarnings(
#' boot_lucid(G = G, Z = Z, Y = Y_normal, lucid_model = "early",
#' model = fit1, R = 5, conf = 0.9)
#' )
#' }
boot_lucid <- function(G,
Z,
Y,
lucid_model = c("early", "parallel","serial"),
CoG = NULL,
CoY = NULL,
model,
conf = 0.95,
R = 100,
verbose = FALSE) {
lucid_model <- match.arg(lucid_model)
model <- normalize_bootstrap_model(
model = model,
lucid_model = lucid_model,
G = G,
Z = Z,
Y = Y,
CoG = CoG,
CoY = CoY
)
# prepare data for bootstrap (boot function require data in a matrix form,
# list data structure doesn't work)
if(!is.null(model$select) &&
(!is.null(model$select$selectG) || !is.null(model$select$selectZ)) &&
(has_unselected_feature(model$select$selectG) || has_unselected_feature(model$select$selectZ))) {
stop("Refit LUCID model with selected feature first then conduct bootstrap inference")
}
if (lucid_model == "serial" && has_unselected_feature_serial(model)) {
stop("Refit serial LUCID model with selected feature first then conduct bootstrap inference")
}
if (lucid_model == "early"){
# ========================== Early Integration ==========================
G <- as.matrix(G)
Z <- as.matrix(Z)
Y <- as.matrix(Y)
dimG <- ncol(G)
dimZ <- ncol(Z)
dimCoG <- if (is.null(CoG)) 0 else ncol(as.matrix(CoG))
dimCoY <- if (is.null(CoY)) 0 else ncol(as.matrix(CoY))
K <- model$K
alldata <- as.data.frame(cbind(G, Z, Y, CoG, CoY))
# bootstrap
if(verbose){
cat(paste0("Use Bootstrap resampling to derive ", 100 * conf, "% CI for LUCID \n"))
}
#initialize progress bar object
pb <- progress::progress_bar$new(total = R + 1)
bootstrap <- boot::boot(data = alldata,
statistic = lucid_par_early,
R = R,
dimG = dimG,
dimZ = dimZ,
dimCoY = dimCoY,
dimCoG = dimCoG,
model = model,
prog = pb)
# bootstrap CIs
ci <- gen_ci(bootstrap,
conf = conf)
# organize CIs
beta <- ci[1:((K - 1) * dimG), ]
mu <- ci[((K - 1) * dimG + 1): ((K - 1) * dimG + K * dimZ), ]
gamma <- ci[-(1:((K - 1) * dimG + K * dimZ)), ]
return(list(beta = beta,
mu = mu,
gamma = gamma,
bootstrap = bootstrap))
} else if (lucid_model == "parallel"){
# ========================== Lucid in Parallel ==========================
G <- as.matrix(G)
Gnames_exposure <- colnames(G)
if(is.null(Gnames_exposure) || length(Gnames_exposure) != ncol(G)) {
Gnames_exposure <- paste0("G", seq_len(ncol(G)))
}
Y <- as.matrix(Y)
if(!is.list(Z)) {
stop("For lucid_model = 'parallel', input 'Z' should be a list of matrices/data frames")
}
Z <- lapply(Z, as.matrix)
nOmics <- length(Z)
if(nOmics == 0) {
stop("For lucid_model = 'parallel', input 'Z' should contain at least one omics layer")
}
dimG <- ncol(G)
dimZ <- sapply(Z, ncol)
dimCoG <- if (is.null(CoG)) 0 else ncol(as.matrix(CoG))
dimCoY <- if (is.null(CoY)) 0 else ncol(as.matrix(CoY))
K <- model$K
if(length(K) != nOmics) {
stop("Length of model$K does not match number of omics layers in Z")
}
z_combined <- do.call(cbind, Z)
alldata <- as.data.frame(cbind(G, z_combined, Y, CoG, CoY))
# define template from fitted model to enforce fixed bootstrap statistic length
template <- extract_parallel_boot_vector(
model = model,
dimG = dimG,
dimZ = dimZ,
Gnames_exposure = Gnames_exposure
)
template_names <- names(template)
n_template <- length(template)
if(verbose){
cat(paste0("Use Bootstrap resampling to derive ", 100 * conf, "% CI for LUCID in parallel \n"))
}
pb <- progress::progress_bar$new(total = R + 1)
bootstrap <- boot::boot(data = alldata,
statistic = lucid_par_parallel,
R = R,
dimG = dimG,
dimZ = dimZ,
dimCoY = dimCoY,
dimCoG = dimCoG,
model = model,
Gnames_exposure = Gnames_exposure,
template_names = template_names,
n_template = n_template,
prog = pb)
ci <- gen_ci(bootstrap, conf = conf)
# split outputs by layer for easier consumption
n_beta <- as.integer((K - 1) * (dimG + 1))
n_mu <- as.integer(K * dimZ)
beta <- vector("list", nOmics)
mu <- vector("list", nOmics)
idx_start <- 1
for(i in seq_len(nOmics)) {
idx_end <- idx_start + n_beta[i] - 1
beta[[i]] <- ci[idx_start:idx_end, , drop = FALSE]
idx_start <- idx_end + 1
}
for(i in seq_len(nOmics)) {
idx_end <- idx_start + n_mu[i] - 1
mu[[i]] <- ci[idx_start:idx_end, , drop = FALSE]
idx_start <- idx_end + 1
}
gamma <- ci[idx_start:nrow(ci), , drop = FALSE]
names(beta) <- paste0("Layer", seq_len(nOmics))
names(mu) <- paste0("Layer", seq_len(nOmics))
return(list(beta = beta,
mu = mu,
gamma = gamma,
bootstrap = bootstrap))
} else if (lucid_model == "serial"){
# ========================== Lucid in Serial ==========================
G <- as.matrix(G)
Y <- as.matrix(Y)
dimG <- ncol(G)
dimCoG <- if (is.null(CoG)) 0 else ncol(as.matrix(CoG))
dimCoY <- if (is.null(CoY)) 0 else ncol(as.matrix(CoY))
z_flat <- flatten_serial_Z(Z)
alldata <- as.data.frame(cbind(G, z_flat$flat, Y, CoG, CoY))
template <- extract_serial_boot_template(model)
template_names <- names(template$vector)
n_template <- length(template_names)
if(verbose){
cat(paste0("Use Bootstrap resampling to derive ", 100 * conf, "% CI for LUCID in serial \n"))
}
pb <- progress::progress_bar$new(total = R + 1)
bootstrap <- boot::boot(
data = alldata,
statistic = lucid_par_serial,
R = R,
dimG = dimG,
dimCoY = dimCoY,
dimCoG = dimCoG,
z_meta = z_flat$meta,
model = model,
template_names = template_names,
n_template = n_template,
prog = pb
)
ci <- gen_ci(bootstrap, conf = conf)
stage_ci <- split_serial_boot_ci(ci = ci, stage_layout = template$stage_layout)
return(list(
stage = stage_ci,
bootstrap = bootstrap
))
}
}
# function to calculate parameters of early LUCID model. use as statisitc input for
# boot function.
lucid_par_early <- function(data, indices, model, dimG, dimZ, dimCoY, dimCoG, prog) {
#display progress with each run of the function
prog$tick()
# prepare data
d <- data[indices, ]
G <- as.matrix(d[, 1:dimG])
Z <- as.matrix(d[, (dimG + 1):(dimG + dimZ)])
Y <- as.matrix(d[, (dimG + dimZ + 1)])
CoG <- CoY <- NULL
K <- model$K
if(dimCoG > 0){
CoG <- as.matrix(d[, (dimG + dimZ + 2):(dimG + dimZ + dimCoG + 1)])
}
if(dimCoY > 0 && dimCoG > 0){
CoY <- as.matrix(d[, (dimG + dimZ + dimCoG + 2):ncol(d)])
}
if(dimCoY > 0 && dimCoG == 0){
CoY <- as.matrix(d[, (dimG + dimZ + 2):ncol(d)])
}
# fit lucid model
seed <- sample(1:2000, 1)
em_ctrl <- model$em_control
rG <- if(!is.null(model$Rho$Rho_G)) model$Rho$Rho_G else 0
rMu <- if(!is.null(model$Rho$Rho_Z_Mu)) model$Rho$Rho_Z_Mu else 0
rCov <- if(!is.null(model$Rho$Rho_Z_Cov)) model$Rho$Rho_Z_Cov else 0
tol_fit <- if(!is.null(em_ctrl$tol)) em_ctrl$tol else 0.001
max_itr_fit <- if(!is.null(em_ctrl$max_itr)) em_ctrl$max_itr else 1000
max_tot_fit <- if(!is.null(em_ctrl$max_tot.itr)) em_ctrl$max_tot.itr else 10000
invisible(capture.output(try_lucid <- try(est_lucid(G = G,
Z = Z,
Y = Y,
CoY = CoY,
CoG = CoG,
lucid_model = "early",
family = model$family,
init_omic.data.model = model$init_omic.data.model,
K = K,
tol = tol_fit,
max_itr = max_itr_fit,
max_tot.itr = max_tot_fit,
Rho_G = rG,
Rho_Z_Mu = rMu,
Rho_Z_Cov = rCov,
init_impute = model$init_impute,
init_par = model$init_par,
seed = seed))))
if("try-error" %in% class(try_lucid)){
n_par <- (K - 1) * dimG + K * dimZ + length(model$res_Gamma$beta)
par_lucid <- rep(NA_real_, n_par)
} else{
beta_col_idx <- 1 + seq_len(dimG)
beta_exposure <- try_lucid$res_Beta[-1, beta_col_idx, drop = FALSE]
par_lucid <- c(as.vector(t(beta_exposure)),
as.vector(t(try_lucid$res_Mu)),
try_lucid$res_Gamma$beta)
G_names <- as.vector(sapply(2:K, function(x) {
paste0(colnames(try_lucid$res_Beta)[beta_col_idx],
".cluster", x)
}))
Z_names <- as.vector(sapply(1:K, function(x) {
paste0(colnames(try_lucid$res_Mu),
".cluster", x)
}))
if(is.null(names(try_lucid$res_Gamma$beta))) {
Y_names <- paste0("cluster", 1:K)
} else {
Y_names <- names(try_lucid$res_Gamma$beta)
}
names(par_lucid) <- c(G_names, Z_names, Y_names)
converge <- TRUE
}
return(par_lucid)
}
# function to calculate parameters of parallel LUCID model. use as statistic input for
# boot function.
lucid_par_parallel <- function(data, indices, model, dimG, dimZ, dimCoY, dimCoG,
Gnames_exposure, template_names, n_template, prog) {
prog$tick()
d <- data[indices, , drop = FALSE]
col_start <- 1
G <- as.matrix(d[, col_start:(col_start + dimG - 1), drop = FALSE])
col_start <- col_start + dimG
nOmics <- length(dimZ)
Z <- vector("list", nOmics)
for(i in seq_len(nOmics)) {
zdim <- dimZ[i]
Z[[i]] <- as.matrix(d[, col_start:(col_start + zdim - 1), drop = FALSE])
col_start <- col_start + zdim
}
Y <- as.matrix(d[, col_start, drop = FALSE])
col_start <- col_start + 1
CoG <- CoY <- NULL
if(dimCoG > 0) {
CoG <- as.matrix(d[, col_start:(col_start + dimCoG - 1), drop = FALSE])
col_start <- col_start + dimCoG
}
if(dimCoY > 0) {
CoY <- as.matrix(d[, col_start:(col_start + dimCoY - 1), drop = FALSE])
}
seed <- sample(1:2000, 1)
rG <- if(!is.null(model$Rho$Rho_G)) model$Rho$Rho_G else 0
rMu <- if(!is.null(model$Rho$Rho_Z_Mu)) model$Rho$Rho_Z_Mu else 0
rCov <- if(!is.null(model$Rho$Rho_Z_Cov)) model$Rho$Rho_Z_Cov else 0
em_ctrl <- model$em_control
tol_fit <- if(!is.null(em_ctrl$tol)) em_ctrl$tol else 0.001
max_itr_fit <- if(!is.null(em_ctrl$max_itr)) em_ctrl$max_itr else 1000
max_tot_fit <- if(!is.null(em_ctrl$max_tot.itr)) em_ctrl$max_tot.itr else 10000
invisible(capture.output(try_lucid <- try(est_lucid(
G = G,
Z = Z,
Y = Y,
CoY = CoY,
CoG = CoG,
lucid_model = "parallel",
family = model$family,
init_omic.data.model = model$init_omic.data.model,
K = model$K,
tol = tol_fit,
max_itr = max_itr_fit,
max_tot.itr = max_tot_fit,
init_impute = model$init_impute,
init_par = model$init_par,
useY = model$useY,
Rho_G = rG,
Rho_Z_Mu = rMu,
Rho_Z_Cov = rCov,
seed = seed
), silent = TRUE)))
if("try-error" %in% class(try_lucid)) {
par_lucid <- rep(NA_real_, n_template)
names(par_lucid) <- template_names
} else {
par_raw <- extract_parallel_boot_vector(
model = try_lucid,
dimG = dimG,
dimZ = dimZ,
Gnames_exposure = Gnames_exposure
)
par_lucid <- align_boot_vector(par_raw, template_names = template_names)
}
return(par_lucid)
}
# Normalize input model for bootstrap:
# bootstrap CI is only supported on zero-penalty models.
normalize_bootstrap_model <- function(model, lucid_model, G, Z, Y, CoG = NULL, CoY = NULL) {
rho_vals <- get_model_rho_values(model)
if (all(abs(rho_vals) <= sqrt(.Machine$double.eps))) {
return(model)
}
warning(
paste0(
"Bootstrap CI is only supported for zero-penalty models ",
"(Rho_G = Rho_Z_Mu = Rho_Z_Cov = 0). ",
"Detected nonzero penalty (Rho_G=", format(rho_vals[1], digits = 6),
", Rho_Z_Mu=", format(rho_vals[2], digits = 6),
", Rho_Z_Cov=", format(rho_vals[3], digits = 6),
"). Falling back to a zero-penalty refit before bootstrap."
),
call. = FALSE
)
refit_bootstrap_zero_penalty(
model = model,
lucid_model = lucid_model,
G = G,
Z = Z,
Y = Y,
CoG = CoG,
CoY = CoY
)
}
get_model_rho_values <- function(model) {
get_scalar <- function(x) {
if (is.null(x)) return(0)
v <- suppressWarnings(as.numeric(x))
v <- v[is.finite(v)]
if (length(v) == 0) return(0)
v[1]
}
rho <- model$Rho
c(
Rho_G = get_scalar(rho$Rho_G),
Rho_Z_Mu = get_scalar(rho$Rho_Z_Mu),
Rho_Z_Cov = get_scalar(rho$Rho_Z_Cov)
)
}
refit_bootstrap_zero_penalty <- function(model, lucid_model, G, Z, Y, CoG = NULL, CoY = NULL) {
em_ctrl <- model$em_control
tol_fit <- if(!is.null(em_ctrl$tol)) em_ctrl$tol else 0.001
max_itr_fit <- if(!is.null(em_ctrl$max_itr)) em_ctrl$max_itr else 1000
max_tot_fit <- if(!is.null(em_ctrl$max_tot.itr)) em_ctrl$max_tot.itr else 10000
seed_fit <- if(!is.null(model$seed) && length(model$seed) > 0 && is.finite(model$seed[1])) {
as.integer(model$seed[1])
} else {
123
}
useY_fit <- if(!is.null(model$useY)) model$useY else TRUE
invisible(capture.output(
refit_try <- try(
estimate_lucid(
lucid_model = lucid_model,
G = G,
Z = Z,
Y = Y,
CoG = CoG,
CoY = CoY,
K = model$K,
init_omic.data.model = model$init_omic.data.model,
useY = useY_fit,
tol = tol_fit,
max_itr = max_itr_fit,
max_tot.itr = max_tot_fit,
Rho_G = 0,
Rho_Z_Mu = 0,
Rho_Z_Cov = 0,
family = model$family,
seed = seed_fit,
init_impute = model$init_impute,
init_par = model$init_par,
verbose = FALSE
),
silent = TRUE
)
))
if("try-error" %in% class(refit_try)) {
stop(
paste0(
"Bootstrap CI requires a zero-penalty model and fallback refit failed. ",
"Please fit estimate_lucid(..., Rho_G = 0, Rho_Z_Mu = 0, Rho_Z_Cov = 0) and retry."
)
)
}
refit_try
}
# recursively detect if any feature is unselected
has_unselected_feature <- function(x) {
if(is.null(x)) {
return(FALSE)
}
if(is.list(x)) {
return(any(vapply(x, has_unselected_feature, logical(1))))
}
x_logical <- as.logical(x)
any(!x_logical, na.rm = TRUE)
}
# Extract a fixed-shape vector of parallel parameters:
# (all layer beta exposure effects for non-reference clusters,
# all layer mu values, gamma coefficients).
extract_parallel_boot_vector <- function(model, dimG, dimZ, Gnames_exposure = NULL) {
K <- as.integer(model$K)
nOmics <- length(K)
if(!is.null(Gnames_exposure) && length(Gnames_exposure) == dimG) {
Gnames <- Gnames_exposure
} else {
Gnames <- model$var.names$Gnames
if(!is.null(Gnames) && length(Gnames) >= dimG) {
Gnames <- Gnames[seq_len(dimG)]
} else {
Gnames <- paste0("G", seq_len(dimG))
}
}
Znames <- model$var.names$Znames
if(is.null(Znames) || length(Znames) != nOmics) {
Znames <- lapply(seq_len(nOmics), function(i) paste0("Z", i, "_", seq_len(dimZ[i])))
}
beta_all <- NULL
beta_names <- NULL
beta_list <- model$res_Beta$Beta
if(is.null(beta_list) && is.list(model$res_Beta)) {
beta_list <- model$res_Beta
}
for(i in seq_len(nOmics)) {
beta_mat <- normalize_parallel_beta(beta_i = beta_list[[i]], K_i = K[i], Gnames = Gnames)
beta_vec <- as.vector(t(beta_mat))
beta_name_i <- as.vector(sapply(2:K[i], function(k) {
paste0("Layer", i, ".", colnames(beta_mat), ".cluster", k)
}))
beta_all <- c(beta_all, beta_vec)
beta_names <- c(beta_names, beta_name_i)
}
mu_all <- NULL
mu_names <- NULL
for(i in seq_len(nOmics)) {
z_names_i <- Znames[[i]]
if(is.null(z_names_i) || length(z_names_i) != dimZ[i]) {
z_names_i <- paste0("Z", i, "_", seq_len(dimZ[i]))
}
mu_mat <- normalize_parallel_mu(mu_i = model$res_Mu[[i]], K_i = K[i], Znames_i = z_names_i)
mu_vec <- as.vector(t(mu_mat))
mu_name_i <- as.vector(sapply(seq_len(K[i]), function(k) {
paste0("Layer", i, ".", z_names_i, ".cluster", k)
}))
mu_all <- c(mu_all, mu_vec)
mu_names <- c(mu_names, mu_name_i)
}
gamma_all <- extract_parallel_gamma(model)
gamma_names <- names(gamma_all)
if(is.null(gamma_names)) {
gamma_names <- paste0("gamma", seq_along(gamma_all))
}
gamma_names <- paste0("Y.", gamma_names)
par <- c(beta_all, mu_all, as.numeric(gamma_all))
names(par) <- c(beta_names, mu_names, gamma_names)
par
}
normalize_parallel_beta <- function(beta_i, K_i, Gnames) {
K_i <- as.integer(K_i)
dimG <- length(Gnames)
target <- matrix(NA_real_, nrow = max(K_i - 1, 0), ncol = dimG + 1)
colnames(target) <- c("(Intercept)", Gnames)
if(K_i <= 1 || is.null(beta_i)) {
return(target)
}
if(is.null(dim(beta_i))) {
beta_i <- matrix(beta_i, nrow = 1)
} else {
beta_i <- as.matrix(beta_i)
}
# keep non-reference clusters only
if(nrow(beta_i) == K_i) {
beta_i <- beta_i[2:K_i, , drop = FALSE]
} else if(nrow(beta_i) >= (K_i - 1)) {
beta_i <- beta_i[seq_len(K_i - 1), , drop = FALSE]
}
beta_exposure <- matrix(NA_real_, nrow = nrow(beta_i), ncol = dimG + 1)
colnames(beta_exposure) <- c("(Intercept)", Gnames)
if(!is.null(colnames(beta_i))) {
idx_int <- match("(Intercept)", colnames(beta_i))
if(!is.na(idx_int)) {
beta_exposure[, 1] <- beta_i[, idx_int, drop = TRUE]
} else if(ncol(beta_i) >= 1) {
beta_exposure[, 1] <- beta_i[, 1, drop = TRUE]
}
idx <- match(Gnames, colnames(beta_i))
valid <- which(!is.na(idx))
if(length(valid) > 0) {
beta_exposure[, 1 + valid] <- beta_i[, idx[valid], drop = FALSE]
} else {
# Fallback for naming mismatches: use positional exposure columns.
offset <- if(ncol(beta_i) >= 1 && grepl("Intercept", colnames(beta_i)[1], fixed = TRUE)) 2 else 1
if(offset <= ncol(beta_i)) {
n_fill <- min(dimG, ncol(beta_i) - offset + 1)
if(n_fill > 0) {
beta_exposure[, 1 + seq_len(n_fill)] <- beta_i[, seq.int(offset, length.out = n_fill), drop = FALSE]
}
}
}
} else {
if(ncol(beta_i) >= 1) {
beta_exposure[, 1] <- beta_i[, 1, drop = TRUE]
}
if(ncol(beta_i) >= (dimG + 1)) {
beta_exposure[, 1 + seq_len(dimG)] <- beta_i[, 2:(dimG + 1), drop = FALSE]
} else {
n_fill <- min(dimG, ncol(beta_i))
if(n_fill > 0) {
beta_exposure[, 1 + seq_len(n_fill)] <- beta_i[, seq_len(n_fill), drop = FALSE]
}
}
}
n_fill <- min(nrow(target), nrow(beta_exposure))
if(n_fill > 0) {
target[seq_len(n_fill), ] <- beta_exposure[seq_len(n_fill), , drop = FALSE]
}
target
}
normalize_parallel_mu <- function(mu_i, K_i, Znames_i) {
K_i <- as.integer(K_i)
dimZ_i <- length(Znames_i)
target <- matrix(NA_real_, nrow = K_i, ncol = dimZ_i)
colnames(target) <- Znames_i
if(is.null(mu_i)) {
return(target)
}
if(is.null(dim(mu_i))) {
mu_i <- matrix(mu_i, nrow = K_i)
} else {
mu_i <- as.matrix(mu_i)
}
if(nrow(mu_i) == dimZ_i && ncol(mu_i) == K_i) {
mu_i <- t(mu_i)
}
n_row <- min(K_i, nrow(mu_i))
n_col <- min(dimZ_i, ncol(mu_i))
if(n_row > 0 && n_col > 0) {
target[seq_len(n_row), seq_len(n_col)] <- mu_i[seq_len(n_row), seq_len(n_col), drop = FALSE]
}
target
}
extract_parallel_gamma <- function(model) {
gamma <- NULL
if(!is.null(model$res_Gamma$fit)) {
gamma <- try(coef(model$res_Gamma$fit), silent = TRUE)
if(inherits(gamma, "try-error")) {
gamma <- NULL
}
}
if(is.null(gamma)) {
family_parallel <- to_parallel_family(model$family)
if(family_parallel == "gaussian") {
gamma <- model$res_Gamma$Gamma$mu
} else {
gamma <- model$res_Gamma$fit$coefficients
}
}
gamma
}
align_boot_vector <- function(par_raw, template_names) {
par <- rep(NA_real_, length(template_names))
names(par) <- template_names
if(length(par_raw) == 0) {
return(par)
}
par_names <- names(par_raw)
if(!is.null(par_names) && all(!is.na(par_names))) {
idx <- match(par_names, template_names)
valid <- which(!is.na(idx))
# Use name-based alignment only when complete; otherwise use positional fallback.
if(length(valid) == length(par_raw)) {
par[idx[valid]] <- as.numeric(par_raw[valid])
return(par)
}
}
n_fill <- min(length(par), length(par_raw))
par[seq_len(n_fill)] <- as.numeric(par_raw[seq_len(n_fill)])
par
}
has_unselected_feature_serial <- function(model) {
if (is.null(model$submodel) || !is.list(model$submodel)) {
return(FALSE)
}
any(vapply(model$submodel, function(sm) {
if (is.null(sm$select)) {
return(FALSE)
}
has_unselected_feature(sm$select$selectG) || has_unselected_feature(sm$select$selectZ)
}, logical(1)))
}
flatten_serial_Z <- function(Z) {
leaf_mats <- list()
leaf_id <- 0L
recurse <- function(node) {
if (is.list(node)) {
children <- vector("list", length(node))
for (i in seq_along(node)) {
children[[i]] <- recurse(node[[i]])
}
names(children) <- names(node)
return(list(kind = "list", children = children, names = names(node)))
}
z_mat <- as.matrix(node)
if (!is.numeric(z_mat)) {
stop("All serial Z blocks must be numeric.")
}
leaf_id <<- leaf_id + 1L
leaf_mats[[leaf_id]] <<- z_mat
list(kind = "leaf", leaf_id = leaf_id, ncol = ncol(z_mat), colnames = colnames(z_mat))
}
meta <- recurse(Z)
flat <- do.call(cbind, leaf_mats)
list(flat = flat, meta = meta)
}
restore_serial_Z_from_data <- function(d, col_start, z_meta) {
idx <- as.integer(col_start)
recurse <- function(meta) {
if (identical(meta$kind, "leaf")) {
cols <- idx:(idx + meta$ncol - 1L)
z_mat <- as.matrix(d[, cols, drop = FALSE])
if (!is.null(meta$colnames) && length(meta$colnames) == meta$ncol) {
colnames(z_mat) <- meta$colnames
}
idx <<- idx + meta$ncol
return(z_mat)
}
out <- vector("list", length(meta$children))
for (i in seq_along(meta$children)) {
out[[i]] <- recurse(meta$children[[i]])
}
names(out) <- meta$names
out
}
list(Z = recurse(z_meta), next_col = idx)
}
build_serial_transition_labels_boot <- function(submodels) {
n_stage <- length(submodels)
out <- vector("list", n_stage)
out[[1]] <- character(0)
for (i in seq.int(2, n_stage)) {
prev <- submodels[[i - 1L]]
if (inherits(prev, "early_lucid")) {
k_prev <- as.integer(prev$K)
if (length(k_prev) > 0 && !is.na(k_prev) && k_prev > 1) {
out[[i]] <- paste0("Stage", i - 1L, ".cluster", seq.int(2, k_prev))
} else {
out[[i]] <- character(0)
}
} else if (inherits(prev, "lucid_parallel")) {
k_prev <- as.integer(prev$K)
labels <- character(0)
for (layer_idx in seq_along(k_prev)) {
if (!is.na(k_prev[layer_idx]) && k_prev[layer_idx] > 1) {
labels <- c(labels, paste0("Stage", i - 1L, ".Layer", layer_idx,
".cluster", seq.int(2, k_prev[layer_idx])))
}
}
out[[i]] <- labels
} else {
out[[i]] <- character(0)
}
}
out
}
extract_early_stage_vector <- function(stage_model, is_last_stage, transition_labels = character(0)) {
K <- as.integer(stage_model$K)
beta_mat <- as.matrix(stage_model$res_Beta)
if (is.null(dim(beta_mat))) {
beta_mat <- matrix(beta_mat, nrow = 1)
}
if (nrow(beta_mat) == K) {
beta_use <- beta_mat[2:K, , drop = FALSE]
cluster_ids <- 2:K
} else if (nrow(beta_mat) == (K - 1)) {
beta_use <- beta_mat
cluster_ids <- 2:K
} else {
beta_use <- beta_mat
cluster_ids <- seq_len(nrow(beta_use))
}
if (nrow(beta_use) == 0) {
beta_use <- matrix(numeric(0), nrow = 0, ncol = ncol(beta_mat))
cluster_ids <- integer(0)
}
n_feat <- max(0, ncol(beta_use) - 1L)
if (length(transition_labels) > 0 && n_feat > 0) {
feat_names <- transition_labels[seq_len(min(length(transition_labels), n_feat))]
if (length(feat_names) < n_feat) {
feat_names <- c(feat_names, paste0("PrevStageCluster", seq.int(length(feat_names) + 1L, n_feat)))
}
beta_col_names <- c("(Intercept)", feat_names)
} else {
beta_col_names <- colnames(beta_use)
if (is.null(beta_col_names) || length(beta_col_names) != ncol(beta_use)) {
beta_col_names <- c("(Intercept)", paste0("G", seq_len(n_feat)))
} else {
beta_col_names[1] <- "(Intercept)"
}
}
if (ncol(beta_use) == length(beta_col_names)) {
colnames(beta_use) <- beta_col_names
}
beta_vec <- as.numeric(t(beta_use))
beta_names <- as.vector(sapply(cluster_ids, function(k) paste0(beta_col_names, ".cluster", k)))
if (length(beta_vec) == length(beta_names)) {
names(beta_vec) <- beta_names
}
mu_mat <- as.matrix(stage_model$res_Mu)
if (is.null(dim(mu_mat))) {
mu_mat <- matrix(mu_mat, nrow = K)
}
if (nrow(mu_mat) != K && ncol(mu_mat) == K) {
mu_mat <- t(mu_mat)
}
z_names <- stage_model$var.names$Znames
if (is.null(z_names) || length(z_names) != ncol(mu_mat)) {
z_names <- paste0("Z", seq_len(ncol(mu_mat)))
}
colnames(mu_mat) <- z_names
mu_vec <- as.numeric(t(mu_mat))
mu_names <- as.vector(sapply(seq_len(nrow(mu_mat)), function(k) paste0(z_names, ".cluster", k)))
if (length(mu_vec) == length(mu_names)) {
names(mu_vec) <- mu_names
}
gamma_vec <- numeric(0)
if (isTRUE(is_last_stage)) {
gamma_raw <- stage_model$res_Gamma$beta
gamma_vec <- as.numeric(gamma_raw)
gamma_names <- names(gamma_raw)
if (is.null(gamma_names) || length(gamma_names) != length(gamma_vec)) {
gamma_names <- paste0("gamma", seq_along(gamma_vec))
}
names(gamma_vec) <- paste0("Y.", gamma_names)
}
vec <- c(beta_vec, mu_vec, gamma_vec)
list(vec = vec, layout = list(type = "early", n_beta = length(beta_vec),
n_mu = length(mu_vec), n_gamma = length(gamma_vec)))
}
extract_parallel_stage_vector <- function(stage_model, is_last_stage, transition_labels = character(0)) {
K <- as.integer(stage_model$K)
dimZ <- as.integer(sapply(stage_model$Z, ncol))
if (length(transition_labels) > 0) {
g_names <- transition_labels
} else {
g_names <- stage_model$var.names$Gnames
if (is.null(g_names) || length(g_names) == 0) {
b1 <- stage_model$res_Beta$Beta[[1]]
p <- if (!is.null(b1)) max(0, ncol(as.matrix(b1)) - 1L) else 0L
g_names <- paste0("G", seq_len(p))
}
}
dimG_stage <- length(g_names)
par <- extract_parallel_boot_vector(
model = stage_model,
dimG = dimG_stage,
dimZ = dimZ,
Gnames_exposure = g_names
)
n_beta_layer <- as.integer((K - 1L) * (dimG_stage + 1L))
n_mu_layer <- as.integer(K * dimZ)
n_beta <- sum(n_beta_layer)
n_mu <- sum(n_mu_layer)
n_keep <- n_beta + n_mu
if (!isTRUE(is_last_stage)) {
par <- par[seq_len(n_keep)]
}
n_gamma <- max(0L, length(par) - n_keep)
list(vec = par, layout = list(type = "parallel", n_beta = n_beta, n_mu = n_mu,
n_gamma = n_gamma, n_beta_layer = n_beta_layer,
n_mu_layer = n_mu_layer))
}
extract_serial_boot_template <- function(model) {
if (is.null(model$submodel) || !is.list(model$submodel) || length(model$submodel) == 0) {
stop("Input serial model does not contain valid submodels.")
}
submodels <- model$submodel
n_stage <- length(submodels)
transition_labels <- build_serial_transition_labels_boot(submodels)
vec_all <- numeric(0)
stage_layout <- vector("list", n_stage)
idx_start <- 1L
for (i in seq_len(n_stage)) {
is_last <- (i == n_stage)
sm <- submodels[[i]]
stage_obj <- if (inherits(sm, "early_lucid")) {
extract_early_stage_vector(sm, is_last_stage = is_last, transition_labels = transition_labels[[i]])
} else if (inherits(sm, "lucid_parallel")) {
extract_parallel_stage_vector(sm, is_last_stage = is_last, transition_labels = transition_labels[[i]])
} else {
stop("Unsupported submodel class in serial bootstrap.")
}
stage_vec <- as.numeric(stage_obj$vec)
local_names <- names(stage_obj$vec)
if (is.null(local_names) || length(local_names) != length(stage_vec)) {
local_names <- paste0("param", seq_along(stage_vec))
}
prefixed_names <- paste0("Stage", i, "::", local_names)
names(stage_vec) <- prefixed_names
idx_end <- idx_start + length(stage_vec) - 1L
stage_layout[[i]] <- c(stage_obj$layout, list(start = idx_start, end = idx_end,
local_names = local_names,
prefixed_names = prefixed_names))
vec_all <- c(vec_all, stage_vec)
idx_start <- idx_end + 1L
}
list(vector = vec_all, stage_layout = stage_layout)
}
lucid_par_serial <- function(data, indices, model, dimG, dimCoY, dimCoG,
z_meta, template_names, n_template, prog) {
prog$tick()
d <- data[indices, , drop = FALSE]
col_start <- 1L
G <- as.matrix(d[, col_start:(col_start + dimG - 1L), drop = FALSE])
col_start <- col_start + dimG
z_rec <- restore_serial_Z_from_data(d = d, col_start = col_start, z_meta = z_meta)
Z <- z_rec$Z
col_start <- z_rec$next_col
Y <- as.matrix(d[, col_start, drop = FALSE])
col_start <- col_start + 1L
CoG <- CoY <- NULL
if (dimCoG > 0) {
CoG <- as.matrix(d[, col_start:(col_start + dimCoG - 1L), drop = FALSE])
col_start <- col_start + dimCoG
}
if (dimCoY > 0) {
CoY <- as.matrix(d[, col_start:(col_start + dimCoY - 1L), drop = FALSE])
}
seed <- sample(1:2000, 1)
rG <- if (!is.null(model$Rho$Rho_G)) model$Rho$Rho_G else 0
rMu <- if (!is.null(model$Rho$Rho_Z_Mu)) model$Rho$Rho_Z_Mu else 0
rCov <- if (!is.null(model$Rho$Rho_Z_Cov)) model$Rho$Rho_Z_Cov else 0
em_ctrl <- model$em_control
tol_fit <- if (!is.null(em_ctrl$tol)) em_ctrl$tol else 0.001
max_itr_fit <- if (!is.null(em_ctrl$max_itr)) em_ctrl$max_itr else 1000
max_tot_fit <- if (!is.null(em_ctrl$max_tot.itr)) em_ctrl$max_tot.itr else 10000
invisible(capture.output(try_lucid <- try(estimate_lucid(
G = G, Z = Z, Y = Y, CoY = CoY, CoG = CoG,
lucid_model = "serial", family = model$family,
init_omic.data.model = model$init_omic.data.model, K = model$K,
tol = tol_fit, max_itr = max_itr_fit, max_tot.itr = max_tot_fit,
init_impute = model$init_impute, init_par = model$init_par,
useY = model$useY, Rho_G = rG, Rho_Z_Mu = rMu, Rho_Z_Cov = rCov,
seed = seed
), silent = TRUE)))
if ("try-error" %in% class(try_lucid)) {
par_lucid <- rep(NA_real_, n_template)
names(par_lucid) <- template_names
return(par_lucid)
}
par_raw <- extract_serial_boot_template(try_lucid)$vector
align_boot_vector(par_raw = par_raw, template_names = template_names)
}
split_serial_boot_ci <- function(ci, stage_layout) {
out <- vector("list", length(stage_layout))
for (i in seq_along(stage_layout)) {
lay <- stage_layout[[i]]
stage_ci <- ci[lay$start:lay$end, , drop = FALSE]
rownames(stage_ci) <- lay$local_names
nb <- lay$n_beta
nm <- lay$n_mu
ng <- lay$n_gamma
beta_ci <- if (nb > 0) stage_ci[seq_len(nb), , drop = FALSE] else NULL
mu_ci <- if (nm > 0) stage_ci[seq.int(nb + 1L, nb + nm), , drop = FALSE] else NULL
gamma_ci <- if (ng > 0) stage_ci[seq.int(nb + nm + 1L, nb + nm + ng), , drop = FALSE] else NULL
if (identical(lay$type, "parallel")) {
beta_list <- vector("list", length(lay$n_beta_layer))
mu_list <- vector("list", length(lay$n_mu_layer))
names(beta_list) <- paste0("Layer", seq_along(beta_list))
names(mu_list) <- paste0("Layer", seq_along(mu_list))
if (!is.null(beta_ci)) {
st <- 1L
for (j in seq_along(lay$n_beta_layer)) {
nj <- lay$n_beta_layer[j]
beta_list[[j]] <- beta_ci[seq.int(st, st + nj - 1L), , drop = FALSE]
st <- st + nj
}
}
if (!is.null(mu_ci)) {
st <- 1L
for (j in seq_along(lay$n_mu_layer)) {
nj <- lay$n_mu_layer[j]
mu_list[[j]] <- mu_ci[seq.int(st, st + nj - 1L), , drop = FALSE]
st <- st + nj
}
}
out[[i]] <- list(beta = beta_list, mu = mu_list, gamma = gamma_ci)
} else {
out[[i]] <- list(beta = beta_ci, mu = mu_ci, gamma = gamma_ci)
}
}
out
}
#' @title generate bootstrp ci (normal, basic and percentile)
#'
#' @param x an object return by boot function
#' @param conf A numeric scalar between 0 and 1 to specify confidence level(s)
#' of the required interval(s).
#'
#' @return a matrix, the first column is the point estimate from original model
#'
gen_ci <- function(x, conf = 0.95) {
t0 <- x$t0
res_ci <- NULL
for (i in 1:length(t0)) {
ci <- try(boot::boot.ci(x,
index = i,
conf = conf,
type = c("norm", "perc")), silent = TRUE)
if("try-error" %in% class(ci)) {
temp_ci <- rep(NA_real_, 4)
} else {
norm_ci <- if(!is.null(ci$normal) && length(ci$normal) >= 3) ci$normal[2:3] else c(NA_real_, NA_real_)
perc_ci <- if(!is.null(ci$percent) && length(ci$percent) >= 5) ci$percent[4:5] else c(NA_real_, NA_real_)
temp_ci <- c(norm_ci, perc_ci)
}
res_ci <- rbind(res_ci,
temp_ci)
}
res <- cbind(t0, res_ci)
colnames(res) <- c("estimate",
"norm_lower", "norm_upper",
"perc_lower", "perc_upper")
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.