#' Bayesian linear mixed model with multiple random effects
#'
#' @param fit \code{vb} fit from \code{vb_fit_rare_common}
#' @param epsilon a threshold for the increase of the variance
#' @param maf_beta a filtering threshold for the weight of parameter beta
#' @param maf_u a filtering threshold for the weight of parameter U
#' @usage vb_predictive(fit, epsilon = 1e-3, maf_beta = 0.5, maf_u = 0.5)
#' @examples
#' fit <- vb_fit_rarecommon(y = y_train, genotype = data, weight.type = "wss")
#' vb_predictive(fit)
#' @export
vb_predictive <- function(x, ...) {
UseMethod("vb_predictive")
}
#' @export
vb_predictive.vb <- function(fit, epsilon = 1e-3, maf_beta = 0.5, maf_u = 0.5) {
#------------------------------------------------------------------------------------------------------------
# (0) initial
y <- NULL
B <- "Yes"
index_test <- fit$index_test # index of the NA value in Y (index for testing samples)
# index_fixed_marker = fit$index_fixed_marker # index of markers for fixed term
N <- fit$N # total num of samples
M <- fit$M # num of regions in the testing data
Px <- fit$Px # num of SNPs in the testing data
P <- fit$DIM # num of SNPs in the each region
w_beta <- fit$E_w_beta # weight of beta
w_u <- fit$E_w_u # weight of u
geno_fix <- fit$genotype0
# all data - fixed term
geno_random <- lapply(1:M, function(m) attr(fit, "UD")[[m]]) # all data - random term
geno_background <- fit$gamma_lambda # all data - background term
X <- fit$X # data for training - fixed term
Z <- fit$Z # data for training - random term
m_u <- fit$m_u # mean of u
m_beta <- fit$m_beta # mean of beta
m_b <- fit$m_b # mean of background
post_sigma2e <- fit$post_sigma2e # sigma2 of error
post_sigma2b <- fit$post_sigma2b # sigma2 of error
var_beta <- fit$post_sigma2beta # sigma2 of error
var_u <- fit$post_sigma2u # sigma2 of error
yc <- fit$y
center <- fit$center
scale <- fit$scale
#------------------------------------------------------------------------------------------------------------
# (1) sorting fixed effects by w_beta
delta_sort_PVE_beta <- rep(-Inf, Px)
# obtain the var_beta order index
# (1) sorting random effects by w_u
delta_sort_PVE_u <- rep(-Inf, M)
# obtain the var_u order index
lst_u <- sort(var_u, index.return = TRUE, decreasing = TRUE)
#------------------------------------------------------------------------------------------------------------
# (2) fixed effects - PVE and select top markers
# PVE - the total proportion of variance in phenotype explained by the sparse effects and random effects terms together
# calculate and store the PVE_beta
PVE_beta <- sapply(1:Px, function(j) var(as.matrix(X[, j]) %*% m_beta[j]) / (post_sigma2e + var_beta + sum(var_u) + post_sigma2b))
# finial index for fixed term # select top regions based on index_delta_PVE_beta
fix_index <- which(w_beta >= maf_beta)
# (2) random effects - PVE and select top regions -------------------------------------------------------------
# calculate and store the PVE_u
PVE_u <- sapply(1:M, function(m) var_u[[m]] / (post_sigma2e + var_beta + sum(var_u) + post_sigma2b))
# sort the PVE_u by the pi_u (decreasing) (add 0 to be first for further calculation)
sort_PVE_u <- append(0, sapply(1:M, function(m) sum(PVE_u[lst_u$ix][1:m])))
# index of change
if (length(sort_PVE_u) > 1) {
delta_sort_PVE_u <- sapply(1:M, function(m) delta_sort_PVE_u[m] <- sort_PVE_u[m + 1] - sort_PVE_u[m])
index_delta_PVE_u <- which(abs(delta_sort_PVE_u) > epsilon)
len <- min(length(index_delta_PVE_u), ceiling(0.15 * M))
} else if (length(sort_PVE_u) == 1) {
delta_sort_PVE_u <- sort_PVE_u
index_delta_PVE_u <- 1
len <- 1
}
random_index <- lapply(lst_u, `[`, lst_u$x %in% head(lst_u$x, len))
#------------------------------------------------------------------------------------------------------------
# (3) calculate the PGE - the proportion of genetic variance explained by the sparse effects terms
#------------------------------------------------------------------------------------------------------------
# (4) fixed effects - calculate the predictive dist - fixed term mean
# data for testing - fixed term
# X0 <- geno_fix[index_test, index_fixed_marker]
X0 <- geno_fix[index_test, ]
# Predictive mean
# all
# beta_pred0 <- lapply(1:Px, function(j) as.matrix(X0[,j]) %*% w_beta[j] %*% m_beta[j] )
beta_pred0 <- lapply(1:Px, function(j) as.matrix(X0[, j]) %*% m_beta[j])
# extract the selected markers for pred
beta_pred00 <- beta_pred0[fix_index]
if (length(fix_index) != 0) {
beta_pred <- Reduce(`+`, beta_pred00) + rep(0, length(index_test))
} else {
beta_pred <- as.vector(rep(0, length(index_test)))
}
# (4) random effects - calculate the predictive dist - random term mean
# data for testing - random term
# data for testing - random term
Z0 <- lapply(1:M, function(m) geno_random[[m]][-index_test, ])
Z1 <- lapply(1:M, function(m) geno_random[[m]][index_test, ])
# Predictive mean
# all
# u0
u_0 <- lapply(1:M, function(m) Z0[[m]] %*% m_u[[m]])
# beta0 for u - prediction step 1
u0_be <- lapply(1:M, function(m) ginv(crossprod(Z0[[m]])) %*% t(Z0[[m]]) %*% u_0[[m]])
# u hat for u - prediction step 2
u_pred0 <- lapply(1:M, function(m) Z1[[m]] %*% u0_be[[m]])
# extract the selected regions for pred
u_pred00 <- u_pred0[random_index$ix]
if (length(random_index$ix) != 0) {
u_pred <- Reduce(`+`, u_pred00) + rep(0, length(index_test))
} else {
u_pred <- as.vector(rep(0, length(index_test)))
}
# (4) background - calculate the predictive dist - background term mean
if (B == "Yes") {
background_pred <- geno_background[index_test, ] %*% m_b
} else {
background_pred <- as.vector(rep(0, length(index_test)))
}
# Predictive variance
#------------------------------------------------------------------------------------------------------------
# (5) calculate the correlation - accuracy
pred_value <- scale(beta_pred + u_pred + background_pred, T, T)
m <- center
s <- sqrt(scale)
pred <- (drop(s) * pred_value) + m
n_test <- length(index_test)
# pred = beta_pred + u_pred + background_pred
return(as.numeric(pred))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.