#' Bayesian linear mixed model with multiple random effects for family data
#'
#' @param fit \code{vb} fit from \code{vb_fit_family}
#' @param epsilon a threshold for the increase of the variance
#' @param maf_beta a filtering threshold for the weight of parameter beta
#' @param maf_u a filtering threshold for the weight of parameter U
#' @usage vb_predictive_family(fit, epsilon = 1e-3, maf_beta = 0.5, maf_u = 0.5)
#' @examples
#' fit <- vb_fit_rarecommon(y = y_train, genotype = data, weight.type = "wss")
#' vb_predictive(fit)
#' @export
vb_predictive_family <- function(x, ...) {
UseMethod("vb_predictive_family")
}
#' @export
#--------------------------------------------------------------------#
# Prediction
#--------------------------------------------------------------------#
vb_predictive_family <- function(fit, y = NULL,epsilon = 1e-3, B = "No", verbose = FALSE, maf_beta, maf_u){
#------------------------------------------------------------------------------------------------------------
# (0) initial
index_test = fit$index_test # index of the NA value in Y (index for testing samples)
# index_fixed_marker = fit$index_fixed_marker # index of markers for fixed term
N <- fit$N # total num of samples
M <- fit$M # num of regions in the testing data
Px <- fit$Px # num of SNPs in the testing data
P <- fit$DIM # num of SNPs in the each region
w_beta <- fit$E_w_beta # weight of beta
w_u <- fit$E_w_u # weight of u
geno_fix <- fit$genotype0
# all data - fixed term
geno_random <- lapply(1:M, function(m) attr(fit,"UD")[[m]] ) # all data - random term
geno_background <- fit$gamma_lambda # all data - background term - shared E
geno_background1 <- fit$gamma_lambda1 # all data - background term - kinship
X <- fit$X # data for training - fixed term
Z <- fit$Z # data for training - random term
m_u <- fit$m_u # mean of u
m_beta <- fit$m_beta # mean of beta
m_b <- fit$m_b # mean of background
m_b1 <- fit$m_b1 # mean of background
post_sigma2e <- fit$post_sigma2e # sigma2 of error
post_sigma2b <- fit$post_sigma2b # sigma2 of error
var_beta <- fit$post_sigma2beta # sigma2 of error
var_u <- fit$post_sigma2u # sigma2 of error
yc <- fit$y
center <- fit$center
scale <- fit$scale
#------------------------------------------------------------------------------------------------------------
# (1) sorting fixed effects by w_beta
delta_sort_PVE_beta <- rep(-Inf, Px)
# obtain the var_beta order index
# var_beta <- sapply(1:Px, function(j) var(as.matrix(X[,j]) %*% w_beta[j] %*% m_beta[j]) )
# lst_beta <- sort(var_beta, index.return=TRUE, decreasing=TRUE)
# (1) sorting random effects by w_u
delta_sort_PVE_u <- rep(-Inf, M)
# obtain the var_u order index
# var_u <- sapply(1:M, function(m) var(Z[[m]] %*% (drop(w_u[[m]]) * diag(1, P[[m]])) %*% m_u[[m]]) )
lst_u <- sort(var_u, index.return=TRUE, decreasing=TRUE)
#------------------------------------------------------------------------------------------------------------
# (2) fixed effects - PVE and select top markers
# PVE - the total proportion of variance in phenotype explained by the sparse effects and random effects terms together
# calculate and store the PVE_beta
PVE_beta <- sapply(1:Px, function(j) var(as.matrix(X[,j]) %*% m_beta[j])/(post_sigma2e + var_beta + sum(var_u) + post_sigma2b) )
# finial index for fixed term # select top regions based on index_delta_PVE_beta
fix_index <- which(w_beta>=maf_beta)
# (2) random effects - PVE and select top regions -------------------------------------------------------------
# calculate and store the PVE_u
PVE_u <- sapply(1:M, function(m) var_u[[m]]/(post_sigma2e + var_beta + sum(var_u) + post_sigma2b) )
#PVE_u <- sapply(1:M, function(j) var(TEST$m_u[,j])/var(y) ) # calculate and store the PVE_u
# sort the PVE_u by the pi_u (decreasing) (add 0 to be first for further calculation)
sort_PVE_u <- append(0, sapply(1:M, function(m) sum(PVE_u[lst_u$ix][1:m]) ))
# index of change
if(length(sort_PVE_u)>1){
delta_sort_PVE_u <- sapply(1:M, function(m) delta_sort_PVE_u[m] <- sort_PVE_u[m + 1] - sort_PVE_u[m] )
index_delta_PVE_u <- which( abs(delta_sort_PVE_u) > epsilon )
len <- min(length(index_delta_PVE_u), ceiling(0.15*M))
}else if(length(sort_PVE_u)==1){
delta_sort_PVE_u <- sort_PVE_u
index_delta_PVE_u <- 1
len <- 1
}
# finial index for random term # select top regions based on index_delta_PVE_u
# random_index <- lapply(lst_u, `[`, lst_u$x %in% head(lst_u$x,(index_delta_PVE_u[1] - 1)) )
random_index <- lapply(lst_u, `[`, lst_u$x %in% head(lst_u$x,len) )
#------------------------------------------------------------------------------------------------------------
# (3) calculate the PGE - the proportion of genetic variance explained by the sparse effects terms
# PGE <- sum(var_beta[fix_index$ix]) / (sum(var_beta[fix_index$ix]) + sum(var_u[random_index$ix]))
#------------------------------------------------------------------------------------------------------------
# (4) fixed effects - calculate the predictive dist - fixed term mean
# data for testing - fixed term
# X0 <- geno_fix[index_test, index_fixed_marker]
X0 <- geno_fix[index_test,]
# Predictive mean
# all
# beta_pred0 <- lapply(1:Px, function(j) as.matrix(X0[,j]) %*% w_beta[j] %*% m_beta[j] )
beta_pred0 <- lapply(1:Px, function(j) as.matrix(X0[,j]) %*% m_beta[j] )
# extract the selected markers for pred
beta_pred00 <- beta_pred0[fix_index]
if (length(fix_index)!=0){
beta_pred <- Reduce(`+`, beta_pred00) + rep(0, length(index_test))
}else{
beta_pred <- as.vector(rep(0, length(index_test)) )
}
# (4) random effects - calculate the predictive dist - random term mean
# data for testing - random term
# data for testing - random term
Z0 <- lapply(1:M, function(m) geno_random[[m]][-index_test,] )
Z1 <- lapply(1:M, function(m) geno_random[[m]][index_test,] )
# Predictive mean
# all
# u_pred0 <- lapply(1:M, function(m) Z0[[m]] %*% (drop(w_u[[m]]) * diag(1, P[[m]])) %*% m_u[[m]] )
# u0
u_0 <- lapply(1:M, function(m) Z0[[m]] %*% m_u[[m]] )
# beta0 for u - prediction step 1
u0_be <- lapply(1:M, function(m) ginv(crossprod(Z0[[m]])) %*% t(Z0[[m]]) %*% u_0[[m]] )
# u hat for u - prediction step 2
u_pred0 <- lapply(1:M, function(m) Z1[[m]] %*% u0_be[[m]] )
# extract the selected regions for pred
u_pred00 <- u_pred0[random_index$ix]
if (length(random_index$ix)!=0){
u_pred <- Reduce(`+`, u_pred00) + rep(0, length(index_test))
}else{
u_pred <- as.vector( rep(0,length(index_test)))
}
# (4) background - calculate the predictive dist - background term mean
if (B == "Yes"){
background_pred <- geno_background[index_test,] %*% m_b
background_pred1 <- geno_background1[index_test,] %*% m_b1
}else{
background_pred <- as.vector( rep(0,length(index_test)))
background_pred1 <- as.vector( rep(0,length(index_test)))
}
# Predictive variance
# s_pred <- sqrt(1/model$lambda + diag(X_test %*% model$S %*% t(X_test)))
#------------------------------------------------------------------------------------------------------------
# Predictive variance
#------------------------------------------------------------------------------------------------------------
# (5) calculate the correlation - accuracy
pred_value <- scale(beta_pred + u_pred + background_pred + background_pred1, T, T)
m <- center
s <- sqrt(scale)
pred <- (drop(s) * pred_value) + m
n_test <- length(index_test)
# pred = beta_pred + u_pred + background_pred
return(as.numeric(pred))
}
#------------------------------------------------------------#
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.