Nothing
#' fit a robust Bayesian variable selection
#'
#' fit a robust Bayesian variable selection model for G×E interactions.
#'
#' @keywords models
#' @param X the matrix of predictors (genetic factors) without intercept. Each row should be an observation vector. X will be centered and a column of 1 will be added to the X matrix
#' as the intercept.
#' @param Y the response variable. The current version of roben only supports continuous response.
#' @param E a matrix of environmental factors. E will be centered. The interaction terms between X (G factors) and E will be automatically created and included in the model.
#' @param clin a matrix of clinical variables. Clinical variables are not subject to penalty.
#' @param iterations the number of MCMC iterations.
#' @param burn.in the number of iterations for burn-in.
#' @param robust logical flag. If TRUE, robust methods will be used.
#' @param sparse logical flag. If TRUE, spike-and-slab priors will be used to shrink coefficients of irrelevant covariates to zero exactly.
#' @param structure three choices are available. "sparsegroup" for sparse-group selection, which is a bi-level selection on both group-level and individual-level. "group" for selection on group-level only. "individual" for selection on individual-level only.
#' @param hyper a named list of hyperparameters.
#' @param debugging logical flag. If TRUE, progress will be output to the console and extra information will be returned.
#'
#' @details Consider the data model described in "\code{\link{data}}":
#' \deqn{Y_{i} = \alpha_{0} + \sum_{t=1}^{q}\alpha_{t}Clin_{it} + \sum_{m=1}^{k}\theta_{m}E_{im} + \sum_{j=1}^{p} \big(U_{ij}^\top\beta_{j}\big) +\epsilon_{i},}
#' where the main and interaction effects of the \eqn{j}th genetic variant is corresponding to the coefficient vector \eqn{\beta_{j}=(\beta_{j1}, \beta_{j2},\ldots,\beta_{jL})^\top}.
#'
#' When structure="sparsegroup" (default setting), selection will be conducted on both individual and group levels (bi-level selection):
#' \itemize{
#' \item \strong{Group-level selection:} by determining whether \eqn{||\beta_{j}||_{2}=0}, we can know if the \eqn{j}th genetic variant has any effect at all.
#' \item \strong{Individual-level selection:} investigate whether the \eqn{j}th genetic variant has main effect, G\eqn{\times}E interaction or both, by determining which components in \eqn{\beta_{j}} has non-zero values.
#' }
#' If structure="group", only group-level selection will be conducted on \eqn{||\beta_{j}||_{2}}. If structure="individual", only individual-level selection will be conducted on each \eqn{\beta_{jl}}, (\eqn{l=1,\ldots,L}).
#'
#' When sparse=TRUE (default), spike--and--slab priors are imposed on individual and/or group levels to identify important main and interaction effects. Otherwise, Laplacian shrinkage will be used.
#'
#' When robust=TRUE (default), the distribution of \eqn{\epsilon_{i}} is defined as a Laplace distribution with density
#' \eqn{
#' f(\epsilon_{i}|\nu) = \frac{\nu}{2}\exp\left\{-\nu |\epsilon_{i}|\right\}
#' }, (\eqn{i=1,\dots,n}), which leads to a Bayesian formulation of LAD regression. If robust=FALSE, \eqn{\epsilon_{i}} follows a normal distribution.
#' \cr
#'
#' Both \eqn{X} and \eqn{E} will be centered before the generation of interaction terms, in order to prevent the multicollinearity between main effects and interaction terms.
#' \cr
#'
#' Users can modify the hyper-parameters by providing a named list of hyper-parameters via the argument `hyper'.
#' The list can have the following named components
#' \itemize{
#' \item a0, b0: shape parameters of the Beta priors (\eqn{\pi^{a_{0}-1}(1-\pi)^{b_{0}-1}}) on \eqn{\pi_{0}}.
#' \item a1, b1: shape parameters of the Beta priors (\eqn{\pi^{a_{1}-1}(1-\pi)^{b_{1}-1}}) on \eqn{\pi_{1}}.
#' \item c1, c2: the shape parameter and the rate parameter of the Gamma prior on \eqn{\nu}.
#' \item d1, d2: the shape parameter and the rate parameter of the Gamma priors on \eqn{\eta}.
#' }
#' Please check the references for more details about the prior distributions.
#'
#' @seealso \code{\link{GxESelection}}
#'
#' @examples
#' data(GxE_small)
#'
#' ## default method
#' iter=5000
#' fit=roben(X, Y, E, clin, iterations = iter)
#' fit$coefficient
#'
#' ## Ture values of parameters of main G effects and interactions
#' coeff$GE
#'
#' ## Compute TP and FP
#' sel=GxESelection(fit)
#' pos=which(sel$indicator != 0)
#' tp=length(intersect(which(coeff$GE != 0), pos))
#' fp=length(pos)-tp
#' list(tp=tp, fp=fp)
#'
#' \donttest{
#' ## alternative: robust group selection
#' fit=roben(X, Y, E, clin, iterations=iter, structure="g")
#' fit$coefficient
#'
#' ## alternative: non-robust sparse group selection
#' fit=roben(X, Y, E, clin, iterations=iter, robust=FALSE)
#' fit$coefficient
#' }
#'
#' @export
roben <- function(X, Y, E, clin=NULL, iterations=10000, burn.in=NULL, robust=TRUE, sparse=TRUE, structure=c("sparsegroup","group","individual"), hyper=NULL, debugging=FALSE)
{
structure = match.arg(structure)
this.call = match.call()
intercept = TRUE
if(iterations<1) stop("iterations must be a positive integer.")
if(is.null(burn.in)){
BI = floor(iterations/2)
}else if(burn.in>=1){
BI = as.integer(burn.in)
}else{
stop("burn.in must be a positive integer.")
}
if(iterations<=BI) stop("iterations must be larger than burn.in.")
dat = Data.matrix(X, Y, E, clin, intercept, debugging)
xx = dat$xx
y = dat$y
CLC = dat$CLC
n = dat$n; s = dat$s
env = dat$env
size = dat$size
G.names = dat$G.names
E.names = dat$E.names
clin.names = dat$clin.names
CLC.names = colnames(CLC)
nclc = ncol(CLC)
if(debugging) message("No. of G: ", s, "\tNo. of E: ", env, "\tNo. of G+GxE: ", ncol(xx), "\n")
clcxx = cbind(CLC, xx)
lasso.fit = glmnet::glmnet(clcxx, y, family="gaussian",alpha=1,nlambda=50)
prop = colSums(as.matrix(lasso.fit$beta) != 0)/ncol(clcxx)
ind = which.min(abs(prop-min(0.5, stats::median(prop), n/ncol(clcxx))))
coeff.array = lasso.fit$beta[,ind]
coeff.array[1] = lasso.fit$a0[ind]
# coeff.array = lasso.fit$beta[,floor(length(lasso.fit$lambda)/2)]
# coeff.array[1] = lasso.fit$a0[floor(length(lasso.fit$lambda)/2)]
# lasso.cv = glmnet::cv.glmnet(clcxx,y,alpha=1,nfolds=5)
# lambda.cv = lasso.cv$lambda.min;
# lasso.fit = glmnet::glmnet(clcxx, y, family="gaussian",alpha=1,nlambda=50)
# coeff.array = as.vector(stats::predict(lasso.fit, s=lambda.cv, type="coefficients"))[-2]
hatAlpha = coeff.array[c(1:nclc)]
hatBeta = matrix(coeff.array[-c(1:nclc)], ncol = s)
if(robust){
out = Robust(xx, y, CLC, s, size, iterations, hatAlpha, hatBeta, sparse, structure, hyper, debugging)
}else{
out = NonRobust(xx, y, CLC, s, size, iterations, hatAlpha, hatBeta, sparse, structure, hyper, debugging)
}
coeff.main = apply(out$GS.alpha[-(1:BI),,drop=FALSE], 2, stats::median); names(coeff.main) = CLC.names;
coeff.GE = matrix(apply(out$GS.beta[-(1:BI),], 2, stats::median), size, dimnames=list(c("main",E.names),G.names))
Int = coeff.main[1]
coeff.E = utils::tail(coeff.main, env); #names(coeff.E) = E.names;
coeff.clin = utils::head(coeff.main[-1], -env)
if(env>0){
coeff.clin = utils::head(coeff.main[-1], -env)
}else{
coeff.clin = coeff.main[-1]
}
coefficient = list(Int=Int, clin=coeff.clin, E=coeff.E, GE=coeff.GE)
fit = list(call = this.call, posterior = out, coefficient=coefficient, burn.in = BI, iterations=iterations, design=list(xx=xx, CLC=CLC))
# if(debugging && sparse) fit$debugList = out$debugList;
class(fit)=c("roben", class(out))
fit
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.