set_init: Gather initial variational parameters provided by the user.

View source: R/set_hyper_init.R

set_initR Documentation

Gather initial variational parameters provided by the user.

Description

This function must be used to provide initial values for the variational parameters used in locus.

Usage

set_init(
  d,
  p,
  gam_vb,
  mu_beta_vb,
  sig2_beta_vb,
  tau_vb,
  link = "identity",
  ind_bin = NULL,
  q = NULL,
  alpha_vb = NULL,
  sig2_alpha_vb = NULL,
  sig2_inv_vb = NULL,
  G = NULL
)

Arguments

d

Number of responses.

p

Number of candidate predictors.

gam_vb

Matrix of size p x d with initial values for the variational parameter yielding posterior probabilities of inclusion.

mu_beta_vb

Matrix of size p x d with initial values for the variational parameter yielding regression coefficient estimates for predictor-response pairs included in the model.

sig2_beta_vb

Vector of length d, for link = "identity" and for link = "mix", of length 1 for link = "probit", and a matrix of size p x d, for link = "logit", with initial values for the variational parameter yielding estimates of effect variances for predictor-response pairs included in the model. For link = "identity" and link = "mix", these values are the same for all the predictors (as a result of the predictor variables being standardized before the variational algorithm). For link = "probit", they are the same for all the predictors and responses.

tau_vb

Vector of length d, for link = "identity", and of length d_cont = d - length(ind_bin) (number of continuous responses), for link = "mix", with initial values for the variational parameter yielding estimates for the continuous response residual precisions. Must be NULL for link = "logit" and link = "probit".

link

Response link. Must be "identity" for linear regression, "logit" for logistic regression, "probit" for probit regression, or "mix" for a mix of identity and probit link functions (in this case, the indices of the binary responses must be gathered in argument ind_bin, see below).

ind_bin

If link = "mix", vector of indices corresponding to the binary variables in Y. Must be NULL if link != "mix".

q

Number of covariates. Default is NULL, for Z NULL.

alpha_vb

Matrix of size q x d with initial values for the variational parameter yielding regression coefficient estimates for covariate-response pairs. Default is NULL, for Z NULL.

sig2_alpha_vb

Matrix of size q x d for link = "identity", for link = "logit" and for link = "mix" with initial values for the variational parameter yielding estimates of effect variances for covariate-response pairs. Vector of length q for link = "probit". Default is NULL, for Z NULL.

sig2_inv_vb

Initial parameters necessary when G is non-NULL. Its inverse square root corresponds to the typical size of non-zero effects. Must be NULL if G is NULL.

G

Number of candidate predictor groups when using the group selection model from the locus function. Default is NULL, for no group selection.

Details

The locus function can also be used with default initial parameter choices (without using set_init) by setting its argument list_init to NULL.

Value

An object of class "init" preparing user initial values for the variational parameters in a form that can be passed to the locus function.

See Also

set_hyper, locus

Examples

seed <- 123; set.seed(seed)

###################
## Simulate data ##
###################

## Examples using small problem sizes:
##
n <- 200; p <- 200; p0 <- 20; d <- 20; d0 <- 15; q <- 2

## Candidate predictors (subject to selection)
##
# Here we simulate common genetic variants (but any type of candidate
# predictors can be supplied).
# 0 = homozygous, major allele, 1 = heterozygous, 2 = homozygous, minor allele

X_act <- matrix(rbinom(n * p0, size = 2, p = 0.25), nrow = n)
X_inact <- matrix(rbinom(n * (p - p0), size = 2, p = 0.25), nrow = n)

shuff_x_ind <- sample(p)
X <- cbind(X_act, X_inact)[, shuff_x_ind]

bool_x_act <- shuff_x_ind <= p0

pat_act <- beta <- matrix(0, nrow = p0, ncol = d0)
pat_act[sample(p0*d0, floor(p0*d0/5))] <- 1
beta[as.logical(pat_act)] <-  rnorm(sum(pat_act))

## Covariates (not subject to selection)
##
Z <- matrix(rnorm(n * q), nrow = n)

alpha <-  matrix(rnorm(q * d), nrow = q)

## Gaussian responses
##
Y_act <- matrix(rnorm(n * d0, mean = X_act %*% beta, sd = 0.5), nrow = n)
Y_inact <- matrix(rnorm(n * (d - d0), sd = 0.5), nrow = n)
shuff_y_ind <- sample(d)
Y <- cbind(Y_act, Y_inact)[, shuff_y_ind] + Z %*% alpha

## Binary responses
##
Y_bin <- ifelse(Y > 0, 1, 0)

########################
## Infer associations ##
########################

## Continuous responses
##

# No covariate
#
# gam_vb chosen so that the prior mean number of responses associated with
# each candidate predictor is 1/4.
gam_vb <- matrix(rbeta(p * d, shape1 = 1, shape2 = 4*d-1), nrow = p)
mu_beta_vb <- matrix(rnorm(p * d), nrow = p)
tau_vb <- 1 / apply(Y, 2, var)
sig2_beta_vb <- 1 / rgamma(d, shape = 2, rate = 1 / tau_vb)

list_init_g <- set_init(d, p, gam_vb, mu_beta_vb, sig2_beta_vb, tau_vb,
                        link = "identity")

# We take p0_av = p0 (known here); this choice may result in variable
# selections that are (too) conservative in some cases. In practice, it is
# advised to set p0_av as a slightly overestimated guess of p0, or perform
# cross-validation using function `set_cv'.

vb_g <- locus(Y = Y, X = X, p0_av = p0, link = "identity",
              list_init = list_init_g)

# With covariates
#
alpha_vb <- matrix(rnorm(q * d), nrow = q)
sig2_alpha_vb <- 1 / matrix(rgamma(q * d, shape = 2, rate = 1), nrow = q)

list_init_g_z <- set_init(d, p, gam_vb, mu_beta_vb, sig2_beta_vb, tau_vb,
                          link = "identity", q = q,
                          alpha_vb = alpha_vb,
                          sig2_alpha_vb = sig2_alpha_vb)

vb_g_z <- locus(Y = Y, X = X, p0_av = p0, Z = Z, link = "identity",
                list_init = list_init_g_z)

## Binary responses
##
# gam_vb chosen so that the prior mean number of responses associated with
# each candidate predictor is 1/4.
sig2_beta_vb_logit <- 1 / t(replicate(p, rgamma(d, shape = 2, rate = 1)))

list_init_logit <- set_init(d, p, gam_vb, mu_beta_vb, sig2_beta_vb_logit,
                            tau_vb = NULL, link = "logit", q = q,
                            alpha_vb = alpha_vb,
                            sig2_alpha_vb = sig2_alpha_vb)

vb_logit <- locus(Y = Y_bin, X = X, p0_av = p0, Z = Z, link = "logit",
                  list_init = list_init_logit)

sig2_alpha_vb_probit <- sig2_alpha_vb[, 1]
sig2_beta_vb_probit <- sig2_beta_vb[1]
list_init_probit <- set_init(d, p, gam_vb, mu_beta_vb, sig2_beta_vb_probit,
                             tau_vb = NULL, link = "probit", q = q,
                             alpha_vb = alpha_vb,
                             sig2_alpha_vb = sig2_alpha_vb_probit)

vb_probit <- locus(Y = Y_bin, X = X, p0_av = p0, Z = Z, link = "probit",
                   list_init = list_init_probit)

## Mix of continuous and binary responses
##
Y_mix <- cbind(Y, Y_bin)
ind_bin <- (d+1):(2*d)

# gam_vb chosen so that the prior mean number of responses associated with
# each candidate predictor is 1/4.
gam_vb_mix <- matrix(rbeta(p * 2*d, shape1 = 1, shape2 = 8*d-1), nrow = p)
mu_beta_vb_mix <- matrix(rnorm(p * 2*d), nrow = p)
sig2_beta_vb_mix <- 1 / c(rgamma(d, shape = 2, rate = 1 / tau_vb),
                          rgamma(d, shape = 2, rate = 1))
alpha_vb_mix <- matrix(rnorm(q * 2*d), nrow = q)
sig2_alpha_vb_mix <- 1 / matrix(rgamma(q * 2*d, shape = 2, rate = 1), nrow = q)

list_init_mix <- set_init(2*d, p, gam_vb_mix, mu_beta_vb_mix,
                          sig2_beta_vb_mix, tau_vb, link = "mix",
                          ind_bin = ind_bin, q = q,
                          alpha_vb = alpha_vb_mix,
                          sig2_alpha_vb = sig2_alpha_vb_mix)

vb_mix <- locus(Y = Y_mix, X = X, p0_av = p0, Z = Z, link = "mix",
                ind_bin = ind_bin, list_init = list_init_mix)


hruffieux/locus documentation built on Jan. 10, 2024, 10:07 p.m.