bayesMultiGroupRR: Bayesian hierarchical multi-group random regression model for...

Description Usage Arguments Details Value See Also Examples

Description

Bayesian hierarchical multi-group random regression model for genomic prediction.

Usage

1

Arguments

data

A list. The data for fitting the model:

name structure
group index vector with group memberships
X marker matrix with genotypes in rows
y vector with phenotypic data

Details

This function is a minimal wrapper around stan and it only supplies the model file argument and the data. All other argument must be taken from stan.

Value

A stanfit-class object.

See Also

See stan, for which this function is a minimal wrapper.

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# Stan model file
stanfile <- system.file('extdat', 'bayesMultiGroupRR.stan', package = 'bayesMultiGroupRR')

## Load packages and set options. ------------------------------------------------------------

library('rstan')
library('magrittr')
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
rm(list = ls())

## copied from https://github.com/DominikMueller64/dmisc/blob/master/R/equal_split.R.
equal_split <- function(x, n, random = TRUE, beginning = FALSE) {
  len <- length(x)
  base_size <- len %/% n
  rem <- len %% n
  sizes <- rep(base_size, times = n)
  if (rem > 0L) {
    ix <- if (beginning) seq.int(from = 1L, to = rem) else sample.int(n, size = rem)
    sizes[ix] <- sizes[ix] + 1L
  }

  s <- seq_len(length(x))
  ret <- purrr::map(sizes, function(size) {
    if (random) {
      smp <- sample(x = s, size = size)
    } else {
      smp <- s[seq.int(from = 1L, to = size)]
    }
    s <<- setdiff(s, smp)
    x[smp]
  })
  if (rem > 0L)
    attr(ret, which = 'indices') <- ix
  ret
}

## Load data ---------------------------------------------------------------------------------

data('wheat', package = 'BGLR')
X <- 2 * wheat.X
rm(list = grep(pattern = 'wheat*', x = ls(), value = TRUE))

## Set parameters ----------------------------------------------------------------------------

h2 <- 0.9  ## heritability
n_loci <- min(ncol(X), 20L)  ## number of (known) loci
n_groups <- 5L  ## number of groups (sub-populations)
base <- 0.90
  ## The true correlation matrix of the locus effects.
(true_cor_eff <- base^abs(outer(seq_len(n_groups), seq_len(n_groups), FUN = `-`)))
  ## Sampling true locus effects.
true_eff <- mvtnorm::rmvnorm(n = n_loci, sigma = true_cor_eff) %>% split(., f = col(.))
  ## Empirical covariances.
(emp_cov_eff <- cor(do.call(what = cbind, true_eff)))

## Prepare data ------------------------------------------------------------------------------

  ## Subset data to the desired number of loci.
X <- X[, sort(sample.int(n = ncol(X), size = n_loci)), drop = FALSE]
  ## Split the genotypes into (randomly sampled!) groups.
X <- dmisc::equal_split(x = split(x = X, f = row(X)), n = n_groups, random = TRUE)
  ## Compute genetic values.
g <- purrr::map2(X, true_eff, function(x, e) purrr::map_dbl(x, ~.x %*% e))
  ## Vector indication group-membership of individual observations.
group_size <- X %>% purrr::map_dbl(length)
group <- rep(seq_along(group_size), times = group_size)
n <- sum(group_size)  ## Total number of observations.
  ## Flatten genotypes to a list of matrices.
X <- X %>% purrr::map(~do.call(what = 'rbind', args = .x))
  ## Standardize genotypes within groups. Remove first homozygous loci!
## X <- X %>% purrr::map(scale)
  ## Flatten genotypes to matrix.
X <- X %>% do.call(what = 'rbind')
  ## Flatten genetic values to vector
g <- purrr::flatten_dbl(g)
  ## Add some noise.
y <- g + rnorm(n = n, sd = sqrt(var(g) * (1 - h2) / h2))

dat <- list(n = n,
            n_groups = n_groups,
            n_loci = n_loci,
            group = group,
            y = y,
            X = X)
## Not run: 
## Fit model.
fit <- stan(file = 'stanfile.stan', data = dat, iter = 500L, warmup = 100L,
            chains = 4L, verbose = TRUE, include = FALSE)

## Inspect traceplots of chains.
rstan::traceplot(fit, 'mu') # overall mean
rstan::traceplot(fit, 'beta')  # group-specific means
rstan::traceplot(fit, 'alpha_mu')  # overall locus effects
## rstan::traceplot(fit, 'alpha') # group-specific locus effects (too much)
rstan::traceplot(fit, 'sigma_e') # residual standard deviation
rstan::traceplot(fit, 'sigma_alpha_mu') # overall locus effect standard deviation
rstan::traceplot(fit, 'sigma_alpha') # group-specific locus effect standard deviation
rstan::traceplot(fit, 'Omega') # correlations
rstan::traceplot(fit, 'h2') # heritability (locus - level!)
rstan::traceplot(fit, 'theta') # linear predictor (for genetic values)

## Compute posterior covariance matrix, correlation matrix and linear predictors (GEBVs).
tmp <- rstan::get_posterior_mean(fit, 'Sigma')[, 'mean-all chains']
(post_mean_cov_eff <- matrix(data = tmp, ncol = n_groups))
cov2cor(post_mean_cov_eff)
tmp <- rstan::get_posterior_mean(fit, 'Omega')[, 'mean-all chains']
(post_mean_cor_eff <- matrix(data = tmp, ncol = n_groups))
post_theta <- rstan::get_posterior_mean(fit, 'theta')[, 'mean-all chains']

## Compute the prediction accuracy on per-group basis.
purrr::map2(split(g, f = group), split(post_theta, f = group), ~cor(.x, .y))


## End(Not run)

DominikMueller64/bayesMultiGroupRR documentation built on May 6, 2019, 2:52 p.m.