Description Usage Arguments Details Value See Also Examples
Bayesian hierarchical multi-group random regression model for genomic prediction.
1 |
data |
A list. The data for fitting the model:
|
This function is a minimal wrapper around stan
and it only supplies
the model file argument and the data. All other argument must be taken from stan
.
A stanfit-class
object.
See stan
, for which this function is a minimal wrapper.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | # Stan model file
stanfile <- system.file('extdat', 'bayesMultiGroupRR.stan', package = 'bayesMultiGroupRR')
## Load packages and set options. ------------------------------------------------------------
library('rstan')
library('magrittr')
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
rm(list = ls())
## copied from https://github.com/DominikMueller64/dmisc/blob/master/R/equal_split.R.
equal_split <- function(x, n, random = TRUE, beginning = FALSE) {
len <- length(x)
base_size <- len %/% n
rem <- len %% n
sizes <- rep(base_size, times = n)
if (rem > 0L) {
ix <- if (beginning) seq.int(from = 1L, to = rem) else sample.int(n, size = rem)
sizes[ix] <- sizes[ix] + 1L
}
s <- seq_len(length(x))
ret <- purrr::map(sizes, function(size) {
if (random) {
smp <- sample(x = s, size = size)
} else {
smp <- s[seq.int(from = 1L, to = size)]
}
s <<- setdiff(s, smp)
x[smp]
})
if (rem > 0L)
attr(ret, which = 'indices') <- ix
ret
}
## Load data ---------------------------------------------------------------------------------
data('wheat', package = 'BGLR')
X <- 2 * wheat.X
rm(list = grep(pattern = 'wheat*', x = ls(), value = TRUE))
## Set parameters ----------------------------------------------------------------------------
h2 <- 0.9 ## heritability
n_loci <- min(ncol(X), 20L) ## number of (known) loci
n_groups <- 5L ## number of groups (sub-populations)
base <- 0.90
## The true correlation matrix of the locus effects.
(true_cor_eff <- base^abs(outer(seq_len(n_groups), seq_len(n_groups), FUN = `-`)))
## Sampling true locus effects.
true_eff <- mvtnorm::rmvnorm(n = n_loci, sigma = true_cor_eff) %>% split(., f = col(.))
## Empirical covariances.
(emp_cov_eff <- cor(do.call(what = cbind, true_eff)))
## Prepare data ------------------------------------------------------------------------------
## Subset data to the desired number of loci.
X <- X[, sort(sample.int(n = ncol(X), size = n_loci)), drop = FALSE]
## Split the genotypes into (randomly sampled!) groups.
X <- dmisc::equal_split(x = split(x = X, f = row(X)), n = n_groups, random = TRUE)
## Compute genetic values.
g <- purrr::map2(X, true_eff, function(x, e) purrr::map_dbl(x, ~.x %*% e))
## Vector indication group-membership of individual observations.
group_size <- X %>% purrr::map_dbl(length)
group <- rep(seq_along(group_size), times = group_size)
n <- sum(group_size) ## Total number of observations.
## Flatten genotypes to a list of matrices.
X <- X %>% purrr::map(~do.call(what = 'rbind', args = .x))
## Standardize genotypes within groups. Remove first homozygous loci!
## X <- X %>% purrr::map(scale)
## Flatten genotypes to matrix.
X <- X %>% do.call(what = 'rbind')
## Flatten genetic values to vector
g <- purrr::flatten_dbl(g)
## Add some noise.
y <- g + rnorm(n = n, sd = sqrt(var(g) * (1 - h2) / h2))
dat <- list(n = n,
n_groups = n_groups,
n_loci = n_loci,
group = group,
y = y,
X = X)
## Not run:
## Fit model.
fit <- stan(file = 'stanfile.stan', data = dat, iter = 500L, warmup = 100L,
chains = 4L, verbose = TRUE, include = FALSE)
## Inspect traceplots of chains.
rstan::traceplot(fit, 'mu') # overall mean
rstan::traceplot(fit, 'beta') # group-specific means
rstan::traceplot(fit, 'alpha_mu') # overall locus effects
## rstan::traceplot(fit, 'alpha') # group-specific locus effects (too much)
rstan::traceplot(fit, 'sigma_e') # residual standard deviation
rstan::traceplot(fit, 'sigma_alpha_mu') # overall locus effect standard deviation
rstan::traceplot(fit, 'sigma_alpha') # group-specific locus effect standard deviation
rstan::traceplot(fit, 'Omega') # correlations
rstan::traceplot(fit, 'h2') # heritability (locus - level!)
rstan::traceplot(fit, 'theta') # linear predictor (for genetic values)
## Compute posterior covariance matrix, correlation matrix and linear predictors (GEBVs).
tmp <- rstan::get_posterior_mean(fit, 'Sigma')[, 'mean-all chains']
(post_mean_cov_eff <- matrix(data = tmp, ncol = n_groups))
cov2cor(post_mean_cov_eff)
tmp <- rstan::get_posterior_mean(fit, 'Omega')[, 'mean-all chains']
(post_mean_cor_eff <- matrix(data = tmp, ncol = n_groups))
post_theta <- rstan::get_posterior_mean(fit, 'theta')[, 'mean-all chains']
## Compute the prediction accuracy on per-group basis.
purrr::map2(split(g, f = group), split(post_theta, f = group), ~cor(.x, .y))
## End(Not run)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.