Nothing
################################################################################
#' @importFrom bigsparser as_SFBM
#' @export
bigsparser::as_SFBM
################################################################################
#' LDpred2
#'
#' LDpred2. Tutorial at \url{https://privefl.github.io/bigsnpr/articles/LDpred2.html}.
#'
#' For reproducibility, `set.seed()` can be used to ensure that two runs of
#' LDpred2 give the exact same results (since v1.10).
#'
#' @inheritParams snp_ldsc2
#' @param corr Sparse correlation matrix as an [SFBM][bigsparser::SFBM-class].
#' If `corr` is a dsCMatrix or a dgCMatrix, you can use `as_SFBM(corr)`.
#' @param h2 Heritability estimate.
#'
#' @return `snp_ldpred2_inf`: A vector of effects, assuming an infinitesimal model.
#'
#' @export
#'
#' @rdname LDpred2
#'
snp_ldpred2_inf <- function(corr, df_beta, h2) {
assert_df_with_names(df_beta, c("beta", "beta_se", "n_eff"))
assert_lengths(rows_along(corr), cols_along(corr), rows_along(df_beta))
assert_pos(df_beta$beta_se, strict = TRUE)
assert_pos(h2, strict = TRUE)
N <- df_beta$n_eff
scale <- sqrt(N * df_beta$beta_se^2 + df_beta$beta^2)
beta_hat <- df_beta$beta / scale
beta_inf <- bigsparser::sp_solve_sym(
corr, beta_hat, add_to_diag = ncol(corr) / (h2 * N))
beta_inf * scale
}
################################################################################
#' @param grid_param A data frame with 3 columns as a grid of hyper-parameters:
#' - `$p`: proportion of causal variants
#' - `$h2`: heritability (captured by the variants used)
#' - `$sparse`: boolean, whether a sparse model is sought
#' They can be run in parallel by changing `ncores`.
#' @param burn_in Number of burn-in iterations.
#' @param num_iter Number of iterations after burn-in.
#' @inheritParams bigsnpr-package
#' @param ind.corr Indices to "subset" `corr`, as if this was run with
#' `corr[ind.corr, ind.corr]` instead. No subsetting by default.
#' @param return_sampling_betas Whether to return all sampling betas (after
#' burn-in)? This is useful for assessing the uncertainty of the PRS at the
#' individual level (see \doi{10.1101/2020.11.30.403188}).
#' Default is `FALSE` (only returns the averaged final vectors of betas).
#' If `TRUE`, only one set of parameters is allowed.
#'
#' @return `snp_ldpred2_grid`: A matrix of effect sizes, one vector (column)
#' for each row of `grid_param`. Missing values are returned when strong
#' divergence is detected. If using `return_sampling_betas`, each column
#' corresponds to one iteration instead (after burn-in).
#'
#' @importFrom doRNG `%dorng%`
#'
#' @export
#'
#' @rdname LDpred2
#'
snp_ldpred2_grid <- function(corr, df_beta, grid_param,
burn_in = 50,
num_iter = 100,
ncores = 1,
return_sampling_betas = FALSE,
ind.corr = cols_along(corr)) {
assert_df_with_names(df_beta, c("beta", "beta_se", "n_eff"))
assert_df_with_names(grid_param, c("p", "h2", "sparse"))
assert_lengths(ind.corr, rows_along(df_beta))
stopifnot(all(ind.corr %in% cols_along(corr)))
assert_pos(df_beta$beta_se, strict = TRUE)
assert_pos(grid_param$h2, strict = TRUE)
assert_cores(ncores)
N <- df_beta$n_eff
scale <- sqrt(N * df_beta$beta_se^2 + df_beta$beta^2)
beta_hat <- df_beta$beta / scale
if (!return_sampling_betas) {
ord <- with(grid_param, order(-p, sparse, -h2)) # large p first
grid <- grid_param[ord, ]
bigparallelr::register_parallel(ncores)
# LDpred2-grid models
res_list <- foreach(
h2 = grid$h2, p = grid$p, sparse = grid$sparse,
.export = "ldpred2_gibbs_one") %dorng% {
ldpred2_gibbs_one(
corr = corr,
beta_hat = beta_hat,
n_vec = N,
ind_sub = ind.corr - 1L,
h2 = h2,
p = p,
sparse = sparse,
burn_in = burn_in,
num_iter = num_iter
)
}
inv_ord <- match(seq_along(ord), ord)
beta_gibbs <- do.call("cbind", res_list[inv_ord])
} else {
if (nrow(grid_param) != 1)
stop2("Only one set of parameters is allowed when using 'return_sampling_betas'.")
# All sampling betas for one LDpred2 model (useful for uncertainty assessment)
beta_gibbs <- ldpred2_gibbs_one_sampling(
corr = corr,
beta_hat = beta_hat,
n_vec = N,
ind_sub = ind.corr - 1L,
h2 = grid_param$h2,
p = grid_param$p,
sparse = grid_param$sparse,
burn_in = burn_in,
num_iter = num_iter
)
}
sweep(beta_gibbs, 1, scale, '*')
}
################################################################################
#' @param vec_p_init Vector of initial values for p. Default is `0.1`.
#' @param h2_init Heritability estimate for initialization.
#' @param sparse In LDpred2-auto, whether to also report a sparse solution by
#' running LDpred2-grid with the estimates of p and h2 from LDpred2-auto, and
#' sparsity enabled. Default is `FALSE`.
#' @param verbose Whether to print "p // h2" estimates at each iteration.
#' Disabled when parallelism is used.
#' @param report_step Step to report sampling betas (after burn-in and before
#' unscaling). Nothing is reported by default. If using `num_iter = 200` and
#' `report_step = 20`, then 10 vectors of sampling betas are reported
#' (as a sparse matrix with 10 columns).
#' @param allow_jump_sign Whether to allow for effects sizes to change sign in
#' consecutive iterations? Default is `TRUE` (normal sampling). You can use
#' `FALSE` to force effects to go through 0 first before changing sign. Setting
#' this parameter to `FALSE` could be useful to prevent instability (oscillation
#' and ultimately divergence) of the Gibbs sampler. This would also be useful
#' for accelerating convergence of chains with a large initial value for p.
#' @param shrink_corr Shrinkage multiplicative coefficient to apply to off-diagonal
#' elements of the correlation matrix. Default is `1` (unchanged).
#' You can use e.g. `0.95` to add a bit of regularization.
#' @param use_MLE Whether to use maximum likelihood estimation (MLE) to estimate
#' alpha and the variance component (since v1.11.4), or assume that alpha is
#' -1 and estimate the variance of (scaled) effects as h2/(m*p), as it was
#' done in earlier versions of LDpred2-auto (e.g. in v1.10.8). Default is `TRUE`,
#' which should provide a better model fit, but might also be less robust.
#' @param p_bounds Boundaries for the estimates of p (the polygenicity).
#' Default is `c(1e-5, 1)`. You can use the same value twice to fix p.
#' @param alpha_bounds Boundaries for the estimates of \eqn{\alpha}.
#' Default is `c(-1.5, 0.5)`. You can use the same value twice to fix \eqn{\alpha}.
#'
#' @return `snp_ldpred2_auto`: A list (over `vec_p_init`) of lists with
#' - `$beta_est`: vector of effect sizes (on the allele scale); note that
#' missing values are returned when strong divergence is detected
#' - `$beta_est_sparse` (only when `sparse = TRUE`): sparse vector of effect sizes
#' - `$postp_est`: vector of posterior probabilities of being causal
#' - `$corr_est`, the "imputed" correlations between variants and phenotypes,
#' which can be used for post-QCing variants by comparing those to
#' `with(df_beta, beta / sqrt(n_eff * beta_se^2 + beta^2))`
#' - `$sample_beta`: sparse matrix of sampling betas (see parameter `report_step`),
#' *not* on the allele scale, for which you need to multiply by
#' `with(df_beta, sqrt(n_eff * beta_se^2 + beta^2))`
#' - `$path_p_est`: full path of p estimates (including burn-in);
#' useful to check convergence of the iterative algorithm
#' - `$path_h2_est`: full path of h2 estimates (including burn-in);
#' useful to check convergence of the iterative algorithm
#' - `$path_alpha_est`: full path of alpha estimates (including burn-in);
#' useful to check convergence of the iterative algorithm
#' - `$h2_est`: estimate of the (SNP) heritability (also see [coef_to_liab])
#' - `$p_est`: estimate of p, the proportion of causal variants
#' - `$alpha_est`: estimate of alpha, the parameter controlling the
#' relationship between allele frequencies and expected effect sizes
#' - `$h2_init` and `$p_init`: input parameters, for convenience
#'
#' @import foreach
#'
#' @export
#'
#' @rdname LDpred2
#'
snp_ldpred2_auto <- function(corr, df_beta, h2_init,
vec_p_init = 0.1,
burn_in = 500,
num_iter = 200,
sparse = FALSE,
verbose = FALSE,
report_step = num_iter + 1L,
allow_jump_sign = TRUE,
shrink_corr = 1,
use_MLE = TRUE,
p_bounds = c(1e-5, 1),
alpha_bounds = c(-1.5, 0.5),
ind.corr = cols_along(corr),
ncores = 1) {
assert_df_with_names(df_beta, c("beta", "beta_se", "n_eff"))
assert_lengths(ind.corr, rows_along(df_beta))
stopifnot(all(ind.corr %in% cols_along(corr)))
assert_pos(df_beta$beta_se, strict = TRUE)
assert_pos(h2_init, strict = TRUE)
N <- df_beta$n_eff
sd <- 1 / sqrt(N * df_beta$beta_se^2 + df_beta$beta^2)
beta_hat <- df_beta$beta * sd
mean_ld <- mean(
ld_scores_sfbm(corr, ind_sub = ind.corr - 1L, ncores = ncores))
ord <- order(-vec_p_init) # large p first
bigparallelr::register_parallel(ncores)
FUNs <- c("ldpred2_gibbs_auto", "ldpred2_gibbs_one")
res_list <- foreach(p_init = vec_p_init[ord], .export = FUNs) %dorng% {
ldpred_auto <- ldpred2_gibbs_auto(
corr = corr,
beta_hat = beta_hat,
n_vec = N,
log_var = 2 * log(sd),
ind_sub = ind.corr - 1L,
p_init = p_init,
h2_init = h2_init,
burn_in = burn_in,
num_iter = num_iter,
verbose = verbose && (ncores == 1),
report_step = report_step,
no_jump_sign = !allow_jump_sign,
shrink_corr = shrink_corr,
use_mle = use_MLE,
p_bounds = p_bounds,
alpha_bounds = alpha_bounds + 1,
mean_ld = mean_ld
)
ldpred_auto$beta_est <- ldpred_auto$beta_est / sd
ldpred_auto$h2_est <- mean(tail(ldpred_auto$path_h2_est, num_iter))
ldpred_auto$p_est <- mean(tail(ldpred_auto$path_p_est, num_iter))
ldpred_auto$alpha_est <- mean(tail(ldpred_auto$path_alpha_est, num_iter))
ldpred_auto$h2_init <- h2_init
ldpred_auto$p_init <- p_init
if (sparse && !is.na(ldpred_auto$h2_est)) {
beta_gibbs <- ldpred2_gibbs_one(
corr = corr,
beta_hat = beta_hat,
n_vec = N,
ind_sub = ind.corr - 1L,
h2 = ldpred_auto$h2_est,
p = ldpred_auto$p_est,
sparse = TRUE,
burn_in = 50,
num_iter = 100
)
ldpred_auto$beta_est_sparse <- beta_gibbs / sd
}
ldpred_auto
}
inv_ord <- match(seq_along(ord), ord)
res_list[inv_ord]
}
################################################################################
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.