################################################################################
context("LDPRED2")
################################################################################
test_that("LDpred2 works", {
skip_if(is_cran)
skip_if_offline("raw.githubusercontent.com")
bedfile <- file.path(tempdir(), "tmp-data/public-data3.bed")
if (!file.exists(rdsfile <- sub_bed(bedfile, ".rds"))) {
zip <- tempfile(fileext = ".zip")
download.file(
"https://github.com/privefl/bigsnpr/blob/master/data-raw/public-data3.zip?raw=true",
destfile = zip, mode = "wb")
unzip(zip, exdir = tempdir())
rds <- snp_readBed(bedfile)
expect_identical(normalizePath(rds), normalizePath(rdsfile))
}
obj.bigSNP <- snp_attach(rdsfile)
G <- obj.bigSNP$genotypes
y <- obj.bigSNP$fam$affection
POS2 <- obj.bigSNP$map$genetic.dist + 1000 * obj.bigSNP$map$chromosome
sumstats <- bigreadr::fread2(file.path(tempdir(), "tmp-data/public-data3-sumstats.txt"))
sumstats$n_eff <- sumstats$N
map <- setNames(obj.bigSNP$map[-3], c("chr", "rsid", "pos", "a1", "a0"))
df_beta <- snp_match(sumstats, map, join_by_pos = FALSE)
ind_var <- df_beta$`_NUM_ID_`
corr0 <- snp_cor(G, ind.col = ind_var, size = 3 / 1000,
infos.pos = POS2[ind_var], ncores = 2)
ld <- bigsnpr:::sp_colSumsSq_sym(p = corr0@p, i = corr0@i, x = corr0@x)
corr <- as_SFBM(corr0, compact = sample(c(TRUE, FALSE), 1))
rm(corr0)
# LD score regression
(ldsc <- with(df_beta, snp_ldsc(ld, length(ld), chi2 = (beta / beta_se)^2,
sample_size = n_eff, blocks = NULL)))
(ldsc2 <- snp_ldsc2(corr, df_beta, intercept = NULL))
expect_equal(ldsc2, ldsc)
# LDpred2-inf
beta_inf <- snp_ldpred2_inf(corr, df_beta, h2 = ldsc[["h2"]])
pred_inf <- big_prodVec(G, beta_inf, ind.col = df_beta[["_NUM_ID_"]])
expect_gt(cor(pred_inf, y), 0.2)
# LDpred2-gibbs
p_seq <- signif(seq_log(1e-3, 1, length.out = 7), 1)
params <- expand.grid(p = p_seq, h2 = ldsc[["h2"]], sparse = c(FALSE, TRUE))
expect_equal(dim(params), c(14, 3))
expect_equal(dim(snp_ldpred2_grid(corr, df_beta, params[1, ])), c(nrow(df_beta), 1))
beta_grid <- snp_ldpred2_grid(corr, df_beta, params, ncores = 2)
pred_grid <- big_prodMat(G, beta_grid, ind.col = df_beta[["_NUM_ID_"]])
expect_gt(max(cor(pred_grid, y)), 0.4)
# LDpred2-uncertainty
expect_error(snp_ldpred2_grid(corr, df_beta, params, return_sampling_betas = TRUE),
"Only one set of parameters is allowed")
beta_sample <- snp_ldpred2_grid(corr, df_beta, params[3, ], num_iter = 200,
return_sampling_betas = TRUE)
expect_equal(dim(beta_sample), c(nrow(df_beta), 200))
expect_gt(cor(rowMeans(beta_sample), beta_grid[, 3]), 0.9)
# LDpred2-auto
beta_auto <- snp_ldpred2_auto(corr, df_beta, h2_init = ldsc[["h2"]],
burn_in = 200, num_iter = 200, sparse = TRUE,
shrink_corr = runif(1, 0.9, 1))
expect_length(beta_auto, 1)
mod <- beta_auto[[1]]
expect_null(dim(mod$beta_est))
pred_auto <- big_prodVec(G, mod$beta_est, ind.col = df_beta[["_NUM_ID_"]])
expect_gt(cor(pred_auto, y), 0.4)
expect_gt(cor(mod$beta_est_sparse, mod$beta_est), 0.9)
expect_lt(mean(mod$beta_est == 0), 0.001)
expect_gt(mean(mod$beta_est_sparse == 0), 0.5)
expect_equal(mod$h2_init, ldsc[["h2"]])
expect_equal(mod$p_init, 0.1)
expect_equal(mod$h2_est, mean(tail(mod$path_h2_est, 200)))
expect_equal(mod$p_est, mean(tail(mod$path_p_est, 200)))
expect_equal(mod$alpha_est, mean(tail(mod$path_alpha_est, 200)))
expect_length(mod$postp_est, length(mod$beta_est))
expect_null(dim(mod$postp_est))
expect_equal(mean(mod$postp_est), mod$p_est, tolerance = 0.01)
expect_equal(dim(mod$sample_beta), c(ncol(corr), 0))
beta_hat <- with(df_beta, beta / sqrt(n_eff * beta_se^2 + beta^2))
expect_gt(cor(mod$corr_est, beta_hat), 0.3)
beta_auto2 <- snp_ldpred2_auto(corr, df_beta, h2_init = ldsc[["h2"]],
burn_in = 200, num_iter = 200,
vec_p_init = 1, allow_jump_sign = FALSE)
expect_gt(cor(beta_auto2[[1]]$beta_est, mod$beta_est), 0.9)
expect_lt(beta_auto2[[1]]$p_est, 0.1)
# Sampling betas
beta_auto <- snp_ldpred2_auto(corr, df_beta, h2_init = ldsc[["h2"]],
burn_in = 200, num_iter = 200, report_step = 10)
bsamp <- beta_auto[[1]]$sample_beta
expect_equal(dim(bsamp), c(ncol(corr), 20))
expect_s4_class(bsamp, "dgCMatrix")
h2_est <- apply(bsamp, 2, function(x) crossprod(x, bigsparser::sp_prodVec(corr, x)))
expect_equal(h2_est, beta_auto[[1]]$path_h2_est[seq(210, 400, by = 10)])
# alpha bounds
beta_auto <- snp_ldpred2_auto(corr, df_beta, h2_init = ldsc[["h2"]],
burn_in = 200, num_iter = 200,
alpha_bounds = c(-1, -1))
expect_equal(beta_auto[[1]]$path_alpha_est, rep(-1, 400))
beta_auto <- snp_ldpred2_auto(corr, df_beta, h2_init = ldsc[["h2"]],
burn_in = 200, num_iter = 200,
alpha_bounds = c(-0.5, 0))
expect_true(all(beta_auto[[1]]$path_alpha_est >= -0.5))
expect_true(all(beta_auto[[1]]$path_alpha_est <= 0))
# Errors
expect_error(snp_ldpred2_inf(corr, df_beta[-6]),
"'df_beta' should have element 'beta'.")
expect_error(snp_ldpred2_grid(corr, df_beta[-6], params),
"'df_beta' should have element 'beta'.")
expect_error(snp_ldpred2_auto(corr, df_beta[-6]),
"'df_beta' should have element 'beta'.")
# Parallelism
all_beta <- replicate(
n = 10, simplify = FALSE,
snp_ldpred2_grid(corr, df_beta, params[c(1, 8), ], ncores = 2))
all_beta <- replicate(
n = 5, simplify = FALSE,
snp_ldpred2_auto(corr, df_beta, h2_init = ldsc[["h2"]],
vec_p_init = seq_log(1e-4, 0.9, 6),
burn_in = 100, num_iter = 100,
sparse = TRUE, ncores = 2))
# Reproducibility
set.seed(1)
grid1 <- snp_ldpred2_grid(corr, df_beta, params, ncores = 2)
grid2 <- snp_ldpred2_grid(corr, df_beta, params, ncores = 2)
expect_false(identical(grid2, grid1))
set.seed(1)
grid3 <- snp_ldpred2_grid(corr, df_beta, params, ncores = 2)
expect_identical(grid3, grid1)
set.seed(1)
beta1 <- snp_ldpred2_grid(corr, df_beta, params[3, ], return_sampling_betas = TRUE)
set.seed(1)
beta2 <- snp_ldpred2_grid(corr, df_beta, params[3, ], return_sampling_betas = TRUE)
expect_identical(beta2, beta1)
set.seed(1)
auto1 <- snp_ldpred2_auto(corr, df_beta,
burn_in = 50, num_iter = 100, sparse = TRUE,
h2_init = 0.3, vec_p_init = seq_log(1e-5, 1, 8),
report_step = 5, ncores = 2)
set.seed(1)
auto2 <- snp_ldpred2_auto(corr, df_beta,
burn_in = 50, num_iter = 100, sparse = TRUE,
h2_init = 0.3, vec_p_init = seq_log(1e-5, 1, 8),
report_step = 5, ncores = 2)
expect_identical(auto2, auto1)
inf1 <- snp_ldpred2_inf(corr, df_beta, h2 = 0.3)
inf2 <- snp_ldpred2_inf(corr, df_beta, h2 = 0.3)
expect_identical(inf2, inf1) # no sampling, so reproducible
})
################################################################################
test_that("MLE_alpha works", {
skip_if(is_cran)
bigsnp <- snp_attachExtdata()
G <- bigsnp$genotypes
FUN <- function(x, log_var, beta2) {
S <- 1 + x[[1]]; sigma2 <- x[[2]]
S * sum(log_var) + length(log_var) * log(sigma2) + sum(beta2 / exp(S * log_var)) / sigma2
}
DER <- function(x, log_var, beta2) {
S <- 1 + x[[1]]; sigma2 <- x[[2]]
res1 <- sum(log_var) - sum(log_var * beta2 / exp(S * log_var)) / sigma2
res2 <- length(log_var) / sigma2 - sum(beta2 / exp(S * log_var)) / sigma2^2
c(res1, res2)
}
alpha <- 0
simu <- snp_simuPheno(G, 0.2, 500, alpha = alpha)
log_var <- log(big_colstats(G, ind.col = simu$set)$var)
beta2 <- simu$effects^2
# without bootstrap
res1 <- optim(par = c(-0.5, 0.2 / 500), fn = FUN, gr = DER, method = "L-BFGS-B",
lower = c(-1.5, 0.2 / 5000), upper = c(0.5, 0.2 / 50),
log_var = log_var, beta2 = beta2)$par
res2 <- bigsnpr:::MLE_alpha(par = c(-0.5, res1[2] * runif(1, 0.7, 1.4)),
log_var = log_var, curr_beta = simu$effects,
ind_causal = 0:499, alpha_bounds = c(-0.5, 1.5))
expect_equal(res2[1:2] - 1:0, res1, tolerance = 1e-4)
# with bootstrap
all_est <- replicate(1000, {
ind <- sample(500, replace = TRUE)
optim(par = c(-0.5, 0.2 / 500), fn = FUN, gr = DER, method = "L-BFGS-B",
lower = c(-1.5, 0.2 / 5000), upper = c(0.5, 0.2 / 50),
log_var = log_var[ind], beta2 = beta2[ind])$par
})
all_est2 <- replicate(1000, {
bigsnpr:::MLE_alpha(par = c(-0.5, mean(all_est[2, ])), ind_causal = 0:499,
log_var = log_var, curr_beta = simu$effects,
alpha_bounds = c(-0.5, 1.5), boot = TRUE)[1:2] - 1:0
})
Q <- ppoints(10)
expect_equal(quantile(all_est[1, ], Q), quantile(all_est2[1, ], Q),
tolerance = 0.1)
expect_equal(quantile(all_est[2, ], Q), quantile(all_est2[2, ], Q),
tolerance = 0.1)
})
################################################################################
test_that("ind.corr works", {
skip_if(is_cran)
skip_if_offline("raw.githubusercontent.com")
bedfile <- file.path(tempdir(), "tmp-data/public-data3.bed")
if (!file.exists(rdsfile <- sub_bed(bedfile, ".rds"))) {
zip <- tempfile(fileext = ".zip")
download.file(
"https://github.com/privefl/bigsnpr/blob/master/data-raw/public-data3.zip?raw=true",
destfile = zip, mode = "wb")
unzip(zip, exdir = tempdir())
rds <- snp_readBed(bedfile)
expect_identical(normalizePath(rds), normalizePath(rdsfile))
}
obj.bigSNP <- snp_attach(rdsfile)
G <- obj.bigSNP$genotypes
y <- obj.bigSNP$fam$affection
POS2 <- obj.bigSNP$map$genetic.dist + 1000 * obj.bigSNP$map$chromosome
sumstats <- bigreadr::fread2(file.path(tempdir(), "tmp-data/public-data3-sumstats.txt"))
sumstats$n_eff <- sumstats$N
map <- setNames(obj.bigSNP$map[-3], c("chr", "rsid", "pos", "a1", "a0"))
df_beta <- snp_match(sumstats, map, join_by_pos = FALSE)
ind_var <- df_beta$`_NUM_ID_`
corr0 <- snp_cor(G, ind.col = ind_var, size = 3 / 1000,
infos.pos = POS2[ind_var], ncores = 2)
ld <- bigsnpr:::sp_colSumsSq_sym(p = corr0@p, i = corr0@i, x = corr0@x)
corr <- as_SFBM(corr0, compact = sample(c(TRUE, FALSE), 1))
rm(corr0)
ind.sub <- sample(ncol(corr), 30e3)
# LDpred2-gibbs
p_seq <- signif(seq_log(1e-3, 1, length.out = 7), 1)
params <- expand.grid(p = p_seq, h2 = 0.3, sparse = c(FALSE, TRUE))
set.seed(1)
system.time(
beta_grid <- snp_ldpred2_grid(as_SFBM(corr[ind.sub, ind.sub]),
df_beta[ind.sub, ], params, ncores = 2)
)
set.seed(1)
system.time(
beta_grid2 <- snp_ldpred2_grid(corr, ind.corr = ind.sub,
df_beta[ind.sub, ], params, ncores = 2)
)
expect_equal(beta_grid2, beta_grid)
# LDpred2-uncertainty
set.seed(1)
beta_grid <- snp_ldpred2_grid(as_SFBM(corr[ind.sub, ind.sub]),
df_beta[ind.sub, ], params[3, ],
return_sampling_betas = TRUE)
set.seed(1)
beta_grid2 <- snp_ldpred2_grid(corr, ind.corr = ind.sub,
df_beta[ind.sub, ], params[3, ],
return_sampling_betas = TRUE)
expect_equal(beta_grid2, beta_grid)
# LDpred2-auto
set.seed(1)
beta_auto <- snp_ldpred2_auto(as_SFBM(corr[ind.sub, ind.sub]), df_beta[ind.sub, ],
h2_init = 0.3, sparse = TRUE, shrink_corr = 0.95,
burn_in = 100, num_iter = 100)
set.seed(1)
beta_auto2 <- snp_ldpred2_auto(corr, ind.corr = ind.sub, df_beta[ind.sub, ],
h2_init = 0.3, sparse = TRUE, shrink_corr = 0.95,
burn_in = 100, num_iter = 100)
expect_equal(beta_auto2, beta_auto)
# LD Score regression
ld <- Matrix::colSums(corr[]^2)
ldsc <- snp_ldsc(ld_score = ld[ind.sub], ld_size = ncol(corr),
chi2 = with(df_beta[ind.sub, ], (beta / beta_se)^2),
sample_size = df_beta$N[ind.sub])
ldsc2 <- snp_ldsc2(corr, ind.beta = ind.sub, df_beta[ind.sub, ],
intercept = NULL, blocks = 200)
expect_equal(ldsc2, ldsc)
})
################################################################################
test_that("p_bounds works in LDpred2-auto", {
skip_if(is_cran)
skip_if_offline("raw.githubusercontent.com")
bedfile <- file.path(tempdir(), "tmp-data/public-data3.bed")
if (!file.exists(rdsfile <- sub_bed(bedfile, ".rds"))) {
zip <- tempfile(fileext = ".zip")
download.file(
"https://github.com/privefl/bigsnpr/blob/master/data-raw/public-data3.zip?raw=true",
destfile = zip, mode = "wb")
unzip(zip, exdir = tempdir())
rds <- snp_readBed(bedfile)
expect_identical(normalizePath(rds), normalizePath(rdsfile))
}
obj.bigSNP <- snp_attach(rdsfile)
G <- obj.bigSNP$genotypes
y <- obj.bigSNP$fam$affection
POS2 <- obj.bigSNP$map$genetic.dist + 1000 * obj.bigSNP$map$chromosome
sumstats <- bigreadr::fread2(file.path(tempdir(), "tmp-data/public-data3-sumstats.txt"))
sumstats$n_eff <- sumstats$N
map <- setNames(obj.bigSNP$map[-3], c("chr", "rsid", "pos", "a1", "a0"))
df_beta <- snp_match(sumstats, map, join_by_pos = FALSE)
ind_var <- df_beta$`_NUM_ID_`
corr0 <- snp_cor(G, ind.col = ind_var, size = 3 / 1000,
infos.pos = POS2[ind_var], ncores = 2)
corr <- as_SFBM(corr0, compact = sample(c(TRUE, FALSE), 1))
rm(corr0)
# LDpred2-auto with bounds for p
p_bounds <- sort(10^runif(2, -5, 0))
beta_auto <- snp_ldpred2_auto(corr, df_beta, p_bounds = p_bounds,
h2_init = 0.3, shrink_corr = 0.95,
burn_in = 100, num_iter = 100)
all_p_est <- beta_auto[[1]]$path_p_est
sapply(all_p_est, function(p_est) expect_gte(p_est, p_bounds[1]))
sapply(all_p_est, function(p_est) expect_lte(p_est, p_bounds[2]))
beta_auto2 <- snp_ldpred2_auto(corr, df_beta, p_bounds = p_bounds[c(1, 1)],
h2_init = 0.3, shrink_corr = 0.95,
burn_in = 100, num_iter = 100)
sapply(beta_auto2[[1]]$path_p_est, function(p_est) expect_equal(p_est, p_bounds[1]))
})
################################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.