###### Function to get correction ######
# #' Get correction
# #'
# #' Calculate corrected causal effect estimate (SE) and the covariance between
# #' observed and corrected effects, and test for the difference between the two
# #'
# #' @param IVs xx
# #' @param lambda xx
# #' @param lambda_se xx
# #' @param h2_LDSC xx
# #' @param h2_LDSC_se xx
# #' @param alpha_obs xx
# #' @param alpha_obs_se xx
# #' @param n_exp xx
# #' @param n_out xx
# #' @param M xx
# #'
# #' @inheritParams MRlap
# #' # @export
# NOT EXPORTED
get_correction <- function(IVs, lambda, lambda_se, h2_LDSC, h2_LDSC_se,
alpha_obs, alpha_obs_se, n_exp, n_out, MR_threshold, verbose){
M=1150000 # consider M is a constant! number of independent markers genome-wide
Tr = -stats::qnorm(MR_threshold/2)
lambdaPrime = lambda/sqrt(n_exp*n_out)
if(verbose) cat("> Estimating genetic architecture parameters... \n")
# function for optimisation
get_pi <- function(my_pi, sumbeta2, Tr, n_exp, h2_LDSC, M){
if(0>=my_pi) return(1e6)
sigma = sqrt(h2_LDSC/(my_pi*M))
denominator = (my_pi * (2*(sigma^2 + 1/n_exp) * stats::pnorm( - Tr / sqrt( 1 + n_exp * sigma^2) ) +
2 * Tr *(n_exp * sigma^4 + 2*sigma^2 + 1/n_exp) *exp(-Tr^2 / (2*(n_exp*sigma^2+1)))/( sqrt(2*pi) * ( 1 + n_exp *sigma^2)^(3/2))) +
(1-my_pi)*1/n_exp * (2*stats::pnorm(-Tr) + 2*Tr *stats::dnorm(Tr))) * M
return(abs(denominator-sumbeta2))
}
# get genetic architecture
get_geneticArchitecture<- function(theta, Nexp, M, Tr){
# theta is (effects, h2_LDSC)
h2_LDSC = theta[length(theta)]
effects = theta[-length(theta)]
sumBeta2 = sum(effects^2)
nSP = 5 # seems enough! convergence is very good, for all starting points
Res_SP = data.frame(SP = 1:nSP,
SP_pi = NA_real_,
diff = NA_real_,
pi = NA_real_)
for(i in 1:nSP){
theta = 3 * 10^(stats::runif(1, -7, -1))
res_optim = stats::optimise(get_pi, interval=c(1e-7, 0.3),
sumbeta2=sumBeta2, Tr=Tr, n_exp=n_exp, h2_LDSC=h2_LDSC, M=M, tol = 1e-6)
Res_SP[i, 2:4] = c(theta, res_optim$objective, res_optim$minimum)
}
# take best one
Res_SP %>% dplyr::arrange(diff) %>% dplyr::slice(1) %>% dplyr::pull(pi) -> pi_x
sigma = sqrt(h2_LDSC/(M*pi_x))
return(c(as.numeric(pi_x), as.numeric(sigma)))
}
theta = c(IVs$std_beta.exp,h2_LDSC)
res_genA = get_geneticArchitecture(theta, n_exp, M, Tr)
get_alpha <- function(n_exp, lambdaPrime, pi_x, sigma, alpha_obs, Tr){
A = stats::pnorm( - Tr / sqrt(1 + n_exp * sigma^2) )
B = 2 * Tr * exp(-Tr^2 / (2*(n_exp*sigma^2+1)))/( sqrt(2*pi)* ( 1 + n_exp *sigma^2)^(3/2))
C = (stats::pnorm(-Tr) + Tr *stats::dnorm(Tr))
a = (pi_x * (2*(sigma^2 + 1/n_exp) * A +
B *(n_exp * sigma^4 + 2*sigma^2 + 1/n_exp)))
b = (1-pi_x)*2/n_exp * C
d = a+b
alpha = (alpha_obs * d - (lambdaPrime * pi_x * (2*A + B + sigma^2 * n_exp*B) +
lambdaPrime * (1 - pi_x) * 2 * C)) / ( pi_x * sigma^2 * (2*A + B * n_exp * sigma^2 + B))
return(alpha)
}
if(verbose) cat("> Estimating corrected effect... \n")
alpha_corrected = get_alpha(n_exp, lambdaPrime, res_genA[1], res_genA[2], alpha_obs, Tr)
## get SE and covariance
get_correctedSE <- function(IVs, lambda, lambda_se, h2_LDSC, h2_LDSC_se, alpha_obs, alpha_obs_se, n_exp, n_out, M, Tr, s=1000, sthreshold=0.05, extracheck=T){
get_s <- function(s){
effects = IVs$std_beta.exp
effects_se = IVs$std_SE.exp
# simulate 500 lambda
L = matrix(stats::rnorm(s, lambda, lambda_se), ncol=s)/sqrt(n_exp*n_out)
# simulate 500 "instruments sets" - each column = 1 simulation
E = matrix(stats::rnorm(nrow(IVs)*s, effects, effects_se), ncol= s)
# simulate 500 h2_LDSC
negativeH2 = FALSE
H = matrix(stats::rnorm(s, h2_LDSC, h2_LDSC_se), ncol=s)
if(any(H<0)){
negativeH2 = TRUE
while(!all(H>0)){
H[H<0] = stats::rnorm(length(H[H<0]), h2_LDSC, h2_LDSC_se)
}
}
# effects + h2 are needed to get pi and therefore sigma
D = rbind(E, H)
pis = apply(D, 2, function(x) get_geneticArchitecture(x, n_exp, M, Tr))
# simulate 500 alpha_obs
B = matrix(stats::rnorm(s, alpha_obs, alpha_obs_se), ncol= s)
# get 500 alpha_corrected + sd
all_params = data.frame(pi = pis[1,],
sigma = pis[2,],
alpha = B[1,],
lambda = L[1,])
all_params$corrected = apply(all_params, 1, function(x) get_alpha(n_exp, x[4], x[1], x[2], x[3], Tr))
all_params$warning_negH2 = negativeH2
return(all_params)
}
res = get_s(s)
num_groups=10
res %>%
dplyr::group_by((dplyr::row_number()-1) %/% (dplyr::n()/num_groups)) %>%
tidyr::nest() %>% dplyr::pull(data) -> res_subsets
subsets_var = unlist(lapply(res_subsets, function(x) stats::var(x$corrected)))
subsets_cov = unlist(lapply(res_subsets, function(x) stats::cov(x$corrected, x$alpha)))
# check coeffificent(s) of variation
needmore = F
if(stats::sd(subsets_var) / base::mean(subsets_var) > sthreshold) needmore=T
if(stats::sd(subsets_cov) / base::mean(subsets_cov) > sthreshold) needmore=T
if(extracheck & (alpha_obs_se^2 + stats::sd(res$corrected)^2 - 2* stats::cov(res$alpha, res$corrected))<0) needmore=T
tmp_sd_corrected = stats::sd(res$corrected)
while(needmore){
res = rbind(res,get_s(s))
res %>%
# in some cases with low h2, might get very "bad" corrected effects,
# mostly when low h2 ...
# need to remove them : just use 10 sd cutoff
dplyr::filter(.data$corrected < alpha_corrected + 10*tmp_sd_corrected, .data$corrected > alpha_corrected - 10*tmp_sd_corrected) %>%
dplyr::group_by((dplyr::row_number()-1) %/% (dplyr::n()/num_groups)) %>%
tidyr::nest() %>% dplyr::pull(data) -> res_subsets
subsets_var = unlist(lapply(res_subsets, function(x) stats::var(x$corrected)))
subsets_cov = unlist(lapply(res_subsets, function(x) stats::cov(x$corrected, x$alpha)))
needmore = F
if(stats::sd(subsets_var) / base::mean(subsets_var) > sthreshold) needmore=T
if(stats::sd(subsets_cov) / base::mean(subsets_cov) > sthreshold) needmore=T
if(extracheck & (alpha_obs_se^2 + stats::sd(res$corrected)^2 - 2* stats::cov(res$alpha, res$corrected))<0) needmore=T
}
all_res = c(stats::sd(res$corrected), stats::cov(res$alpha, res$corrected), nrow(res), any(res$warning_negH2))
return(all_res)
}
# get SE corrected effects + COV
se_cov = get_correctedSE(IVs, lambda, lambda_se, h2_LDSC, h2_LDSC_se, alpha_obs, alpha_obs_se, n_exp, n_out, M, Tr)
# sqrt(h2_LDSC/(my_pi * M)) : NaNs produced
if(verbose) cat(" ", "corrected effect:", format(alpha_corrected, digits = 3), "(", format(se_cov[1], digits=3), ")\n")
if(verbose) cat(" ", "covariance between observed and corrected effect:", format(se_cov[2], digits=3), "\n")
if(verbose) cat(" ", se_cov[3], "simulations were used to estimate the variance and the covariance.\n")
if(se_cov[4]) warning("some h2 were negative in the parametric bootstrap, please check that the heritability of the exposure is not too low as this might compromise the results.\n")
if(verbose) cat("> Testing difference between observed and corrected effect... \n")
test_diff = (alpha_obs - alpha_corrected) /
sqrt(alpha_obs_se^2 + se_cov[1]^2 - 2* se_cov[2])
p_diff = 2*stats::pnorm(-abs(test_diff), lower.tail=T)
return(list("alpha_corrected"=alpha_corrected,
"alpha_corrected_se" = se_cov[1],
"cov_obs_corrected" = se_cov[2],
"test_diff"=test_diff,
"p_diff"=p_diff,
"pi_x" = res_genA[1],
"sigma2_x" = res_genA[2]^2))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.