R/corrected_cov_functions.R

Defines functions corrected_cov corrcov corrcov_bhat corrcov_nvar corrcov_nvar_bhat corrcov_CI corrcov_CI_bhat

Documented in corrcov corrcov_bhat corrcov_CI corrcov_CI_bhat corrcov_nvar corrcov_nvar_bhat corrected_cov

#' Corrected coverage estimate of the causal variant in the credible set
#'
#' Requires an estimate of the true effect at the CV (e.g. use maximum absolute z-score or output from corrcoverage::est_mu function)
#' @rdname corrected_cov
#' @title Corrected coverage estimate of the causal variant in the credible set
#' @param pp0 Posterior probabilities of SNPs
#' @param mu The true effect at the CV (estimate using corrcoverage::est_mu function)
#' @param V Variance of the estimated effect size (can be obtained using coloc::Var.beta.cc function)
#' @param Sigma SNP correlation matrix
#' @param thr Minimum threshold for fine-mapping experiment
#' @param W Prior for the standard deviation of the effect size parameter, beta (W=0.2 default)
#' @param nrep Number of posterior probability systems to simulate for each variant considered causal (nrep = 1000 default)
#' @param pp0min Only average over SNPs with pp0 > pp0min
#' @return Corrected coverage estimate
#'
#' @examples
#'
#' set.seed(1)
#' nsnps <- 100
#' N0 <- 5000
#' N1 <- 5000
#'
#' ## generate example LD matrix
#' library(mvtnorm)
#' nsamples = 1000
#'
#' simx <- function(nsnps, nsamples, S, maf=0.1) {
#'     mu <- rep(0,nsnps)
#'     rawvars <- rmvnorm(n=nsamples, mean=mu, sigma=S)
#'     pvars <- pnorm(rawvars)
#'     x <- qbinom(1-pvars, 1, maf)
#'}
#'
#' S <- (1 - (abs(outer(1:nsnps,1:nsnps,`-`))/nsnps))^4
#' X <- simx(nsnps,nsamples,S)
#' LD <- cor2(X)
#' maf <- colMeans(X)
#'
#' ## generate V (variance of estimated effect sizes)
#' varbeta <- Var.data.cc(f = maf, N = 5000, s = 0.5)
#'
#' pp <- rnorm(nsnps, 0.2, 0.05)
#' pp <- pp/sum(pp)
#'
#' corrected_cov(pp0 = pp, mu = 4, V = varbeta, Sigma = LD, thr = 0.95, nrep = 100)
#'
#' @export
#' @author Anna Hutchinson
corrected_cov <- function(pp0, mu, V, Sigma, thr, W = 0.2, nrep = 1000, pp0min = 0.001) {

  nsnps = length(pp0)
  temp = diag(x = mu, nrow = nsnps, ncol = nsnps)
  usesnps = which(pp0 > pp0min)
  zj = lapply(usesnps, function(i) temp[i, ])  # nsnp zj vectors for each snp considered causal

  # simulate ERR matrix
  ERR = mvtnorm::rmvnorm(nrep, rep(0, ncol(Sigma)), Sigma)

  # calculate r
  r = W^2/(W^2 + V)

  # simulate pp systems
  pps = mapply(.zj_pp, Zj = zj, MoreArgs = list(int.Sigma = Sigma, int.nrep = nrep, int.ERR = ERR, int.r = r), SIMPLIFY =     FALSE)

  # consider different CV as causal in each list
  n_pps <- length(pps)
  args <- 1:nsnps

  # obtain credible set for each simulation
  d5 <- lapply(1:n_pps, function(x) {
    credsetC(pps[[x]], CV = rep(usesnps[x], dim(pps[[x]])[1]), thr = thr)
  })

  propcov <- lapply(d5, prop_cov) %>% unlist()
  sum(propcov * pp0[usesnps])/sum(pp0[usesnps])
}

#' Corrected coverage estimate using Z-scores and mafs
#'
#' This function only requires the marginal summary statistics from GWAS
#' @rdname corrcov
#' @title Corrected coverage estimate using Z-scores and MAFs
#' @param z Marginal Z-scores
#' @param f Minor allele frequencies
#' @param N0 Number of controls
#' @param N1 Number of cases
#' @param Sigma SNP correlation matrix
#' @param thr Minimum threshold for fine-mapping experiment
#' @param W Prior for the standard deviation of the effect size parameter, beta (default 0.2)
#' @param nrep The number of simulated posterior probability systems to consider for the corrected coverage estimate (default 1000)
#' @param pp0min Only average over SNPs with pp0 > pp0min
#' @return Corrected coverage estimate
#'
#' @examples
#'
#' set.seed(1)
#' nsnps = 100
#' N0 = 5000
#' N1 = 5000
#' z_scores <- rnorm(nsnps, 0, 3) # simulate a vector of Z-scores
#'
#' ## generate example LD matrix
#' library(mvtnorm)
#' nsamples = 1000
#'
#' simx <- function(nsnps, nsamples, S, maf=0.1) {
#'     mu <- rep(0,nsnps)
#'     rawvars <- rmvnorm(n=nsamples, mean=mu, sigma=S)
#'     pvars <- pnorm(rawvars)
#'     x <- qbinom(1-pvars, 1, maf)
#' }
#'
#' S <- (1 - (abs(outer(1:nsnps,1:nsnps,`-`))/nsnps))^4
#' X <- simx(nsnps,nsamples,S)
#' LD <- cor2(X)
#' maf <- colMeans(X)
#'
#' corrcov(z = z_scores, f = maf, N0, N1, Sigma = LD, thr = 0.95)
#'
#' @export
#' @author Anna Hutchinson
corrcov <- function(z, f, N0, N1, Sigma, thr, W = 0.2, nrep = 1000, pp0min = 0.001) {

    varbeta = 1/(2 * (N0 + N1) * f * (1 - f) * (N1/(N0 + N1)) * (1 - (N1/(N0 + N1))))

    pp = ppfunc(z, V = varbeta, W = 0.2)

    muhat = sum(abs(z) * pp)

    corrected_cov(pp0 = pp, mu = muhat, V = varbeta, Sigma, thr, W, nrep, pp0min)
}

#' Corrected coverage estimate using estimated effect sizes and their standard errors
#'
#' This function only requires the marginal summary statistics from GWAS
#' @rdname corrcov_bhat
#' @title Corrected coverage estimate using estimated effect sizes and their standard errors
#' @param bhat Estimated effect sizes from single-SNP logistic regressions
#' @param V Variance of estimated effect sizes
#' @param N0 Number of controls
#' @param N1 Number of cases
#' @param Sigma SNP correlation matrix
#' @param thr Minimum threshold for fine-mapping experiment
#' @param W Prior for the standard deviation of the effect size parameter, beta (default 0.2)
#' @param nrep The number of simulated posterior probability systems to consider for the corrected coverage estimate (default 1000)
#' @param pp0min Only average over SNPs with pp0 > pp0min
#' @return Corrected coverage estimate
#'
#' @examples
#'
#' set.seed(1)
#' nsnps <- 100
#' N0 <- 1000 # number of controls
#' N1 <- 1000 # number of cases
#'
#' ## generate example LD matrix
#' library(mvtnorm)
#' nsamples = 1000
#'
#' simx <- function(nsnps, nsamples, S, maf=0.1) {
#'     mu <- rep(0,nsnps)
#'     rawvars <- rmvnorm(n=nsamples, mean=mu, sigma=S)
#'     pvars <- pnorm(rawvars)
#'     x <- qbinom(1-pvars, 1, maf)
#'}
#'
#' S <- (1 - (abs(outer(1:nsnps,1:nsnps,`-`))/nsnps))^4
#' X <- simx(nsnps,nsamples,S)
#' LD <- cor2(X)
#' maf <- colMeans(X)
#'
#' varbeta <- Var.data.cc(f = maf, N = N0 + N1, s = N1/(N0+N1))
#'
#' bhats = rnorm(nsnps, 0, 0.2) # log OR
#'
#' corrcov_bhat(bhat = bhats, V = varbeta, N0, N1, Sigma = LD, thr = 0.95)
#'
#' @export
#' @author Anna Hutchinson
corrcov_bhat <- function(bhat, V, N0, N1, Sigma, thr, W = 0.2, nrep = 1000, pp0min = 0.001) {

    z = bhat/sqrt(V)

    pp = ppfunc(z, V, W = 0.2)

    muhat = sum(abs(z) * pp)

    corrected_cov(pp0 = pp, mu = muhat, V, Sigma, thr, W, nrep, pp0min)
}

#' Obtain corrected coverage estimate using Z-scores and mafs (limiting simulations used for estimation to those with correct nvar)
#'
#' This function requires the marginal summary statistics from GWAS and an nvar value. It should only be used when nvar is very low (<3) and there is some evidence to suggest that only simulated credible sets with this nvar value should be used to derive the corrected coverage estimate.
#' @rdname corrcov_nvar
#' @title Corrected coverage estimate using Z-scores and MAFs (fixing nvar)
#' @param z Marginal Z-scores
#' @param f Minor allele frequencies
#' @param N0 Number of controls
#' @param N1 Number of cases
#' @param Sigma SNP correlation matrix
#' @param nvar The number of variants that simulated credible sets used for estimation should contain
#' @param thr Minimum threshold for fine-mapping experiment
#' @param W Prior for the standard deviation of the effect size parameter, beta (default 0.2)
#' @param nrep The number of simulated posterior probability systems to consider for the corrected coverage estimate (nrep = 10000 default due to trimming)
#' @param pp0min Only average over SNPs with pp0 > pp0min
#' @return Corrected coverage estimate
#'
#' @examples
#'
#' set.seed(1)
#' nsnps = 100
#' N0 = 5000
#' N1 = 5000
#' z_scores <- rnorm(nsnps, 0, 3) # simulate a vector of Z-scores
#'
#' ## generate example LD matrix
#' library(mvtnorm)
#' nsamples = 1000
#'
#' simx <- function(nsnps, nsamples, S, maf=0.1) {
#'     mu <- rep(0,nsnps)
#'     rawvars <- rmvnorm(n=nsamples, mean=mu, sigma=S)
#'     pvars <- pnorm(rawvars)
#'     x <- qbinom(1-pvars, 1, maf)
#'}
#'
#' S <- (1 - (abs(outer(1:nsnps,1:nsnps,`-`))/nsnps))^4
#' X <- simx(nsnps,nsamples,S)
#' LD <- cor2(X)
#' maf <- colMeans(X)
#'
#' corrcov_nvar(z = z_scores, f = maf, N0, N1, Sigma = LD, thr = 0.95, nvar = 1, nrep = 100)
#'
#' # note that nrep should be at least the default value (nrep = 10000) but is
#' # lower here for speed of computation
#'
#' @export

#' @author Anna Hutchinson
corrcov_nvar <- function(z, f, N0, N1, Sigma, nvar, thr, W = 0.2, nrep = 10000, pp0min = 0.001) {

  varbeta = 1/(2 * (N0 + N1) * f * (1 - f) * (N1/(N0 + N1)) * (1 - (N1/(N0 + N1))))

  pp = ppfunc(z, V = varbeta, W = 0.2)

  muhat = sum(abs(z) * pp)

  nsnps = length(pp)

  #### corrected coverage

  temp = diag(x = muhat, nrow = nsnps, ncol = nsnps)
  usesnps = which(pp > pp0min)
  zj = lapply(usesnps, function(i) temp[i, ])  # nsnp zj vectors for each snp considered causal

  # simulate ERR matrix

  ERR = mvtnorm::rmvnorm(nrep, rep(0, ncol(Sigma)), Sigma)

  r = W^2/(W^2 + varbeta)

  pps = mapply(.zj_pp, Zj = zj, MoreArgs = list(int.Sigma = Sigma, int.nrep = nrep, int.ERR = ERR, int.r = r), SIMPLIFY = FALSE)

  # consider different CV as causal in each list
  n_pps = length(pps)
  args = 1:nsnps

  # obtain credible set for each simulation
  d5 <- lapply(1:n_pps, function(x) {
    credsetC(pps[[x]], CV = rep(usesnps[x], dim(pps[[x]])[1]), thr = thr)
  })

  d5_trim <- lapply(d5, function(p) p[which(p$nvar==nvar),])

  propor_cov <- lapply(d5_trim, prop_cov) %>% unlist()

  nsims <- lapply(d5_trim, function(x) dim(x)[1]) %>% unlist()

  contained <- lapply(d5_trim, function(p) p$covered) %>% unlist()

  pp.vec <- rep(pp[usesnps], times=nsims)

  sum(contained * pp.vec)/sum(pp.vec)
}

#' Obtain corrected coverage estimate using estimated effect sizes and their standard errors (limiting simulations used for estimation to those with correct nvar)
#'
#' This function requires the marginal summary statistics from GWAS and an nvar value. It should only be used when nvar is very low ($<3$) and there is some evidence to suggest that only simulated credible sets with this nvar value should be used to derive the corrected coverage estimate.
#' @rdname corrcov_nvar_bhat
#' @title Corrected coverage estimate using estimated effect sizes and their standard errors (fixing nvar)
#' @param bhat Estimated effect sizes from single-SNP logistic regressions
#' @param V Variance of estimated effect sizes
#' @param N0 Number of controls
#' @param N1 Number of cases
#' @param Sigma SNP correlation matrix
#' @param nvar The number of variants that simulated credible sets used for estimation should contain
#' @param thr Minimum threshold for fine-mapping experiment
#' @param W Prior for the standard deviation of the effect size parameter, beta (default 0.2)
#' @param nrep The number of simulated posterior probability systems to consider for the corrected coverage estimate (nrep = 10000 default due to trimming)
#' @param pp0min Only average over SNPs with pp0 > pp0min
#' @return Corrected coverage estimate
#'
#' @examples
#'
#' set.seed(1)
#' nsnps <- 100
#' N0 <- 5000 # number of controls
#' N1 <- 5000 # number of cases
#'
#' ## generate example LD matrix
#' library(mvtnorm)
#' nsamples = 1000
#'
#' simx <- function(nsnps, nsamples, S, maf=0.1) {
#'     mu <- rep(0,nsnps)
#'     rawvars <- rmvnorm(n=nsamples, mean=mu, sigma=S)
#'     pvars <- pnorm(rawvars)
#'     x <- qbinom(1-pvars, 1, maf)
#'}
#'
#' S <- (1 - (abs(outer(1:nsnps,1:nsnps,`-`))/nsnps))^4
#' X <- simx(nsnps,nsamples,S)
#' LD <- cor2(X)
#' maf <- colMeans(X)
#'
#' varbeta <- Var.data.cc(f = maf, N = N0 + N1, s = N1/(N0+N1))
#'
#' bhats = rnorm(nsnps,0,0.2) # log OR
#'
#' corrcov_nvar_bhat(bhat = bhats, V = varbeta, N0, N1, Sigma = LD, thr = 0.95, nvar = 1, nrep = 1000)
#'
#' # note that nrep should be at least the default value (nrep = 10000) but is
#' # lower here for speed of computation
#'
#' @export
#'
#' @author Anna Hutchinson
corrcov_nvar_bhat <- function(bhat, V, N0, N1, Sigma, nvar, thr, W = 0.2, nrep = 10000, pp0min = 0.001) {

  z = bhat/sqrt(V)

  pp = ppfunc(z, V, W = 0.2)

  muhat = sum(abs(z) * pp)

  nsnps = length(pp)

  #### corrected coverage

  temp = diag(x = muhat, nrow = nsnps, ncol = nsnps)
  usesnps = which(pp > pp0min)
  zj = lapply(usesnps, function(i) temp[i, ])  # nsnp zj vectors for each snp considered causal

  # simulate ERR matrix

  ERR = mvtnorm::rmvnorm(nrep, rep(0, ncol(Sigma)), Sigma)

  r = W^2/(W^2 + V)

  pps = mapply(.zj_pp, Zj = zj, MoreArgs = list(int.Sigma = Sigma, int.nrep = nrep, int.ERR = ERR, int.r = r), SIMPLIFY = FALSE)

  # consider different CV as causal in each list
  n_pps = length(pps)
  args = 1:nsnps

  # obtain credible set for each simulation
  d5 <- lapply(1:n_pps, function(x) {
    credsetC(pps[[x]], CV = rep(usesnps[x], dim(pps[[x]])[1]), thr = thr)
  })

  d5_trim <- lapply(d5, function(p) p[which(p$nvar==nvar),])

  propor_cov <- lapply(d5_trim, prop_cov) %>% unlist()

  nsims <- lapply(d5_trim, function(x) dim(x)[1]) %>% unlist()

  contained <- lapply(d5_trim, function(p) p$covered) %>% unlist()

  pp.vec <- rep(pp[usesnps], times=nsims)

  sum(contained * pp.vec)/sum(pp.vec)
}

#' Obtain confidence interval for corrected coverage estimate using Z-scores and mafs
#'
#' @rdname corrcov_CI
#' @title Confidence interval for corrected coverage estimate using Z-scores and MAFs
#' @param z Marginal Z-scores
#' @param f Minor allele frequencies
#' @param N0 Number of controls
#' @param N1 Number of cases
#' @param Sigma SNP correlation matrix
#' @param thr Minimum threshold for fine-mapping experiment
#' @param W Prior for the standard deviation of the effect size parameter, beta (default 0.2)
#' @param nrep The number of simulated posterior probability systems to consider for the corrected coverage estimate (nrep = 1000 default)
#' @param CI The size of the confidence interval (as a decimal)
#' @param pp0min Only average over SNPs with pp0 > pp0min
#' @return CI for corrected coverage estimate
#'
#' @examples
#'
#' \donttest{
#'
#'  # this is a long running example
#' set.seed(1)
#' nsnps = 100
#' N0 = 5000
#' N1 = 5000
#' z_scores <- rnorm(nsnps, 0, 3) # simulate a vector of Z-scores
#'
#' ## generate example LD matrix
#' library(mvtnorm)
#' nsamples = 1000
#'
#' simx <- function(nsnps, nsamples, S, maf=0.1) {
#'     mu <- rep(0,nsnps)
#'     rawvars <- rmvnorm(n=nsamples, mean=mu, sigma=S)
#'     pvars <- pnorm(rawvars)
#'     x <- qbinom(1-pvars, 1, maf)
#' }
#'
#' S <- (1 - (abs(outer(1:nsnps,1:nsnps,`-`))/nsnps))^4
#' X <- simx(nsnps,nsamples,S)
#' LD <- cor2(X)
#' maf <- colMeans(X)
#'
#' corrcov_CI(z = z_scores, f = maf, N0, N1, Sigma = LD, thr = 0.95)
#' }
#'
#' @export
#'
#' @author Anna Hutchinson
corrcov_CI <- function(z, f, N0, N1, Sigma, thr, W = 0.2, nrep = 1000, CI = 0.95, pp0min = 0.001){
  corrcov_reps = replicate(100, corrcov(z, f, N0, N1, Sigma, thr, W, nrep, pp0min))
  stats::quantile(corrcov_reps, probs = c((1-CI)/2, (CI+1)/2))
}

#' Obtain confidence interval for corrected coverage estimate using estimated effect sizes and their standard errors
#'
#' @rdname corrcov_CI_bhat
#' @title Confidence interval for corrected coverage estimate using estimated effect sizes and their standard errors
#' @param bhat Estimated effect sizes from single-SNP logistic regressions
#' @param V Variance of estimated effect sizes
#' @param N0 Number of controls
#' @param N1 Number of cases
#' @param Sigma SNP correlation matrix
#' @param thr Minimum threshold for fine-mapping experiment
#' @param W Prior for the standard deviation of the effect size parameter beta
#' @param nrep The number of simulated posterior probability systems to consider for the corrected coverage estimate (nrep = 1000 default)
#' @param CI The size of the confidence interval (as a decimal)
#' @param pp0min Only average over SNPs with pp0 > pp0min
#' @return CI for corrected coverage estimate
#'
#' @examples
#'
#' \donttest{
#'  # this is a long running example
#' set.seed(1)
#' nsnps <- 100
#' N0 <- 5000 # number of controls
#' N1 <- 5000 # number of cases
#'
#' ## generate example LD matrix
#' library(mvtnorm)
#' nsamples = 1000
#'
#' simx <- function(nsnps, nsamples, S, maf=0.1) {
#'     mu <- rep(0,nsnps)
#'     rawvars <- rmvnorm(n=nsamples, mean=mu, sigma=S)
#'     pvars <- pnorm(rawvars)
#'     x <- qbinom(1-pvars, 1, maf)
#'}
#'
#' S <- (1 - (abs(outer(1:nsnps,1:nsnps,`-`))/nsnps))^4
#' X <- simx(nsnps,nsamples,S)
#' LD <- cor2(X)
#' maf <- colMeans(X)
#'
#' varbeta <- Var.data.cc(f = maf, N = N0 + N1, s = N1/(N0+N1))
#'
#' bhats = rnorm(nsnps,0,0.2) # log OR
#'
#' corrcov_CI_bhat(bhat = bhats, V = varbeta, N0, N1, Sigma = LD)
#' }
#'
#' @export
#'
#' @author Anna Hutchinson
corrcov_CI_bhat <- function(bhat, V, N0, N1, Sigma, thr, W = 0.2, nrep = 1000, CI = 0.95, pp0min = 0.001){
  corrcov_reps = replicate(100, corrcov_bhat(bhat, V, N0, N1, Sigma, thr, W, nrep, pp0min))
  stats::quantile(corrcov_reps, probs = c((1-CI)/2, (CI+1)/2))
}

Try the corrcoverage package in your browser

Any scripts or data that you put into this service are public.

corrcoverage documentation built on Dec. 7, 2019, 1:07 a.m.