Nothing
##=================================************************************************=================================##
## This file contains functions that are used for summarizing the MCMC data generated to make inference on pleiotropy.
## These functions are also the same for both uncorrelated and correlated versions of CPBayes.
## There is also one function which can be used for making some post summaries for interesting variants
## For example, estimating the direction of associations and posterior mean/median of beta or odds ratios
##=================================************************************************=================================##
##======================********** subset selection using the MCMC sample data ***********==========================##
select_subset <- function( K, Z.data, mcmc.samplesize )
{
#asso.pr = colSums(Z.data)/mcmc.samplesize
z = as.data.frame(Z.data)
Z.summary = aggregate(z, by=z, length)[1:(ncol(z)+1)]
row.no = which.max(Z.summary[,K+1])
select.Z = Z.summary[row.no,]
select.Z = select.Z[-(K+1)]
subset = which(select.Z==1)
#Z.sort.summary.uncor = Z.summary[order(Z.summary[,K+1]),] ;
#data <- list( subset = subset, asso.pr = asso.pr )
return(subset)
}
##======================********** compute log10BF and PPNA using the MCMC sample data ***********==================##
overall_pleio_measure <- function( K, shape1, shape2, sample_probZ_zero )
{
## calculate the Bayes factor, first compue the posterior odds
log_probZ_zero <- log(sample_probZ_zero)
log_probZ_zero <- rowSums(log_probZ_zero)
probZ_zero <- exp(log_probZ_zero)
poste_deno <- mean(probZ_zero)
log_posterior_deno <- log(poste_deno)
log_posterior_nume <- log(1-poste_deno)
log_posterior_odds <- log_posterior_nume - log_posterior_deno
## compute the prior odds
p1 <- shape1/(shape1+shape2)
p0 <- 1-p1; log.p0 <- log(p0); log.nume <- log(1-(p0^K));
log.deno = K*log.p0; log.prior.odds = log.nume - log.deno;
log_BF <- log_posterior_odds-log.prior.odds
log10_BF <- log10(exp(log_BF))
if(log10_BF == Inf) log10_BF <- 300 ## assigning an upper bound if the BF becomes infinity
## compute the PPNA using the Posterior odds computed in the above
a <- exp(log_posterior_odds)
log_deno <- log(1+a)
PPNA <- exp(-log_deno)
if(PPNA == 0) PPNA <- 10^(-300) ## assigning an lower bound if it becomes zero.
data <- list( log10_BF = log10_BF, PPNA = PPNA )
return(data)
}
##===============********** Estimate directions of associations using the MCMC sample data ***********==============##
estimate_directions <- function( K, sim.beta)
{
## estimation of the effect directions
direction <- sign(sim.beta)
direction <- (direction+1)/2 ## convert from +1/-1 scale to 0/1 scale
direction_prob <- colMeans(direction) ## probability of an effect being positive
positive_association <- direction_prob>0.5
effect_direction <- character(K)
effect_direction[which(positive_association==TRUE)] <- "positive"
effect_direction[which(positive_association==FALSE)] <- "negative"
direction <- effect_direction
}
##===========================********** post summaries for a pleiotropy signal ***********==========================##
## (1-level)% credible interval to be computed by CPBayes
## default value of level = 0.05. It also computes posterior summary of beta and odds ratio
##===========================*************************************************************==========================##
## Post processing of the mcmc data genarated by MCMC.
#' Post summary of the MCMC data generated by the uncorrelated or correlated version of CPBayes.
#'
#' Run the \code{\link{post_summaries}} function to summarize the MCMC data produced by
#' \code{\link{cpbayes_uncor}} or \code{\link{cpbayes_cor}} and obtain meaningful insights
#' into an observed pleiotropic signal.
#'
#' @param mcmc_output A list returned by either
#' \code{\link{cpbayes_uncor}} or \code{\link{cpbayes_cor}}. This list
#' contains the primary results and MCMC data produced by \code{\link{cpbayes_uncor}}
#' or \code{\link{cpbayes_cor}}. No default is specified. See the example below.
#' @param level A numeric value. (1-level)\% credible interval (Bayesian analog of the
#' confidence interval) of the unknown true genetic effect (beta/odds ratio)
#' on each trait is computed. Default choice is 0.05.
#' @return The output produced by this function is a list that consists of various components.
#' \item{variantName}{It is the name of the genetic variant provided by the user. If not
#' specified by the user, default name is `Variant'.}
#' \item{log10_BF}{It provides the log10(Bayes factor) produced by CPBayes that measures
#' the evidence of the overall pleiotropic association.}
#' \item{locFDR}{It provides the local false discovery rate (posterior probability of null association) produced by
#' CPBayes (a Bayesian analog of the p-value) which is a measure of the evidence
#' of aggregate-level pleiotropic association. Bayes factor is adjusted for prior odds, but
#' locFDR is solely a function of posterior odds. locFDR can sometimes be significantly small
#' indicating an association, but log10_BF may not. Hence, always check both log10_BF and locFDR.}
#' \item{subset}{A data frame providing the optimal subset of associated/non-null traits
#' along with their trait-specific posterior probability of association (PPAj) and direction
#' of associations. It is NULL if no phenotype is selected by CPBayes.}
#' \item{important_traits}{It provides the traits which yield a trait-specific posterior
#' probability of association (PPAj) > 20\%. Even if a phenotype is not selected in the
#' optimal subset of non-null traits, it can produce a non-negligible value of
#' trait-specific posterior probability of association. We note that `important_traits'
#' is expected to include the traits already contained in `subset'. It provides the
#' name of the important traits and their trait-specific posterior probability of
#' association (PPAj) and the direction of associations. Always check
#' 'important_traits' even if 'subset' contains a single trait.
#' It helps to better explain an observed pleiotropic signal.}
#' \item{traitNames}{It returns the name of all the phenotypes specified by the user.
#' Default is trait1, trait2, ... , traitK.}
#' \item{PPAj}{Data frame providing the trait-specific posterior probability of
#' association for all the phenotypes.}
#' \item{poste_summary_beta}{Data frame providing the posterior summary of the
#' unknown true genetic effect (beta) on each trait. It gives posterior mean,
#' median, standard error, credible interval (lower and upper limits) of the
#' true beta corresponding to each trait.}
#' \item{poste_summary_OR}{Data frame providing the posterior summary of the unknown
#' true genetic effect (odds ratio) on each trait. It gives posterior mean, median,
#' standard error, credible interval (lower and upper limits) of the true odds
#' ratio corresponding to each trait.}
#'
#' @references Majumdar A, Haldar T, Bhattacharya S, Witte JS (2018) An efficient Bayesian meta analysis approach for studying cross-phenotype genetic associations. PLoS Genet 14(2): e1007139.
#'
#' @seealso \code{\link{cpbayes_uncor}}, \code{\link{cpbayes_cor}}
#'
#' @examples
#' data(ExampleDataUncor)
#' BetaHat <- ExampleDataUncor$BetaHat
#' BetaHat
#' SE <- ExampleDataUncor$SE
#' SE
#' traitNames <- paste("Disease", 1:10, sep = "")
#' SNP1 <- "rs1234"
#' result <- cpbayes_uncor(BetaHat, SE, Phenotypes = traitNames, Variant = SNP1)
#' PleioSumm <- post_summaries(result, level = 0.05)
#' str(PleioSumm)
#'
#' @export
post_summaries <- function( mcmc_output, level = 0.05 )
{
genetic_variant <- mcmc_output$variantName
log10_BF <- mcmc_output$log10_BF
PPNA <- mcmc_output$locFDR
subset <- mcmc_output$subset
important_phenos <- mcmc_output$important_traits
MCMC_data <- mcmc_output$auxi_data
traitNames <- MCMC_data$traitNames
K <- MCMC_data$K
mcmc.samplesize <- MCMC_data$mcmc.samplesize
asso.pr <- MCMC_data$PPAj
Z.data <- MCMC_data$Z.data
sim.beta <- MCMC_data$sim.beta
asso_prob = data.frame( traits = traitNames, PPAj = asso.pr, stringsAsFactors = FALSE)
## estimate the direction of associations
alldirection <- estimate_directions( K, sim.beta )
optimal_subset <- data.frame( traits = subset, stringsAsFactors = FALSE)
if( length(subset) > 0 ){
selected <- match(subset, traitNames)
optimal_subset$PPAj <- asso.pr[selected]
optimal_subset$direction <- alldirection[selected]
}
imp_traits <- important_phenos$traits
if( length(imp_traits) > 0 ){
selected <- match(imp_traits, traitNames)
direct <- alldirection[selected]
important_phenos$direction = direct
}
## for summarizing the 'beta' parameters themselves
poste_mean_beta <- 0
poste_median_beta <- 0
poste_se_beta <- 0
CI_l <- 0 ## lower limit of 95% credible interval for beta
CI_u <- 0 ## upper limit of 95% credible interval for beta
poste_mean_OR <- 0 ## for summarizing the odds ratio (OR) = exp(beta)
poste_median_OR <- 0
CI_OR_l <- 0 ## lower limit of 95% credible interval for odds ratios
CI_OR_u <- 0 ## upper limit of 95% credible interval for odds ratios
lev <- level
lev1 <- lev/2
lev2 <- 1-(lev/2)
for( k in 1:K ){
poste_mean_beta[k] <- mean(sim.beta[,k])
poste_se_beta[k] <- sd(sim.beta[,k])
beta_qtls <- quantile( sim.beta[,k], prob = c( lev1, 0.5, lev2 ) )
CI_l[k] <- beta_qtls[1]
poste_median_beta[k] <- beta_qtls[2]
CI_u[k] <- beta_qtls[3]
ORs <- exp(sim.beta[,k])
poste_mean_OR[k] <- mean(ORs)
or_qtls <- quantile( ORs, prob = c( lev1, 0.5, lev2 ) )
CI_OR_l[k] <- or_qtls[1]
poste_median_OR[k] <- or_qtls[2]
CI_OR_u[k] <- or_qtls[3]
}
poste_summary_beta = data.frame( traits = traitNames, poste_mean = poste_mean_beta,
poste_median = poste_median_beta, poste_se = poste_se_beta, lCl = CI_l,
uCl = CI_u, stringsAsFactors = FALSE)
poste_summary_OR = data.frame( traits = traitNames, poste_mean = poste_mean_OR, poste_median = poste_median_OR,
lCl = CI_OR_l, uCl = CI_OR_u, stringsAsFactors = FALSE)
data = list( variantName = genetic_variant, log10_BF = log10_BF, locFDR = PPNA, subset = optimal_subset,
important_traits = important_phenos, traitNames = traitNames, PPAj = asso_prob,
poste_summary_beta = poste_summary_beta, poste_summary_OR = poste_summary_OR )
}
##===========================********** Forest plot for a pleiotropy signal ***********==========================##
## (1-level)% confidence interval of beta parameter is to be plotted in the forest plot. default value of level = 0.05.
##===========================*************************************************************==========================##
## Forest plot presenting pleiotropy result obtained by CPBayes.
#' Forest plot presenting pleiotropy result obtained by CPBayes.
#'
#' Run the \code{\link{forest_cpbayes}} function to create a forest plot that presents the pleiotropy result obtained
#' by \code{\link{cpbayes_uncor}} or \code{\link{cpbayes_cor}}.
#'
#' @param mcmc_output A list returned by either
#' \code{\link{cpbayes_uncor}} or \code{\link{cpbayes_cor}}. This list
#' contains all the primary results and MCMC data produced by \code{\link{cpbayes_uncor}}
#' or \code{\link{cpbayes_cor}}. No default is specified. See the example below.
#' @param level A numeric value. (1-level)\% confidence interval of the unknown true genetic effect (beta/log(odds ratio))
#' on each trait is plotted in the forest plot. Default choice is 0.05.
#' @param PPAj_cutoff A numeric value. It's a user-specified threshold of PPAj (trait-specific posterior probability
#' of association). Only those traits having PPAj values above this cut-off are included in the forest plot. So, the choice of
#' this variable as '0.0' includes all traits in the forest plot. Default is 0.01.
#' @return The output produced by this function is a diagram file in .pdf format. The details of the diagram are as follows:
#' \item{file_name}{The pdf file is named after the genetic variant. So, if the argument `Variant'
#' in \code{\link{cpbayes_uncor}} or \code{\link{cpbayes_cor}} is specified as 'rs1234', the figure file is named as rs1234.pdf.}
#' \item{Column1}{First column in the figure specifies the name of the phenotypes.}
#' \item{Column2}{Second column provides the trait-specific univariate association p-value for a trait.}
#' \item{Column3}{Third column provides the trait-specific posterior probability of association (PPAj) produced by CPBayes.}
#' \item{Column4}{Fourth column states whether a phenotype was selected in the optimal subset of associated/non-null traits
#' detected by CPBayes. If a phenotype was not selected, selected and positively associated, selected and negatively associated,
#' its association status is stated as null, positive and negative, respectively.}
#' \item{Column5}{In the right section of the figure, the primary eatimate and confidence interval of the beta/log odds ratio parameter for
#' a trait is plotted.}
#' @references Majumdar A, Haldar T, Bhattacharya S, Witte JS (2018) An efficient Bayesian meta analysis approach for studying cross-phenotype genetic associations. PLoS Genet 14(2): e1007139.
#'
#' @seealso \code{\link{cpbayes_uncor}}, \code{\link{cpbayes_cor}}
#'
#' @examples
#' data(ExampleDataUncor)
#' BetaHat <- ExampleDataUncor$BetaHat
#' SE <- ExampleDataUncor$SE
#' traitNames <- paste("Disease", 1:10, sep = "")
#' SNP1 <- "rs1234"
#' result <- cpbayes_uncor(BetaHat, SE, Phenotypes = traitNames, Variant = SNP1)
#' \dontrun{forest_cpbayes(result, level = 0.05)}
#'
#' @export
forest_cpbayes <- function(mcmc_output, level = 0.05, PPAj_cutoff = 0.01){
result <- mcmc_output
betahat <- result$auxi_data$betahat
se <- result$auxi_data$se
summ <- post_summaries(result) ## summary of CPBayes results
traits <- summ$traitNames ## phenotypes
K <- length(traits) ## number of traits
selection <- rep("null", K) ## if no trait is selected
if(is.null(summ$subset) == FALSE){
selected_traits <- summ$subset$traits
select_trait_posi <- match(selected_traits, traits)
direction <- summ$subset$direction
selection[select_trait_posi] <- direction
}
upper_alfa <- abs(qnorm( (level/2), 0,1))
shift <- upper_alfa*se
lowCIbeta <- betahat - shift ## lower confidence interval of beta
upCIbeta <- betahat + shift ## upper confidence interval of
pvalues <- pchisq( (betahat/se)^2, df=1, lower.tail=F )
for(j in 1:K){
x <- pvalues[j]
count <- 0
while(x < 1){ x <- 10*x; count <- count+1 }
pvalues[j] <- round(pvalues[j], digits = count) ## or, digits = count
}
PPAj <- summ$PPAj$PPAj
select = which(PPAj > PPAj_cutoff)
PPAj <- round(100*summ$PPAj$PPAj, digits = 1)
PPAj <- paste(as.character(PPAj), "%", sep = "")
labeltext <- data.frame( Trait = traits, Pvalue = as.character(pvalues), PPAj = PPAj, selection = selection, stringsAsFactors = FALSE )
df_names <- data.frame( Trait = "Trait", Pvalue = "pvalue", PPAj = "PPAj", selection = "association", stringsAsFactors=F)
labeltext = rbind(df_names,labeltext)
betahat <- c(NA, betahat)
lowCIbeta <- c(NA, lowCIbeta)
upCIbeta <- c(NA, upCIbeta)
log10BF <- round(summ$log10_BF, digits = 2)
PPNA <- summ$locFDR
x <- PPNA
count <- 0
while(x < 1){ x <- 10*x; count <- count+1 }
PPNA <- round(PPNA, digits = count+1)
if(length(select) > 0){
select <- select+1
select <- c(1,select)
title <- summ$variantName
pdffile <- paste(title, ".pdf", sep = "")
title <- paste("Pleiotropy at ", title, sep = "") ## "PPNA = ", PPNA,
pdf(pdffile)
forestplot(labeltext[select, ], betahat[select], lowCIbeta[select], upCIbeta[select],
zero = 0, lineheight = "auto", boxsize = 0.12, xlab = "Estimate and CI of log(OR)",
col = fpColors(lines="red", box="darkred"), title = title, new_page = FALSE)
# boxsize = 0.12,
dev.off()
}else{
print("Forest plot not created, because no trait has a PPA_j value above the threshold specified.")
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.