R/MV.cor.test.R
In MultiABEL: Multi-Trait Genome-Wide Association Analysis

#' Correlation replication test based on multivariate analysis results
#' 
#' This function is developed to implement correlation replication test based on MVA or cMVA results
#' 
#' @param marker The SNP to be analyzed
#' @param gwa.1 GWAS summary statistics for sample 1, includes A1, A2 and two columns for each trait: beta and se
#' @param gwa.2 GWAS summary statistics for sample 2, includes A1, A2 and two columns for each trait: beta and se
#' @param R.1 Phenotypic correlation matrix for sample 1
#' @param R.2 Phenotypic correlation matrix for sample 2
#' @param traits Traits to be analyzed
#' @param nrep The number of Monte Carlo repetitions 
#' @param probs Percentiles of the endpoints of confidence interval
#' @param method The method used for computing correlation coefficient
#' @param plot If the results for making correlation test figure are needed
#' 
#' @return The function returns two lists of \code{res}, which includes 
#' 1) \code{correlation} Estimated correlation computed from original sample; 
#' 2) \code{ci.left} The value at left endpoint of confidence interval; 
#' 3) \code{ci.right} The value at right endpoint of confidence interval (Note: If there are only two traits, 
#' then the ratio of correlation equals to one is provided instead of ci.left and ci.right),
#' and \code{df.plot}, will be provided if \code{plot = TRUE}, includes 
#' 1) \code{traits} The name of traits in analysis;
#' 2) \code{rank.1} The rank of estimated effect sizes in sample 1;
#' 3) \code{rank.2} The rank of estimated effect sizes in sample 2;
#' 4) \code{mean.conc} The mean of concordant pairs in MC generated by the trait;
#' 5) \code{sd.conc} The standard deviation of concordant pairs in MC generated by the trait;
#' 6) \code{se.beta} The standard error of estimated effect sizes computed using inverse-variance weighting.
#' 
#' @author Zheng Ning, Xia Shen
#' 
#' @references 
#' Zheng Ning, Yakov Tsepilov, Sodbo Zh. Sharapov, Alexander K. Grishenko, Masoud Shirali, Peter K. Joshi,
#' James F. Wilson, Yudi Pawitan, Chris S. Haley, Yurii S. Aulchenko, Xia Shen (2018).
#' Multivariate discovery, replication, and interpretation of pleiotropic loci using summary association statistics. \emph{Submitted}.
#' 
#' @seealso 
#' \code{MultiSummary}
#' 
#' @examples 
#' \dontrun{
#' 
#' data(example.MV.cor.test)
#' 
#' ## Six-trait correlation test ##
#' traits <- c("HEIGHT", "BMI", "HIP", "WC", "WHR", "WEIGHT")
#' set.seed(510)
#' MV.cor.test(marker = "rs905938", gwa.1 = example.gwa.1, gwa.2 = example.gwa.2, R.1 = example.R.1,
#'             R.2 = example.R.2, traits = traits, nrep = 10000)
#' 
#' ## Make correlation correlation test figure ##
#' require(ggplot2)
#' require(cowplot)
#' 
#' set.seed(510)
#' res.mv.cor <- MV.cor.test(marker = "rs905938", gwa.1 = example.gwa.1, gwa.2 = example.gwa.2, R.1 = example.R.1,
#'                           R.2 = example.R.2, traits = traits, nrep = 10000, plot = TRUE)
#' df.plot <- res.mv.cor$df.plot
#' 
#' p1 <- ggplot()+ 
#'   geom_point(data=df.plot, mapping=aes(x=rank.1, y=rank.2, color=traits), size=2) + 
#'   geom_point(data=df.plot, mapping=aes(x=rank.1, y=rank.2, color=traits, size = se.beta), alpha = 0.2) + 
#'   stat_smooth(data=df.plot, mapping=aes(x=rank.1, y=rank.2), method = "lm", se=FALSE, color="black", size=0.3, fullrange = TRUE) + 
#'   coord_cartesian(xlim = c(0.5, 6.5), ylim = c(0.5, 6.5)) + xlim(0,200) + 
#'   scale_size_continuous(range = c(3, 10)) +
#'   theme(axis.text=element_text(size=10),
#'         axis.title=element_text(size=14,face="bold"), 
#'         strip.text.x = element_text(size = 16))+ 
#'   theme(axis.title.x=element_blank(),axis.text.x=element_blank(),
#'         axis.ticks.x=element_blank(),axis.title.y=element_blank(), legend.position = c(0.8,0.3), 
#'         legend.background=element_rect(colour='NA', fill='transparent'), legend.key=element_blank(), 
#'         legend.title=element_text(size=14), 
#'         legend.text=element_text(size=12), legend.key.size = unit(1.4, 'lines')) + 
#'   guides(colour = guide_legend(override.aes = list(alpha = 1)), size = FALSE) +
#'   scale_colour_discrete(name = "Traits")
#' 
#' p2 <- ggplot(data=df.plot, aes(x=rank.1,y=mean.conc)) +
#'   coord_cartesian(xlim = c(0.5, 6.5), ylim = c(0, 5.5)) + 
#'   geom_bar(stat = "identity", aes(fill=traits), width = 0.4) + theme(legend.position="none") + theme(
#'     strip.background = element_blank(),
#'     strip.text.x = element_blank()
#'   ) + geom_errorbar(aes(ymin = mean.conc - sd.conc,ymax = mean.conc + sd.conc), width = 0.1)  + 
#'   theme(axis.title.x=element_blank(),axis.text.x=element_blank(),
#'         axis.ticks.x=element_blank(),axis.title.y=element_blank()) + 
#'   theme(plot.margin = unit(c(0, 0, 0, 0), "cm"))
#' 
#' require(cowplot)
#' plot_grid(p1,p2,ncol=1,align = "v", rel_heights = c(2,1))
#' 
#' }
#' @aliases MV.cor.test, mv.cor.test
#' @keywords multivariate, replication, correlation test
#' 

`MV.cor.test` <- function(marker, gwa.1, gwa.2, R.1, R.2, traits, nrep = 10000, probs = c(0.025,0.975), method = "kendall", plot = FALSE){
  
  if((method %in% c("kendall", "pearson", "spearman")) == FALSE){
    stop("The method has to be one of 'kendall', 'pearson' or 'spearman'.")
  }
  
  R.1 <- R.1[traits, traits]
  R.2 <- R.2[traits, traits]
  
  beta.1 <- as.numeric(gwa.1[marker, paste0(traits, ".beta")])
  se.1 <- as.numeric(gwa.1[marker, paste0(traits, ".se")])
  cov.beta.1 <- diag(se.1) %*% R.1 %*% diag(se.1)
  
  
  beta.2 <- as.numeric(gwa.2[marker, paste0(traits, ".beta")])
  se.2 <- as.numeric(gwa.2[marker, paste0(traits, ".se")])
  cov.beta.2 <- diag(se.2) %*% R.2 %*% diag(se.2)
  
  A1.1 <- gwa.1[marker, "A1"]
  A1.2 <- gwa.2[marker, "A1"]
  if(A1.1 != A1.2){
    beta.2 <- -beta.2
  }
  
  cor.sample <- cor(beta.1, beta.2, method = method)
  
  n <- nrep
  beta.1.mc <- rmvnorm(n, mean = beta.1, sigma = cov.beta.1)
  beta.2.mc <- rmvnorm(n, mean = beta.2, sigma = cov.beta.2)
  
  cor.mc <- numeric(n)
  for(i in 1:n){
    cor.mc[i] <- cor(beta.1.mc[i,], beta.2.mc[i,], method = method)
  }
  ci <- quantile(cor.mc, probs = probs)
  
  df.plot <- NULL
  if(plot == TRUE && length(traits) > 2){
    agree <- function(df,i){
      sum(apply(sign(df[,i] - df[,-i]),2, prod) == 1)
    }
    rank.1 <- rank(beta.1, ties.method = "random")
    rank.2 <- rank(beta.2, ties.method = "random")
    concordance <- matrix(0,n,length(traits))
    for(i in 1:n){
      concordance[i,] <- sapply(1:length(traits), FUN = agree, df = rbind(beta.1.mc[i,],beta.2.mc[i,]))
    }
    mean.conc <- colMeans(concordance)
    sd.conc <- apply(concordance, 2, sd)
    
    se.beta <- sqrt(1 / (1/(se.1^2) + 1/(se.2^2)))
    
    df.plot <- cbind(rank.1,rank.2,mean.conc, sd.conc, se.beta)
    df.plot <- data.frame(traits, df.plot)
    colnames(df.plot) <- c("traits", "rank.1","rank.2", "mean.conc","sd.conc", "se.beta")
  }
  
  if(plot == TRUE && length(traits) == 2){
    stop("The number of traits should be greater than two to make the figure meaningful.")
  }
  
  if(length(traits) == 2){
    ratio.1 <- sum(cor.mc) / n
    res <- c(cor.sample,ratio.1)
    names(res) <- c("correlation", "ratio")
  } else{
    res <- c(cor.sample,ci)
    names(res) <- c("correlation", "ci.left", "ci.right")
  }
  if(plot == TRUE){
    return(list(res = res, df.plot = df.plot))
  } else{
    return(res)
  }
}