R/genomic_bayes.R

Defines functions bmm sortedSets neff adjustMapLD splitWithOverlap plotCvs plotBayes computeGRS computeWW computeWy computeB mtbayes mt_sbayes_sparse sbayes_region adjLDregion adjustB gmap sbayes sbayes_sparse sbayes_wy bayes gbayes

Documented in adjustB adjustMapLD gbayes gmap plotBayes splitWithOverlap

####################################################################################################################
#    Module 6: Bayesian models
####################################################################################################################
#'
#' Bayesian linear regression models
#'
#' @description
#' 
#' Bayesian linear regression (BLR) models:
#' 
#'   - unified mapping of genetic variants, estimation of genetic parameters 
#' (e.g. heritability) and prediction of disease risk) 
#' 
#' - handles different genetic architectures (few large, many small effects)
#' 
#' - scale to large data (e.g. sparse LD)
#' 
#' 
#' In the Bayesian multiple regression model the posterior density of the 
#' model parameters depend on the likelihood of the data given 
#' the parameters and a prior probability for the model parameters
#' 
#' The prior density of marker effects defines whether the model will 
#' induce variable selection and shrinkage or shrinkage only. 
#' Also, the choice of prior will define the extent and type of shrinkage induced.
#' Ideally the choice of prior for the marker effect should reflect the genetic 
#' architecture of the trait, and will vary (perhaps a lot) across traits.
#' 
#' 
#' The following prior distributions are provided:
#' 
#' Bayes N: Assigning a Gaussian prior to marker effects implies that the posterior means are the 
#' BLUP estimates (same as Ridge Regression).
#' 
#' Bayes L: Assigning a double-exponential or Laplace prior is the density used in 
#' the Bayesian LASSO
#' 
#' Bayes A: similar to ridge regression but t-distribution prior (rather than Gaussian) 
#' for the marker effects ; variance comes from an inverse-chi-square distribution instead of being fixed. Estimation 
#' via Gibbs sampling. 
#' 
#' Bayes C: uses a “rounded spike” (low-variance Gaussian) at origin many small 
#' effects can contribute to polygenic component, reduces the dimensionality of 
#' the model (makes Gibbs sampling feasible). 
#' 
#' Bayes R: Hierarchical Bayesian mixture model with 4 Gaussian components, with 
#' variances scaled by 0, 0.0001 , 0.001 , and 0.01 . 
#'
#'
#' @param y is a vector or matrix of phenotypes
#' @param X is a matrix of covariates
#' @param W is a matrix of centered and scaled genotypes
#' @param nburn is the number of burnin iterations
#' @param nit is the number of iterations
#' @param nit_global is the number of global iterations
#' @param nit_local is the number of local iterations
#' @param pi is the proportion of markers in each marker variance class (e.g. pi=c(0.999,0.001),used if method="ssvs")
#' @param h2 is the trait heritability
#' @param method specifies the methods used (method="bayesN","bayesA","bayesL","bayesC","bayesR")
#' @param algorithm specifies the algorithm
#' @param tol is tolerance, i.e. convergence criteria used in gbayes
#' @param Glist list of information about genotype matrix stored on disk
#' @param stat dataframe with marker summary statistics
#' @param covs is a list of summary statistics (output from internal cvs function)
#' @param fit is a list of results from gbayes
#' @param trait is an integer used for selection traits in covs object
#' @param chr is the chromosome for which to fit BLR models
#' @param rsids is a character vector of rsids
#' @param b is a vector or matrix of marginal marker effects 
#' @param seb is a vector or matrix of standard error of marginal effects
#' @param mask is a vector or matrix of TRUE/FALSE specifying if marker should be ignored 
#' @param bm is a vector or matrix of adjusted marker effects for the BLR model
#' @param LD is a list with sparse LD matrices
#' @param n is a scalar or vector of number of observations for each trait
#' @param vb is a scalar or matrix of marker (co)variances
#' @param vg is a scalar or matrix of genetic (co)variances
#' @param ve is a scalar or matrix of residual (co)variances
#' @param ssb_prior is a scalar or matrix of prior marker (co)variances
#' @param ssg_prior is a scalar or matrix of prior genetic (co)variances
#' @param sse_prior is a scalar or matrix of prior residual (co)variances
#' @param vg_prior is a scalar or matrix of prior genetic (co)variances
#' @param ve_prior is a scalar or matrix of prior residual (co)variances
#' @param nub is a scalar or vector of prior degrees of freedom for marker (co)variances
#' @param nug is a scalar or vector of prior degrees of freedom for prior genetic (co)variances
#' @param nue is a scalar or vector of prior degrees of freedom for prior residual (co)variances
#' @param updateB is a logical for updating marker (co)variances
#' @param updateG is a logical for updating genetic (co)variances
#' @param updateE is a logical for updating residual (co)variances
#' @param updatePi is a logical for updating pi
#' @param adjustE is a logical for adjusting residual variance
#' @param models is a list structure with models evaluated in bayesC
#' @param formatLD is a character specifying LD format (formatLD="dense" is default)
#' @param verbose is a logical; if TRUE it prints more details during iteration
#' @param scaleY is a logical; if TRUE y is centered and scaled 
#' @param msize number of markers used in compuation of sparseld
#' @param lambda is a vector or matrix of lambda values 
#' @param GRMlist is a list providing information about GRM matrix stored in binary files on disk




#' @return Returns a list structure including
#' \item{b}{vector or matrix (mxt) of posterior means for marker effects}
#' \item{d}{vector or matrix (mxt) of posterior means for marker inclusion probabilities}
#' \item{vb}{scalar or vector (t) of posterior means for marker variances}
#' \item{vg}{scalar or vector (t) of posterior means for genomic variances}
#' \item{ve}{scalar or vector (t) of posterior means for residual variances}
#' \item{rb}{matrix (txt) of posterior means for marker correlations}
#' \item{rg}{matrix (txt) of posterior means for genomic correlations}
#' \item{re}{matrix (txt) of posterior means for residual correlations}
#' \item{pi}{vector (1xnmodels) of posterior probabilities for models}
#' \item{h2}{vector (1xt) of posterior means for model probability}
#' \item{param}{ a list current parameters (same information as item listed above) used for restart of the analysis}
#' \item{stat}{matrix (mxt) of marker information and effects used for genomic risk scoring}


#' @author Peter Sørensen


#' @examples
#'
#'
#' # Simulate data and test functions
#'
#' W <- matrix(rnorm(100000),nrow=1000)
#' set1 <- sample(1:ncol(W),5)
#' set2 <- sample(1:ncol(W),5)
#' sets <- list(set1,set2)
#' g <- rowSums(W[,c(set1,set2)])
#' e <- rnorm(nrow(W),mean=0,sd=1)
#' y <- g + e
#'
#'
#' fitM <- gbayes(y=y, W=W, method="bayesN")
#' fitA <- gbayes(y=y, W=W, method="bayesA")
#' fitL <- gbayes(y=y, W=W, method="bayesL")
#' fitC <- gbayes(y=y, W=W, method="bayesC")
#'


#'
#' @export
#'

gbayes <- function(y=NULL, X=NULL, W=NULL, stat=NULL, covs=NULL, trait=NULL, fit=NULL, Glist=NULL, 
                   chr=NULL, rsids=NULL, b=NULL, bm=NULL, seb=NULL, LD=NULL, n=NULL,formatLD="dense",
                   vg=NULL, vb=NULL, ve=NULL, ssg_prior=NULL, ssb_prior=NULL, sse_prior=NULL, lambda=NULL, scaleY=TRUE,
                   h2=NULL, pi=0.001, updateB=TRUE, updateG=TRUE, updateE=TRUE, updatePi=TRUE, adjustE=TRUE, models=NULL,
                   nug=4, nub=4, nue=4, verbose=FALSE,msize=100, mask=NULL,
                   GRMlist=NULL, ve_prior=NULL, vg_prior=NULL,tol=0.001,
                   nit=100, nburn=0, nit_local=NULL,nit_global=NULL,
                   method="mixed", algorithm="mcmc") {
  
  # mask
  mask.rsids <- NULL
  if(!is.null(mask)) mask.rsids <- unique(as.vector(apply(mask,2,function(x){as.vector(rownames(mask)[x])})))

  
  # Check methods
  methods <- c("blup","bayesN","bayesA","bayesL","bayesC","bayesR")
  method <- match(method, methods) - 1
  if( !sum(method%in%c(0:5))== 1 ) stop("Method specified not valid") 

  algorithms <- c("mcmc","em-mcmc")
  algorithm <- match(algorithm, algorithms)
  if(is.na(algorithm)) stop("algorithm argument specified not valid")
  
  if(method==0) {
    # BLUP and we do not estimate parameters
    updateB=FALSE;
    updateE=FALSE;
  }
  
    
  # Determine number of traits
  nt <- 1
  if(!is.null(y)) {
    if(is.list(y)) nt <- length(y)
    if(is.matrix(y)) nt <- ncol(y)
  }
  if(!is.null(stat)) {
    if(!is.data.frame(stat) && is.list(stat)) nt <- ncol(stat$b)
    if( any(sapply(stat[,-c(1:5)],function(x){any(!is.finite(x))}))) stop("Some elements in stat not finite")
    if( any(sapply(stat[,-c(1:5)],function(x){any(is.na(x))}))) stop("Some elements in stat are NA's")
  }
  
  # Define type of analysis
  if(!is.null(GRMlist)) analysis <- "mtmc-mixed"
  
  #if(nt==1 && !is.null(y) && !is.null(W) && algorithm=="default") 
  if(nt==1 && !is.null(y) && !is.null(W) && formatLD=="dense") 
    analysis <- "st-blr-individual-level-default"
  
  #if(nt==1 && !is.null(y) && !is.null(W) && algorithm=="sbayes") 
  if(nt==1 && !is.null(y) && !is.null(W) && formatLD=="sparse") 
    analysis <- "st-blr-individual-level-sbayes"
  
  #if(nt==1 && !is.null(y) && algorithm=="sparse")
  if(nt==1 && !is.null(y) && formatLD=="sparse") 
    analysis <- "st-blr-individual-level-sparse-ld"
  
  #if( nt==1 && !is.null(y) &&  algorithm=="dense") 
  if( nt==1 && !is.null(y) &&  formatLD=="dense") 
    analysis <- "st-blr-individual-level-dense-ld"
  
  if( nt==1 && is.null(y) && !is.null(stat) && !is.null(Glist)) 
    analysis <- "st-blr-sumstat-sparse-ld"
  
  if(nt>1 && !is.null(y) && !is.null(W))
    analysis <- "mt-blr-individual-level-dense-ld"

  #if(nt>1 && !is.null(y) && algorithm=="sparse") 
  if(nt>1 && !is.null(y) && formatLD=="sparse") 
    analysis <- "mt-blr-individual-level-sparse-ld"
  
  #if( nt>1 && !is.null(stat) && !is.null(Glist) && algorithm=="default") 
  #if( nt>1 && !is.null(stat) && !is.null(Glist) && formatLD=="sparse") 
  if( nt>1 && !is.null(stat) && !is.null(Glist)) 
    analysis <- "mt-blr-sumstat-sparse-ld"
  
  # fix this  
  #if( nt>1 && !is.null(stat) && !is.null(Glist) && algorithm=="serial") 
  #  analysis <- "st-blr-sumstat-sparse-ld"
  # end fix this
  
  message(paste("Type of analysis performed:",analysis))  
  
  
  ##############################################################################
  # Different work flows
  ##############################################################################
  
  # Single and multiple trait BLR based on GRMs    
  if(!is.null(GRMlist)) {
    fit <- bmm(y=y, X=X, W=W, GRMlist=GRMlist,
               vg=vg, ve=ve, nug=nug, nue=nue,
               vg_prior=vg_prior, ve_prior=ve_prior,
               updateG=updateB, updateE=updateE,
               nit=nit, nburn=nburn, tol=tol, verbose=verbose) 
  } 
  
  
  # Single trait BLR using y and W   
  #if(nt==1 && !is.null(y) && !is.null(W) && algorithm=="default") {
  if(nt==1 && !is.null(y) && !is.null(W) && formatLD=="dense") {
    
    fit <- bayes(y=y, X=X, W=W, b=b, 
                 bm=bm, seb=seb, LD=LD, n=n,
                 vg=vg, vb=vb, ve=ve, 
                 ssb_prior=ssb_prior, sse_prior=sse_prior, lambda=lambda, scaleY=scaleY,
                 h2=h2, pi=pi, updateB=updateB, updateE=updateE, updatePi=updatePi, models=models,
                 nub=nub, nue=nue, nit=nit, method=method, formatLD=formatLD, algorithm=algorithm)  
  }
  
  
  # Single trait BLR using y and W and sbayes method 
  #if(nt==1 && !is.null(y) && !is.null(W) && algorithm=="sbayes") {
  if(nt==1 && !is.null(y) && !is.null(W) && formatLD=="sparse") {
    
    fit <- sbayes_wy(y=y, X=X, W=W, b=b, bm=bm, seb=seb, LD=LD, n=n,
                  vg=vg, vb=vb, ve=ve, 
                  ssb_prior=ssb_prior, sse_prior=sse_prior, lambda=lambda, scaleY=scaleY,
                  h2=h2, pi=pi, updateB=updateB, updateE=updateE, updatePi=updatePi, models=models,
                  nub=nub, nue=nue, nit=nit, method=method, formatLD=formatLD, algorithm=algorithm)  
  }
  
  # Multiple trait BLR using y and W
  if(nt>1 && !is.null(y) && !is.null(W)) {
    fit <- mtbayes(y=y, X=X, W=W, b=b, bm=bm, seb=seb, LD=LD, n=n,
                   vg=vg, vb=vb, ve=ve, 
                   ssb_prior=ssb_prior, sse_prior=sse_prior, lambda=lambda, scaleY=scaleY,
                   h2=h2, pi=pi, updateB=updateB, updateE=updateE, updatePi=updatePi, models=models,
                   nub=nub, nue=nue, nit=nit, method=method, formatLD=formatLD, algorithm=algorithm, verbose=verbose) 
  }
  
  
  # Single trait BLR using y and sparse LD provided Glist
  #if( nt==1 && !is.null(y) && algorithm=="sparse") {
  if( nt==1 && !is.null(y) && formatLD=="sparse") {
    
    if(is.null(Glist)) stop("Please provide Glist")
    fit <- NULL
    if(is.matrix(y)) ids <- rownames(y)
    if(is.vector(y)) ids <- names(y)
    rws <- match(ids,Glist$ids)
    if(any(is.na(rws))) stop("some elements in names(y) does not match elements in Glist$ids ")       
    n <- length(y)
    
    if(is.null(chr)) chromosomes <- 1:Glist$nchr
    if(!is.null(chr)) chromosomes <- chr
    
    bm <- dm <- fit <- stat <- vector(length=Glist$nchr,mode="list")
    names(bm) <- names(dm) <- names(fit) <- names(stat) <- 1:Glist$nchr
    
    yy <- sum((y-mean(y))**2)
    
    if(is.null(covs)) {
      covs <- vector(length=Glist$nchr,mode="list")
      names(covs) <- 1:Glist$nchr
      for (chr in chromosomes){
        print(paste("Computing summary statistics for chromosome:",chr))
        covs[[chr]] <- cvs(y=y,Glist=Glist,chr=chr)
      }
    } 
    
    if(is.null(nit_local)) nit_local <- nit
    if(is.null(nit_global)) nit_global <- 1
    
    for (it in 1:nit_global) {
      for (chr in chromosomes){
        if(verbose) print(paste("Extract sparse LD matrix for chromosome:",chr))
        LD <- getSparseLD(Glist = Glist, chr = chr, onebased=FALSE)
        LD$values <- lapply(LD$values,function(x){x*n})
        rsidsLD <- names(LD$values)
        clsLD <- match(rsidsLD,Glist$rsids[[chr]])
        wy <- covs[[chr]][rsidsLD,"wy"]
        b <- rep(0,length(wy))
        if(it>1) {
          b <- fit[[chr]]$b
          if(updateB) vb <- fit[[chr]]$param[1]
          if(updateE) ve <- fit[[chr]]$param[2]
          if(updatePi) pi <- fit[[chr]]$param[3]
        }
        stop("Need to add ww and mask to sbayes_sparse - use cvs function to get it")
        if(verbose) print( paste("Fit",methods[method+1] ,"on chromosome:",chr))
        fit[[chr]] <- sbayes_sparse(yy=yy, 
                                    wy=wy,
                                    b=b, 
                                    LDvalues=LD$values, 
                                    LDindices=LD$indices, 
                                    method=method, 
                                    nit=nit_local, 
                                    nburn=nburn, 
                                    n=n, 
                                    pi=pi,
                                    nue=nue, 
                                    nub=nub, 
                                    h2=h2, 
                                    lambda=lambda, 
                                    vb=vb, 
                                    ve=ve, 
                                    updateB=updateB, 
                                    updateE=updateE, 
                                    updatePi=updatePi,
                                    algorithm=algorithm)
        stat[[chr]] <- data.frame(rsids=rsidsLD,chr=rep(chr,length(rsidsLD)),
                                  pos=Glist$pos[[chr]][clsLD], ea=Glist$a1[[chr]][clsLD],
                                  nea=Glist$a2[[chr]][clsLD], eaf=Glist$af[[chr]][clsLD],
                                  bm=fit[[chr]]$bm,dm=fit[[chr]]$dm,stringsAsFactors = FALSE)
        rownames(stat[[chr]]) <- rsidsLD
      }
    }
    stat <- do.call(rbind, stat)
    rownames(stat) <- stat$rsids
    fit$stat <- stat
    fit$stat$vm <- 2*(1-fit$stat$eaf)*fit$stat$eaf*fit$stat$bm^2
    fit$method <- methods[method+1]
    fit$covs <- covs
  }
  
  
  # Single trait BLR using y and dense LD
  #if( nt==1 && !is.null(y) &&  algorithm=="dense") {
  # if( nt==1 && !is.null(y) &&  formatLD=="dense") {
  #   
  #   overlap <- 0
  #   
  #   if(is.null(Glist)) stop("Please provide Glist")
  #   fit <- NULL
  #   if(is.matrix(y)) ids <- rownames(y)
  #   if(is.vector(y)) ids <- names(y)
  #   rws <- match(ids,Glist$ids)
  #   if(any(is.na(rws))) stop("some elements in names(y) does not match elements in Glist$ids ")       
  #   n <- length(y)
  #   
  #   if(is.null(chr)) chromosomes <- 1:Glist$nchr
  #   if(!is.null(chr)) chromosomes <- chr
  #   
  #   rsids <- unlist(Glist$rsidsLD)
  #   cls <- lapply(Glist$rsids,function(x) { 
  #     splitWithOverlap(na.omit(match(rsids,x)),msize,0)})
  #   vblist <- lapply(sapply(cls,length),function(x) 
  #   {vector(length=x, mode="numeric")})
  #   velist <- lapply(sapply(cls,length),function(x) 
  #   {vector(length=x, mode="numeric")})
  #   pilist <- lapply(sapply(cls,length),function(x) 
  #   {vector(length=x, mode="numeric")})
  #   b <- lapply(Glist$mchr,function(x){rep(0,x)})
  #   bm <- lapply(Glist$mchr,function(x){rep(0,x)})
  #   dm <- lapply(Glist$mchr,function(x){rep(0,x)})
  #   
  #   if(is.null(nit_local)) nit_local <- nit
  #   if(is.null(nit_global)) nit_global <- 1
  #   
  #   for (it in 1:nit_global) {
  #     e <- y-mean(y)
  #     yy <- sum(e**2)
  #     for (chr in 1:length(Glist$nchr)) {
  #       for (i in 1:length(cls[[chr]])) {
  #         wy <- computeWy(y=e,Glist=Glist,chr=chr,cls=cls[[chr]][[i]])
  #         WW <- computeWW(Glist=Glist, chr=chr, cls=cls[[chr]][[i]], rws=rws)
  #         if(it>1) {
  #           if(updateB) vb <- vblist[[chr]][i]
  #           if(updateE) ve <- velist[[chr]][i]
  #           if(updatePi) pi <- pilist[[chr]][i]
  #         }
  #         fitS <- computeB(wy=wy, yy=yy, WW=WW, n=n,
  #                          b=b[[chr]][cls[[chr]][[i]]],
  #                          ve=ve, vb=vb, pi=pi,
  #                          nub=nub, nue=nue,
  #                          updateB=updateB, updateE=updateE, updatePi=updatePi,
  #                          nit=nit, nburn=nburn, method=method) 
  #         b[[chr]][cls[[chr]][[i]]] <- fitS$b
  #         bm[[chr]][cls[[chr]][[i]]] <- fitS$bm
  #         dm[[chr]][cls[[chr]][[i]]] <- fitS$dm
  #         vblist[[chr]][i] <- fitS$param[1]
  #         velist[[chr]][i] <- fitS$param[2]
  #         pilist[[chr]][i] <- fitS$param[3]
  #         grs <- computeGRS(Glist = Glist, chr = chr, 
  #                           cls = cls[[chr]][[i]], 
  #                           b=bm[[chr]][cls[[chr]][[i]]])  
  #         e <- e - grs[rws,]
  #       }
  #     }
  #   }   
  #   bm <- unlist(bm)
  #   dm <- unlist(dm)
  #   names(bm) <- names(dm) <- unlist(Glist$rsids)
  #   rsids2rws <- match(rsids,unlist(Glist$rsids))
  #   stat <- data.frame(rsids=rsids,
  #                      chr=unlist(Glist$chr)[rsids2rws],
  #                      pos=unlist(Glist$pos)[rsids2rws], 
  #                      ea=unlist(Glist$a1)[rsids2rws],
  #                      nea=unlist(Glist$a2)[rsids2rws], 
  #                      eaf=unlist(Glist$af)[rsids2rws],
  #                      bm=bm[rsids],
  #                      dm=dm[rsids], stringsAsFactors = FALSE)
  #   fit$stat <- stat
  #   fit$stat$vm <- 2*(1-fit$stat$eaf)*fit$stat$eaf*fit$stat$bm^2
  #   fit$method <- methods[method+1]
  #   
  # }
  
  # Single trait BLR using summary statistics and sparse LD provided in Glist
  if(analysis=="st-blr-sumstat-sparse-ld") {
    
    # single trait summary statistics
    if(is.data.frame(stat)) {
      
      nt <- 1
      rsidsLD <- unlist(Glist$rsidsLD)
      m <- length(rsidsLD)
      b <- wy <- ww <- matrix(0,nrow=length(rsidsLD),ncol=nt)
      mask <- matrix(TRUE,nrow=length(rsidsLD),ncol=nt)
      rownames(b) <- rownames(wy) <- rownames(ww) <- rownames(mask) <- rsidsLD
      trait_names <- "bm"     
      
      stat <- stat[rownames(stat)%in%rsidsLD,]
      if(is.null(stat$ww)) stat$ww <- 1/(stat$seb^2 + stat$b^2/stat$n)
      if(is.null(stat$wy)) stat$wy <- stat$b*stat$ww
      if(!is.null(stat$n)) n <- as.integer(median(stat$n))
      ww[rownames(stat),1] <-  stat$ww
      wy[rownames(stat),1] <- stat$wy
      mask[rownames(stat),1] <- FALSE
      if(!is.null(mask.rsids)) mask[rownames(mask)%in%mask.rsids,1] <- TRUE
      if(any(is.na(wy))) stop("Missing values in wy")
      if(any(is.na(ww))) stop("Missing values in ww")
      
      b2 <- stat$b^2
      seb2 <- stat$seb^2
      #yy <- (b2 + (n-2)*seb2)*ww[rownames(stat),]
      yy <- (b2 + (n-2)*seb2)*stat$ww
      yy <- median(yy)
    }
    
    # multiple trait summary statistics
    if( !is.data.frame(stat) && is.list(stat)) {
      
      nt <- ncol(stat$b)
      trait_names <- colnames(stat$b)
      if(is.null(trait_names)) trait_names <- paste0("T",1:nt)
      rsidsLD <- unlist(Glist$rsidsLD)
      m <- length(rsidsLD)
      b <- wy <- ww <- matrix(0,nrow=length(rsidsLD),ncol=nt)
      mask <- matrix(TRUE,nrow=length(rsidsLD),ncol=nt)
      rownames(b) <- rownames(wy) <- rownames(ww) <- rownames(mask) <- rsidsLD
      colnames(b) <- colnames(wy) <- colnames(ww) <- colnames(mask) <- trait_names
      
      rws <- rownames(stat$b)%in%rsidsLD
      if(is.null(stat$ww)) stat$ww <- 1/(stat$seb^2 + stat$b^2/stat$n)
      if(is.null(stat$wy)) stat$wy <- stat$b*stat$ww
      if(!is.null(stat$n)) n <- as.integer(apply(stat$n[rws,],2,median))
      ww[rownames(stat$ww[rws,]),] <- stat$ww[rws,]
      wy[rownames(stat$wy[rws,]),] <- stat$wy[rws,]
      mask[rownames(stat$wy[rws,]),] <- FALSE
      if(!is.null(mask.rsids)) mask[rownames(mask)%in%mask.rsids,] <- TRUE
      
      if(any(is.na(wy))) stop("Missing values in wy")
      if(any(is.na(ww))) stop("Missing values in ww")
      
      b2 <- (stat$b[rws,])^2
      seb2 <- (stat$seb[rws,])^2
      for (i in 1:nt) {
        seb2[,i] <- (n[i]-2)*seb2[,i]
      }
      yy <- (b2 + seb2)*stat$ww[rws,]
      yy <- apply(yy,2,median)
      
    }

    if(is.null(chr)) chromosomes <- 1:Glist$nchr
    if(!is.null(chr)) chromosomes <- chr
    if(is.null(LD)) LD <- vector(length=Glist$nchr,mode="list")
    
    bm <- dm <- fit <- res <- vector(length=Glist$nchr,mode="list")
    names(bm) <- names(dm) <- names(fit) <- names(res) <- 1:Glist$nchr
    for (chr in chromosomes){
      if(is.null(LD[[chr]])) {
        if(verbose) print(paste("Extract sparse LD matrix for chromosome:",chr))
        LD[[chr]] <- getSparseLD(Glist = Glist, chr = chr, onebased=FALSE)
      } 
      rsidsLD <- names(LD[[chr]]$values)
      clsLD <- match(rsidsLD,Glist$rsids[[chr]])
      bmchr <- dmchr <- NULL
      for (trait in 1:nt) {
        if(verbose) print( paste("Fit",methods[method+1], "on chromosome:",chr,"for trait",trait))
        #LDvalues <- LD[[chr]]$values
        fit[[chr]] <- sbayes_sparse(yy=yy[trait], 
                                    wy=wy[rsidsLD,trait],
                                    ww=ww[rsidsLD,trait],
                                    b=b[rsidsLD,trait], 
                                    LDvalues=LD[[chr]]$values, 
                                    LDindices=LD[[chr]]$indices,
                                    mask=mask[rsidsLD,trait],
                                    method=method, 
                                    nit=nit, 
                                    nburn=nburn, 
                                    n=n[trait],
                                    m=m,
                                    pi=pi,
                                    nue=nue, 
                                    nub=nub, 
                                    h2=h2[trait], 
                                    lambda=lambda, 
                                    vb=vb, 
                                    ve=ve,
                                    ssb_prior=ssb_prior, 
                                    sse_prior=sse_prior,
                                    updateB=updateB, 
                                    updateE=updateE, 
                                    updatePi=updatePi,
                                    updateG=updateG,
                                    adjustE=adjustE,
                                    algorithm=algorithm)
        bmchr <- cbind(bmchr, fit[[chr]]$bm)
        dmchr <- cbind(dmchr, fit[[chr]]$dm)
      }
      colnames(bmchr) <- trait_names
      res[[chr]] <- data.frame(rsids=rsidsLD,chr=rep(chr,length(rsidsLD)),
                               pos=Glist$pos[[chr]][clsLD], ea=Glist$a1[[chr]][clsLD],
                               nea=Glist$a2[[chr]][clsLD], eaf=Glist$af[[chr]][clsLD],
                               bm=bmchr, dm=dmchr, stringsAsFactors = FALSE)
      rownames(res[[chr]]) <- rsidsLD
      LD[[chr]]$values <- NULL
      LD[[chr]]$indices <- NULL
    }
    res <- do.call(rbind, res)
    rownames(res) <- res$rsids
    fit$stat <- res
    fit$stat$vm <- 2*(1-fit$stat$eaf)*fit$stat$eaf*fit$stat$bm^2
    fit$method <- methods[method+1]
    
    #fit$ves <- lapply(fit[chromosomes],function(x){x$ves})
    #fit$vgs <- lapply(fit[chromosomes],function(x){x$vgs})
    #fit$vbs <- lapply(fit[chromosomes],function(x){x$vbs})
    #fit$pis <- lapply(fit[chromosomes],function(x){x$pis})
    #fit$pim <- lapply(fit[chromosomes],function(x){x$pim})
    #fit$param <- lapply(fit[chromosomes],function(x){x$param})
    
    fit$ves <- lapply(fit[1:22],function(x){x$ves})
    fit$vgs <- lapply(fit[1:22],function(x){x$vgs})
    fit$vbs <- lapply(fit[1:22],function(x){x$vbs})
    fit$pis <- lapply(fit[1:22],function(x){x$pis})
    fit$pim <- lapply(fit[1:22],function(x){x$pim})
    fit$param <- lapply(fit[1:22],function(x){x$param})

    fit$mask <- mask
    zve <- sapply(fit$ves[chromosomes],function(x){coda::geweke.diag(x[nburn:length(x)])$z})
    zvg <- sapply(fit$vgs[chromosomes],function(x){coda::geweke.diag (x[nburn:length(x)])$z})
    zvb <- sapply(fit$vbs[chromosomes],function(x){coda::geweke.diag(x[nburn:length(x)])$z})
    zpi <- sapply(fit$pis[chromosomes],function(x){coda::geweke.diag(x[nburn:length(x)])$z})
    ve <- sapply(fit$ves[chromosomes],function(x){mean(x[nburn:length(x)])})
    vg <- sapply(fit$vgs[chromosomes],function(x){mean(x[nburn:length(x)])})
    vb <- sapply(fit$vbs[chromosomes],function(x){mean(x[nburn:length(x)])})
    pi <- sapply(fit$pim[chromosomes],function(x){1-x[1]})
    fit$conv <- data.frame(zve=zve,zvg=zvg, zvb=zvb, zpi=zpi)  
    fit$post <- data.frame(ve=ve,vg=vg, vb=vb,pi=pi)  
    fit$ve <- mean(ve)
    fit$vg <- sum(vg)
    fit[1:22] <- NULL
    
    
    
    
  }
  
  # fit$b vector or matrix (m or mxt)
  # fit$d vector or matrix (m or mxt)
  # fit$vb scalar or vector (t)
  # fit$vg scalar or vector (t)
  # fit$ve scalar or vector (t)
  # fit$rb matrix (txt)
  # fit$rg matrix (txt)
  # fit$re matrix (txt)
  # fit$pi vector (models)
  # fit$h2 scalar or vector (t)
  # $param
  # $stat
  
  # Multi trait BLR using summary statistics and sparse LD provided in Glist
  #  if( nt>1 && is.null(y) && !is.null(stat) && !is.null(Glist)) {
  
  if(analysis=="mt-blr-sumstat-sparse-ld") {
    
    # multiple trait summary statistics
    
    nt <- ncol(stat$b)
    trait_names <- colnames(stat$b)
    if(is.null(trait_names)) trait_names <- paste0("T",1:nt)
    rsidsLD <- unlist(Glist$rsidsLD)
    b <- wy <- ww <- matrix(0,nrow=length(rsidsLD),ncol=nt)
    rownames(b) <- rownames(wy) <- rownames(ww) <- rsidsLD
    colnames(b) <- colnames(wy) <- colnames(ww) <- trait_names
    
    rws <- rownames(stat$b)%in%rsidsLD
    #if(is.null(stat$ww)) stat$ww <- 1/(stat$seb^2 + (stat$b^2)/stat$n)
    #if(is.null(stat$wy)) stat$wy <- stat$b*stat$n
    if(!is.null(stat$n)) n <- as.integer(colMeans(stat$n[rws,]))
    if(!is.null(stat$ww)) ww[rownames(stat$ww[rws,]),] <- stat$ww[rws,]
    if(is.null(stat$ww)) ww[rownames(stat$n[rws,]),] <- stat$n[rws,]
    if(!is.null(stat$wy)) wy[rownames(stat$wy[rws,]),] <- stat$wy[rws,]
    if(is.null(stat$wy)) wy[rownames(stat$b[rws,]),] <- stat$b[rws,]*stat$n[rws,]
    if(any(is.na(wy))) stop("Missing values in wy")
    if(any(is.na(ww))) stop("Missing values in ww")
    
    b2 <- (stat$b[rws,])^2
    seb2 <- (stat$seb[rws,])^2
    for (i in 1:nt) {
      seb2[,i] <- (n[i]-2)*seb2[,i]
    }
    yy <- (b2 + seb2)*stat$ww[rws,]
    yy <- colMeans(yy)
    
    if(is.null(chr)) chromosomes <- 1:Glist$nchr
    if(!is.null(chr)) chromosomes <- chr
    if(is.null(LD)) LD <- vector(length=Glist$nchr,mode="list")
    
    bm <- dm <- fit <- res <- vector(length=Glist$nchr,mode="list")
    names(bm) <- names(dm) <- names(fit) <- names(res) <- 1:Glist$nchr
    for (chr in chromosomes){
      if(is.null(LD[[chr]])) {
        if(verbose) print(paste("Extract sparse LD matrix for chromosome:",chr))
        LD[[chr]] <- getSparseLD(Glist = Glist, chr = chr, onebased=FALSE)
      }
      rsidsLD <- names(LD[[chr]]$values)
      clsLD <- match(rsidsLD,Glist$rsids[[chr]])
      bmchr <- NULL
      fit[[chr]] <- mt_sbayes_sparse(yy=yy,
                                     ww=ww[rsidsLD,],
                                     wy=wy[rsidsLD,],
                                     b=b[rsidsLD,],
                                     LDvalues=LD[[chr]]$values,
                                     LDindices=LD[[chr]]$indices,
                                     n=n,
                                     nit=nit,
                                     pi=pi,
                                     nue=nue,
                                     nub=nub,
                                     h2=h2,
                                     vb=vb,
                                     ve=ve,
                                     ssb_prior=ssb_prior,
                                     sse_prior=sse_prior,
                                     updateB=updateB,
                                     updateE=updateE,
                                     updatePi=updatePi,
                                     models=models,
                                     method=method,
                                     verbose=verbose)
      res[[chr]] <- data.frame(rsids=rsidsLD,chr=rep(chr,length(rsidsLD)),
                               pos=Glist$pos[[chr]][clsLD], ea=Glist$a1[[chr]][clsLD],
                               nea=Glist$a2[[chr]][clsLD], eaf=Glist$af[[chr]][clsLD],
                               bm=fit[[chr]]$bm, dm=fit[[chr]]$dm,  stringsAsFactors = FALSE)
      rownames(res[[chr]]) <- rsidsLD
      LD[[chr]]$values <- NULL
      LD[[chr]]$indices <- NULL
    }
    res <- do.call(rbind, res)
    rownames(res) <- res$rsids
    fit$stat <- res
    fit$method <- methods[method+1]
    fit$stat$vm <- 2*(1-fit$stat$eaf)*fit$stat$eaf*fit$stat$bm^2
    
  }
  return(fit)
  
}

##############################################################################
# Core functions used in work flows
##############################################################################


# Single trait BLR based on individual level data 
bayes <- function(y=NULL, X=NULL, W=NULL, b=NULL, bm=NULL, seb=NULL, LD=NULL, n=NULL,
                  vg=NULL, vb=NULL, ve=NULL, 
                  ssb_prior=NULL, sse_prior=NULL, lambda=NULL, scaleY=NULL,
                  h2=NULL, pi=NULL, updateB=NULL, updateE=NULL, updatePi=NULL, models=NULL,
                  nub=NULL, nue=NULL, nit=NULL, method=NULL, formatLD=NULL, algorithm=NULL) {
  ids <- NULL
  if(is.matrix(y)) ids <- rownames(y)
  if(is.vector(y)) ids <- names(y)
  if(scaleY) y <- as.vector(scale(y)) 
  n <- nrow(W)
  m <- ncol(W)
  
  #if(is.null(ids)) warning("No names/rownames provided for y")
  #if(is.null(rownames(W))) warning("No names/rownames provided for W")
  if(!is.null(ids) & !is.null(rownames(W))) {
    if(any(is.na(match(ids,rownames(W))))) stop("Names/rownames for y does match rownames for W")
  }
  vy <- var(y)
  if(is.null(pi)) pi <- 0.001
  if(is.null(h2)) h2 <- 0.5
  if(is.null(ve)) ve <- vy*(1-h2)
  if(is.null(vg)) vg <- vy*h2
  if(method<4 && is.null(vb)) vb <- vg/m
  if(method>=4 && is.null(vb)) vb <- vg/(m*pi)
  if(is.null(lambda)) lambda <- rep(ve/vb,m)
  if(method<4 && is.null(ssb_prior))  ssb_prior <-  ((nub-2.0)/nub)*(vg/m)
  if(method>=4 && is.null(ssb_prior))  ssb_prior <-  ((nub-2.0)/nub)*(vg/(m*pi))
  if(is.null(sse_prior)) sse_prior <- ((nue-2.0)/nue)*ve
  if(is.null(b)) b <- rep(0,m)

  pi <- c(1-pi,pi)
  gamma <- c(0,1.0)
  if(method==5) pi <- c(0.95,0.02,0.02,0.01)
  if(method==5) gamma <- c(0,0.01,0.1,1.0)
  
  #print(h2)
  #print(vy)
  #print(vb)
  #print(vg)
  #print(ve)
  #print(ssb_prior)
  #print(sse_prior)
  #print(pi)
  
  #if(algorithm=="default") {
  if(formatLD=="dense") {
    fit <- .Call("_qgg_bayes",
                 y=y, 
                 W=split(W, rep(1:ncol(W), each = nrow(W))), 
                 b=b,
                 lambda = lambda,
                 pi = pi,
                 gamma = gamma,
                 vg = vg,
                 vb = vb,
                 ve = ve,
                 ssb_prior=ssb_prior,
                 sse_prior=sse_prior,
                 nub=nub,
                 nue=nue,
                 updateB = updateB,
                 updateE = updateE,
                 updatePi = updatePi,
                 nit=nit,
                 method=as.integer(method)) 
    ids <- rownames(W)
    names(fit[[1]]) <- names(fit[[2]]) <- names(fit[[10]]) <- colnames(W)
    #fit[[7]] <- crossprod(t(W),fit[[10]])[,1]
    #names(fit[[7]]) <- names(fit[[8]]) <- ids
    names(fit[[8]]) <- ids
    #names(fit) <- c("bm","dm","coef","vb","ve","pi","g","e","param","b")
    #names(fit) <- c("bm","dm","coef","vbs","vgs","ves","pi","g","param","b")
    names(fit) <- c("bm","dm","coef","vbs","vgs","ves","pim","g","b","d","param")
    
  } 
  return(fit)
}

# Single trait BLR based on individual level data based on fast algorithm  
sbayes_wy <- function(y=NULL, X=NULL, W=NULL, b=NULL, bm=NULL, seb=NULL, LD=NULL, n=NULL,
                   vg=NULL, vb=NULL, ve=NULL, 
                   ssb_prior=NULL, sse_prior=NULL, lambda=NULL, scaleY=NULL,
                   h2=NULL, pi=NULL, updateB=NULL, updateE=NULL, updatePi=NULL, models=NULL,
                   nub=NULL, nue=NULL, nit=NULL, method=NULL, formatLD=NULL, algorithm=NULL) {
  
  ids <- NULL
  if(is.matrix(y)) ids <- rownames(y)
  if(is.vector(y)) ids <- names(y)
  
  if(scaleY) y <- as.vector(scale(y)) 
  
  wy <- as.vector(crossprod(W,y))           
  yy <- sum((y-mean(y))**2)
  
  n <-nrow(W)       
  
  if(!is.null(W) && is.null(LD)) {
    n <- nrow(W)
    LD <- crossprod(W)
  }    
  m <- ncol(LD)
  
  if(is.null(pi)) pi <- 0.001
  if(is.null(h2)) h2 <- 0.5
  
  if(is.null(ve)) ve <- 1
  if(method<4 && is.null(vb)) vb <- (ve*h2)/m
  if(method>=4 && is.null(vb)) vb <- (ve*h2)/(m*pi)
  if(is.null(lambda)) lambda <- rep(ve/vb,m)
  if(is.null(vg)) vg <- ve*h2
  if(method<4 && is.null(ssb_prior))  ssb_prior <-  ((nub-2.0)/nub)*(vg/m)
  if(method>=4 && is.null(ssb_prior))  ssb_prior <-  ((nub-2.0)/nub)*(vg/m*pi)
  if(is.null(sse_prior)) sse_prior <- ((nue-2.0)/nue)*ve
  if(is.null(b)) b <- rep(0,m)
  
  fit <- .Call("_qgg_sbayes",
               wy=wy, 
               LD=split(LD, rep(1:ncol(LD), each = nrow(LD))), 
               b = b,
               lambda = lambda,
               yy = yy,
               pi = pi,
               vg = vg,
               vb = vb,
               ve = ve,
               ssb_prior=ssb_prior,
               sse_prior=sse_prior,
               nub=nub,
               nue=nue,
               updateB = updateB,
               updateE = updateE,
               updatePi = updatePi,
               n=n,
               nit=nit,
               method=as.integer(method))
  names(fit[[1]]) <- rownames(LD)
  if(!is.null(W)) fit[[7]] <- crossprod(t(W),fit[[10]])[,1]
  names(fit[[7]]) <- ids
  stop("check fit names")
  names(fit) <- c("bm","dm","coef","vbs","vgs","ves","pi","r","param","b")
  
  return(fit)
  
}


# Single trait BLR using summary statistics and sparse LD provided in Glist 
sbayes_sparse <- function(yy=NULL, wy=NULL, ww=NULL, b=NULL, bm=NULL, seb=NULL, 
                          LDvalues=NULL,LDindices=NULL, n=NULL, m=NULL, mask=NULL,
                          vg=NULL, vb=NULL, ve=NULL, 
                          ssb_prior=NULL, sse_prior=NULL, lambda=NULL, scaleY=NULL,
                          h2=NULL, pi=NULL, updateB=NULL, updateE=NULL, updatePi=NULL, 
                          updateG=NULL, adjustE=NULL, models=NULL,
                          nub=NULL, nue=NULL, nit=NULL, nburn=NULL, method=NULL, algorithm=NULL, verbose=NULL) {

  if(is.null(m)) m <- length(LDvalues)
  vy <- yy/(n-1)
  if(is.null(pi)) pi <- 0.001
  if(is.null(h2)) h2 <- 0.5
  if(is.null(ve)) ve <- vy*(1-h2)
  if(is.null(vg)) vg <- vy*h2
  if(method<4 && is.null(vb)) vb <- vg/m
  if(method>=4 && is.null(vb)) vb <- vg/(m*pi)
  if(is.null(lambda)) lambda <- rep(ve/vb,m)
  if(method<4 && is.null(ssb_prior))  ssb_prior <-  ((nub-2.0)/nub)*(vg/m)
  if(method>=4 && is.null(ssb_prior))  ssb_prior <-  ((nub-2.0)/nub)*(vg/(m*pi))
  if(is.null(sse_prior)) sse_prior <- ((nue-2.0)/nue)*ve
  if(is.null(b)) b <- rep(0,m)
  
  pi <- c(1-pi,pi)
  gamma <- c(0,1.0)
  if(method==5) pi <- c(0.95,0.02,0.02,0.01)
  if(method==5) gamma <- c(0,0.01,0.1,1.0)

  seed <- sample.int(.Machine$integer.max, 1)
  
  fit <- .Call("_qgg_sbayes_spa",
               wy=wy, 
               ww=ww, 
               LDvalues=LDvalues, 
               LDindices=LDindices, 
               b = b,
               lambda = lambda,
               mask=mask,
               yy = yy,
               pi = pi,
               gamma = gamma,
               vg = vg,
               vb = vb,
               ve = ve,
               ssb_prior=ssb_prior,
               sse_prior=sse_prior,
               nub=nub,
               nue=nue,
               updateB = updateB,
               updateE = updateE,
               updatePi = updatePi,
               updateG = updateG,
               adjustE = adjustE,
               n=n,
               nit=nit,
               nburn=nburn,
               method=as.integer(method),
               algo=as.integer(algorithm),
               seed=seed)
  names(fit[[1]]) <- names(LDvalues)
  names(fit) <- c("bm","dm","coef","vbs","vgs","ves","pis","pim","r","b","param")
  return(fit)
}

# Single trait BLR using summary statistics and sparse LD provided in Glist
sbayes <- function(stat=NULL, b=NULL, seb=NULL, n=NULL,
                   LD=NULL, LDvalues=NULL,LDindices=NULL,
                   mask=NULL, lambda=NULL,
                   vg=NULL, vb=NULL, ve=NULL, h2=NULL, pi=NULL,
                   ssb_prior=NULL, sse_prior=NULL, nub=4, nue=4,
                   updateB=TRUE, updateE=TRUE, updatePi=TRUE, updateG=TRUE,
                   adjustE=TRUE, models=NULL,
                   nit=500, nburn=100, method="bayesC", algorithm=1, verbose=FALSE) {
  
  # Check method
  methods <- c("blup","bayesN","bayesA","bayesL","bayesC","bayesR")
  method <- match(method, methods) - 1
  if( !sum(method%in%c(0:5))== 1 ) stop("Method specified not valid")
  
  # Prepare summary statistics input
  if( is.null(stat) ) stop("Please provide summary statistics")
  m <- nrow(stat)
  if(is.null(mask)) mask <- rep(FALSE, m)
  if(is.null(stat$n)) stat$n <- stat$dfe
  if( is.null(stat$n) ) stop("Please provide summary statistics that include n")
  
  n <- as.integer(median(stat$n))
  ww <- 1/(stat$seb^2 + stat$b^2/stat$n)
  wy <- stat$b*ww
  if(!is.null(stat$ww)) ww <- stat$ww
  if(!is.null(stat$wy)) wy <- stat$wy
  
  b2 <- stat$b^2
  seb2 <- stat$seb^2
  yy <- (b2 + (stat$n-2)*seb2)*ww
  yy <- median(yy)
  
  # Prepare sparse LD matrix
  if( is.null(LD) ) stop("Please provide LD matrix")
  LDvalues <- split(LD, rep(1:ncol(LD), each = nrow(LD)))
  LDindices <- lapply(1:ncol(LD),function(x) { (1:ncol(LD))-1 } )
  
  # Prepare starting parameters
  vy <- yy/(n-1)
  if(is.null(pi)) pi <- 0.001
  if(is.null(h2)) h2 <- 0.5
  if(is.null(ve)) ve <- vy*(1-h2)
  if(is.null(vg)) vg <- vy*h2
  if(method<4 && is.null(vb)) vb <- vg/m
  if(method>=4 && is.null(vb)) vb <- vg/(m*pi)
  if(is.null(lambda)) lambda <- rep(ve/vb,m)
  if(method<4 && is.null(ssb_prior))  ssb_prior <-  ((nub-2.0)/nub)*(vg/m)
  if(method>=4 && is.null(ssb_prior))  ssb_prior <-  ((nub-2.0)/nub)*(vg/(m*pi))
  if(is.null(sse_prior)) sse_prior <- ((nue-2.0)/nue)*ve
  if(is.null(b)) b <- rep(0,m)
  
  pi <- c(1-pi,pi)
  gamma <- c(0,1.0)
  if(method==5) pi <- c(0.95,0.02,0.02,0.01)
  if(method==5) gamma <- c(0,0.01,0.1,1.0)
  
  seed <- sample.int(.Machine$integer.max, 1)
  
  fit <- .Call("_qgg_sbayes_spa",
               wy=wy,
               ww=ww,
               LDvalues=LDvalues,
               LDindices=LDindices,
               b = b,
               lambda = lambda,
               mask=mask,
               yy = yy,
               pi = pi,
               gamma = gamma,
               vg = vg,
               vb = vb,
               ve = ve,
               ssb_prior=ssb_prior,
               sse_prior=sse_prior,
               nub=nub,
               nue=nue,
               updateB = updateB,
               updateE = updateE,
               updatePi = updatePi,
               updateG = updateG,
               adjustE = adjustE,
               n=n,
               nit=nit,
               nburn=nburn,
               method=as.integer(method),
               algo=as.integer(algorithm),
               seed=seed)
  names(fit[[1]]) <- names(LDvalues)
  names(fit) <- c("bm","dm","coef","vbs","vgs","ves","pis","pim","r","b","param")
  return(fit)
}

################################################################################
# Single trait fine-mapping BLR using summary statistics and sparse LD provided in Glist 
# gmap full version

#' Finemapping using Bayesian Linear Regression Models
#'
#' This function implements Bayesian linear regression models to provide unified mapping of 
#' genetic variants, estimate genetic parameters (e.g. heritability), and predict disease risk.
#' It is designed to handle various genetic architectures and scale efficiently with large datasets.
#'
#' @description
#' In the Bayesian multiple regression model, the posterior density of the model parameters depends
#' on the likelihood of the data given the parameters and a prior probability for the model parameters.
#' The choice of the prior for marker effects can influence the type and extent of shrinkage induced in the model.
#'
#' @param y A vector or matrix of phenotypes.
#' @param X A matrix of covariates.
#' @param W A matrix of centered and scaled genotypes.
#' @param nburn Number of burnin iterations.
#' @param nit Number of iterations.
#' @param nit_global Number of global iterations.
#' @param nit_local Number of local iterations.
#' @param pi Proportion of markers in each marker variance class.
#' @param h2 Trait heritability.
#' @param method Method used (e.g. "bayesN","bayesA","bayesL","bayesC","bayesR").
#' @param algorithm Specifies the algorithm.
#' @param tol Convergence criteria used in gbayes.
#' @param Glist List of information about genotype matrix stored on disk.
#' @param stat Dataframe with marker summary statistics.
#' @param fit List of results from gbayes.
#' @param trait Integer used for selection traits in covs object.
#' @param chr Chromosome for which to fit BLR models.
#' @param rsids Character vector of rsids.
#' @param b Vector or matrix of marginal marker effects.
#' @param seb Vector or matrix of standard error of marginal effects.
#' @param mask Vector or matrix specifying if marker should be ignored.
#' @param bm Vector or matrix of adjusted marker effects for the BLR model.
#' @param lambda Vector or matrix of lambda values 
#' @param LD List with sparse LD matrices.
#' @param n Scalar or vector of number of observations for each trait.
#' @param vb Scalar or matrix of marker (co)variances.
#' @param vg Scalar or matrix of genetic (co)variances.
#' @param ve Scalar or matrix of residual (co)variances.
#' @param ssb_prior Scalar or matrix of prior marker (co)variances.
#' @param ssg_prior Scalar or matrix of prior genetic (co)variances.
#' @param sse_prior Scalar or matrix of prior residual (co)variances.
#' @param vg_prior Scalar or matrix of prior genetic (co)variances.
#' @param ve_prior Scalar or matrix of prior residual (co)variances.
#' @param nub Scalar or vector of prior degrees of freedom for marker (co)variances.
#' @param nug Scalar or vector of prior degrees of freedom for genetic (co)variances.
#' @param nue Scalar or vector of prior degrees of freedom for residual (co)variances.
#' @param updateB Logical indicating if marker (co)variances should be updated.
#' @param updateG Logical indicating if genetic (co)variances should be updated.
#' @param updateE Logical indicating if residual (co)variances should be updated.
#' @param updatePi Logical indicating if pi should be updated.
#' @param adjustE Logical indicating if residual variance should be adjusted.
#' @param models List structure with models evaluated in bayesC.
#' @param formatLD Character specifying LD format (default is "dense").
#' @param verbose Logical; if TRUE, it prints more details during iteration.
#' @param sets A list of character vectors where each vector represents a set of items. If the names
#'   of the sets are not provided, they are named as "Set1", "Set2", etc.
#' @param ids vector of individuals used in the study
#' @param scaleY Logical indicating if y should be scaled.
#' @param shrinkLD Logical indicating if LD should be shrunk.
#' @param shrinkCor Logical indicating if cor should be shrunk.
#' @param pruneLD Logical indicating if LD pruning should be applied.
#' @param r2 Scalar providing value for r2 threshold used in pruning
#' @param checkLD Logical indicating if LD matches summary statistics.
#' @param checkConvergence Logical indicating if convergences should be checked.
#' @param checkLD Logical indicating if LD matches summary statistics.
#' @param critVe Scalar providing value for z-score threshold used in checking convergence for Ve
#' @param critVg Scalar providing value for z-score threshold used in checking convergence for Vg
#' @param critVb Scalar providing value for z-score threshold used in checking convergence for Vg
#' @param critPi Scalar providing value for z-score threshold used in checking convergence for Pi
#' @param ntrial Integer providing number of trials used if convergence is not obtaines
#' @param msize Integer providing number of markers used in computation of sparseld
#' @param threshold Scalar providing value for threshold used in adjustment of B
#'
#'
#' @return 
#' A list containing:
#' \itemize{
#'   \item{\code{bm}}{Vector or matrix of posterior means for marker effects.}
#'   \item{\code{dm}}{Vector or matrix of posterior means for marker inclusion probabilities.}
#'   \item{\code{vb}}{Scalar or vector of posterior means for marker variances.}
#'   \item{\code{vg}}{Scalar or vector of posterior means for genomic variances.}
#'   \item{\code{ve}}{Scalar or vector of posterior means for residual variances.}
#'   \item{\code{rb}}{Matrix of posterior means for marker correlations.}
#'   \item{\code{rg}}{Matrix of posterior means for genomic correlations.}
#'   \item{\code{re}}{Matrix of posterior means for residual correlations.}
#'   \item{\code{pi}}{Vector of posterior probabilities for models.}
#'   \item{\code{h2}}{Vector of posterior means for model probability.}
#'   \item{\code{param}}{List of current parameters used for restarting the analysis.}
#'   \item{\code{stat}}{Matrix of marker information and effects used for genomic risk scoring.}
#' }
#'
#' @author Peter Sørensen
#'
#' @examples
#' 
#' # Plink bed/bim/fam files
#' bedfiles <- system.file("extdata", paste0("sample_chr",1:2,".bed"), package = "qgg")
#' bimfiles <- system.file("extdata", paste0("sample_chr",1:2,".bim"), package = "qgg")
#' famfiles <- system.file("extdata", paste0("sample_chr",1:2,".fam"), package = "qgg")
#' 
#' # Prepare Glist
#' Glist <- gprep(study="Example", bedfiles=bedfiles, bimfiles=bimfiles, famfiles=famfiles)
#' 
#' # Simulate phenotype
#' sim <- gsim(Glist=Glist, chr=1, nt=1)
#' 
#' # Compute single marker summary statistics
#' stat <- glma(y=sim$y, Glist=Glist, scale=FALSE)
#' str(stat)
#' 
#' # Define fine-mapping regions 
#' sets <- Glist$rsids
#' Glist$chr[[1]] <- gsub("21","1",Glist$chr[[1]]) 
#' Glist$chr[[2]] <- gsub("22","2",Glist$chr[[2]]) 
#' 
#' # Fine map
#' fit <- gmap(Glist=Glist, stat=stat, sets=sets, verbose=FALSE, 
#'             method="bayesC", nit=1500, nburn=500, pi=0.001)
#'             
#' fit$post  # Posterior inference for every fine-mapped region
#' fit$conv  # Convergence statistics for every fine-mapped region
#' 
#' # Posterior inference for marker effect
#' head(fit$stat)             
#'


#' @author Peter Sørensen


#'
#' @export
#'

gmap <- function(y=NULL, X=NULL, W=NULL, stat=NULL, trait=NULL, sets=NULL, fit=NULL, Glist=NULL,
                 chr=NULL, rsids=NULL, ids=NULL, b=NULL, bm=NULL, seb=NULL, mask=NULL, LD=NULL, n=NULL,
                 vg=NULL, vb=NULL, ve=NULL, ssg_prior=NULL, ssb_prior=NULL, sse_prior=NULL,
                 lambda=NULL, scaleY=TRUE, shrinkLD=FALSE, shrinkCor=FALSE, formatLD="dense", pruneLD=TRUE, 
                 r2=0.05, checkLD=TRUE,
                 h2=NULL, pi=0.001, updateB=TRUE, updateG=TRUE, updateE=TRUE, updatePi=TRUE,
                 adjustE=TRUE, models=NULL,
                 checkConvergence=FALSE, critVe=3, critVg=5, critVb=5, critPi=3, ntrial=1,
                 nug=4, nub=4, nue=4, verbose=FALSE,msize=100,threshold=NULL,
                 ve_prior=NULL, vg_prior=NULL,tol=0.001,
                 nit=100, nburn=50, nit_local=NULL,nit_global=NULL,
                 method="bayesC", algorithm="mcmc") {


  # Check methods and parameter settings
  methods <- c("blup","bayesN","bayesA","bayesL","bayesC","bayesR")
  method <- match(method, methods) - 1
  if( !sum(method%in%c(0:5))== 1 ) stop("method argument specified not valid")
  algorithms <- c("mcmc","em-mcmc")
  algorithm <- match(algorithm, algorithms)
  if(is.na(algorithm)) stop("algorithm argument specified not valid")
  if(shrinkLD) {
    if(is.null(Glist$map)) {
      warning("No map information in Glist - LD matrix shrinkage turned off")
      shrinkLD <- FALSE
    }
  } 
  
  # check this again
  if(is.data.frame(stat)) {
    if( any(sapply(stat[,-c(1:5)],function(x){any(!is.finite(x))}))) stop("Some elements in stat not finite")
    if( any(sapply(stat[,-c(1:5)],function(x){any(is.na(x))}))) stop("Some elements in stat NA")
    nt <- 1
    rsids <- stat$rsids
    m <- sum(rsids%in%unlist(Glist$rsids))
    if(!is.null(Glist$rsidsLD)) m <- sum(rsids%in%unlist(Glist$rsidsLD))
    stat$b <- as.matrix(stat$b)
    stat$seb <- as.matrix(stat$seb)
    stat$n <- as.matrix(stat$n)
    stat$p <- as.matrix(stat$p)
    rownames(stat$b) <- rownames(stat$seb) <- rsids
    rownames(stat$n) <- rownames(stat$p) <- rsids
    if(!is.null(stat[["ww"]])) {
      stat$ww <- as.matrix(stat$ww)
      rownames(stat$ww) <- rsids
    }
    if(!is.null(stat[["wy"]])) {
      stat$wy <- as.matrix(stat$wy)
      rownames(stat$wy) <- rsids
    }
  }
  if(!is.data.frame(stat) && is.list(stat)) {
    nt <- ncol(stat$b)
    rsids <- rownames(stat$b)
    m <- sum(rsids%in%unlist(Glist$rsids))
    if(!is.null(Glist$rsidsLD)) m <- sum(rsids%in%unlist(Glist$rsidsLD))
  }
  

  # Prepare summary statistics
  if(is.null(stat[["ww"]])) stat$ww <- 1/(stat$seb^2 + stat$b^2/stat$n)
  if(is.null(stat[["wy"]])) stat$wy <- stat$b*stat$ww
  if(nt==1) {
    yy <- median((stat$b^2 + (stat$n-2)*stat$seb^2)*stat$ww)
    n <- median(stat$n)
  }
  if(nt>1) {
    yy <- (stat$b^2 + (stat$n-2)*stat$seb^2)*stat$ww
    yy <- apply(yy,2,median)
    n <- apply(stat$n,2,median)
  }

  # Prepare input
  b <- matrix(0, nrow=length(rsids), ncol=nt)
  if(is.null(mask)) mask <- matrix(FALSE, nrow=length(rsids), ncol=nt)
  rownames(b) <- rownames(mask) <- rsids
  
  vy <- yy/(n-1)
  if(is.null(pi)) pi <- 0.001
  if(is.null(h2)) h2 <- 0.5
  if(is.null(ve)) ve <- vy*(1-h2)
  if(is.null(vg)) vg <- vy*h2
  mc <- min(c(5000,m))
  if(method>=4 && is.null(vb)) vb <- vg/(mc*pi)
  if(method>=4 && is.null(ssb_prior))  ssb_prior <- ((nub-2.0)/nub)*(vg/(mc*pi))

  if(is.null(trait)) trait <- 1
  message(paste("Processing trait:",trait))
  
  
  if(!is.null(sets))  { 
    
    sets <- mapSets(sets=sets, rsids=stat$rsids, index=FALSE)
    if(any(sapply(sets,function(x){any(is.na(x))}))) stop("NAs in sets detected - please remove these")

    chr <- unlist(Glist$chr)
    chrSets <- mapSets(sets=sets, Glist=Glist, index=TRUE)
    chrSets <- sapply(chrSets,function(x){as.numeric(unique(chr[x]))})
    lsets <- sapply(chrSets,length)
    sets <- sets[lsets==1]
    if(any(lsets>1)) {
      warning(paste("Marker sets mapped to multiple chromosome:",paste(which(lsets>1),collapse=",")))
    }
    if(any(lsets==0)) {
      warning(paste("Marker sets mapped to multiple chromosome:",paste(which(lsets==0),collapse=",")))
    }

    # Prepare output
    bm <- dm <- vector(mode="list",length=length(sets))
    ves <- vgs <- vbs <- pis <- bs <- ds <- vector(mode="list",length=length(sets))
    pim <- vector(mode="list",length=length(sets))
    names(bm) <- names(dm) <- names(ves) <- names(vgs) <- names(pis) <- names(bs)  <- names(ds) <- names(sets)     
    names(pim) <- names(bs)  <- names(ds) <- names(sets)     
    attempts <- rep(0, length=length(sets))
    

    if(is.null(ids)) ids <- Glist$idsLD
    if(is.null(ids)) ids <- Glist$ids
    
    #message(paste("Processing chromosome:",chr))
    if(formatLD=="sparse") {
      sparseLD <- getSparseLD(Glist=Glist,chr=chr)
    }
    # BLR model for each set
    for (i in 1:length(sets)) {
      
      chr <- chrSets[[i]]
      rsids <- sets[[i]]
      message(paste("Processing region:",i,"on chromosome:",chr))
      
      pos <- getPos(Glist=Glist, chr=chr, rsids=rsids)
      message(paste("Region size in Mb:",round((max(pos)-min(pos))/1000000,2)))
      if(!is.null(Glist$map)) map <- getMap(Glist=Glist, chr=chr, rsids=rsids)
      if(!is.null(Glist$map)) message(paste("Region size in cM:",round(max(map)-min(map),2)))
      
      if(formatLD=="dense") {
        W <- getG(Glist=Glist, chr=chr, rsids=rsids, ids=ids, scale=TRUE)
        B <- crossprod(scale(W))/(length(ids)-1)
        if(shrinkCor) B <- corpcor::cor.shrink(W)
        if(shrinkLD) B <- adjustMapLD(LD = B, map=map)
        LD <- NULL
        LD$values <- split(B, rep(1:ncol(B), each = nrow(B)))
        LD$indices <- lapply(1:ncol(B),function(x) { (1:ncol(B))-1 } )
        rsids <- colnames(B)
        names(LD$values) <- rsids
        names(LD$indices) <- rsids
        msize_set <- length(rsids)
      }
      
      
      if(formatLD=="sparse") {
        B <- regionLD(sparseLD = sparseLD, onebased=FALSE, rsids=rsids, format="dense")
        if(shrinkCor) B <- corpcor::cor.shrink(W)
        if(shrinkLD) B <- adjustMapLD(LD = B, map=map)
        LD <- NULL
        LD$values <- split(B, rep(1:ncol(B), each = nrow(B)))
        LD$indices <- lapply(1:ncol(B),function(x) { (1:ncol(B))-1 } )
        rsids <- colnames(B)
        names(LD$values) <- rsids
        names(LD$indices) <- rsids
        msize_set <- length(rsids)
      }
      
      #ntrial <- 5
      converged <- FALSE
      
      updateB_reg <- updateB
      updatePi_reg <- updatePi
      pi_reg <- pi
      
      for (trial in 1:ntrial) {
        
        if (!converged) {
          
          attempts[i] <- trial
          
          fit <- sbayes_region(yy=yy[trait],
                                     wy=stat$wy[rsids,trait],
                                     ww=stat$ww[rsids,trait],
                                     b=b[rsids,trait],
                                     mask=mask[rsids,trait],
                                     LDvalues=LD$values,
                                     LDindices=LD$indices,
                                     method=method,
                                     algorithm=algorithm,
                                     nit=nit,
                                     nburn=nburn,
                                     n=n[trait],
                                     m=msize_set,
                                     pi=pi_reg,
                                     nue=nue,
                                     nub=nub,
                                     ssb_prior=ssb_prior,
                                     updateB=updateB_reg,
                                     updateE=updateE,
                                     updatePi=updatePi_reg,
                                     updateG=updateG,
                                     adjustE=adjustE)
          
          # Check convergence            
          critve <- critvg <- critvb <- critpi <- FALSE
          if(!updateE) critve <- TRUE
          if(!updateG) critvg <- TRUE
          if(!updateB) critvb <- TRUE
          if(!updatePi) critpi <- TRUE
          zve <- coda::geweke.diag(fit$ves[nburn:length(fit$ves)])$z
          zvg <- coda::geweke.diag(fit$vgs[nburn:length(fit$vgs)])$z
          zvb <- coda::geweke.diag(fit$vbs[nburn:length(fit$vbs)])$z
          zpi <- coda::geweke.diag(fit$pis[nburn:length(fit$pis)])$z
          if(!is.na(zve)) critve <- abs(zve)<critVe
          if(!is.na(zvg)) critvg <- abs(zvg)<critVg
          if(!is.na(zvb)) critvb <- abs(zvb)<critVb
          if(!is.na(zpi)) critpi <- abs(zpi)<critPi
          
          critb1 <- fit$dm>0.01 & fit$bm>0 & fit$bm>stat$b[rsids,trait]
          critb2 <- fit$dm>0.01 & fit$bm<0 & fit$bm<stat$b[rsids,trait]
          critb <- !any(critb1 | critb2)
          converged <- critve & critvg & critvb & critpi & critb
          
          
          if (!converged & checkConvergence) {
            message("")
            message(paste("Region not converged in attempt:",trial))
            if(!critve) message(paste("Zve:",zve))
            if(!critvg) message(paste("Zvg:",zvg))
            if(!critvb) message(paste("Zvb:",zvb))
            if(!critpi) message(paste("Zpi:",zpi))
            W <- getG(Glist=Glist, chr=chr, rsids=rsids, ids=ids, scale=TRUE)
            B <- crossprod(scale(W))/(length(ids)-1)
            if(shrinkCor) B <- corpcor::cor.shrink(W)
            if(shrinkLD) B <- adjustMapLD(LD = B, map=map)
            if(checkLD & trial==1) { 
              message("Adjust summary statistics using imputation")
              badj <- adjustB(b=stat$b[rsids,trait], LD = B, 
                                    msize=500, overlap=100, shrink=0.001, threshold=1e-8) 
              z <- (badj-stat$b[rsids,trait])/stat$seb[rsids,trait]
              outliers <- names(z[abs(z)>1.96])
              #mask[outliers,trait] <- TRUE
              stat$b[outliers,trait] <- badj[abs(z)>1.96]
              stat$ww[outliers,trait] <- 1/(stat$seb[outliers,trait]^2 + stat$b[outliers,trait]^2/stat$n[outliers,trait])
              stat$wy[outliers,trait] <- stat$b[outliers,trait]*stat$ww[outliers,trait]
            }
            #if(pruneLD & trial==2) {
            if(pruneLD) {
              message("Adjust summary statistics using pruning")
              pruned <- adjLDregion(LD=B, p=stat$p[rsids,trait], r2=r2, thold=1) 
              mask[pruned,trait] <- TRUE
            }
            if(trial==3) {
              message("Set updateB and updatePi to FALSE")
              updateB_reg <- FALSE 
              updatePi_reg <- FALSE 
            }
            if(trial>3) {
              message("Decrease Pi by a factor 10")
              updateB_reg <- FALSE 
              updatePi_reg <- FALSE
              pi_reg <- pi_reg*0.1
            }
          }
        }
      }
      
      # Make plots to monitor convergence
      if(verbose) {
        layout(matrix(1:4,ncol=2))
        pipsets <- splitWithOverlap(1:length(rsids),100,99)
        pip <- fit$dm
        plot(pip, ylim=c(0,max(pip)), ylab="PIP",xlab="Position", frame.plot=FALSE)
        plot(-log10(stat$p[rsids,trait]), ylab="-log10(P)",xlab="Position", frame.plot=FALSE)
        hist(fit$ves, main="Ve", xlab="")
        plot(y=fit$bm, x=stat$b[rsids,trait], ylab="Adjusted",xlab="Marginal", frame.plot=FALSE)
        abline(h=0,v=0, lwd=2, col=2, lty=2)
      }
      
      # Save results
      bm[[i]] <- fit$bm
      dm[[i]] <- fit$dm
      pim[[i]] <- fit$pim
      ves[[i]] <- fit$ves
      vbs[[i]] <- fit$vbs
      vgs[[i]] <- fit$vgs
      pis[[i]] <- fit$pis
      selected <- NULL
      if(!is.null(threshold)) selected <- fit$dm>=threshold
      if(any(selected)) {
        bs[[i]] <- matrix(fit$bs,nrow=length(rsids))
        ds[[i]] <- matrix(fit$ds,nrow=length(rsids))
        rownames(bs[[i]]) <- rownames(ds[[i]]) <- rsids
        colnames(bs[[i]]) <- colnames(ds[[i]]) <- 1:(nit+nburn)
        bs[[i]] <- bs[[i]][selected,]
        ds[[i]] <- ds[[i]][selected,]
      }
      names(bm[[i]]) <- names(dm[[i]]) <- rsids
    }
    fit <- NULL
    fit$bm <- bm
    fit$dm <- dm
    fit$pim <- pim
    fit$ves <- ves
    fit$vbs <- vbs
    fit$vgs <- vgs
    fit$pis <- pis
    fit$attempts <- attempts
    if(!is.null(threshold)) fit$bs <- bs
    if(!is.null(threshold)) fit$ds <- ds
    
  }  
  
  if(is.null(sets))  { 
    
    
    # Prepare output
    bm <- dm <- vector(mode="list",length=22)
    ves <- vgs <- vbs <- pis <- bs <- ds <- vector(mode="list",length=22)
    pim <- attempts <- vector(mode="list",length=22)
    
    chromosomes <- 1:22
    if(!is.null(chr)) chromosomes <- chr 

    if(is.null(ids)) ids <- Glist$idsLD
    if(is.null(ids)) ids <- Glist$ids
    
    for (chr in chromosomes) {
      
      message(paste("Processing chromosome:",chr))
      rsidsLD <- Glist$rsidsLD[[chr]]
      rsidsLD <- rsidsLD[rsidsLD%in%rownames(b)]
      sets <- split(rsidsLD, ceiling(seq_along(rsidsLD) / msize))
      #if(is.null(ssb_prior)) {
      #  h2 <- 0.5
      #  pi <- 0.001
      #  vy <- 1
      #  vg <- h2*vy
      #  nub <- 4
      #  ww <- 1/(stat$seb^2 + stat$b/stat$n)
      #  mx <- sum(ww/mean(stat$n))
      #  ssb_prior <- vy*h2*(nub+2)/mx/pi
      #}
      
      if(formatLD=="sparse") {
        sparseLD <- getSparseLD(Glist=Glist,chr=chr)
      }
      
      # BLR model for each set
      for (i in 1:length(sets)) {
        
        message(paste("Processing region:",i))
        rsids <- sets[[i]]
        pos <- getPos(Glist=Glist, chr=chr, rsids=rsids)
        message(paste("Region size in Mb:",round((max(pos)-min(pos))/1000000,2)))
        if(!is.null(Glist$map)) map <- getMap(Glist=Glist, chr=chr, rsids=rsids)
        if(!is.null(Glist$map)) message(paste("Region size in cM:",round(max(map)-min(map),2)))
        
        if(formatLD=="dense") {
          W <- getG(Glist=Glist, chr=chr, rsids=rsids, ids=ids, scale=TRUE)
          B <- crossprod(scale(W))/(length(ids)-1)
          if(shrinkCor) B <- corpcor::cor.shrink(W)
          if(shrinkLD) B <- adjustMapLD(LD = B, map=map)
          LD <- NULL
          LD$values <- split(B, rep(1:ncol(B), each = nrow(B)))
          LD$indices <- lapply(1:ncol(B),function(x) { (1:ncol(B))-1 } )
          rsids <- colnames(B)
          names(LD$values) <- rsids
          names(LD$indices) <- rsids
          msize_set <- length(rsids)
        }
        
        
        
        if(formatLD=="sparse") {
          B <- regionLD(sparseLD = sparseLD, onebased=FALSE, rsids=rsids, format="dense")
          if(shrinkLD) B <- adjustMapLD(LD = B, map=map)
          LD <- NULL
          LD$values <- split(B, rep(1:ncol(B), each = nrow(B)))
          LD$indices <- lapply(1:ncol(B),function(x) { (1:ncol(B))-1 } )
          rsids <- colnames(B)
          names(LD$values) <- rsids
          names(LD$indices) <- rsids
          msize_set <- length(rsids)
          #LD <- regionLD(sparseLD = sparseLD, onebased=FALSE, rsids=rsids, format="sparse")
          #rsids <- LD$rsids
          #msize <- length(rsids)
        }
        
        #ntrial <- 5
        converged <- FALSE

        updateB_reg <- updateB
        updatePi_reg <- updatePi
        pi_reg <- pi

        for (trial in 1:ntrial) {
          
          if (!converged) {
            
            attempts[[chr]][[i]] <- trial
            
            
            fit <- sbayes_region(yy=yy[trait],
                                       wy=stat$wy[rsids,trait],
                                       ww=stat$ww[rsids,trait],
                                       b=b[rsids,trait],
                                       mask=mask[rsids,trait],
                                       LDvalues=LD$values,
                                       LDindices=LD$indices,
                                       method=method,
                                       algorithm=algorithm,
                                       nit=nit,
                                       nburn=nburn,
                                       n=n[trait],
                                       m=msize_set,
                                       pi=pi,
                                       nue=nue,
                                       nub=nub,
                                       ssb_prior=ssb_prior,
                                       updateB=updateB_reg,
                                       updateE=updateE,
                                       updatePi=updatePi_reg,
                                       updateG=updateG,
                                       adjustE=adjustE)
            #   }
            
            # Check convergence            
            critve <- critvg <- critvb <- critpi <- FALSE
            if(!updateE) critve <- TRUE
            if(!updateG) critvg <- TRUE
            if(!updateB) critvb <- TRUE
            if(!updatePi) critpi <- TRUE
            zve <- coda::geweke.diag(fit$ves[nburn:length(fit$ves)])$z
            zvg <- coda::geweke.diag(fit$vgs[nburn:length(fit$vgs)])$z
            zvb <- coda::geweke.diag(fit$vbs[nburn:length(fit$vbs)])$z
            zpi <- coda::geweke.diag(fit$pis[nburn:length(fit$pis)])$z
            if(!is.na(zve)) critve <- abs(zve)<critVe
            if(!is.na(zvg)) critvg <- abs(zvg)<critVg
            if(!is.na(zvb)) critvb <- abs(zvb)<critVb
            if(!is.na(zpi)) critpi <- abs(zpi)<critPi

            critb1 <- fit$dm>0.01 & fit$bm>0 & fit$bm>stat$b[rsids,trait]
            critb2 <- fit$dm>0.01 & fit$bm<0 & fit$bm<stat$b[rsids,trait]
            critb <- !any(critb1 | critb2)
            converged <- critve & critvg & critvb & critpi & critb
            
            if (!converged & checkConvergence) {
              message("")
              message(paste("Region not converged in attempt:",trial))
              if(!critve) message(paste("Zve:",zve))
              if(!critvg) message(paste("Zvg:",zvg))
              if(!critvb) message(paste("Zvb:",zvb))
              if(!critpi) message(paste("Zpi:",zpi))
              W <- getG(Glist=Glist, chr=chr, rsids=rsids, ids=ids, scale=TRUE)
              B <- crossprod(scale(W))/(length(ids)-1)
              if(shrinkCor) B <- corpcor::cor.shrink(W)
              if(shrinkLD) B <- adjustMapLD(LD = B, map=map)
              if(checkLD & trial==1) { 
                message("Adjust summary statistics using imputation")
                badj <- adjustB(b=stat$b[rsids,trait], LD = B, 
                                      msize=500, overlap=100, shrink=0.001, threshold=1e-8) 
                z <- (badj-stat$b[rsids,trait])/stat$seb[rsids,trait]
                outliers <- names(z[abs(z)>1.96])
                #mask[outliers,trait] <- TRUE
                stat$b[outliers,trait] <- badj[abs(z)>1.96]
                stat$ww[outliers,trait] <- 1/(stat$seb[outliers,trait]^2 + stat$b[outliers,trait]^2/stat$n[outliers,trait])
                stat$wy[outliers,trait] <- stat$b[outliers,trait]*stat$ww[outliers,trait]
              }
              if(pruneLD) {
                #if(pruneLD & trial==2) {
                message("Adjust summary statistics using pruning")
                pruned <- adjLDregion(LD=B, p=stat$p[rsids,trait], r2=r2, thold=1) 
                mask[pruned,trait] <- TRUE
              }
              if(trial==3) {
                message("Set updateB and updatePi to FALSE")
                updateB_reg <- FALSE 
                updatePi_reg <- FALSE 
              }
              if(trial>3) {
                message("Decrease Pi by a factor 10")
                updateB_reg <- FALSE 
                updatePi_reg <- FALSE
                pi_reg <- pi_reg*0.1
              }
            }
          }
        }
        
        # Make plots to monitor convergence
        if(verbose) {
          layout(matrix(1:4,ncol=2))
          pipsets <- splitWithOverlap(1:length(rsids),100,99)
          #pip <- sapply(pipsets,function(x){sum(fit$dm[x])})
          pip <- fit$dm
          plot(pip, ylim=c(0,max(pip)), ylab="PIP",xlab="Position", frame.plot=FALSE)
          plot(-log10(stat$p[rsids,trait]), ylab="-log10(P)",xlab="Position", frame.plot=FALSE)
          hist(fit$ves, main="Ve", xlab="")
          plot(y=fit$bm, x=stat$b[rsids,trait], ylab="Adjusted",xlab="Marginal", frame.plot=FALSE)
          abline(h=0,v=0, lwd=2, col=2, lty=2)
        }

        # Save results
        bm[[chr]][[i]] <- fit$bm
        dm[[chr]][[i]] <- fit$dm
        pim[[chr]][[i]] <- fit$pim
        ves[[chr]][[i]] <- fit$ves
        vbs[[chr]][[i]] <- fit$vbs
        vgs[[chr]][[i]] <- fit$vgs
        pis[[chr]][[i]] <- fit$pis
        #bs[[chr]][[i]] <- matrix(fit$bs,nrow=length(rsids))
        #ds[[chr]][[i]] <- matrix(fit$ds,nrow=length(rsids))
        #rownames(bs[[chr]][[i]]) <- rownames(ds[[chr]][[i]]) <- rsids
        #colnames(bs[[chr]][[i]]) <- colnames(ds[[chr]][[i]]) <- 1:nit
        selected <- NULL
        if(!is.null(threshold)) selected <- fit$dm>=threshold
        if(any(selected)) {
          bs[[chr]][[i]] <- matrix(fit$bs,nrow=length(rsids))
          ds[[chr]][[i]] <- matrix(fit$ds,nrow=length(rsids))
          rownames(bs[[chr]][[i]]) <- rownames(ds[[chr]][[i]]) <- rsids
          colnames(bs[[chr]][[i]]) <- colnames(ds[[chr]][[i]]) <- 1:(nit+nburn)
          bs[[chr]][[i]] <- bs[[chr]][[i]][selected,]
          ds[[chr]][[i]] <- ds[[chr]][[i]][selected,]
        }
        names(bm[[chr]][[i]]) <- names(dm[[chr]][[i]]) <- rsids
      }
    }
    
    fit <- NULL
    fit$bm <- unlist(bm, recursive=FALSE)
    fit$dm <- unlist(dm, recursive=FALSE)
    fit$pim <- unlist(pim, recursive=FALSE)
    fit$ves <- unlist(ves, recursive=FALSE)
    fit$vbs <- unlist(vbs, recursive=FALSE)
    fit$vgs <- unlist(vgs, recursive=FALSE)
    fit$pis <- unlist(pis, recursive=FALSE)
    fit$attempts <- unlist(attempts, recursive=TRUE)
    if(!is.null(threshold)) fit$bs <- unlist(bs, recursive=FALSE)
    if(!is.null(threshold)) fit$ds <- unlist(ds, recursive=FALSE)
  }


  pip <- sapply(fit$dm,sum)
  minb <- sapply(fit$bm,min)
  maxb <- sapply(fit$bm,max)
  m <- sapply(fit$bm,length)
  
  bm <- unlist(unname(fit$bm))
  dm <- unlist(unname(fit$dm))
  #selected <- dm>0
  #bm <- bm[selected]
  #dm <- dm[selected]
  marker <- data.frame(rsids=unlist(Glist$rsids),
                       chr=unlist(Glist$chr), pos=unlist(Glist$pos), 
                       ea=unlist(Glist$a1), nea=unlist(Glist$a2),
                       eaf=unlist(Glist$af),stringsAsFactors = FALSE)
  marker <- marker[marker$rsids%in%names(bm),]
  fit$stat <- data.frame(marker,bm=bm[marker$rsids],
                         dm=dm[marker$rsids], stringsAsFactors = FALSE)
  fit$stat$vm <- 2*(1-fit$stat$eaf)*fit$stat$eaf*fit$stat$bm^2
  fit$method <- methods[method+1]
  fit$mask <- mask
  zve <- sapply(fit$ves,function(x){coda::geweke.diag(x[nburn:length(x)])$z})
  zvg <- sapply(fit$vgs,function(x){coda::geweke.diag(x[nburn:length(x)])$z})
  zvb <- sapply(fit$vbs,function(x){coda::geweke.diag(x[nburn:length(x)])$z})
  zpi <- sapply(fit$pis,function(x){coda::geweke.diag(x[nburn:length(x)])$z})
  ve <- sapply(fit$ves,function(x){mean(x[nburn:length(x)])})
  vg <- sapply(fit$vgs,function(x){mean(x[nburn:length(x)])})
  vb <- sapply(fit$vbs,function(x){mean(x[nburn:length(x)])})
  pi <- sapply(fit$pim,function(x){1-x[1]})
  
  if(!is.null(Glist$map)) map <- unlist(Glist$map)
  pos <- unlist(Glist$pos)
  sets <- lapply(fit$bm,names)
  setsindex <- mapSets(sets=sets, rsids=unlist(Glist$rsids))
  if(!is.null(Glist$map)) cm <- sapply(setsindex, function(x){ max(map[x])-min(map[x]) })
  mb <- sapply(setsindex, function(x){ (max(pos[x])-min(pos[x]))/1000000 })
  minmb <- sapply(setsindex, function(x){ min(pos[x]) })
  maxmb <- sapply(setsindex, function(x){ max(pos[x]) })
  
  chr <- unlist(Glist$chr)
  chr <- sapply(setsindex,function(x){as.numeric(unique(chr[x]))})
  
  b <- stat[fit$stat$rsids,"b"]
  
  #fit$region <-  NULL
  fit$conv <- data.frame(zve=zve,zvg=zvg, zvb=zvb, zpi=zpi)  
  if(is.null(Glist$map)) fit$post <- data.frame(ve=ve,vg=vg, vb=vb, pi=pi, pip=pip, minb=minb, maxb=maxb, m=m, mb=mb, chr=chr, minmb=minmb, maxmb=maxmb)  
  if(!is.null(Glist$map)) fit$post <- data.frame(ve=ve,vg=vg, vb=vb, pi=pi, pip=pip, minb=minb, maxb=maxb, m=m, mb=mb, cm=cm, chr=chr, minmb=minmb, maxmb=maxmb)  
  fit$ve <- mean(ve)
  fit$vg <- sum(vg)
  fit$b <- b
  return(fit)
}

adjustB <- function(b=NULL, LD = NULL, msize=NULL, overlap=NULL, shrink=0.001, threshold=1e-8) {
  m <- length(b)
  badj <- rep(0,m)
  sets <- splitWithOverlap(1:m,msize,overlap)
  for( i in 1:length(sets) ) {
    mset <- length(sets[[i]])
    bset <- b[sets[[i]]]
    B <- LD[sets[[i]],sets[[i]]]
    for (j in 1:mset) {
      Bi <- chol2inv(chol(B[-j,-j]+diag(shrink,mset-1)))
      badj[sets[[i]][j]] <- sum(B[j,-j]*Bi%*%bset[-j])
    }
    plot(y=badj[sets[[i]]],x=b[sets[[i]]],ylab="Predicted", xlab="Observed",  frame.plot=FALSE)
    abline(0,1, lwd=2, col=2, lty=2)
  }
  return(b=badj)
}

adjLDregion <- function(LD=NULL, p=NULL, r2=0.5, thold=1) {
  rsids <- colnames(LD)
  o <- order(p, decreasing = FALSE)
  m <- length(rsids)
  indx1 <- rep(T, m)
  indx2 <- rep(F, m)
  message(paste("Pruning using r2 threshold:",r2))
  message(paste("Pruning using p-value threshold:",thold))
  ldSets <- apply(LD,1,function(x){colnames(LD)[x>r2]})
  ldSets <- mapSets(sets = ldSets, rsids = rsids)
  for (j in o) {
    if (p[j] <= thold) {
      if (indx1[j]) {
        rws <- ldSets[[j]]
        indx1[rws] <- F
        indx2[j] <- T
      }
    }
  }
  return(rsids[!indx2])
}

# Single trait fine-mapping BLR using summary statistics and sparse LD provided in Glist 
sbayes_region <- function(yy=NULL, wy=NULL, ww=NULL, b=NULL, bm=NULL, mask=NULL, seb=NULL, 
                          LDvalues=NULL,LDindices=NULL, n=NULL, m=NULL,
                          vg=NULL, vb=NULL, ve=NULL, 
                          ssb_prior=NULL, sse_prior=NULL, lambda=NULL, scaleY=NULL,
                          h2=NULL, pi=NULL, updateB=NULL, updateE=NULL, updatePi=NULL, 
                          updateG=NULL, adjustE=NULL, models=NULL,
                          nub=NULL, nue=NULL, nit=NULL, nburn=NULL, method=NULL, algorithm=NULL, verbose=NULL) {
  
  if(is.null(m)) m <- length(LDvalues)
  vy <- yy/(n-1)
  if(is.null(pi)) pi <- 0.001
  if(is.null(h2)) h2 <- 0.5
  if(is.null(ve)) ve <- vy*(1-h2)
  if(is.null(vg)) vg <- vy*h2
  if(method<4 && is.null(vb)) vb <- vg/m
  if(method>=4 && is.null(vb)) vb <- vg/(m*pi)
  if(is.null(lambda)) lambda <- rep(ve/vb,m)
  if(method<4 && is.null(ssb_prior))  ssb_prior <-  ((nub-2.0)/nub)*(vg/m)
  if(method>=4 && is.null(ssb_prior))  ssb_prior <-  ((nub-2.0)/nub)*(vg/(m*pi))
  if(is.null(sse_prior)) sse_prior <- ((nue-2.0)/nue)*ve
  if(is.null(b)) b <- rep(0,m)
  
  pi <- c(1-pi,pi)
  gamma <- c(0,1.0)
  if(method==5) pi <- c(0.95,0.02,0.02,0.01)
  if(method==5) gamma <- c(0,0.01,0.1,1.0)
  if(is.null(algorithm)) algorithm <- 0
  
  fit <- .Call("_qgg_sbayes_reg",
               wy=wy, 
               ww=ww, 
               LDvalues=LDvalues, 
               LDindices=LDindices, 
               b = b,
               lambda = lambda,
               mask = mask,
               yy = yy,
               pi = pi,
               gamma = gamma,
               vg = vg,
               vb = vb,
               ve = ve,
               ssb_prior=ssb_prior,
               sse_prior=sse_prior,
               nub=nub,
               nue=nue,
               updateB = updateB,
               updateE = updateE,
               updatePi = updatePi,
               updateG = updateG,
               adjustE = adjustE,
               n=n,
               nit=nit,
               nburn=nburn,
               method=as.integer(method),
               algo=as.integer(algorithm))
  names(fit[[1]]) <- names(LDvalues)
  names(fit) <- c("bm","dm","coef","vbs","vgs","ves","pis","pim","r","b","param","bs","ds")
  return(fit)
}


# Multiple trait BLR using summary statistics and sparse LD provided in Glist 
mt_sbayes_sparse <- function(yy=NULL, ww=NULL, wy=NULL, b=NULL, 
                             LDvalues=NULL,LDindices=NULL, n=NULL,
                             vg=NULL, vb=NULL, ve=NULL, 
                             ssb_prior=NULL, sse_prior=NULL, 
                             h2=NULL, pi=NULL, updateB=NULL, 
                             updateE=NULL, updatePi=NULL, models=NULL,
                             nub=NULL, nue=NULL, nit=NULL, method=NULL, verbose=NULL) {
  
  nt <- length(yy)
  m <- length(LDvalues)
  
  if(method==0) {
    # BLUP and we do not estimate parameters
    updateB=FALSE;
    updateE=FALSE;
  }
  
  if(method==2) stop("Multiple trait not yet implemented for Bayes A") 
  if(method==3) stop("Multiple trait not yet implemented for Bayesian Lasso")
  
  if(is.null(b)) b <- lapply(1:nt,function(x){rep(0,m)}) 
  if(is.matrix(b)) b <- split(b, rep(1:ncol(b), each = nrow(b)))
  
  if(is.null(models)) {
    models <- rep(list(0:1), nt)
    models <- t(do.call(expand.grid, models))
    models <- split(models, rep(1:ncol(models), each = nrow(models)))
    pi <- c(0.999,rep(0.001,length(models)-1)) 
  } 
  if(is.character(models)) {
    if(models=="restrictive") {
      models <- list(rep(0,nt),rep(1,nt))
      pi <- c(0.999,0.001)
    }
  }
  
  vy <- diag(yy/(n-1),nt)
  if(is.null(h2)) h2 <- 0.5
  if(is.null(vg)) vg <- diag(diag(vy)*h2)
  if(is.null(ve)) ve <- diag(diag(vy)*(1-h2))
  if(method<4 && is.null(vb)) vb <- diag((diag(vy)*h2)/m)
  if(method>=4 && is.null(vb)) vb <- diag((diag(vy)*h2)/(m*pi[length(models)]))
  if(method<4 && is.null(ssb_prior))  ssb_prior <-  diag(((nub-2.0)/nub)*(diag(vg)/m))
  if(method>=4 && is.null(ssb_prior))  ssb_prior <-  diag(((nub-2.0)/nub)*(diag(vg)/(m*pi[length(models)])))
  if(is.null(sse_prior)) sse_prior <- diag(((nue-2.0)/nue)*diag(ve))

  fit <- .Call("_qgg_mtsbayes",
               wy=split(wy, rep(1:ncol(wy), each = nrow(wy))),
               ww=split(ww, rep(1:ncol(ww), each = nrow(ww))),
               yy=yy,
               b = b,
               LDvalues=LDvalues, 
               LDindices=LDindices, 
               B = vb,
               E = ve,
               ssb_prior=split(ssb_prior, rep(1:ncol(ssb_prior), each = nrow(ssb_prior))),
               sse_prior=split(sse_prior, rep(1:ncol(sse_prior), each = nrow(sse_prior))),
               models=models,
               pi=pi,
               nub=nub,
               nue=nue,
               updateB = updateB,
               updateE = updateE,
               updatePi = updatePi,
               n=n,
               nit=nit,
               method=as.integer(method))
  
  fit[[6]] <- matrix(unlist(fit[[6]]), ncol = nt, byrow = TRUE)
  fit[[7]] <- matrix(unlist(fit[[7]]), ncol = nt, byrow = TRUE)
  trait_names <- names(yy)
  if(is.null(trait_names)) trait_names <- paste0("T",1:nt)
  colnames(fit[[6]]) <- rownames(fit[[6]]) <- trait_names
  colnames(fit[[7]]) <- rownames(fit[[7]]) <- trait_names
  fit[[11]] <- matrix(unlist(fit[[11]]), ncol = nt, byrow = TRUE)
  fit[[12]] <- matrix(unlist(fit[[12]]), ncol = nt, byrow = TRUE)
  colnames(fit[[11]]) <- rownames(fit[[11]]) <- trait_names
  colnames(fit[[12]]) <- rownames(fit[[12]]) <- trait_names
  # add colnames/rownames e, g and gm
  # add colnames/rownames rg and covg
  fit[[13]] <- fit[[13]][[1]]
  fit[[14]] <- fit[[14]][[1]]
  fit[[15]] <- fit[[15]][[1]]
  fit[[16]] <- fit[[6]]
  fit[[17]] <- fit[[7]]
  if(sum(diag(fit[[16]]))>0) fit[[16]] <- cov2cor(fit[[16]])
  if(sum(diag(fit[[17]]))>0)  fit[[17]] <- cov2cor(fit[[17]])
  for(i in 1:nt){
    names(fit[[1]][[i]]) <- names(LDvalues)
    names(fit[[2]][[i]]) <- names(LDvalues)
    names(fit[[10]][[i]]) <- names(LDvalues)
  }
  names(fit[[1]]) <- trait_names
  names(fit[[2]]) <- trait_names
  names(fit[[3]]) <- trait_names
  names(fit[[4]]) <- trait_names
  names(fit[[5]]) <- trait_names
  names(fit[[8]]) <- trait_names
  names(fit[[9]]) <- trait_names
  names(fit[[13]]) <- sapply(models,paste,collapse="_")
  names(fit[[14]]) <- sapply(models,paste,collapse="_")
  
  names(fit) <- c("bm","dm","coef","vbs","ves","covb","cove",
                  "wy","r","b","vb","ve","pi","pim","order",
                  "rb","re")
  fit$bm <- as.matrix(as.data.frame(fit$bm))
  fit$dm <- as.matrix(as.data.frame(fit$dm))
  fit$b <- as.matrix(as.data.frame(fit$b))
  return(fit)
}


# Multiple trait BLR based on individual level data based on fast algorithm  
mtbayes <- function(y=NULL, X=NULL, W=NULL, b=NULL, bm=NULL, seb=NULL, LD=NULL, n=NULL,
                    vg=NULL, vb=NULL, ve=NULL, formatLD=NULL,
                    ssb_prior=NULL, sse_prior=NULL, lambda=NULL, scaleY=NULL,
                    h2=NULL, pi=NULL, updateB=NULL, updateE=NULL, updatePi=NULL, models=NULL,
                    nub=NULL, nue=NULL, nit=NULL, method=NULL, algorithm=NULL, verbose=NULL) {
  
  if(is.list(y)) nt <- length(y)
  if(!is.matrix(y)) stop("This is not a multiple trait analysis")
  
  if(method==0) {
    # BLUP and we do not estimate parameters
    updateB=FALSE;
    updateE=FALSE;
  }
  
  if(method==2) stop("Multiple trait not yet implemented for Bayes A") 
  if(method==3) stop("Multiple trait not yet implemented for Bayesian Lasso")
  
  n <- nrow(W)
  m <- ncol(W)
  
  if(!is.matrix(y)) stop("y should be a matrix")
  y <- split(y, rep(1:ncol(y), each = nrow(y)))
  
  if(scaleY) y <- lapply(y,function(x){as.vector(scale(x))})
  if(!scaleY) y <- lapply(y,function(x){x-mean(x) })
  
  if(is.null(b)) b <- lapply(1:nt,function(x){rep(0,m)}) 
  if(is.matrix(b)) b <- split(b, rep(1:ncol(b), each = nrow(b)))
  
  if(is.null(models)) {
    models <- rep(list(0:1), nt)
    models <- t(do.call(expand.grid, models))
    models <- split(models, rep(1:ncol(models), each = nrow(models)))
    pi <- c(0.999,rep(0.001,length(models)-1)) 
  } 
  if(is.character(models)) {
    if(models=="restrictive") {
      models <- list(rep(0,nt),rep(1,nt))
      pi <- c(0.999,0.001)
    }
  }
  
  vy <- diag(sapply(y,var))
  if(is.null(h2)) h2 <- 0.5
  if(is.null(ve)) ve <- diag(diag(vy)*(1-h2))
  if(method<4 && is.null(vb)) vb <- diag((diag(vy)*h2)/m)
  if(method>=4 && is.null(vb)) vb <- diag((diag(vy)*h2)/(m*pi[length(models)])) 
  if(is.null(vg)) vg <- diag(diag(vy)*h2)
  if(method<4 && is.null(ssb_prior))  ssb_prior <-  diag(((nub-2.0)/nub)*(diag(vg)/m))
  if(method>=4 && is.null(ssb_prior))  ssb_prior <-  diag(((nub-2.0)/nub)*(diag(vg)/(m*pi[length(models)])))
  if(is.null(sse_prior)) sse_prior <- diag(((nue-2.0)/nue)*diag(ve))

  fit <- .Call("_qgg_mtbayes",
               y=y, 
               W=split(W, rep(1:ncol(W), each = nrow(W))), 
               b=b,
               B = vb,
               E = ve,
               ssb_prior=split(ssb_prior, rep(1:ncol(ssb_prior), each = nrow(ssb_prior))),
               sse_prior=split(sse_prior, rep(1:ncol(sse_prior), each = nrow(sse_prior))),
               models=models,
               pi=pi,
               nub=nub,
               nue=nue,
               updateB=updateB,
               updateE=updateE,
               updatePi=updatePi,
               nit=nit,
               method=as.integer(method))
  fit[[6]] <- matrix(unlist(fit[[6]]), ncol = nt, byrow = TRUE)
  fit[[7]] <- matrix(unlist(fit[[7]]), ncol = nt, byrow = TRUE)
  trait_names <- names(y)
  ids <- rownames(W)
  if(is.null(trait_names)) trait_names <- paste0("T",1:nt)
  colnames(fit[[6]]) <- rownames(fit[[6]]) <- trait_names
  colnames(fit[[7]]) <- rownames(fit[[7]]) <- trait_names
  fit[[11]] <- matrix(unlist(fit[[11]]), ncol = nt, byrow = TRUE)
  fit[[12]] <- matrix(unlist(fit[[12]]), ncol = nt, byrow = TRUE)
  colnames(fit[[11]]) <- rownames(fit[[11]]) <- trait_names
  colnames(fit[[12]]) <- rownames(fit[[12]]) <- trait_names
  # add colnames/rownames e, g and gm
  # add colnames/rownames rg and covg
  fit[[13]] <- fit[[13]][[1]]
  fit[[14]] <- fit[[14]][[1]]
  fit[[15]] <- fit[[15]][[1]]
  fit[[16]] <- crossprod(t(W),matrix(unlist(fit[[1]]), ncol=nt))
  fit[[17]] <- cov2cor(fit[[6]])
  fit[[18]] <- cov2cor(fit[[7]])
  fit[[19]] <- cov(fit[[16]])
  fit[[20]] <- cov2cor(fit[[19]])
  colnames(fit[[19]]) <- rownames(fit[[19]]) <- trait_names
  colnames(fit[[20]]) <- rownames(fit[[20]]) <- trait_names
  for(i in 1:nt){
    names(fit[[1]][[i]]) <- colnames(W)
    names(fit[[2]][[i]]) <- colnames(W)
    names(fit[[10]][[i]]) <- colnames(W)
    names(fit[[8]][[i]]) <- ids
    names(fit[[9]][[i]]) <- names(y[[i]])
  }
  names(fit[[1]]) <- trait_names
  names(fit[[2]]) <- trait_names
  names(fit[[3]]) <- trait_names
  names(fit[[4]]) <- trait_names
  names(fit[[5]]) <- trait_names
  names(fit[[8]]) <- trait_names
  names(fit[[9]]) <- trait_names
  rownames(fit[[16]]) <- ids
  colnames(fit[[16]]) <- trait_names
  names(fit[[13]]) <- sapply(models,paste,collapse="_")
  names(fit[[14]]) <- sapply(models,paste,collapse="_")
  names(fit) <- c("bm","dm","mus","vbs","ves","covb","cove",
                  "g","e","b","vb","ve","pi","pim","order",
                  "gm","rb","re","covg","rg")
  fit$bm <- as.matrix(as.data.frame(fit$bm))
  fit$dm <- as.matrix(as.data.frame(fit$dm))
  fit$mus <- as.data.frame(fit$mus)
  fit$vbs <- as.data.frame(fit$vbs)
  fit$ves <- as.data.frame(fit$ves)
  fit$g <- as.data.frame(fit$g)
  fit$e <- as.data.frame(fit$e)
  fit$b <- as.data.frame(fit$b)
  fit$gm <- as.data.frame(fit$gm)
  return(fit)
  
}


computeB <- function(wy=NULL, yy=NULL, b=NULL, WW=NULL, n=NULL,
                     vb=NULL, vg=NULL, ve=NULL, lambda=NULL, 
                     ssb_prior=NULL, sse_prior=NULL, 
                     nub=NULL, nue=NULL, 
                     h2=NULL, pi=NULL, 
                     updateB=NULL, updateE=NULL, updatePi=NULL,
                     nit=NULL, nburn=NULL, method=NULL) {
  
  m <- ncol(WW)
  
  if(is.null(pi)) pi <- 0.001
  if(is.null(h2)) h2 <- 0.5
  
  if(is.null(ve)) ve <- 1
  if(method<4 && is.null(vb)) vb <- (ve*h2)/m
  if(method>=4 && is.null(vb)) vb <- (ve*h2)/(m*pi)
  if(is.null(lambda)) lambda <- rep(ve/vb,m)
  if(is.null(vg)) vg <- ve*h2
  if(method<4 && is.null(ssb_prior))  ssb_prior <-  ((nub-2.0)/nub)*(vg/m)
  if(method>=4 && is.null(ssb_prior))  ssb_prior <-  ((nub-2.0)/nub)*(vg/m*pi)
  if(is.null(sse_prior)) sse_prior <- nue*ve
  if(is.null(b)) b <- rep(0,m)
  
  fit <- .Call("_qgg_sbayes",
               wy=wy, 
               LD=split(WW, rep(1:ncol(WW), each = nrow(WW))), 
               b = b,
               lambda = lambda,
               yy = yy,
               pi = pi,
               vg = vg,
               vb = vb,
               ve = ve,
               ssb_prior=ssb_prior,
               sse_prior=sse_prior,
               nub=nub,
               nue=nue,
               updateB = updateB,
               updateE = updateE,
               updatePi = updatePi,
               n=n,
               nit=nit,
               method=as.integer(method))
  
  names(fit[[1]]) <- rownames(WW)
  stop("Check names of fit again")
  names(fit) <- c("bm","dm","mus","vbs","ves","pis","wy","r","param","b")
  
  return(fit)
  
}

computeWy <- function(y=NULL, Glist = NULL, chr = NULL, cls = NULL) {
  wy <- cvs(y=y,Glist=Glist,chr=chr,cls=cls)$wy
  return(wy)
}

computeWW <- function(Glist = NULL, chr = NULL, cls = NULL, rws=NULL, scale=TRUE) { 
  W <- getG(Glist=Glist, chr=chr, cls=cls, scale=scale)
  WW <- crossprod(W[rws,])
  return(WW)
}

computeGRS <- function(Glist = NULL, chr = NULL, cls = NULL, b=NULL, scale=TRUE) { 
  af <- Glist$af[[chr]][cls]  
  grs <- .Call("_qgg_mtgrsbed", Glist$bedfiles[chr], 
               Glist$n, cls=cls, af=af, scale=scale, Slist=list(b))
  return(as.matrix(as.data.frame(grs)))
}




#' Plot fit from gbayes
#'
#' @description
#' Summary plots from gbayes fit

#' @param fit object from gbayes
#' @param causal indices for "causal" markers 
#' @param chr chromosome to plot 
#' @param what character fro what to plot (e.g. "trace", "bpm", "ve", "vg", "vb") 
#' @keywords internal

#'
#' @export
#'

plotBayes <- function(fit=NULL, causal=NULL, what="bm", chr=1) {
  # if(!is.list(fit[[1]])) {
  #   layout(matrix(1:6,nrow=3,ncol=2))
  #   plot(fit[[1]],ylab="Posterior", xlab="Marker", main="Marker effect", frame.plot=FALSE)  
  #   if(!is.null(causal)) points(x=causal,y=rep(0,length(causal)),col="red", pch=4, cex=2, lwd=3 )
  #   #plot(fit[[2]],ylab="Posterior mean", xlab="Marker", pch="✈",,main="Marker indicator", frame.plot=FALSE)  
  #   plot(fit[[2]],ylab="Posterior mean", xlab="Marker", main="Marker indicator", ylim=c(0,1), frame.plot=FALSE)  
  #   if(!is.null(causal)) points(x=causal,y=rep(0,length(causal)),col="red", pch=4, cex=2, lwd=3 )
  #   #if(!is.null(causal)) points(x=causal,y=rep(0,length(causal)),col="red", pch="✈", cex=2, lwd=3 )
  #   hist(fit[[6]],xlab="Posterior", main="Pi")  
  #   hist(fit[[4]],xlab="Posterior", main="Marker variance")  
  #   hist(fit[[5]],xlab="Posterior", main="Residual variance")  
  #   plot(fit[[6]],xlab="Sample", ylab="Pi", frame.plot=FALSE)  
  # } 
  # if(is.list(fit[[1]])) {
  #   layout(matrix(1:4,2,2))
  #   matplot(as.data.frame(fit[[1]]),ylab="Marker effect", frame.plot=FALSE)  
  #   if(!is.null(causal)) points(x=causal,y=rep(0,length(causal)),col="green", pch=4, cex=2, lwd=3 )
  #   matplot(as.data.frame(fit[[2]]),ylab="Marker indicator", frame.plot=FALSE)  
  #   if(!is.null(causal)) points(x=causal,y=rep(0,length(causal)),col="green", pch=4, cex=2, lwd=3 )
  #   matplot(as.data.frame(fit[[4]]),ylab="Marker variance", frame.plot=FALSE)  
  #   matplot(as.data.frame(fit[[5]]),ylab="Residual variance", frame.plot=FALSE)  
  # } 
  if(length(chr)==1) {
    if(what=="bm") plot(fit[[chr]]$bm,ylab="Posterior Mean Effect", xlab="Marker", main="Adjusted Marker Effect", frame.plot=FALSE)  
    
    if(what=="dm") {
      if(fit$method=="bayesC") {
        plot(fit[[chr]]$dm, ylab="PIP", xlab="Marker", main="Posterior Inclusion Probability", ylim=c(0,1), frame.plot=FALSE)
      }
      if(fit$method=="bayesR") {
        plot(fit[[chr]]$dm, 
             xlab="Marker", 
             ylab="Marker Class",
             frame.plot=FALSE, ylim=c(0,3), main="Posterior Mean Marker Class")
        abline(h=1, lty=2, col=2, lwd=2)
        abline(h=2, lty=2, col=2, lwd=2)
        abline(h=3, lty=2, col=2, lwd=2)
      } 

    }
    if(what=="ve") hist(fit[[chr]]$ves,xlab="Posterior", main="Residual Variance")
    if(what=="vb") hist(fit[[chr]]$vbs,xlab="Posterior", main="Marker Variance")
    if(what=="vg") hist(fit[[chr]]$vgs,xlab="Posterior", main="Genetic Variance")
    fit[[chr]]$h2 <- fit[[chr]]$vgs/(fit[[chr]]$vgs+fit[[chr]]$ves)
    if(what=="h2") hist(fit[[chr]]$h2,xlab="Posterior", main="Heritability")  
  }
  if(length(chr)==1 & what=="trace") {
    layout(matrix(1:4,ncol=2))
    plot(fit[[chr]]$ves, ylab="Ve", xlab="Iteration", main="Residual Variance")
    plot(fit[[chr]]$vbs, ylab="Vb", xlab="Iteration", main="Marker Variance")
    plot(fit[[chr]]$vgs, ylab="Vg", xlab="Iteration", main="Genetic Variance")
    fit[[chr]]$h2 <- fit[[chr]]$vgs/(fit[[chr]]$vgs+fit[[chr]]$ves)
    plot(fit[[chr]]$h2, ylab="h2", xlab="Iteration", main="Heritability")  
  }
  
  if(length(chr)>1) {
    if(what=="Ve") {
      x <- sapply(fit[chr], function(x) {mean(x$ves)})
      sd <- sapply(fit[chr], function(x) {sd(x$ves)})
      main <- "Residual Variance"
    }
    if(what=="Vg") {
      x <- sapply(fit[chr], function(x) {mean(x$vgs)})
      sd <- sapply(fit[chr], function(x) {sd(x$vgs)})
      main <- "Genetic Variance"
    }
    if(what=="Vb") {
      x <- sapply(fit[chr], function(x) {mean(x$vbs)})
      sd <- sapply(fit[chr], function(x) {sd(x$vbs)})
      main <- "Marker Variance"
    }
    if(what=="h2") {
      x <- sapply(fit[chr], function(x) {mean(x$vgs/(x$vgs+x$ves))})
      sd <- sapply(fit[chr], function(x) {sd(x$vgs/(x$vgs+x$ves))})
      main <- "Heritability"
    }
    names(x) <- names(sd) <- paste("Chr",chr)
    plotForest(x=x,sd=sd, reorder=FALSE, xlab=what, main=main) 
  }
  
}

plotCvs <- function(fit=NULL, causal=NULL) {
  
  layout(matrix(1:length(fit$p),ncol=1))
  for (i in 1:length(fit$p)) {
    plot(-log10(fit$p[[i]]),ylab="mlogP", xlab="Marker", main="Marker effect", frame.plot=FALSE)  
    if(!is.null(causal)) points(x=causal,y=rep(0,length(causal)),col="red", pch=4, cex=2, lwd=3 )
  }
}


#' Split Vector with Overlapping Segments
#'
#' Splits a vector into segments of a specified length with a specified overlap.
#' The function returns a list where each element contains a segment of the vector.
#'
#' @param vec A numeric or character vector to be split.
#' @param seg.length An integer specifying the length of each segment.
#' @param overlap An integer specifying the number of overlapping elements between consecutive segments.
#'
#' @return A list where each element is a segment of the input vector. The segments can overlap based on the specified overlap.
#'
#' @keywords internal
#' @export
splitWithOverlap <- function(vec, seg.length, overlap) {
  starts = seq(1, length(vec), by=seg.length-overlap)
  ends   = starts + seg.length - 1
  ends[ends > length(vec)] = length(vec)
  lapply(1:length(starts), function(i) vec[starts[i]:ends[i]])
}

#' Adjust Linkage Disequilibrium (LD) Using Map Information
#'
#' Adjusts the linkage disequilibrium (LD) values based on map information, effective population size,
#' and sample size used for map construction.
#'
#' @param LD A matrix representing the linkage disequilibrium (LD) structure.
#' @param map A numeric vector containing the map information.
#' @param neff An integer representing the effective population size. Default is 11600.
#' @param nmap An integer representing the sample size used for map construction. Default is 186.
#' @param threshold A numeric value specifying the threshold for setting LD to zero. Currently unused in the function. Default is 0.001.
#'
#' @return A matrix where each value is the adjusted LD based on the input map, effective population size, and map construction sample size.
#'
#' @keywords internal
#' @export
adjustMapLD <- function(LD = NULL, map=NULL, neff=11600, nmap=186, threshold=0.001) {
  # neff: effective population size
  # nmap: sample size used for map contruction
  # threshold: used for setting LD to zero
  rho <- sapply(map,function(x){map-x})
  rho <- 4*neff*abs(rho)
  shrink <- exp(-rho/(2*nmap))
  return(LD*shrink)
}

neff <- function(seb=NULL,af=NULL,Vy=1) {
  seb2 <- seb**2
  vaf <- 2*af*(1-af)
  neff <- round(median(Vy/(vaf*seb2)))
  return(neff)
}

sortedSets <- function(o = NULL, msize = 500) {
  m <- length(o)
  sets <- vector(length=m,mode="list") 
  for (i in 1:m){
    sets[[i]] <- c((i-msize):(i-1),i:(i+msize))
    sets[[i]] <- sets[[i]][sets[[i]]>0]
    sets[[i]] <- sets[[i]][sets[[i]]<(m+1)]
  }
  
  indices <- 1:m
  keep <- rep(TRUE,m)
  osets <- NULL
  for (i in 1:m) {
    if(keep[o[i]]){
      keep[sets[[ o[i] ]]] <- FALSE
      osets <- c(osets,o[i])
    }
  }
  if(any(!indices%in%unique(unlist(sets[osets])))) stop("Some markers not in sets")
  return(sets[osets])
}



################################################################################
# Note on how to obtain a pre-specified prior mode
################################################################################
#
# Inverse Wishart
#
# mode = S/(df+nt+1)	(mode of prior variance estimates)
# => S = mode*(df+nt+1)	(prior S to given prior mode)
# nt = number of traits
# df = prior degrees of freedom
# S = prior scale parameter 
#
# Inverse Chi-square
#
# mode = (S/(df+2))/df		(mode of prior variance estimates)
# => S = (mode*(df+2))/df	(prior S to given prior mode)
# df = prior degrees of freedom
# S = prior scale parameter 
################################################################################

bmm <- function(y=NULL, X=NULL, W=NULL, GRMlist=NULL,
                vg=NULL, ve=NULL, nug=NULL, nue=NULL,
                vg_prior=NULL, ve_prior=NULL,
                updateG=TRUE, updateE=TRUE,
                nit=500, nburn=0, tol=0.001, verbose=FALSE) {
  if(is.vector(y)) y <- as.matrix(y)
  n <- nrow(y)                            # number of observation
  nt <- ncol(y)                           # number of traits
  tnames <- colnames(y)
  if(is.null(tnames)) tnames <- paste0("T",1:nt)
  e <- matrix(0,nrow=n,ncol=nt)
  mu <- matrix(0,nrow=n,ncol=nt)
  mus <- matrix(0,nrow=n,ncol=nt)
  psum <- matrix(0,nrow=n,ncol=nt)
  

  nset <- length(GRMlist)                   # number of sets
  setnames <- names(GRMlist)
  if(is.null(setnames)) setnames <- paste0("Set",1:nset) 
  g <- gm <- NULL
  for ( i in 1:nt) {                        # genetic values (n*nset)
    g[[i]] <- matrix(0,nrow=n,ncol=nset)         
    gm[[i]] <- matrix(0,nrow=n,ncol=nset)
    rownames(g[[i]]) <- rownames(y) 
    colnames(g[[i]]) <- setnames
    rownames(gm[[i]]) <- rownames(y) 
    colnames(gm[[i]]) <- setnames
  }
  names(gm) <- names(g) <- tnames
  
  vgm <- lapply(1:nset,function(x){matrix(0,nt,nt)})
  names(vgm) <- setnames
  
  vem <- matrix(0,nt,nt)
  
  if(is.null(vg_prior)) vg_prior <- lapply(1:nset,function(x){diag(0.01,nt)})
  if(is.null(ve_prior)) ve_prior <- diag(0.01,nt)
  if(is.null(nug)) nug <- rep(4,nset)
  if(is.null(nue)) nue <- 4
  
  if(!is.null(ve)) ve <- as.matrix(ve)
  if(is.null(ve)) ve <- ve_prior
  if(is.null(vg)) vg <- vg_prior
  
  ves <- matrix(0,nrow=nit,ncol=(nt*(nt+1))/2)
  
  idsG <- rownames(GRMlist[[1]])
  idsT <- rownames(y)
  idsV <- idsG[!idsG%in%idsT]
  train <- match(idsT,idsG)

  if(any(!idsT%in%idsG)) stop("Some ids in y not in GRM")
  
  # eigen value decomposition of the GRM5 matrices
  U <- D <- vector(length=nset,mode="list")
  vgs <- vector(length=nset,mode="list")
  for ( i in 1:nset ){
    eg <- eigen(GRMlist[[i]][train,train])                    
    ev <- eg$values
    U[[i]] <- eg$vectors[,ev>tol]            # keep eigen vector if ev>tol
    D[[i]] <- eg$values[ev>tol]              # keep eigen value if ev>tol
    vgs[[i]] <- matrix(NA,nrow=nit,ncol=(nt*(nt+1))/2)
  }
  
  for ( i in 1:nit) {                     
    for ( j in 1:nset ) {                   
      rhs <- NULL
      for (t in 1:nt) {
        yadj <- y[,t]-mu[,t]-rowSums(as.matrix(g[[t]][,-j]))
        rhst <- crossprod( U[[j]],yadj ) 
        rhs <- cbind(rhs,rhst)
      }
      Vi <- solve(vg[[j]])
      a <- matrix(0,nrow=nrow(rhs),ncol=nt)
      for (k in 1:nrow(rhs)) {
        iC <- solve( diag(1,nt) + (ve%*%Vi)/D[[j]][k] )      
        ahat <- iC%*%rhs[k,]
        a[k,] <- MASS::mvrnorm(n=1,mu=ahat,Sigma=iC%*%ve) 
      }
      for (t in 1:nt) { 
        g[[t]][,j] <- U[[j]]%*%a[,t] 
      }
      if(i>nburn){
        for (t in 1:nt) { 
          gm[[t]][,j] <- gm[[t]][,j] + g[[t]][,j]  
        } 
      }
      
      # Sample variance components
      df <- nrow(a) + nug[j]
      
      # inverse chisquare
      if (nt==1) {
        scg <- sum((1/D[[j]])*a**2) + (vg_prior[[j]]*(nug[j]+2))/nug[j]	# => S = (mode*(df+2))/df         
        #scg <- sum((1/D[[j]])*a**2) + (vg[j]*(nug[j]+2))/nug[j]	# => S = (mode*(df+2))/df         
        vg[j] <- scg/rchisq(n=1, df=df, ncp=0)    
        vgs[[j]][i,] <- vg[[j]]
      }
      
      # inverse wishart
      if (nt>1) {
        S <- t(a*(1/D[[j]]))%*%a + vg_prior[[j]]*(nug[j]+nt+1)		# => S = mode*(df+nt+1)
        #S <- t(a*(1/D[[j]]))%*%a + vg[[j]]*(nug[j]+nt+1)		# => S = mode*(df+nt+1)
        vg[[j]] <- MCMCpack::riwish(df,S)
        vgs[[j]][i,] <- vg[[j]][as.vector(lower.tri(vg[[j]], diag=TRUE))]
      }
    }
    
    # Sample mu
    if(is.null(X)) {
      for (t in 1:nt) {
        yadj <- y[,t]-rowSums(as.matrix(g[[t]])) 
        rhs <- sum(yadj)
        lhs <- (n+ve[t,t]/100000)
        mu[,t] <- rnorm(1,mean=rhs/lhs,sd=1/sqrt(lhs))
        if(nit>nburn) mus[,t] <- mus[,t] + mu[,t]
      }  
    }
    if(!is.null(X)) {
      for (t in 1:nt) {
        yadj <- y[,t]-rowSums(as.matrix(g[[t]])) 
        mu[,t] <- X%*%solve(t(X)%*%X)%*%t(X)%*%yadj
        if(nit>nburn) mus[,t] <- mus[,t] + mu[,t]
      }  
    }
    
    for (t in 1:nt) {
      e[,t] <- y[,t]-mu[,t]-rowSums(g[[t]])
      p <-  (1/sqrt(2*pi))*exp(-0.5*(e[,t]**2)) 
      if(i>nburn) psum[,t] <- psum[,t] + 1/p
    }
    Se <- t(e)%*%e + ve_prior*(mean(nue)+nt+1)
    dfe <- nrow(y) + mean(nue)+nt+1
    ve <- MCMCpack::riwish(dfe,Se)
    ves[i,] <- ve[as.vector(lower.tri(ve, diag=TRUE))]
    
    if(nit>nburn) {
      for ( j in 1:nset ) {
        vgm[[j]] <- vgm[[j]] + vg[[j]]  
      }                   
      vem <- vem + ve  
    }
    if(verbose) message(paste("Iteration:",i))
    
  }
  
  for ( j in 1:nset ) {                   
    vgm[[j]] <- vgm[[j]]/(nit-nburn)
    colnames(vgm[[j]]) <- rownames(vgm[[j]]) <- tnames
    for (t in 1:nt) { 
      if(nit>nburn) gm[[t]][,j] <- gm[[t]][,j]/(nit-nburn) 
    } 
  }
  mus <- mus/(nit-nburn)
  vem <- vem/(nit-nburn)  
  colnames(vem) <- rownames(vem) <- tnames
  
  # summary of model fit
  logCPO <- NULL
  for (t in 1:nt) {
    logCPO[t] <- sum(log((nit-nburn)*(1/psum[,t])))
  }
  fit <- list(vgs=vgs,ves=ves,vgm=vgm,vem=vem,logCPO=logCPO, gm=gm, g=g,e=e, nit=nit, nburn=nburn)
  vgm <- Reduce(`+`, fit$vgm)
  vem <- fit$vem
  vpm <- vgm + vem
  fit$vpm <- vpm
  fit$h2 <- diag(vgm/vpm)
  fit$h2set <- lapply(fit$vgm,function(x) {diag(x/vpm)})
  fit$rp <- cov2cor(vpm)
  fit$re <- cov2cor(vem)
  fit$rg <- cov2cor(vgm)
  fit$rgset <- lapply(fit$vgm,function(x) {cov2cor(x)})
  
  gset <- NULL
  for ( i in 1:nset) {                        
    gset[[i]] <- matrix(0,nrow=length(idsG),ncol=nt)         
    rownames(gset[[i]]) <- idsG 
    colnames(gset[[i]]) <- tnames
  }
  names(gset) <- setnames
  
  for ( set in 1:nset ){
    for (t1 in 1:nt) {
      isDups <- duplicated(idsT)
      ghat <- gm[[t1]][!isDups,set]
      names(ghat) <- idsT[!isDups]
      gset[[set]][names(ghat),t1] <- ghat
      #gset[[set]][idsT,t1] <- gm[[t1]][,set]
      if(length(idsV)>0) {
        for (t2 in 1:nt) {
          gset[[set]][idsV,t1] <- gset[[set]][idsV,t1] + (GRMlist[[set]][idsV,idsT]*vgm[t1,t2])%*%gm[[t2]][,set]
        }
      }
    }
  }
  fit$gset <- gset
  fit$g <- Reduce(`+`, fit$gset)
  fit$mu <- colMeans(mus) 
  return(fit) # return posterior samples of sigma 
}

Try the qgg package in your browser

Any scripts or data that you put into this service are public.

qgg documentation built on Sept. 8, 2023, 5:52 p.m.