application/meta_pd_1.R

shotgun.abfModel<-function(betas,ses,prior.sigma=0.3,prior.cor="indep",prior.rho=NA,cryptic.cor=NA,log=FALSE,log10=FALSE,na.rm=FALSE,tolerance=1e-1000,n.iter=50,B=5){
  #If betas and ses are data frames, this checks if they can be turned into numeric vectors. Stops the calculation if this is not the case.

  library(MASS)
  library(dplyr)

  if(!class(betas) %in% c("data.frame","numeric")){
    stop("betas should be a numeric vector.")
  }
  if(!class(ses) %in% c("data.frame","numeric")){
    stop("ses should be a numeric vector.")
  }

  if(class(betas)=="data.frame"){
    if(dim(betas)[1]>1){
      stop("betas should be a vector, not multiple rows from a matrix or a data frame.")
    }
    if(all(apply(betas,2,class)=="numeric")){
      betas<-as.numeric(betas)
    } else {
      stop("betas is not numeric.")
    }
  }
  if(class(ses)=="data.frame"){
    if(dim(ses)[1]>1){
      stop("ses should be a vector, not multiple rows from a matrix or a data frame.")
    }
    if(all(apply(ses,2,class)=="numeric")){
      ses<-as.numeric(ses)
    } else {
      stop("ses is not numeric.")
    }
  }
  if(class(prior.sigma)=="data.frame"){
    if(dim(prior.sigma)[1]>1){
      stop("prior.sigma should be a vector or a single value, not multiple rows from a matrix or a data frame.")
    }
    if(all(apply(prior.sigma,2,class)=="numeric")){
      prior.sigma<-as.numeric(prior.sigma)
    } else {
      stop("prior.sigma is not numeric.")
    }
  }
  if(log && log10){
    stop("can give the approximate Bayes factor in log space or log_10 space, but not both at the same time.")
  }
  if(tolerance>sqrt(.Machine$double.eps)){
    warning(paste0("Your tolerance might be too high. The standard value for the internal functions is ",sqrt(.Machine$double.eps),"."))
  }

  if(length(betas)!=length(ses)){
    stop("betas and ses should be the same length.")
  }
  nstudies<-length(betas)


  ##Check prior.sigma isn't erroneous.
  if(!length(prior.sigma) %in% c(1,nstudies)){
    stop("prior.sigma should either be a single value or a vector whose length is equal to the number of studies.")
  }
  if(any(prior.sigma<=0)){
    stop("all values of prior.sigma must be > 0.")
  }
  if(length(prior.sigma)==1){
    prior.sigma<-rep(prior.sigma,nstudies)
  }

  ##Get the prior correlation matrix.
  if(class(prior.cor)=="character"){
    if(prior.cor=="correlated"){
      if(is.na(prior.rho)){
        stop("prior.rho must be set or prior.cor must be changed to something other than \"correlated.\"")
      }
      if(!is.numeric(prior.rho)){
        stop("prior.rho must be numeric.")
      }
      if(!length(prior.rho) %in% c(1,choose(nstudies,2))){
        stop("prior.rho should be either a flat value or the full upper triangle of the desired correlation matrix.")
      }
      if(length(prior.cor)==1){
        prior.cor.mat<-matrix(prior.rho,nrow=nstudies,ncol=nstudies)
        diag(prior.cor.mat)<-1
      } else {
        prior.cor.mat<-diag(nstudies)
        prior.cor.mat[upper.tri(prior.cor.mat,diag=FALSE)]<-prior.cor
        prior.cor.mat[lower.tri(prior.cor.mat)]<-t(prior.cor.mat)[lower.tri(prior.cor.mat)]
      }
    } else if(prior.cor=="indep"){
      prior.cor.mat<-diag(nstudies)
    } else if(prior.cor=="fixed"){
      prior.cor.mat<-matrix(1,nrow=nstudies,ncol=nstudies)
    }
  } else if(class(prior.cor)!="matrix"){
    stop("prior.cor should be a matrix, or one of the following values: \"indep\", \"fixed\", or \"correlated\"")
  } else {
    if(dim(prior.cor)[1]!=dim(prior.cor)[2]){
      stop("prior.cor is not a square matrix.")
    }
    if(dim(prior.cor)[1]!=nstudies){
      stop("the dimensions of prior.cor do not match the number of studies in the meta-analysis.")
    }
    if(!isSymmetric.matrix(prior.cor)){
      stop("prior.cor is not a symmetric matrix.")
    }
    if(any(svd(prior.cor)$d<0)){
      warning("prior.cor is not positive semidefinite. Errors may arise due to this.")
    }
    if(!all(diag(prior.cor) %in% c(0,1))){
      stop("the diagonal of prior.cor should be 1 for all studies with true effects and 0 elsewhere.")
    }
    if(any(diag(prior.cor)==0)){
      if(all(prior.cor[which(diag(prior.cor==0)),]!=0) || all(prior.cor[,which(diag(prior.cor==0))]!=0)){
        if(length(which(diag(prior.cor==0)))==1){
          stop(paste0("Row ",which(diag(prior.cor==0))," and column ",which(diag(prior.cor==0))," of prior.cor should all be 0, or else prior.cor[",which(diag(prior.cor==0)),",",which(diag(prior.cor==0)),"] should be 1."))
        } else {
          stop(paste0("Rows ",paste(which(diag(prior.cor==0)),collapse=",")," and columns ",paste(which(diag(prior.cor==0)),collapse=",")," of prior.cor should all be 0, or else prior.cor[c(",paste(which(diag(prior.cor==0)),collapse=","),"), c(",paste(which(diag(prior.cor==0)),collapse=","),")] should all be 1."))
        }
      }
    }
    prior.cor.mat<-prior.cor
  }

  ##Get the cryptic correlation matrix
  if(all(is.na(cryptic.cor)) && length(cryptic.cor)==1){
    cryptic.cor.mat<-diag(nstudies)
  } else if(class(cryptic.cor)!="matrix"){
    stop("cryptic.cor should be a square matrix.")
  } else if(dim(cryptic.cor)[1]!=dim(cryptic.cor)[2]){
    stop("cryptic.cor should be a square matrix.")
  } else if(dim(cryptic.cor)[1]!=nstudies){
    stop("the dimensions of cryptic.cor do not match the number of studies in the meta-analysis.")
  } else if(!isSymmetric.matrix(cryptic.cor)){
    stop("cryptic.cor is not a symmetric matrix.")
  } else if(any(eigen(cryptic.cor)$values<0)){
    stop("cryptic.cor is not positive definite.")
  } else if(any(diag(cryptic.cor)!=1)){
    stop("the diagonal of cryptic.cor should be 1 uniformly.")
  } else if(length(cryptic.cor)>1 && any(is.na(cryptic.cor))){
    stop("If cryptic.cor is defined, then no NAs are permitted.")
  } else {
    cryptic.cor.mat<-cryptic.cor
  }

  #Calculate V and the null once.
  ind<-intersect(which(!is.na(betas)),which(!is.na(ses)))
  nind<-union(which(is.na(betas)),which(is.na(ses)))
  n<-length(ind)
  if(n<=1){
    return(NA)
  }

  b<-betas[ind]
  se<-ses[ind]
  ccm<-cryptic.cor.mat[ind,ind]
  prior.V.gen<-prior.cor.mat[ind,ind]*matrix(prior.sigma[ind],nrow=n,ncol=n)*t(matrix(prior.sigma[ind],nrow=n,ncol=n))

  V<-diag(se^2)
  for(i in 1:(nrow(V)-1)){
    for(j in (i+1):ncol(V)){
      V[i,j]<-sqrt(V[i,i])*sqrt(V[j,j])*ccm[i,j]
      V[j,i]<-V[i,j]
    }
  }

  null.calc<-as.numeric(determinant(V,logarithm=TRUE)$modulus)

  getModel<-function(ind){
    x <- rep(0,n)
    x[ind] <- 1
    return(x)
  }

  findNeighborPlus<-function(Curr,Non){
    NeighborP <- matrix(nrow=length(Non),ncol=n)
    for(i in 1:length(Non)){
      plu <- Non[i]
      indP <- c(Curr,plu)
      NeighborP[i,] <- getModel(indP)
    }
    return(NeighborP)
  }

  findNeighborMinus<-function(Curr){
    NeighborM <- matrix(nrow=length(Curr),ncol=n)
    for(i in 1:length(Curr)){
      min <- Curr[i]
      indP <- setdiff(Curr,min)
      NeighborM[i,] <- getModel(indP)
    }
    return(NeighborM)
  }

  findNeighborO<-function(Curr,Non){
    NeighborO <- matrix(nrow=length(Curr)*length(Non),ncol=n)
    for(i in 1:length(Curr)){
      used <- Curr[i]
      indTep <- setdiff(Curr,used)
      for(j in 1:length(Non)){
        new <- Non[j]
        indO <- c(indTep,new)
        NeighborO[(i-1)*length(Non)+j,] <- getModel(indO)
      }
    }
    return(NeighborO)
  }

  findNeighbor<-function(x){
    Curr <- which(x==1)
    Non <- setdiff(seq(1,n),Curr)
    if(length(Curr)==0){
      neighbors <- list(vector(),findNeighborPlus(Curr,Non),vector())
    }else if(length(Non)==0){
      neighbors <- list(vector(),vector(),findNeighborMinus(Curr))
    }else{
      neighbors <- list(findNeighborO(Curr,Non),findNeighborPlus(Curr,Non),findNeighborMinus(Curr))
    }
    return(neighbors)
  }

  getABF<-function(x){
    prior.V<-matrix(x,nrow=n,ncol=n)*t(matrix(x,nrow=n,ncol=n))*prior.V.gen
    A<-prior.V+V
    invA<-ginv(A,tol=tolerance)
    quad.form<-t(b) %*% (ginv(V,tol=tolerance) - invA) %*% b
    lbf<-(-0.5*(as.numeric(determinant(A,logarithm=TRUE)$modulus)-null.calc))
    ABF<-(lbf + 0.5 * quad.form)
    if(ABF<=0){
      ABF<-10^(-20)
    }
    return(ABF)
  }

  modelToABF<-function(models){
    abf<-apply(models,1,getABF)
    return(abf)
  }

  modelToString<-function(models){
    return(apply(models,1,paste,sep="",collapse=""))
  }

  k <- sample(1:n,1)
  xind <- sample(1:n,k)
  x <- getModel(xind)
  df1 <- data.frame(model=paste(x,sep="",collapse=""),abf=getABF(x))
  df1$model <- as.character(df1$model)
  dflist <- list(df1)

  for(i in 1:n.iter){
    neighbors <- findNeighbor(x)
    neighborO <- neighbors[[1]]
    if(length(neighborO)==0){
      dfO <- data.frame()
      IO <- NA
    }else if(nrow(neighborO)==1){
      dfO <- data.frame(model=modelToString(neighborO),abf=modelToABF(neighborO))
      dfO$model <- as.character(dfO$model)
      IO <- 1
    }else{
      dfO <- data.frame(model=modelToString(neighborO),abf=modelToABF(neighborO))
      dfO$model <- as.character(dfO$model)
      IO <- sample(nrow(dfO),1,prob=dfO$abf/sum(dfO$abf),replace=TRUE)
    }

    neighborPlus <- neighbors[[2]]
    if(length(neighborPlus)==0){
      dfPlus <- data.frame()
      IPlus <- NA
    }else if(nrow(neighborPlus)==1){
      dfPlus <- data.frame(model=modelToString(neighborPlus),abf=modelToABF(neighborPlus))
      dfPlus$model <- as.character(dfPlus$model)
      IPlus <- 1
    }else{
      dfPlus <- data.frame(model=modelToString(neighborPlus),abf=modelToABF(neighborPlus))
      dfPlus$model <- as.character(dfPlus$model)
      IPlus <- sample(nrow(dfPlus),1,prob=dfPlus$abf/sum(dfPlus$abf),replace=TRUE)
    }

    neighborMinus <- neighbors[[3]]
    if(length(neighborMinus)==0){
      dfMinus <- data.frame()
      IMinus <- NA
    }else if(nrow(neighborMinus)==1){
      dfMinus <- data.frame(model=modelToString(neighborMinus),abf=modelToABF(neighborMinus))
      dfMinus$model <- as.character(dfMinus$model)
      IMinus <- 1
    }else{
      dfMinus <- data.frame(model=modelToString(neighborMinus),abf=modelToABF(neighborMinus))
      dfMinus$model <- as.character(dfMinus$model)
      IMinus <- sample(nrow(dfMinus),1,prob=dfMinus$abf/sum(dfMinus$abf),replace=TRUE)
    }

    df1 <- rbind(df1,dfO,dfMinus,dfPlus)
    df1$model <- as.character(df1$model)
    df1 <- df1[!duplicated(df1),]
    df1 <- arrange(df1,abf)
    if(nrow(df1)>B){
      df1<-df1[-c(1:(nrow(df1)-B)),]
    }
    dflist <- c(dflist,list(df1))

    mod3 <- c(dfO[IO,1],dfPlus[IPlus,1],dfMinus[IMinus,1])
    mod3 <- na.omit(mod3)
    abf3 <- c(dfO[IO,2],dfPlus[IPlus,2],dfMinus[IMinus,2])
    abf3 <- na.omit(abf3)
    if(length(mod3)==0){
      break
    }else if(length(mod3)==1){
      xstr <- mod3[1]
      x <- as.numeric(strsplit(xstr,split="")[[1]])
    }else{
      k <- sample(length(mod3),1,prob=abf3/sum(abf3),replace=TRUE)
      xstr <- mod3[k]
      x <- as.numeric(strsplit(xstr,split="")[[1]])
    }
  }

  modelFinall <- df1[nrow(df1),1]
  ABFfinall<-df1[nrow(df1),2]

  if(log10){
    ABFfinall<-ABFfinall/log(10)
  }
  if(!log && !log10){
    ABFfinall<-exp(ABFfinall)
  }

  return(list(ABF=ABFfinall,model=modelFinall))
}

dfuse <- read.csv("dfuse_pd_1.csv",stringsAsFactors=FALSE)

nstud <- 29
vbetas <- paste("beta",seq(1,nstud),sep="")
vses <- paste("se",seq(1,nstud),sep="")

abf <- function(i){
  SNP <- dfuse[i,"SNP"]
  nstudies <- dfuse[i,"counts"]
  betas <- dfuse[i,vbetas]
  studiesUsed <- paste(1-as.numeric(is.na(betas)),collapse="")
  ses <- dfuse[i,vses]
  abfL <- shotgun.abfModel(betas,ses,prior.sigma=0.3,prior.cor="correlated",prior.rho=0.3)
  abf <- abfL$ABF
  sModel <- abfL$model
  print(i)
  return(c(SNP,abf,sModel,nstudies,studiesUsed))
}

timestart <- Sys.time()
re <- sapply(seq(1,nrow(dfuse)),abf)
dfre <- data.frame(SNP=re[1,],ABF=re[2,],model=re[3,],n_studies=re[4,],studies_involved=re[5,],stringsAsFactors=FALSE)
dfre$ABF <- as.character(dfre$ABF)
dfre$ABF <- as.numeric(dfre$ABF)
dfre$n_studies <- as.character(dfre$n_studies)
dfre$n_studies <- as.numeric(dfre$n_studies)
dfre <- arrange(dfre,desc(ABF),desc(n_studies))
timeend <- Sys.time()
runningtime<-timeend-timestart
print(runningtime)
head(dfre)
nrow(dfre)
write.csv(dfre,"dfre_pd_1.csv")
sjl-sjtu/GWAS_meta documentation built on May 7, 2024, 5:04 p.m.