R/HDGENE.pairDetection.R

`HDGENE.pairDetection` <- function(CV = NULL,GD = NULL,GDD = NULL,GM = NULL,Y = NULL,p.threshold = NULL,seqQTN = NULL,position.pruning=NULL,LD = 0.7,model = NULL){
  # Objects: doing G by G using GLM, full screen
  # inputs:
  #		CV: n by p , p fixed effects
  #		GD: m by n , big matrix, genotype matrix 1
  #   GDD: m by n, big matrix, genotype matrix 2
  #		Y: n by 2 matrix
  #   seqQTN: significant QTNs from Additive model
  #   GM: m by 3 matrix
  #   p.threshold: threshold for calculating interaction
  # Outputs:
  #		GWAS: 5 columns, (SNP1, SNP2, Pvalue_1, Pvalue_2, Pvalue_1*2)
  # Author: Yao Zhou
  # Last Update: July 19, 2017

  ## prepare input
    index.y = which(!is.na(Y[,2]))
    NeedDo = FALSE
    myGWAS = NULL
    GDA = NULL
    CV.BIC = CV
    CV.BIC = CV.BIC[index.y,]
    if(!is.null(position.pruning)){
      GD1 = deepcopy(GD, rows = position.pruning)
      GDD1 = deepcopy(GDD,rows = position.pruning)
      GD = GD1
      GDD = GDD1
      GM = GM[position.pruning,]
    }

    GD1 = deepcopy(GD,cols = index.y)
    GDD1 = deepcopy(GDD,cols = index.y)
    n = length(index.y)
    m = nrow(GM)

    if(is.null(p.threshold)) p.threshold = 0.01/nrow(GM)
    if(is.null(CV.BIC)){
      k = 3
    }else{
      if(is.null(ncol(CV.BIC))|length(seqQTN)==1){
        k = 4
      }else {
        k = ncol(CV.BIC) + 3 ## degree of freedom
      }
    }
    Eps = FALSE
    index.AA = NULL
    indicator.s = 0
    iter = 0
    CV1 = CV
    t1 = proc.time()
    print("Testing interaction....")
    if(m > 10e4){
      aa_cor = HDGENE.cor2.new(Y = Y[index.y,2], GD = GD1, w = CV.BIC, ms = 100000,model = model)
      aa_order = order(aa_cor,decreasing = T,na.last = T)
      aa_length = sum(!is.na(aa_cor))
      aa_log = n/log(n)
      aa_lim = min(aa_log,aa_length)
      index.aa = aa_order[1:aa_lim]
    }else{
      aa_cor = HDGENE.cor2(Y = Y[index.y,2], GD = GD1, w = CV.BIC, ms = 100000,model = model)
      p = Blink.rtop(r=aa_cor,df = n - k)
      index.aa = which(p < p.threshold)
    }
    t2 = proc.time()
    t3 = t2 - t1
    cat("Interaction testing finished in",round(as.numeric(t3)[3],2), "....","\n")

    ## transform correlation to pvalue
    cat(length(index.aa),"interactions passed the p value threshold...","\n")
    if(length(index.aa)>0){
      cor_order = index.aa[order(aa_cor[index.aa],decreasing = T,na.last = T)]
      SNP2 = cor_order%%m
      SNP2[SNP2==0]=m
      SNP1 = trunc(cor_order/m)
      SNP1[SNP2!=m] = SNP1[SNP2!=m] +1 ## max correlation is at aa_cor[SNP2[1],SNP1[1]]
      index.snp1 = SNP1
      index.snp2 = SNP2
      GD2 = GDD[index.snp2,]
      if(length(index.snp2)==1){
        GD2 = as.matrix(GD2)
        GD1 = as.matrix(GD[index.snp1,])
      }else{
        GD2 = t(as.matrix(GD2))
        GD1 = t(as.matrix(GD[index.snp1,]))
      }
      GDI = GD1 * GD2
      cat(ncol(GDI),"QTNs for LDRemove","\n")
      if(ncol(GDI)>1){
        seqQTN.index = Blink.LDRemove(GDneo=t(GDI),Porder = seq(1:ncol(GDI)),LD = LD,orientation = "row",LD.num = 100)
        cat(length(seqQTN.index),"QTNs for BIC selection","\n")
        GDI = GDI[,seqQTN.index]
        SNP1 = SNP1[seqQTN.index]
        SNP2 = SNP2[seqQTN.index]
        if(length(seqQTN.index)>1){
          myBIC = Blink.BICselection(CV = CV.BIC, Psort=seq(1:length(seqQTN.index)),GD=GDI[index.y,],Y=Y[index.y,],orientation="col",BIC.method="naive")
          seqQTN.index = myBIC$seqQTN
          cat(length(seqQTN.index),"QTNs detected with interactions!","\n")
          GDI = GDI[,seqQTN.index]
          SNP1 = SNP1[seqQTN.index]
          SNP2 = SNP2[seqQTN.index]
        }
      }
      GDAI = cbind(GDA,GDI)
      GMAI = matrix(NA,ncol(GDAI),3)
      GMAI[,1]=paste("rs",seq(1:ncol(GDAI)),sep="")
      print(paste(ncol(GDAI),"markers are fitted in the EM-BLASSO model ..."))
      if(ncol(GDAI)>1){
        myEM = EM_LASSO(CV = CV1,GD = GDAI[index.y,],y = Y[index.y,2],GM = GMAI)
        GWAS = myEM$GWAS
        index.s = match(myEM$GWAS[,1],GMAI[,1])
      }else{
        index.s = c(1)
        GWAS = data.frame(rs=c("rs1"),chr=c(1),pos=c(1),u=c(1),LOD=c(4),Variance=c(10))
      }

      SNPa = index.s[which(index.s<=length(seqQTN))]
      SNPaa = setdiff(index.s,SNPa)
      if(is.null(ncol(GDA))){
        NGDA = 0
      }else{
        NGDA = ncol(GDA)
      }
      if(length(SNPaa)>0){
        SNPaa1 = SNP1[SNPaa-NGDA]
        SNPaa2 = SNP2[SNPaa-NGDA]
      }
      myGWAS = matrix(NA,length(index.s),5)
      myGWAS = as.data.frame(myGWAS)
      if(length(SNPa)>0){
        SNPa.c = seqQTN[SNPa]
        myGWAS[1:length(SNPa),1] = as.character(GM[SNPa.c,1])
      }
      if(length(SNPaa)>0){
        myGWAS[(length(SNPa)+1):nrow(myGWAS),1] = as.character(GM[SNPaa1,1])
        myGWAS[(length(SNPa)+1):nrow(myGWAS),2] = as.character(GM[SNPaa2,1])
      }

      myGWAS[,3] = as.numeric(GWAS[,4])
      myGWAS[,4] = as.numeric(GWAS[,5])
      myGWAS[,5] = as.numeric(GWAS[,6])
      colnames(myGWAS) = c("SNP1","SNP2","u","LOD","Rsquare%")
    }else{
      print("No epistatic effects detected!")
      myGWAS = NULL
    }

  ##
  return(GWAS = myGWAS)

}
YaoZhou89/HDGENE documentation built on May 14, 2019, 7:42 a.m.