R/GAPIT.Main.R

Defines functions `GAPIT.Main`

#'
#' GAPIT.Main
#' 
#' @description 
#' GAPIT.Main
#'
#' @param Y data.frame of phenotype data, samples in rows, traits in columns
#' @param G data.frame of genotypic data, HAPMAP format
#' @param GD data.frame of genotypic data
#' @param GM genetic map for GD
#' @param KI param
#' @param Z param
#' @param CV param
#' @param CV.Extragenetic param
#' @param SNP.P3D param
#' @param GP param
#' @param GK param
#' @param group.from param
#' @param group.to param
#' @param group.by param
#' @param kinship.cluster param
#' @param kinship.group param
#'
#' @param kinship.algorithm param
#' @param DPP param
#' @param ngrid param
#' @param llin param
#' @param ulim param
#' @param esp param
#' @param GAPIT3.output param
#' @param file.path param
#' @param file.from param
#' @param file.to param
#' @param file.total param
#' @param file.fragment param
#' @param file.G param
#' @param file.Ext.G param
#' @param file.GD param
#' @param file.GM param
#' @param file.Ext.GD param
#' @param file.Ext.GM param
#' @param SNP.MAF param
#' @param FDR.Rate param
#' @param SNP.FDR param
#' @param SNP.effect param
#' @param SNP.impute param
#' @param PCA.total param
#' @param GAPIT.Version param
#' @param name.of.trait param
#' @param GT param
#' @param SNP.fraction param
#' @param seed param
#' @param BINS param
#' @param SNP.test param
#' @param SNP.robust param
#' @param LD.chromosome param
#' @param LD.location param
#' @param LD.range param
#' @param model param
#' @param bin.from param
#' @param bin.to param
#' @param bin.by param
#' @param inclosure.from param
#' @param inclosure.to param
#' @param inclosure.by param
#' @param SNP.permutation param
#' @param SNP.CV param
#' @param NJtree.group param
#' @param NJtree.type param
#' @param plot.bin param
#' @param genoFormat param
#' @param hasGenotype param
#' @param byFile param
#' @param fullGD param
#' @param PC param
#' @param GI param
#' @param Timmer param
#' @param Memory param
#' @param sangwich.top param
#' @param sangwich.bottom param
#' @param QC param
#' @param GTindex param
#' @param LD param
#' @param file.output param
#' @param cutOff param
#' @param Model.selection param
#' @param Create.indicator param
#' @param Major.allele.zero param
#' @param QTN.position param
#' @param SUPER_GD param
#' @param SUPER_GS param
#' @param plot.style param
#' @param CG param
#' @param chor_taxa param
#'
#' @return 
#' A list
#'
#' @author Zhiwu Zhang and Jiabo Wang
#'
#' @export
`GAPIT.Main` <-
function(Y,
         G=NULL,
         GD=NULL,
         allGD=NULL,
         allCV=NULL,
         GM=NULL,
         KI=NULL,
         Z=NULL,
         CV=NULL,
         CV.Extragenetic=NULL,
         SNP.P3D=TRUE,
         GP=NULL,
         GK=NULL,
         group.from=1000000,
         group.to=1,
         group.by=10,
         kinship.cluster="average",
         kinship.group='Mean',
         kinship.algorithm=NULL,
         DPP=50000,
         ngrid = 100, 
         llin = -10, 
         ulim = 10, 
         esp = 1e-10,
         GAPIT3.output=TRUE,
         file.path=NULL,
         file.from=NULL, 
         file.to=NULL, 
         file.total=NULL, 
         file.fragment = 512, 
         file.G=NULL, 
         file.Ext.G=NULL,
         file.GD=NULL, 
         file.GM=NULL, 
         file.Ext.GD=NULL,
         file.Ext.GM=NULL,
         SNP.MAF=0,
         FDR.Rate=1,
         SNP.FDR=1,
         SNP.effect="Add",
         SNP.impute="Middle",
         PCA.total=0,  
         GAPIT.Version=GAPIT.Version,
         name.of.trait, 
         GT = NULL, 
         SNP.fraction = 1, 
         seed = 123, 
         BINS = 20,
         SNP.test=TRUE,
         SNP.robust="FaST",
         LD.chromosome=NULL,
         LD.location=NULL,
         LD.range=NULL,
         model=model,
         bin.from=10000,
         bin.to=5000000,
         bin.by=1000,
         inclosure.from=10,
         inclosure.to=1000,
         inclosure.by=10,
         SNP.permutation=FALSE,
         SNP.CV=NULL,
         NJtree.group=NJtree.group,
         NJtree.type=NJtree.type,
         plot.bin=plot.bin,
         genoFormat=NULL,
         hasGenotype=NULL,
         byFile=NULL,
         fullGD=NULL,
         PC=NULL,
         GI=NULL, 
         Timmer = NULL, 
         Memory = NULL,
         sangwich.top=NULL,
         sangwich.bottom=NULL,
         QC=TRUE,
         QTN.gs=NULL,
         GTindex=NULL,
         LD=0.05,
         file.output=TRUE,
         cutOff=0.05, 
         Model.selection = FALSE, 
         Create.indicator = FALSE,
				 # QTN=NULL, 
				 # QTN.round=1,
				 # QTN.limit=0, 
				 # QTN.update=TRUE, 
				 # QTN.method="Penalty", 
				 Major.allele.zero = FALSE,
         QTN.position=NULL,
				 SUPER_GD=NULL,
				 SUPER_GS=SUPER_GS,
				 plot.style="Beach",
				 CG=CG,
				 chor_taxa=chor_taxa){
#Object: To perform GWAS and GPS (Genomic Prediction or Selection)
#Output: GWAS table (text file), QQ plot (PDF), Manhattan plot (PDF), genomic prediction (text file), and
#        genetic and residual variance components
#Authors: Zhiwu Zhang
# Last update: Oct 23, 2015  by Jiabo Wang add REML threshold and SUPER GD KI
##############################################################################################

#Initial p3d and h2.opt temporaryly
  h2.opt=NULL
  p3d=list(
    ps=NULL,
    REMLs=NULL,
    stats=NULL,
    effect.est=NULL,
    rsquare_base=NULL,
    rsquare=NULL,
    dfs=NULL,
    df=NULL,
    tvalue=NULL,
    stderr=NULL,
    maf=NULL,
    nobs=NULL,
    Timmer=NULL,
    Memory=NULL,
    vgs=NULL,
    ves=NULL,
    BLUP=NULL,
    BLUP_Plus_Mean=NULL,
    PEV=NULL,
    BLUE=NULL,
    logLM=NULL,
    effect.snp=NULL,
    effect.cv=NULL
  )
  
  
  if (SUPER_GS){
    Compression=NULL
    kinship.optimum=NULL
    kinship=NULL
    PC=PC
    REMLs=NULL
    GWAS=NULL
    QTN=NULL
    Timmer=GAPIT.Timmer(Infor="GAPIT.SUPER.GS")
    Memory=GAPIT.Memory(Infor="GAPIT.SUPER.GS")
    #print(model)
    SUPER_GS_GAPIT = GAPIT.SUPER.GS(Y=Y,
                                    GD=GD,
                                    allGD=allGD,
                                    GM=GM,
                                    KI=KI,
                                    Z=Z,
                                    CV=CV,
                                    allCV=allCV,
                                    GK=GK,
                                    kinship.algorithm=kinship.algorithm,
                                    bin.from=bin.from,
                                    bin.to=bin.to,
                                    bin.by=bin.by,
                                    inclosure.from=inclosure.from,
                                    inclosure.to=inclosure.to,
                                    inclosure.by=inclosure.by,
                                    group.from=group.from,
                                    group.to=group.to,
                                    group.by=group.by,
                                    kinship.cluster=kinship.cluster,
                                    kinship.group=kinship.group,
                                    PCA.total=PCA.total,
                                    GT=GT,
                                    PC=PC,
                                    GI=GI,
                                    Timmer = Timmer, 
                                    Memory = Memory,
                                    model=model,
                                    sangwich.top=sangwich.top,
                                    sangwich.bottom=sangwich.bottom,
                                    QC=QC,
                                    QTN.gs=QTN.gs,
                                    # GTindex=GTindex,
                                    LD=LD,
                                    file.output=FALSE,
                                    GAPIT3.output=GAPIT3.output,
                                    cutOff=cutOff,
                                    CV.Extragenetic=CV.Extragenetic
                        )
# Compression=as.matrix(SUPER_GS_GAPIT$Compression)
# opt=
	  print("SUPER.GS function Done!!")	
	  return (list(Compression=SUPER_GS_GAPIT$Compression,
	               kinship.optimum=SUPER_GS_GAPIT$SUPER_kinship,
	               kinship=SUPER_GS_GAPIT$kinship, 
	               PC=SUPER_GS_GAPIT$PC,
	               GWAS=GWAS, 
                 GPS=SUPER_GS_GAPIT$GPS,
	               Pred=SUPER_GS_GAPIT$Pred,
	               Timmer=Timmer,
	               Memory=Memory,
	               h2=SUPER_GS_GAPIT$h2,
	               SUPER_GD=SUPER_GS_GAPIT$SUPER_GD,
	               GWAS=NULL,
	               QTN=NULL)
	          )
					
  }else{
  #print("@@@@@@@")
  #print(group.from)

#Handler of SNP.test=F
#Iniciate with two by seven NA matrix
#The seventh is for p values of SNP
    DTS=rbind(rep(NA,7),rep(NA,7) )
  
  
#End imediatly in one of these situtiona
    shortcut=FALSE
    LL.save=1e10
#In case of null Y and null GP, sent back genotype only  
    thisY=Y[,2]
    thisY=thisY[!is.na(thisY)]
    if(length(thisY) <3){
      shortcut=TRUE
    }else{
      if(stats::var(thisY) ==0) shortcut=TRUE
    }
        
    if(shortcut){
      print(paste("Y is empty. No GWAS/GS performed for ",name.of.trait,sep=""))
      return (list(compression=NULL,
                   kinship.optimum=NULL, 
                   kinship=KI,
                   PC=PC,
                   GWAS=NULL, 
                   GPS=NULL,
                   Pred=NULL, 
                   REMLs=NULL,
                   Timmer=Timmer,
                   Memory=Memory,
                   h2=NULL))
    }

#QC
    print("------------Examining data (QC)------------------------------------------")
    if(is.null(Y)) stop ("GAPIT says: Phenotypes must exist.")
    if(is.null(KI)&missing(GD) & kinship.algorithm!="SUPER") stop ("GAPIT says: Kinship is required. As genotype is not provided, kinship can not be created.")

#When GT and GD are missing, force to have fake ones (creating them from Y),GI is not required in this case
    if(is.null(GD) & is.null(GT)) {
	    GT=as.matrix(Y[,1])
	    GD=matrix(1,nrow(Y),1)	
      GI=as.data.frame(matrix(0,1,3) )
      colnames(GI)=c("SNP","Chromosome","Position")
    }

    if(is.null(GT)) {
      GT=as.character(CV[,1])
    }
#print("@@@@@@@@")
#print(GD)
#merge CV with PC: Put CV infront of PC
    # if(PCA.total>0&!is.null(CV))CV=GAPIT.CVMergePC(CV,PC)
    # if(PCA.total>0&is.null(CV))CV=PC
    #for GS merge CV with GD name
    # if (is.null(CV)){
    #   my_allCV=CV
    # }else{
    #   taxa_GD=rownames(GD)
      my_allCV=allCV
      # my_allCV=my_allCV[my_allCV[,1]%in%taxa_GD,]
    #print(dim(my_allCV))
    # }

    #Handler of CV.Extragenetic
    # if(is.null(CV) & !is.null(CV.Extragenetic)){
    #   stop ("GAPIT says: CV.Extragenetic is more than avaiables.")
    # }

    if(!is.null(CV)& !is.null(CV.Extragenetic)){  
      if(CV.Extragenetic>(ncol(CV)-1)){
        stop ("GAPIT says: CV.Extragenetic is more than avaiables.")
      }
    }

    #Create Z as identity matrix from Y if it is not provided
    if(kinship.algorithm!="None" & kinship.algorithm!="SUPER" & is.null(Z)){
      taxa=as.character(CV[,1]) #this part will make GS without CV not present all prediction
      Z=as.data.frame(diag(1,nrow(CV)))
#taxa=as.character(KI[,1])
#Z=as.data.frame(diag(1,nrow(KI)))
      Z=rbind(taxa,Z)
      taxa=c('Taxa',as.character(taxa))
      Z=cbind(taxa,Z)
    }
    ZI=Z

    #Add the part of non proportion in Z matrix
    if(kinship.algorithm!="None" & kinship.algorithm!="SUPER" & !is.null(Z))
    {
      if(nrow(Z)-1<nrow(Y)) Z=GAPIT.ZmatrixFormation(Z=Z,Y=Y)
    }

    #Create CV with all 1's if it is not provided
    # noCV=FALSE
    # if(is.null(CV)){
    #   noCV=TRUE
    #   CV=Y[,1:2]
    #   CV[,2]=1
    #   colnames(CV)=c("taxa","overall")
    # }

    #Remove duplicat and integragation of data
    # print("QC is in process...")

    CVI <- CV

# print(dim(Z))
# print("!!!!!")
# if(QC) ###### from 413 to 423 codes were shield by jiabo 2022.8.8
# {
#   qc <- GAPIT.QC(Y=Y,KI=KI, GT=GT,CV=CV,Z=Z,GK=GK)
#   GTindex=qc$GTindex
#   Y=qc$Y  # here make twice qc and chaos with numeric taxa, Thanks for Dennis. 20210913
#   KI=qc$KI
#   CV=qc$CV
#   Z=qc$Z
#   GK=qc$GK
#   # if(noCV)CVI=qc$CV #this part will make GS without CV not present all prediction
# }

# print(length(GK))
    # GTindex=match(as.character(KI[,1]),as.character(Y[,1]))
    # GTindex=GTindex[!is.na(GTindex)]
    # KI=KI[GTindex,GTindex+1]
    # my_taxa=as.character(KI[,1])
    # CV=CV[as.character(CV[,1])%in%as.character(Y[,1]),]
    #Output phenotype
    # colnames(Y)=c("Taxa",name.of.trait)
    # if(file.output){
    #   try(utils::write.table(Y, paste("GAPIT.", name.of.trait,".phenotype.csv" ,sep = ""),
    #                          quote = FALSE, sep = ",", 
    #                          row.names = FALSE,
    #                          col.names = TRUE))
    # }
    
    # Default kinship.algorithm = "VanRaden".
    # This if() may be seldom used.
    #TDP
#     if( kinship.algorithm =="None" ){
#       if(min(CV[,2])==max(CV[,2])) CV=NULL
	
#       # GAPIT.TDP() does not appear to exist.
#       # theTDP = GAPIT.TDP(Y=Y,
#       #                    CV=CV,
#       #                    SNP = as.data.frame(cbind(GT[GTindex],as.matrix(as.data.frame(GD[GTindex,])))),
#       #                    QTN=QTN,
#       #                    Round=QTN.round,
#       #                    QTN.limit=QTN.limit, 
#       #                    QTN.update=QTN.update, 
#       #                    Method=QTN.method
#       #                    )
# #print(dim(GM))
# #print(length(theTDP$p))

#       #theGWAS=cbind(GM,theTDP$p,NA,NA,NA)	
#       theGWAS=cbind(GM,NA,NA,NA,NA)	
      
#       return (list(Compression = NULL,
#                    kinship.optimum = NULL, 
#                    kinship = NULL,
#                    PC = NULL,
#                    GWAS = theGWAS, 
#                    GPS = NULL,
#                    Pred = NULL,
#                    REMLs = NULL,
#                    #QTN = theTDP$QTN,
#                    QTN = NULL,
#                    Timmer = Timmer,
#                    Memory = Memory,
#                    h2 = NULL))
#     }

#rm(qc)
    # gc()

    # Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="QC")
    # Memory=GAPIT.Memory(Memory=Memory,Infor="QC")

    #Get indicator of sangwich top and bottom
    byPass.top=FALSE
    byPass=FALSE
    NOBLUP=FALSE
    if(group.from<2&group.to<2) NOBLUP=TRUE
    #if(!is.null(sangwich.bottom)) byPass=((sangwich.bottom=="FaST" | sangwich.bottom=="SUPER" | sangwich.bottom=="DC" )& is.null(GP)   )
    if(!is.null(sangwich.top)){
      byPass.top=((sangwich.top=="FaST" | sangwich.top=="SUPER" | sangwich.top=="DC" ) )
    }
    if(!is.null(sangwich.bottom)){
      byPass=((sangwich.bottom=="FaST" | sangwich.bottom=="SUPER" | sangwich.bottom=="DC" ) )
    }
    
    print("Try to group from and to were set to 1")

    if(byPass){
      print("group from and to were set to 1")
      group.from=1
      group.to=1
    }

    print("------------Examining data (QC) done-------------------------------------")

    #Sagnwich top bun: To gep GP if it is not provided
    if(!is.null(sangwich.top) & is.null(GP)){
      print("-------------------Sandwich top bun-----------------------------------")
#print(dim(GD))
#print(GD[1:5,1:5])

      #Create GK if not provided
      if(is.null(GK))
      {
#    set.seed(1)
        nY=floor(nrow(Y)*.9)
        nG=ncol(GD)
        if(nG>nY)
        {
          snpsam=sample(1:nG,nY)
        }else{
            snpsam=1:nG
        }
        GK=GD
    # print(dim(GK))
    # print(GK[270:279,1:5])
        SNPVar=apply(as.matrix(GK), 2, stats::var)
    # print(SNPVar)
        GK=GK[,SNPVar>0]
        GK=cbind(as.data.frame(GT),as.data.frame(GK)) #add taxa
      }
  
  #myGD=cbind(as.data.frame(GT),as.data.frame(GD)) 
      # print(tail(Y[,1]))
      # print(tail(CV[,1]))
      # print(tail(GT))
      # print(dim(cbind(as.data.frame(GT[GTindex]),as.data.frame(GD[GTindex,]))))
      GP=GAPIT.Bread(Y=Y,CV=CV,Z=Z,KI=KI,GK=GK,GD=cbind(as.data.frame(GT),as.data.frame(GD)),GM=GI,method=sangwich.top,LD=LD,file.output=FALSE)$GWAS
      # file.output=file.output.temp
  
  
      GK=NULL
  
      print("-------------------Sagnwich top bun: done-----------------------------")  

    } 

    Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="SagnwichTop")
    Memory=GAPIT.Memory(Memory=Memory,Infor="SagnwichTop")

    #Sandwich burger and dressing
    print("-------------------Sandwich burger and dressing------------------------")

    #Handler of group boundry
    if(group.from>group.to) stop("GAPIT says: group.to should  be larger than group.from. Please correct them!")

    if(is.null(CV) | (!is.null(CV) & group.to<(ncol(CV)+1))) {
      #The minimum of group is 1 + number of columns in CV
      group.from=1
      group.to=1
      #warning("The upper bound of groups (group.to) is not sufficient. both boundries were set to a and GLM is performed!")
      message("The upper bound of groups (group.to) is not sufficient. both boundries were set to a and GLM is performed!")
    }

    if(!is.null(CV)& group.from<1) {
      group.from=1 #minimum of group is number of columns in CV
      #warning("The lower bound of groups should be 1 at least. It was set to 1!")
      message("The lower bound of groups should be 1 at least. It was set to 1!")
    }
 
    nk=1000000000
    if(!is.null(KI)) nk=min(nk,nrow(KI))
    if(!is.null(GK)) nk=min(nk,nrow(GK))

    if(!is.null(KI)){
      if(group.to>nk) {
        #group.to=min(nrow(KI),length(GTindex)) #maximum of group is number of rows in KI
        group.to=nk #maximum of group is number of rows in KI
        #warning("The upper bound of groups is too high. It was set to the size of kinship!") 
        # warnings = errors during testing, so this warning will cause a failure.
        message("The upper bound of groups is too high. It was set to the size of kinship!") 
      }
      if(group.from>nk){ 
        group.from=nk
        #warning("The lower bound of groups is too high. It was set to the size of kinship!") 
        # warnings = errors during testing, so this warning will cause a failure.
        message("The lower bound of groups is too high. It was set to the size of kinship!")
      } 
    }

    if(!is.null(CV)){
      if(group.to<=ncol(CV)+1) {
        #The minimum of group is number of columns in CV
        #group.from=ncol(CV)+2
        #group.to=ncol(CV)+2
        #warning("The upper bound of groups (group.to) is not sufficient. both boundries were set to their minimum and GLM is performed!")
        message("The upper bound of groups (group.to) is not sufficient. both boundries were set to their minimum and GLM is performed!")
      }
    }

#bin.fold=ceiling(log2(bin.to/bin.from))
#bin.seq=0:bin.fold
#bin.level=bin.from*2^bin.seq

    #Set upper bound for inclosure.to
    if(inclosure.to>nrow(Y))inclosure.to=nrow(Y)-1

    #set inclosure loop levels
    bin.level=seq(bin.from,bin.to,by=bin.by)
    inclosure=seq(inclosure.from,inclosure.to,by=inclosure.by)

    #Optimization for group number, cluster algorithm and kinship type
    GROUP=seq(group.to,group.from,by=-group.by)#The reverse order is to make sure to include full model
    if(missing("kinship.cluster")) kinship.cluster=c("ward", "single", "complete", "average", "mcquitty", "median", "centroid")
    if(missing("kinship.group")) kinship.group=c("Mean", "Max", "Min", "Median")
    numSetting=length(GROUP)*length(kinship.cluster)*length(kinship.group)*length(bin.level)*length(inclosure)

    #Reform Y, GD and CV into EMMA format
    ys=as.matrix(Y[,2])
    X0=as.matrix(CV[,-1])
    CV.taxa=CVI[,1]
    #print(length(ys))
    #Initial
    count=0
    Compression=matrix(,numSetting,6)
    colnames(Compression)=c("Type","Cluster","Group","REML","VA","VE")

    #add indicator of overall mean
    if(min(X0[,1])!=max(X0[,1])) X0 <- cbind(1, X0) #do not add overall mean if X0 has it already at first column


    Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="DataProcessing")
    Memory=GAPIT.Memory(Memory=Memory,Infor="DataProcessing")

    print("-------------------------Iteration in process--------------------------")
    print(paste("Total iterations: ",numSetting,sep=""))

    #Loop to optimize cluster algorithm, group number and kinship type
    # print(hasGenotype)
    for (bin in bin.level){
      for (inc in inclosure){       
        
        for (ca in kinship.cluster){
          for (group in GROUP){
            for (kt in kinship.group){
              #Do not screen SNP unless existing genotype and one combination
              if(numSetting==1 & hasGenotype){
                optOnly=FALSE
              }else{
                optOnly=TRUE
              }
              if(!SNP.test) optOnly=TRUE

              if(optOnly | Model.selection){
                colInclude=1
                optOnly = TRUE
              }else{
                colInclude=c(1:ncol(GD))
              }

              if(!optOnly) 
              {
                print("Compressing and Genome screening..." )
              }
                count=count+1
#Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 1")
#Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 1")
                if(!byPass)
                {
                  if(count==1)print("-------Mixed model with Kinship-----------------------------")
                  if(group<ncol(X0)+1) group=1 # the emma function (emma.delta.REML.dLL.w.Z) does not allow K has dim less then CV. turn to GLM (group=1)
                  cp <- GAPIT.Compress(KI=KI,kinship.cluster=ca,kinship.group=kt,GN=group,Timmer=Timmer,Memory=Memory)
                  Timmer=cp$Timmer
                  Memory=cp$Memory
                  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_cp")
                  Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2_cp")
#print("BK...")
                  bk <- GAPIT.Block(Z=Z,GA=cp$GA,KG=cp$KG)
                  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_bk")
                  Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2 bk")
#print("ZC...")
                  zc <- GAPIT.ZmatrixCompress(Z=Z,GAU =bk$GA)
                  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_zc")
                  Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2 zc")
#print("wraping...")
#Reform KW and Z into EMMA format
                  zrow=nrow(zc$Z)
                  zcol=ncol(zc$Z)-1
#Z1=matrix(as.numeric(as.matrix(zc$Z[,-1])),nrow=zrow,ncol=zcol)
                  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Prio PreP3D")
                  Memory=GAPIT.Memory(Memory=Memory,Infor="Prio PreP3D")  
                # print(colInclude)   
                # print(head(CVI))           
                  p3d <- GAPIT.EMMAxP3D(ys=ys,
                                      xs=as.matrix(as.data.frame(GD[,colInclude])),
                                      K = as.matrix(bk$KW),
                                      Z=matrix(as.numeric(as.matrix(zc$Z[,-1])),nrow=zrow,ncol=zcol),
                                      X0=X0,
                                      CVI=CVI,
                                      CV.Extragenetic=CV.Extragenetic,
                                      GI=GI,
                                      SNP.P3D=SNP.P3D,
                                      Timmer=Timmer,
                                      Memory=Memory,
                                      fullGD=fullGD,
                                      SNP.permutation=SNP.permutation,
                                      GP=GP,
                                      SNP.fraction=SNP.fraction,
			                                file.path=file.path,
			                                file.from=file.from,
			                                file.to=file.to,
			                                file.total=file.total, 
			                                file.fragment = file.fragment, 
			                                byFile=byFile, 
			                                file.G=file.G,
			                                file.Ext.G=file.Ext.G,
			                                file.GD=file.GD, 
			                                file.GM=file.GM, 
			                                file.Ext.GD=file.Ext.GD,
			                                file.Ext.GM=file.Ext.GM,
			                                # GTindex=GTindex,
			                                genoFormat=genoFormat,
			                                optOnly=optOnly,
			                                SNP.effect=SNP.effect,
			                                SNP.impute=SNP.impute,
			                                name.of.trait=name.of.trait, 
			                                Create.indicator = Create.indicator, 
			                                Major.allele.zero = Major.allele.zero
			                                )
                  Timmer=p3d$Timmer
                  Memory=p3d$Memory
                # print(head(p3d$BLUE))
                  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Post PreP3D")
                  Memory=GAPIT.Memory(Memory=Memory,Infor="Post PreP3D")
#print("Cluster algorithm, kinship type, groups, VG, Ve and REML:")
                  print(paste(count, "of",numSetting,"--","Vg=",round(p3d$vgs,4), "VE=",round(p3d$ves,4),"-2LL=",round(p3d$REMLs,2), "  Clustering=",ca,"  Group number=", group ,"  Group kinship=",kt,sep = " "))
#print(table(GTindex))
                #Recoding the optimum KI
                  if(count==1)
                  {
                    KI.save=KI
                    LL.save=p3d$REMLs
                  }else{
                    if(p3d$REMLs<LL.save)
                    {
                      KI.save=KI
                      LL.save=p3d$REMLs
                    }
                  }
#print(paste("CA is ",ca))
#print(paste("group is ",group))
#print(paste("kt is ",kt))
                #recording Compression profile on array
                  Compression[count,1]=kt
                  Compression[count,2]=ca
                  Compression[count,3]=group
                  Compression[count,4]=p3d$REMLs
                  Compression[count,5]=p3d$vgs
                  Compression[count,6]=p3d$ves
                }else{# end of if(!byPass)
                #Set QTNs
                if(count==1)print("-------The burger is SNP-----------------------------------")
  #bin.size=bin
  #inclosure.size=inc
                #@@@This section is not useful
                if(!is.null(GP))
                {
  #print("Being specific...")
                  myGenotype <- GAPIT.Genotype(
                    G=NULL,
                    GD=NULL,
                    GM=GI,
                    KI=NULL,
                    kinship.algorithm="SUPER",
                    PCA.total=0,
                    SNP.fraction=SNP.fraction,
                    SNP.test=SNP.test,
                    file.path=file.path,
                    file.from=file.from, 
                    file.to=file.to, 
                    file.total=file.total, 
                    file.fragment = file.fragment, 
                    file.G=file.G, 
                    file.Ext.G=file.Ext.G,
                    file.GD=file.GD, 
                    file.GM=file.GM, 
                    file.Ext.GD=file.Ext.GD,
                    file.Ext.GM=file.Ext.GM,
                    SNP.MAF=SNP.MAF,
                    FDR.Rate = FDR.Rate,
                    SNP.FDR=SNP.FDR,
                    SNP.effect=SNP.effect,
                    SNP.impute=SNP.impute,
                    LD.chromosome=LD.chromosome,
                    LD.location=LD.location,
                    LD.range=LD.range,
                    kinship.cluster=kinship.cluster,#NJtree.group=NJtree.group,NJtree.type=NJtree.type,
                    GP=GP,
                    GK=NULL,
                    bin.size=bin,
                    inclosure.size=inc,
                    SNP.CV=SNP.CV,
                    GTindex=GTindex,
                    sangwich.top=NULL,
                    sangwich.bottom=sangwich.bottom,
                    Timmer = Timmer, 
                    Memory = Memory,
                    file.output=file.output, 
                    Create.indicator = Create.indicator, 
                    Major.allele.zero = Major.allele.zero
                    )
    
                  Timmer=myGenotype$Timmer
                  Memory=myGenotype$Memory
  
                  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Genotype for burger")
                  Memory=GAPIT.Memory(Memory=Memory,Infor="Genotype for burger")
  
                  print(paste("bin---",bin,"---inc---",inc,sep=""))
                  print(which(myGenotype$SNP.QTN==TRUE))
                  GK=GD[,myGenotype$SNP.QTN,drop=FALSE]
                  SUPER_GD=GD[,myGenotype$SNP.QTN,drop=FALSE]
                  SNPVar=apply(as.matrix(GK), 2, stats::var)
                  GK=GK[,SNPVar>0,drop=FALSE]
                  SUPER_GD=SUPER_GD[,SNPVar>0,drop=FALSE]
                  GK=cbind(as.data.frame(GT),as.data.frame(GK)) #add taxa
  # print(length(GT))
  # print(dim(SUPER_GD))
                  SUPER_GD=cbind(as.data.frame(GT),as.data.frame(SUPER_GD)) #add taxa
# print(dim(GK))
  #GP=NULL
                }# end of if(is.null(GK)) 

# print(is.null(GK))
if(numSetting>1)
{
print("-------Calculating likelihood-----------------------------------")
  myBurger=GAPIT.Burger(Y=Y,CV=CV,GK=GK)
    # myBurger=GAPIT.Burger(Y=Y,CV=NULL,GK=GK)   #########modified by Jiabo Wang
  myREML=myBurger$REMLs
  myVG=myBurger$vg
  myVE=myBurger$ve
}else{
  myREML=NA
  myVG=NA
  myVE=NA
}

#Recoding the optimum GK
if(count==1)
{
  GK.save=GK
  LL.save=myREML
  SUPER_optimum_GD=SUPER_GD     ########### get SUPER GD
}else{
  if(myREML<LL.save)
  {
    GK.save=GK
    LL.save=myREML
	  SUPER_optimum_GD=SUPER_GD     ########### get SUPER GD
  }
}
  
#Put to storage
Compression[count,1]=1
Compression[count,2]=bin
Compression[count,3]=inc
Compression[count,4]=myREML
Compression[count,5]=myVG
Compression[count,6]=myVE
print(Compression[count,]) 

#print("---------------SUPER 2nd stage: calculating LL ------------------------")


}   # end of if(byPass)

}#end of for (ca in kinship.cluster)

#Skip the rest group in case group 1 is finished
if(group==1) break #To skip the rest group interations

}#end of for (group in GROUP)
}#end of for (kt in kinship.group)

  
}#end of for (inc in inclosure)
}#end of for (bin in bin.level)


if(Model.selection == TRUE){ 

  print("------------------------Model selection for optimal number of PCs and Covariates-------------------------------------------------")
  #update KI with the best likelihood
  KI=KI.save
  if(numSetting>1){
  Compression=Compression[order(as.numeric(Compression[,4]),decreasing = FALSE),]  #sort on REML
  kt=Compression[1,1]
  ca=Compression[1,2]
  group=Compression[1,3]
  }

  cp <- GAPIT.Compress(KI=KI,kinship.cluster=ca,kinship.group=kt,GN=group,Timmer=Timmer,Memory=Memory)
  Timmer=cp$Timmer
  Memory=cp$Memory

  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_cp")
  Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2_cp")
  
  bk <- GAPIT.Block(Z=Z,GA=cp$GA,KG=cp$KG)
  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_bk")
  Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2 bk")

  zc <- GAPIT.ZmatrixCompress(Z=Z,GAU =bk$GA)

  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_zc")
  Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2 zc")

  z0=as.matrix(zc$Z[,-1])
  Z1=matrix(as.numeric(z0),nrow=nrow(z0),ncol=ncol(z0))


  
  BIC <- rep(NA,ncol(X0))
  LogLike <- rep(NA, ncol(X0))
  for(i in 1:ncol(X0)){#1 because the first column of X0 is the intercept

    X0.test <- as.matrix(X0[,1:i]) 
    
    #print("The dim of bk$KW is ")
    #print(dim(bk$KW))
    #print(dim(X0.test))
    #print(dim(CVI))

    p3d <- GAPIT.EMMAxP3D(ys=ys,xs=as.matrix(as.data.frame(GD[,1])),K = as.matrix(bk$KW) ,Z=Z1,X0=X0.test,CVI=CVI,CV.Extragenetic=CV.Extragenetic,GI=GI,SNP.P3D=SNP.P3D,Timmer=Timmer,Memory=Memory,fullGD=fullGD,
            SNP.permutation=SNP.permutation, GP=GP,
			      file.path=file.path,file.from=file.from,file.to=file.to,file.total=file.total, file.fragment = file.fragment, byFile=byFile, file.G=file.G,file.Ext.G=file.Ext.G,file.GD=file.GD, file.GM=file.GM, file.Ext.GD=file.Ext.GD,file.Ext.GM=file.Ext.GM,
            genoFormat=genoFormat,optOnly=TRUE,SNP.effect=SNP.effect,SNP.impute=SNP.impute,name.of.trait=name.of.trait, Create.indicator = Create.indicator, Major.allele.zero = Major.allele.zero)

    
    
    k.num.param <- 2+i
    #k is (i-1) because we have the following parameters in the likelihood function:
    #  intercept
    #  (i-1) covariates
    #  sigma_g
    #  delta
    
    #print(paste("The value of round(p3d$REMLs,5) is ", round(p3d$REMLs,5), sep = ""))
    #print(paste("The value of log(GTindex) is ", log(GTindex), sep = ""))
    #print(paste("The value of 0.5*k.num.param*log(GTindex) is ", 0.5*k.num.param*log(nrow(Z1)), sep = ""))
    
    LogLike[i] <- p3d$logLM
    BIC[i] <- p3d$logLM -(0.5*k.num.param*log(nrow(Z1)))
    
    #print("The value of k.num.param  is: ")
    #print(k.num.param)
    
    #print(paste("The value of nrow(Z1) is ", nrow(Z1), sep = ""))  
    
    }   
    Optimum.from.BIC <- which(BIC == max(BIC))
    
    print(paste("-----------------------The optimal number of PCs/covariates is ", (Optimum.from.BIC-1)," -------------------------", sep = ""))
    
    BIC.Vector <- cbind(as.matrix(rep(0:(ncol(X0)-1))), as.matrix(BIC), as.matrix(LogLike))

           
    #print(seq(0:ncol(X0)))
    
       #print(BIC.Vector)
 
    colnames(BIC.Vector) <- c("Number of PCs/Covariates", "BIC (larger is better) - Schwarz 1978", "log Likelihood Function Value")
    
    utils::write.table(BIC.Vector, paste("GAPIT.", name.of.trait, ".BIC.Model.Selection.Results.csv", sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)
    
    #print(BIC.Vector)
    
    X0 <- X0[,1:(Optimum.from.BIC)]
    
    if(Optimum.from.BIC == 1){
    X0 <- as.matrix(X0)
    }
    print("The dimension of X0 after model selection is:")
    print(dim(X0))
    
    print("The head of X0 after model selection is")
    print(utils::head(X0))
    

} # where does it start: 522

print("---------------------Sandwich bottom bun-------------------------------")
# print("Compression") 
# print(Compression)

#Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Compression")
#Memory=GAPIT.Memory(Memory=Memory,Infor="Copmression")

if(numSetting==1)
{
  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GWAS")
  Memory=GAPIT.Memory(Memory=Memory,Infor="GWAS")
}
  
#Perform GWAS with the optimum setting
#This section is omited if there is only one setting
if((numSetting>1)| (!is.null(sangwich.bottom)&!byPass) | Model.selection) {
  print("Genomic screening..." )
  
optOnly=FALSE  #set default to false and change it to TRUE in these situations:
if(!hasGenotype) optOnly=TRUE
if(!SNP.test) optOnly=TRUE

if(optOnly){
 colInclude=1
}else{
 colInclude=c(1:ncol(GD))
}

if(numSetting>1){
#Find the best ca,kt and group
#print(paste(as.numeric(Compression[1,4]))) ###added by Jiabo Wang 2015.7.20
#print(paste(min(as.numeric(Compression[,4]),rm.na=TRUE)))
adjust_value=as.numeric(Compression[1,4])-min(as.numeric(Compression[,4]),rm.na=TRUE)
# nocompress_value=as.numeric(Compression[1,4])
# REML_storage=as.numeric(Compression[,4])

adjust_sq=sqrt(stats::var(as.numeric(Compression[,4])))
# threshold=adjust_mean*0.1       
if(which.min(as.numeric(Compression[,4]))!=1)     ###added by Jiabo Wang 2015.7.20
{
if(which.min(as.numeric(Compression[,4]))==which.max(as.numeric(Compression[,5])))
{
  kt=Compression[which.min(as.numeric(Compression[,4])),1]
  ca=Compression[which.min(as.numeric(Compression[,4])),2]
  group=Compression[which.min(as.numeric(Compression[,4])),3]
  va=Compression[which.min(as.numeric(Compression[,4])),5]
  ve=Compression[which.min(as.numeric(Compression[,4])),6]
}else{
  # Compression0=Compression
  cnn=which.min(as.numeric(Compression[,4]))
  if(cnn-which.min(as.numeric(Compression[-cnn,4]))<2)
    {
      kt=Compression[which.min(as.numeric(Compression[,4])),1]
      ca=Compression[which.min(as.numeric(Compression[,4])),2]
      group=Compression[which.min(as.numeric(Compression[,4])),3]
      va=Compression[which.min(as.numeric(Compression[,4])),5]
      ve=Compression[which.min(as.numeric(Compression[,4])),6]
    }else{
      kt=Compression[1,1]
      ca=Compression[1,2]
      group=Compression[1,3]
      va=Compression[1,5]
      ve=Compression[1,6]
      print("The difference of compression is not enough!!")
    }
  
}



# Compression=Compression0

print(paste("Compress Optimum: ",ca,kt,group,va,va,ve,sep = " "))
}else{
Compression=Compression[order(as.numeric(Compression[,4]),decreasing = FALSE),]  #sort on REML

kt=Compression[1,1]
ca=Compression[1,2]
group=Compression[1,3]
print(paste("Optimum: ",Compression[1,2],Compression[1,1],Compression[1,3],Compression[1,5],Compression[1,6],Compression[1,4],sep = " "))
}
}#end  if(numSetting>1)
Compression=Compression[order(as.numeric(Compression[,4]),decreasing = FALSE),]
print(Compression)


print("--------------  Sandwich bottom ------------------------") 

if(!byPass) 
{ 
print("--------------  Sandwich bottom with raw burger------------------------") 

 if(Model.selection == FALSE){
  #update KI with the best likelihood
  if(is.null(sangwich.bottom)) KI=KI.save

  cp <- GAPIT.Compress(KI=KI,kinship.cluster=ca,kinship.group=kt,GN=group,Timmer=Timmer,Memory=Memory)
  Timmer=cp$Timmer
  Memory=cp$Memory
  
  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_cp")
  Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2_cp")
  
  bk <- GAPIT.Block(Z=Z,GA=cp$GA,KG=cp$KG)
  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_bk")
  Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2 bk")
  
  zc <- GAPIT.ZmatrixCompress(Z=Z,GAU =bk$GA)
  
  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="PreP3D 2_zc")
  Memory=GAPIT.Memory(Memory=Memory,Infor="PreP3D 2 zc")
  
  #Reform KW and Z into EMMA format
  
  z0=as.matrix(zc$Z[,-1])   
  Z1=matrix(as.numeric(z0),nrow=nrow(z0),ncol=ncol(z0))
 }
 
 print("--------------EMMAxP3D with the optimum setting-----------------------") 
 #print(dim(ys))
 #print(dim(as.matrix(as.data.frame(GD[GTindex,colInclude]))))
  p3d <- GAPIT.EMMAxP3D(ys=ys,xs=as.matrix(as.data.frame(GD[,colInclude]))   ,K = as.matrix(bk$KW) ,Z=Z1,X0=as.matrix(X0),CVI=CVI, CV.Extragenetic=CV.Extragenetic,GI=GI,SNP.P3D=SNP.P3D,Timmer=Timmer,Memory=Memory,fullGD=fullGD,
          SNP.permutation=SNP.permutation, GP=GP,
    			 file.path=file.path,file.from=file.from,file.to=file.to,file.total=file.total, file.fragment = file.fragment, byFile=byFile, file.G=file.G,file.Ext.G=file.Ext.G,file.GD=file.GD, file.GM=file.GM, file.Ext.GD=file.Ext.GD,file.Ext.GM=file.Ext.GM,
           genoFormat=genoFormat,optOnly=optOnly,SNP.effect=SNP.effect,SNP.impute=SNP.impute,name.of.trait=name.of.trait, Create.indicator = Create.indicator, Major.allele.zero = Major.allele.zero)  
    
  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GWAS")
  Memory=GAPIT.Memory(Memory=Memory,Infor="GWAS")  
 print("--------------EMMAxP3D with the optimum setting done------------------") 
  
}#end of if(!byPass) 
}#end of if(numSetting>1 & hasGenotype & !SNP.test)  

#print("Screening wiht the optimum setting done") 

if(byPass)
{
print("---------------Sandwich bottom with grilled burger---------------------") 
print("---------------Sandwich bottom: reload bins ---------------------------")

#SUPER: Final screening
  GK=GK.save
  # print(GK)
  # print(dim(GK))
  myBread=GAPIT.Bread(Y=Y,CV=CV,Z=Z,GK=GK,GD=cbind(as.data.frame(GT),as.data.frame(GD)),GM=GI,method=sangwich.bottom,LD=LD,file.output=FALSE)
  
  print("SUPER saving results...")

  Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GWAS")
  Memory=GAPIT.Memory(Memory=Memory,Infor="GWAS")  

   
}   #end of if(byPass)

print("--------------------Final results presentations------------------------")



#Plotting optimum group kinship
if(!byPass) {
  if(length(bk$KW)>1 &length(bk$KW)<length(KI) & length(bk$KW)<1000 & GAPIT3.output){
#    if( file.output == TRUE ){
        grDevices::pdf(paste("GAPIT.Genotype.Kin_Optimum.",name.of.trait,".pdf",sep=""), width = 12, height = 12)
        graphics::par(mar = c(25,25,25,25))
        gplots::heatmap.2(as.matrix(bk$KW),  cexRow =.2, cexCol = 0.2, col=rev(grDevices::heat.colors(256)), scale="none", symkey=FALSE, trace="none")
        grDevices::dev.off()
#    }
  }
}


#Merge GWAS resultss from files to update ps,maf and nobs in p3d
if(byFile&!fullGD)
{
print("Loading GWAS results from file...")
for (file in file.from:file.to)
{

#Initicalization
frag=1
numSNP=file.fragment

while(numSNP==file.fragment) {     #this is problematic if the read end at the last line  

#Initicalization GI to detect reading empty line
#theGI=NULL
#theP=NULL
#theMAF=NULL
#thenobs=NULL

 
#reload results from files
print(paste("Current file ",file,"Fragment: ",frag))

theGI <- try(utils::read.table(paste("GAPIT.TMP.GI.",name.of.trait,file,".",frag,".txt",sep=""), head = TRUE)   ,silent=TRUE)
theP <- try(utils::read.table(paste("GAPIT.TMP.ps.",name.of.trait,file,".",frag,".txt",sep=""), head = FALSE)   ,silent=TRUE)
theMAF <- try(utils::read.table(paste("GAPIT.TMP.maf.",name.of.trait,file,".",frag,".txt",sep=""), head = FALSE),silent=TRUE)
thenobs <- try(utils::read.table(paste("GAPIT.TMP.nobs.",name.of.trait,file,".",frag,".txt",sep=""),head= FALSE),silent=TRUE)
thersquare_base <- try(utils::read.table(paste("GAPIT.TMP.rsquare.base.",name.of.trait,file,".",frag,".txt",sep=""),head= FALSE),silent=TRUE)
thersquare <- try(utils::read.table(paste("GAPIT.TMP.rsquare.",name.of.trait,file,".",frag,".txt",sep=""),head= FALSE),silent=TRUE)
          thedf  <- try(utils::read.table(paste("GAPIT.TMP.df.",name.of.trait,file,".",frag,".txt",sep=""),head= FALSE),silent=TRUE)
          thetvalue  <- try(utils::read.table(paste("GAPIT.TMP.tvalue.",name.of.trait,file,".",frag,".txt",sep=""),head= FALSE),silent=TRUE)
          thestderr  <- try(utils::read.table(paste("GAPIT.TMP.stderr.",name.of.trait,file,".",frag,".txt",sep=""),head= FALSE),silent=TRUE)
theeffect.est <- try(utils::read.table(paste("GAPIT.TMP.effect.est.",name.of.trait,file,".",frag,".txt",sep=""),head= FALSE),silent=TRUE)

if(inherits(theGI, "try-error"))  {
#if(nrow(theGI)<1){
  numSNP=0
  #print("This fragment is empty.")
}else{



#print("Records loaded for this fragment.")
  numSNP=nrow(theGI)  
  colnames(theP)="P"
  colnames(theMAF )="MAF"
  colnames(thenobs )="nobs"
  colnames(thersquare_base) = "Base.Model.R.square"  
  colnames(thersquare) = "Model.R.square"
            colnames(thedf) = "Model.DF"
            colnames(thetvalue) = "Model.tvalue"
            colnames(thestderr) = "Model.stderr"
  colnames(theeffect.est) = "Effect.Est"    
  colnames(theGI) = colnames(GI)
 



#Merge results  
  if(file==file.from & frag==1){

    GI=theGI  
    #print(dim(GI))
    allP=theP
    #print(head(theP))
    allMAF=theMAF
    allnobs=thenobs
    allrsquare_base=thersquare_base
    allrsquare=thersquare
              alldf=thedf
              alltvalue=thetvalue
              allstderr=thestderr
    alleffect.est=theeffect.est

  }else{
    allP=as.data.frame(rbind(as.matrix(allP),as.matrix(theP))  )
    allMAF=as.data.frame(rbind(as.matrix(allMAF),as.matrix(theMAF)) )
    allnobs=as.data.frame(rbind(as.matrix(allnobs),as.matrix(thenobs)))
    allrsquare_base=as.data.frame(rbind(as.matrix(allrsquare_base),as.matrix(thersquare_base)))
    allrsquare=as.data.frame(rbind(as.matrix(allrsquare),as.matrix(thersquare)))
              alldf=as.data.frame(rbind(as.matrix(alldf),as.matrix(thedf)))
              alltvalue=as.data.frame(rbind(as.matrix(alltvalue),as.matrix(thetvalue)))
              allstderr=as.data.frame(rbind(as.matrix(allstderr),as.matrix(thestderr)))
    alleffect.est=as.data.frame(rbind(as.matrix(alleffect.est),as.matrix(theeffect.est)))
    #print("!!!!!!!!!!!!!!!")
    #print(dim(GI))
    #print(dim(theGI))
    GI=as.data.frame(rbind(as.matrix(GI),as.matrix(theGI)))
  }

}#end of  if(inherits(theGI, "try-error")) (else section)

#setup for next fragment
frag=frag+1   #Progress to next fragment 

}#end of loop on fragment: while(numSNP==file.fragment)
}#end of loop on file

#update p3d with components from files

  p3d$ps=allP
  p3d$maf=allMAF
  p3d$nobs=allnobs
  p3d$rsquare_base=allrsquare_base
  p3d$rsquare=allrsquare
      p3d$df=alldf
      p3d$tvalue=alltvalue
      p3d$stderr=allstderr
  p3d$effect.est=alleffect.est
  
#Delete all the GAPIT.TMP files
theFile=paste("GAPIT.TMP.",name.of.trait,".*")
  system('cmd /c del "GAPIT.TMP*.*"') 
  system('cmd /c del "GAPIT.TMP*.*"') 
  print("GWAS results loaded from all files succesfully!")
} #end of if(byFile)

#--------------------------------------------------------------------------------------------------------------------#
#Final report   
print("Generating summary" )
GWAS=NULL
GPS=NULL
#rm(zc)
gc()

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Final")
Memory=GAPIT.Memory(Memory=Memory,Infor="Final")

#genomic prediction
print("Genomic Breeding Values (GBV) ..." )
#print(p3d$BLUP)
Group=1:nrow(CVI)
RefInf=rep(1,nrow(CVI))
# print(table(index))
# RefInf[index]=1
ID=1:nrow(CVI)
BLUP=rep(0,nrow(CVI))
PEV=rep(0,nrow(CVI))
gs=NULL
gs$BLUP=cbind(as.data.frame(CVI[,1]),Group,RefInf,ID,BLUP,PEV)
# print(head(gs$BLUP))
colnames(gs$BLUP)=c("Taxa","Group","RefInf","ID","BLUP","PEV")
if(!byPass) 
{

if(length(bk$KW)>ncol(X0)) {
    gs <- GAPIT.GS(KW=bk$KW,KO=bk$KO,KWO=bk$KWO,GAU=bk$GAU,UW=cbind(p3d$BLUP,p3d$PEV))
}

print("Writing GBV and Acc..." )

GPS=NULL
if(length(bk$KW)>ncol(X0)) GPS=gs$BLUP
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GPS")
Memory=GAPIT.Memory(Memory=Memory,Infor="GPS")

#Make heatmap for distribution of BLUP and PEV
print("GBV and accuracy distribution..." )
if(length(bk$KW)>ncol(X0) &file.output) {
  GAPIT.GS.Visualization(gsBLUP = gs$BLUP, BINS=BINS,name.of.trait = name.of.trait)
}

#Make a plot Summarzing the Compression Results, if more than one "compression level" has been assessed
print("Compression portfolios..." )
#print(Compression)
if(file.output){
  GAPIT.Compression.Visualization(Compression = Compression, 
                                  name.of.trait = name.of.trait,file.output=file.output)
}
print("Compression Visualization done")

if(length(Compression)<1){
  h2.opt= NULL
}else{
  print(Compression)
if(length(Compression)<6) Compression=t(as.matrix(Compression[which(Compression[,4]!="NULL" | Compression[,4]!="NaN"),]))
if(length(Compression)==6) Compression=matrix(Compression,1,6) 
if(length(Compression)>6) Compression=Compression[which(Compression[,4]!="NULL" | Compression[,4]!="NaN"),]
Compression.best=Compression[1,] 
variance=as.numeric(Compression.best[5:6])
varp=variance/sum(variance)
h2.opt= varp[1]
}

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Compression.Visualization")
Memory=GAPIT.Memory(Memory=Memory,Infor="Compression.Visualization")
# print("$$$$$")
# print(str(p3d))

ps=p3d$ps
nobs=p3d$nobs
maf=p3d$maf
rsquare_base=p3d$rsquare_base
rsquare=p3d$rsquare
df=p3d$df
tvalue=p3d$tvalue
stderr=p3d$stderr
effect.est=p3d$effect.est
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Extract p3d results")
Memory=GAPIT.Memory(Memory=Memory,Infor="Extract p3d results")
print("p3d objects transfered")  

#where does it start: 936
}else{  #byPass
    #print("The head of myBread$GWAS is")
  #print(head(myBread$GWAS))
  GPS=myBread$BLUP
  ps=myBread$GWAS[,4]
  nobs=myBread$GWAS[,6]
  #print(dim(GI))
  #print(head())
  Bread_index=match(as.character(myBread$GWAS[,1]),as.character(GI[,1]))
  #print(GD[1:5,1:5])
  Bread_X=GD[,Bread_index]
  #print(dim(Bread_X))
  maf=apply(Bread_X,2,function(one) abs(1-sum(one)/(2*nrow(Bread_X))))
  maf[maf>0.5]=1-maf[maf>0.5]
  rsquare_base=rep(NA,length(ps))
  rsquare=rep(NA,length(ps))
  df=rep(nrow(Bread_X),length(ps))
  tvalue=myBread$GWAS[,5]
  stderr=rep(NA,length(nobs))
  effect.est=myBread$GWAS[,7]
  
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Extract bread results")
Memory=GAPIT.Memory(Memory=Memory,Infor="Extract bread results")
 
}
print("Merge BLUP and BLUE")
#print(head(ps))
#Merge BLUP and BLUE
Pred=NULL
if((!byPass)&(!Model.selection)){
 print("GAPIT before BLUP and BLUE")
 #print(dim(p3d$BLUE))
 BLUE=data.frame(cbind(data.frame(CV.taxa),data.frame(p3d$BLUE)))
 # print(head(BLUE))
 colnames(BLUE)=c("Taxa","BLUE.N","BLUE.I")
 QTNs=rep(0,nrow(BLUE))
 #Initial BLUP as BLUe and add additional columns
 BLUE=cbind(BLUE,QTNs)
 BB= merge( BLUE,gs$BLUP, by.x = "Taxa", by.y = "Taxa")
 # if (is.null(my_allCV))
 #   {
 #     my_allX=matrix(1,length(my_taxa),1)
 #   }else{
 #     # my_allX=as.matrix(my_allCV[,-1])
 #     # my_allX=cbind(1,as.matrix(my_allCV[,-1]))
 #     my_allX=cbind(rep(1, times = nrow(my_allCV)),as.matrix(my_allCV[,-1]))
 #   } 
 # print(head(BB))
  gBreedingValue=BB[,3]+BB[,4]+BB[,8]
  Prediction=BB[,2]+BB[,3]+BB[,4]+BB[,8]

 Pred=data.frame(cbind(BB,data.frame(gBreedingValue)),data.frame(Prediction))
 # if(NOBLUP)Pred=NA
 colnames(Pred)=c("Taxa","BLUE.N","BLUE.I","QTNs","Group","RefInf","ID","BLUP","PEV","gBreedingValue","Prediction")
 
 print("GAPIT after BLUP and BLUE")
}

#Export BLUP and PEV
if(!byPass &GAPIT3.output) 
{
print("Exporting BLUP and Pred")
  #try(write.table(gs$BLUP, paste("GAPIT.", name.of.trait,".BLUP.csv" ,sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE))
  try(utils::write.table(Pred, paste("GAPIT.Association.Prediction_results.", name.of.trait,".csv" ,sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE))
}

if(byPass) 
{
  theK.back=NULL
}else{
  theK.back=cp$KG
}
if(byPass)Compression[1,4]=0 #create a fake value to aloow output of SUPER 

#Export GWAS results
PWI.Filtered=NULL
if(hasGenotype &SNP.test &!is.na(Compression[1,4]))     #require not NA REML 
{
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Extract GWAS start")
Memory=GAPIT.Memory(Memory=Memory,Infor="Extract GWAS start")


  #print("Filtering SNPs with MAF..." )
	#index=maf>=SNP.MAF	   
  
	PWI.Filtered=cbind(GI,ps,maf,nobs,rsquare_base,rsquare,effect.est)#[index,]
	#print(dim(PWI.Filtered))
	colnames(PWI.Filtered)=c("SNP","Chromosome","Position ","P.value", "maf", "nobs", "Rsquare.of.Model.without.SNP","Rsquare.of.Model.with.SNP","effect")

if(!byPass){  
   if(Create.indicator){
    #Add a counter column for GI
    GI.counter <- cbind(GI, seq(1:nrow(GI))) 
    
    #Turn GI and effect.est into data frames
    GI.counter.data.frame <- data.frame(GI.counter)
    colnames(GI.counter.data.frame) <- c("X1", "X2", "X3", "X4")
    
    effect.est.data.frame <- data.frame(effect.est)
    colnames(effect.est.data.frame) <- c("X1", "X2", "X3")
    print(utils::head(GI.counter.data.frame))
    print(utils::head(effect.est.data.frame))
    #Do a merge statement
    GWAS.2 <- merge(GI.counter.data.frame, effect.est.data.frame, by.x = "X4", by.y = "X1")
    
    #Remove the counter column
    GWAS.2 <- GWAS.2[,-1]
    
    #Add column names
    colnames(GWAS.2) <- c("SNP","Chromosome","Position ", "Genotype", "Allelic Effect Estimate")
    
    
   }
   if(!Create.indicator){ 
    GWAS.2 <- PWI.Filtered[,c(1:3,9)]
    colnames(GWAS.2) <- c("SNP","Chromosome","Position ", "Allelic Effect Estimate")
   } 
}
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="MAF filtered")
Memory=GAPIT.Memory(Memory=Memory,Infor="MAF filtered")
		     
  #print("SNPs filtered with MAF")
   
  
  if(!is.null(PWI.Filtered))
  {

  #Run the BH multiple correction procedure of the results
  #Create PWIP, which is a table of SNP Names, Chromosome, bp Position, Raw P-values, FDR Adjusted P-values
  #print("Calculating FDR..." )

  PWIP <- GAPIT.Perform.BH.FDR.Multiple.Correction.Procedure(PWI = PWI.Filtered, FDR.Rate = FDR.Rate, FDR.Procedure = "BH")
  
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Multiple Correction")
Memory=GAPIT.Memory(Memory=Memory,Infor="Multiple Correction")


  #QQ plots
  #print("QQ plot..." )
  # if(file.output) GAPIT.QQ(P.values = PWIP$PWIP[,4], name.of.trait = name.of.trait,DPP=DPP)


Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="QQ plot")
Memory=GAPIT.Memory(Memory=Memory,Infor="QQ plot")


  #Manhattan Plots
  
  
   #print("Manhattan plot (Genomewise)..." )
#  if(file.output) GAPIT.Manhattan(GI.MP = PWIP$PWIP[,2:4], name.of.trait = name.of.trait, DPP=DPP, plot.type = "Genomewise",cutOff=cutOff)
#  if(file.output) GAPIT.Manhattan(GI.MP = PWIP$PWIP[,2:4], name.of.trait = name.of.trait, DPP=DPP, plot.type = "Genomewise",cutOff=cutOff,seqQTN=QTN.position)  #QTN does not work with sorted P
 # if(file.output) GAPIT.Manhattan(GI.MP = PWI.Filtered[,2:4], name.of.trait = name.of.trait, DPP=DPP, plot.type = "Genomewise",cutOff=cutOff,seqQTN=QTN.position,plot.style=plot.style,plot.bin=plot.bin,chor_taxa=chor_taxa)

 #print("Manhattan plot (Chromosomewise)..." )
 
  #if(file.output) GAPIT.Manhattan(GI.MP = PWIP$PWIP[,2:4], name.of.trait = name.of.trait, DPP=DPP, plot.type = "Chromosomewise",cutOff=cutOff)
 # if(file.output&SNP.fraction==1) GAPIT.Manhattan(GI.MP = PWI.Filtered[,2:4],GD=GD,CG=CG, name.of.trait = name.of.trait, DPP=DPP, plot.type = "Chromosomewise",cutOff=cutOff,plot.bin=plot.bin,chor_taxa=chor_taxa)

Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Manhattan plot")
Memory=GAPIT.Memory(Memory=Memory,Infor="Manhattan plot")


  #Association Table
  #print("Association table..." )
  #print(dim(PWIP$PWIP))
  #GAPIT.Table(final.table = PWIP$PWIP, name.of.trait = name.of.trait,SNP.FDR=SNP.FDR)
  #print(head(PWIP$PWIP))
  GWAS=PWIP$PWIP[PWIP$PWIP[,9]<=SNP.FDR,]
  #print("Joining tvalue and stderr" )
  
        DTS=cbind(GI,df,tvalue,stderr,effect.est)
        colnames(DTS)=c("SNP","Chromosome","Position","DF","t Value","std Error","effect")	

  #print("Creating ROC table and plot" )
	# if(file.output) myROC=GAPIT.ROC(t=tvalue,se=stderr,Vp=stats::var(ys),trait=name.of.trait)
  #print("ROC table and plot created" )

  #MAF plots
  #print("MAF plot..." )
   # if(file.output) myMAF1=GAPIT.MAF(MAF=GWAS[,5],P=GWAS[,4],E=NULL,trait=name.of.trait)


  #print(dim(GWAS))

  # if(file.output){
  #   utils::write.table(GWAS, paste("GAPIT.Association.GWAS_Results.", name.of.trait, ".csv", sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)
  #  utils::write.table(DTS, paste("GAPIT.Association.Df_tValue_StdErr.", name.of.trait, ".csv", sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)
  #  if(!byPass) utils::write.table(GWAS.2, paste("GAPIT.Genotype.Allelic_Effect_Estimates.", name.of.trait, ".csv", sep = ""), quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)
  # }


  
  } #end of if(!is.null(PWI.Filtered))
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Extract GWAS end")
Memory=GAPIT.Memory(Memory=Memory,Infor="Extract GWAS end")

  
} #end of if(hasGenotype )

#Log
# if(GAPIT3.output) log=GAPIT.Log(Y=Y,KI=KI,Z=Z,CV=CV,SNP.P3D=SNP.P3D,
				# group.from = group.from ,group.to =group.to ,group.by = group.by ,kinship.cluster = kinship.cluster, kinship.group= kinship.group,
                      	# ngrid = ngrid , llin = llin , ulim = ulim , esp = esp ,name.of.trait = name.of.trait)
#Memory usage
#GAPIT.Memory.Object(name.of.trait=name.of.trait)

#Timming
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="Report")
Memory=GAPIT.Memory(Memory=Memory,Infor="Report")
# if(file.output){
# file=paste("GAPIT.", name.of.trait,".Timming.csv" ,sep = "")
# utils::write.table(Timmer, file, quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)

# file=paste("GAPIT.", name.of.trait,".Memory.Stage.csv" ,sep = "")
# utils::write.table(Memory, file, quote = FALSE, sep = ",", row.names = FALSE,col.names = TRUE)
# }
print(paste(name.of.trait, "has been analyzed successfully!") )
print(paste("The results are saved in the directory of ", getwd()) )



#print("==========================================================================================")
TV<-list()
TV$ps=ps
TV$nobs=nobs
TV$maf=maf
TV$rsquare_base=rsquare_base
TV$rsquare=rsquare
TV$df=df
TV$tvalue=tvalue
TV$stderr=stderr
TV$effect.est=effect.est
#print("!!!!!!!!!!!!!")
#print(head(effect.est))
#print(head(DTS[,7]))
#print(ys)
if(byPass | Model.selection) Pred <- NA
print("before ending GAPIT.Main")
#print(dim(Compression))
return (list(Timmer=Timmer,Compression=Compression,kinship.optimum=theK.back, kinship=KI,PC=PC,GWAS=PWI.Filtered, GPS=GPS,Pred=Pred,REMLs=Compression[count,4],Timmer=Timmer,Memory=Memory,SUPER_GD=SUPER_GD,P=ps,effect.snp=DTS[,7],effect.cv=p3d$effect.cv,h2= h2.opt,TV=TV))
} #end if non-SUPER.GS situation, this is a long if statement, structure needs improvement
}#The function GAPIT.Main ends here
#=============================================================================================
jiabowang/GAPIT3 documentation built on March 6, 2025, 2:21 a.m.