R/rrBLUP_GAGS_VS_UT.R

Defines functions rrBLUP_GAGS_VS_UT

rrBLUP_GAGS_VS_UT <- function(train_genotypes, train_phenotype,train_GM=NULL,train_GD=NULL,train_PCA=NULL,test_genotypes,test_phenotype,test_PCA=NULL,Kernel="Markers",Sparse=FALSE,m=NULL,degree=NULL, nL=NULL,GWAS="BLINK",alpha=0.05,threshold=NULL, QTN=10,markers=NULL,PCA.total=3,transformation=NULL)
{
  # Make the CV list
  if(Kernel=="Markers"){
    if(!is.null(markers)){
      samp=sample(2:ncol(train_GD), markers)
      train_GD=train_GD[,samp]
      train_GM<-train_GM[samp,]

      myGD_train <- train_genotypes[,samp]
      myGD_test <- test_genotypes[,samp]
      genotypes=rbind(train=myGD_train,test=myGD_test)
    }else{
      myGD_train <- train_genotypes
      myGD_test <- test_genotypes
      genotypes=rbind(train=myGD_train,test=myGD_test)
    }
    # Calculate the GS model using rrBLUP
    myGD_train=apply(myGD_train,2,function(x) recode(x,"0"="-1","1"="0","2"="1"))
    myGD_train=apply(myGD_train,2,as.numeric)
    myGD_test=apply(myGD_test,2,function(x) recode(x,"0"="-1","1"="0","2"="1"))
    myGD_test=apply(myGD_test,2,as.numeric)
  }else{
    genotypes=rbind(train=train_genotypes,test=test_genotypes)
    maf <- calc_maf_apply(genotypes, encoding = c(0, 1, 2))
    mono_indices <- which(maf ==0)
    if(length(mono_indices)!=0){
      genotypes = genotypes[,-mono_indices]
    }

    if(Sparse==TRUE){
      X=as.matrix(genotypes)
      X=apply(genotypes,2,as.numeric)
      #sum(rowSums(is.na(X)))
      XS<-scale(X,center=TRUE,scale=TRUE)
      #sum(rowSums(is.na(XS)))
      library(caret)
      nzv <- nearZeroVar(XS)
      XSZV <- XS[, -nzv]
      #sum(rowSums(is.na(XSZV)))
      X=XSZV
      K=Sparse_Kernel_Construction(m=m,X=X,name=Kernel, degree=degree,nL=nL)
    }else{
      X=as.matrix(genotypes)
      X=apply(genotypes,2,as.numeric)
      #sum(rowSums(is.na(X)))
      XS<-scale(X,center=TRUE,scale=TRUE)
      #sum(rowSums(is.na(XS)))
      library(caret)
      nzv <- nearZeroVar(XS)
      XSZV <- XS[, -nzv]
      #sum(rowSums(is.na(XSZV)))
      X=XSZV
      K=Kernel_computation(X=X,name=Kernel,degree=degree, nL=nL)
    }

  }
  #if(!is.null(markers)){
    #samp=sample(2:ncol(train_GD), markers)
    #m_samp=train_GD[,samp]
    #myGD_train <- m_samp
    #myGD_test <- m_samp
  #}else{
    #myGD_train <- train_GD
    #myGD_test <- train_GD
  #}



  if(transformation=="sqrt"){
    train_phenotype[,2]=replace(train_phenotype[,2], train_phenotype[,2] < 0, 0)
    train_phenotype[,2] <-sqrt(train_phenotype[,2])
    myY_train <- train_phenotype[,2]
    pheno_test=test_phenotype
    pheno_test[,2]<-NA
    pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
  }

  if(transformation=="log"){
    train_phenotype[,2]=replace(train_phenotype[,2], train_phenotype[,2] <= 0, 0.000001)
    train_phenotype[,2] <-log(train_phenotype[,2])
    myY_train <- train_phenotype[,2]
    pheno_test=test_phenotype
    pheno_test[,2]<-NA
    pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])

  }

  if(transformation=="boxcox"){
    train_phenotype[,2] <-boxcox_t(train_phenotype[,2])
    myY_train <- train_phenotype[,2]
    pheno_test=test_phenotype
    pheno_test[,2]<-NA
    pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
  }

  if(transformation=="none"){
    myY_train <- train_phenotype[,2]
    pheno_test=test_phenotype
    pheno_test[,2]<-NA
    pheno_train <- c(train=train_phenotype[,2],test=pheno_test[,2])
  }

  GWASR<- GAPIT(Y = train_phenotype,
                GD = train_GD,
                GM = train_GM,
                PCA.total=PCA.total,
                model = GWAS,
                file.output=F)

  if(threshold=="Bonferonni"){

    GWASSM <- which(GWASR$GWAS$P.value <= alpha/length(GWASR$GWAS$P.value))
    if(length(GWASSM)==0){
      prediction=data.frame(pheno_test,GEBV=NA,RE=NA,FE=NA)


    }else{
      sm=as.character(GWASR$GWAS[GWASSM,]$SNP)

      myCV_train <- myGD_train[,sm]
      myCV_test <- myGD_test[,sm]
      myCV<-as.matrix(genotypes[,sm])

      if(!is.null(train_PCA)){

        if(Kernel=="Markers"){
          myPCA_train <- train_PCA
          myPCA_test <- test_PCA
          PCA<-rbind(train_PCA,test_PCA)
          fix_train_PC <- as.matrix(cbind(myCV_train,myPCA_train))
          fix_test_PC  <- as.matrix(cbind(myCV_test,myPCA_test))
          #fix_PC=as.matrix(cbind(myCV,PCA))
          #p <- ncol(fix_train_PC)
          #XtX <- crossprod(fix_train_PC, fix_train_PC)
          #rank.X <- qr(XtX)$rank
          #if (rank.X < p) {
          #sm2=findLinearCombos(fix_train_PC)$remove
          #fix_train_PC=fix_train_PC[,-sm2]
          #fix_test_PC=fix_test_PC[,-sm2]}

          fix_train_PC=make_full_rank(fix_train_PC)
          fix_test_PC=fix_test_PC[,colnames(fix_train_PC)]
          if(ncol(data.frame(fix_test_PC))==1){
            fix_test_PC=matrix(fix_test_PC)
          }
          rrBLUP_model_PC <- mixed.solve(y = myY_train,
                                         Z = myGD_train,
                                         X = fix_train_PC)
          pred_effects_PC <- myGD_test %*% rrBLUP_model_PC$u
          fix_effects_PC <- fix_test_PC  %*% rrBLUP_model_PC$beta
          predictions <- c(pred_effects_PC) + c(fix_effects_PC)
        }else{
          fix_PC=as.matrix(cbind(myCV,PCA))
          #p <- ncol(fix_PC)
          #XtX <- crossprod(fix_PC, fix_PC)
          #rank.X <- qr(XtX)$rank
          #if (rank.X < p) {
          #sm2=findLinearCombos(fix_PC)$remove
          #fix_PC=fix_PC[,-sm2]}

          fix_PC=make_full_rank(fix_PC)
          gBLUP_model <- mixed.solve(y = pheno_train,
                                     K = K,
                                     X=fix_PC)
          pred_effects_PC <- gBLUP_model$u[-c(1:length(train_phenotype[,2]))]
          fix_effects_PC <- as.matrix(fix_PC[-c(1:length(train_phenotype[,2])),])  %*% gBLUP_model$beta
          predictions <- c(pred_effects_PC) + c(fix_effects_PC)
        }
        prediction=data.frame(pheno_test,GEBV=predictions,RE=pred_effects,FE=fix_effects)


      }else{

        if(Kernel=="Markers"){
          fix_train <- as.matrix(myCV_train)
          fix_test  <- as.matrix(myCV_test)
          #p <- ncol(fix_train)
          #XtX <- crossprod(fix_train, fix_train)
          #rank.X <- qr(XtX)$rank
          #if (rank.X < p) {
          #sm2=findLinearCombos(fix_train)$remove
          #fix_train=fix_train[,-sm2]
          #fix_test=fix_test[,-sm2]}

          #fix_myCV  <- as.matrix(myCV)
          #p <- ncol(fix_myCV)
          #XtX <- crossprod(fix_myCV, fix_myCV)
          #rank.X <- qr(XtX)$rank
          #if (rank.X < p) {
          #sm2=findLinearCombos(fix_myCV)$remove
          #myCV=myCV[,-sm2]}

          fix_train=make_full_rank(fix_train)
          fix_test=fix_test[,colnames(fix_train)]
          if(ncol(data.frame(fix_test))==1){
            fix_test=matrix(fix_test)
          }
          rrBLUP_model <- mixed.solve(y = myY_train,
                                      Z = myGD_train,
                                      X = fix_train)

          pred_effects <- myGD_test %*% rrBLUP_model$u
          fix_effects <- fix_test  %*% rrBLUP_model$beta
          predictions <- c(pred_effects) + c(fix_effects)
        }else{
          fix_myCV  <- as.matrix(myCV)
          fix_myCV=make_full_rank(fix_myCV)
          gBLUP_model <- mixed.solve(y = pheno_train,
                                     K = K,
                                     X=fix_myCV)
          pred_effects <- gBLUP_model$u[-c(1:length(train_phenotype[,2]))]
          fix_effects <- as.matrix(fix_myCV[-c(1:length(train_phenotype[,2])),])  %*% gBLUP_model$beta
          predictions <- c(pred_effects) + c(fix_effects)
        }
        prediction=data.frame(pheno_test,GEBV=predictions,RE=pred_effects,FE=fix_effects)

      }
    }



  }else{
    top10=GWASR$GWAS[order(GWASR$GWAS$P.value),]
    GWASSM=top10[1:QTN,]$SNP
    myCV_train <- myGD_train[,GWASSM]
    myCV_test <- myGD_test[,GWASSM]
    myCV<-as.matrix(genotypes[,GWASSM])



    if(!is.null(train_PCA)){

      gc()
      if(Kernel=="Markers"){
        myPCA_train <- train_PCA
        myPCA_test <- test_PCA
        PCA<-rbind(train_PCA,test_PCA)
        fix_train_PC <- as.matrix(cbind(myCV_train,myPCA_train))
        fix_test_PC  <- as.matrix(cbind(myCV_test,myPCA_test))
        #fix_PC=as.matrix(cbind(myCV,PCA))

        #p <- ncol(fix_train_PC)
        #XtX <- crossprod(fix_train_PC, fix_train_PC)
        #rank.X <- qr(XtX)$rank
        #if (rank.X < p) {
        #sm2=findLinearCombos(fix_train_PC)$remove
        #fix_train_PC=fix_train_PC[,-sm2]
        #fix_test_PC=fix_test_PC[,-sm2]}
        fix_train_PC=make_full_rank(fix_train_PC)
        fix_test_PC=fix_test_PC[,colnames(fix_train_PC)]
        if(ncol(data.frame(fix_test_PC))==1){
          fix_test_PC=matrix(fix_test_PC)
        }
        rrBLUP_model_PC <- mixed.solve(y = myY_train,
                                       Z = myGD_train,
                                       X = fix_train_PC)

        pred_effects_PC <- myGD_test %*% rrBLUP_model_PC$u
        fix_effects_PC <- fix_test_PC  %*% rrBLUP_model_PC$beta
        predictions <- c(pred_effects_PC) + c(fix_effects_PC)
      }else{
        fix_PC=as.matrix(cbind(myCV,PCA))
        #p <- ncol(fix_PC)
        #XtX <- crossprod(fix_PC, fix_PC)
        #rank.X <- qr(XtX)$rank
        #if (rank.X < p) {
        #sm2=findLinearCombos(fix_PC)$remove
        #fix_PC=fix_PC[,-sm2]}
        fix_PC=make_full_rank(fix_PC)
        gBLUP_model <- mixed.solve(y = pheno_train,
                                   K = K,
                                   X=fix_PC)
        pred_effects <- gBLUP_model$u[-c(1:length(train_phenotype[,2]))]
        fix_effects <- as.matrix(fix_PC[-c(1:length(train_phenotype[,2])),])  %*% gBLUP_model$beta
        predictions <- c(pred_effects) + c(fix_effects)
      }
      prediction=data.frame(pheno_test,GEBV=predictions,RE=pred_effects,FE=fix_effects)
    }else{


      if(Kernel=="Markers"){

        fix_train <- as.matrix(myCV_train)
        fix_test  <- as.matrix(myCV_test)
        #p <- ncol(fix_train)
        #XtX <- crossprod(fix_train, fix_train)
        #rank.X <- qr(XtX)$rank
        #if (rank.X < p) {
        #sm2=findLinearCombos(fix_train)$remove
        #fix_train=fix_train[,-sm2]
        #fix_test=fix_test[,-sm2]}
        fix_train=make_full_rank(fix_train)
        fix_test=fix_test[,colnames(fix_train)]
        if(ncol(data.frame(fix_test))==1){
          fix_test=matrix(fix_test)
        }
        #myCV_fix  <- as.matrix(myCV)
        #p <- ncol(myCV_fix)
        #XtX <- crossprod(myCV_fix, myCV_fix)
        #rank.X <- qr(XtX)$rank
        #if (rank.X < p) {
        #sm2=findLinearCombos(myCV_fix)$remove
        #myCV=myCV[,-sm2]}
        rrBLUP_model <- mixed.solve(y = myY_train,
                                    Z = myGD_train,
                                    X = fix_train)

        pred_effects <- myGD_test %*% rrBLUP_model$u
        fix_effects <- fix_test  %*% rrBLUP_model$beta
        predictions <- c(pred_effects) + c(fix_effects)
      }else{
        myCV_fix  <- as.matrix(myCV)
        myCV_fix=make_full_rank(myCV_fix)

        gBLUP_model <- mixed.solve(y = pheno_train,
                                   K = K,
                                   X=myCV_fix)
        pred_effects <- gBLUP_model$u[-c(1:length(train_phenotype[,2]))]
        fix_effects <- as.matrix(myCV_fix[-c(1:length(train_phenotype[,2])),])  %*% gBLUP_model$beta
        predictions <- c(pred_effects) + c(fix_effects)
      }
      prediction=data.frame(pheno_test,GEBV=predictions,RE=pred_effects,FE=fix_effects)

    }

  }

  results_ALL=list(Predictions=prediction)
  return(results_ALL)
}
lfmerrick21/WhEATBreeders documentation built on Jan. 1, 2023, 7:12 a.m.