R/eKernels.R

Defines functions envKernel envK envMarker getEnrichedKernel getEenriched getGEernriched getReactionNorm

Documented in envKernel envMarker

#' #####################################################################################################
#' Bayesian prediction models for genomic selection with ecophysiological information
#' Author: Germano MFC Neto   date 19/06/19 , version BETA 0.03 (first version from oct 2018)
#' implementation in BGLR or BGGE packages
#' #####################################################################################################
#
#' ===============================================================================
#'  Environmental kernels
#' ===============================================================================
envKernel <-function(df.cov, is.scaled=T, sd.tol = 1,tol=.001){
  if(!is.matrix(df.cov)){stop('df.cov must be a matrix')}
  if(!isTRUE(is.scaled)){
    Amean <- df.cov-apply(df.cov,2,mean)+tol
    sdA   <- apply(Amean,2,sd)
    A. <- Amean/sdA
    removed <- names(sdA[sdA < sd.tol])
    df.cov <- A.[,!colnames(A.) %in% removed]
    t <- ncol(df.cov)
    r <- length(removed)
    cat(paste0('------------------------------------------------','\n'))
    cat(paste0(' Removed envirotype markers:','\n'))
    cat(paste0(r,' from ',t,'\n'))
    cat(paste0(removed,'\n'))
    cat(paste0('------------------------------------------------','\n'))
    
  }
  O <- tcrossprod(df.cov)/ncol(df.cov)  # env kernel from covariates
  H <- crossprod(df.cov)/nrow(df.cov)   # omega kernel for covariates
  O <- O + diag(1E-4,nrow=nrow(O),ncol=ncol(O))
  H <- H + diag(1E-4,nrow=nrow(H),ncol=ncol(H))
  return(list(H=H,O=O))
}


envK = function(df.cov,df.p,skip=3){
  df.p <-data.frame(df.p)
  df.cov <-data.frame(df.cov)
  df.cov$env <- as.factor(rownames(df.cov))
  W <- as.matrix(merge(df.p,df.cov, by="env")[,-c(1:skip)])
  return(W)
}

envMarker = function(df.cov,digits=0, format=c("101","rel-01","PCA"), ncp=2){
  #' df.cov : dataframe of environmental covariates
  #' digits: number of output digits
  #' format: environmental marker (012), 
  #'         centering and scaled to mean 0 and variance 1 (rel-01)
  #'         principal component analysis (PCA)
  #' ncp   : number of principal components used (valid only for format="PCA")
  
  require(scales)
  require(FactoMineR)
  
  COVARIATES <- df.cov
  
  if(format == "rel-01"){
    COVARIATES <- as.matrix(round(scale(COVARIATES,scale = T,center = T),digits))
    return(W=COVARIATES)
  }
  
  if(format == "101"){
    COVARIATES<-round(apply(COVARIATES,2,function(X) scales::rescale(x=X,to=c(-1,1))),digits)
    return(W=COVARIATES)}
  
  if(format == "PCA"){
    res.pca <- PCA(COVARIATES,graph=FALSE)
    COVARIATESp <- res.pca$svd$U[,1:ncp]
    rownames(COVARIATESp) <- row.names(COVARIATES)
    return(list(W=COVARIATESp,eig=res.pca$eig))}
}
#' ===============================================================================
#'  Getting Ecophysiological-enriched Kernel models
#' ===============================================================================

getEnrichedKernel = function( Ke = NULL,                    #' environmental kernel see envKernel()
                              Kw = NULL,                    #' covariate kernel
                              W = NULL,                     #' covariate matrix
                              Kg,                           #' genotypic kernel (p x p genotypes)
                              Y,                            #' phenotypic dataframe
                              X = NULL,                     #' fixed effects
                              skip = 3,                     #' for envK function
                              model = "MM",                 #' family model : model = c("MM","MDs","RN")
                              type =  "E",                  #' environmental structure:  type = c("EGE","E")
                              react.norm = "WGW",           #' reaction norm structure
                              intercept.random = FALSE      #' insert genomic random intercept
){
  
  #'-------------------------------------------------------------------------------------#
  #' 1 Introduction step
  #'-------------------------------------------------------------------------------------#
  
  if (!require(BGGE)) {install.packages("BGGE");require(BGGE);}
  
  cat("----------------------------------------------------- \n")
  cat("Getting Ecophysiological-enriched Kernel models\n")
  cat("See BGGE package vignette for more details \n")
  cat("----------------------------------------------------- \n")
  
  if(!any(model %in% c("MM","MDs","RN"))){stop("Model not specified. Choose between MM or MDs")}
  
  #'-------------------------------------------------------------------------------------#
  #' 2 Creating Kernels
  #'-------------------------------------------------------------------------------------#
  
  #' 2.1 Reaction norm model from Jarquin et al. 2014
  #' see getReactionNorm() funciton
  #' for W build, see envMarker() function
  #'-------------------------------------------------------------------------------------#
  
  if(model == "RN"){ 
    cat(paste("Building kernel-based reaction norm model \n"),sep="");
    K = getReactionNorm(W = W,Kg = Kg,Y = Y,X = X, react.norm = react.norm,
                        intercept.random = intercept.random);
  }else{
    
  #' 2.2 our new models
  #'-------------------------------------------------------------------------------------#

    if(isTRUE(type %in% c("EGE","E"))){
      
      #' 2.2.1 Ecophysiological enriched environmental plus GE structure (E+GE)
      #' see getGEenriched() function
      #'-------------------------------------------------------------------------------------#
      
      if(type == "EGE"){ 
        
        cat(paste("Building ",model," with ecophysioligical enriched effects  \n"),sep="");
        K = getGEernriched(Ke = Ke,W = W,Kg = Kg,Y = Y,
                           X = X,model = model,
                           type = type,
                           intercept.random = intercept.random);
      }
      
      #' 2.2.2 Ecophysiological enriched environmental structure (E)
      #' see getEenriched() function
       #'-------------------------------------------------------------------------------------#
      if(type == "E"){ 
        cat(paste("Building ",model," with random environmental kernel (E) \n"),sep="");
        K = getEenriched(Ke = Ke,W = W,Kg = Kg,Y = Y,
                         X = X,model = model,
                         type = type,
                         intercept.random = intercept.random);
      }
      
    }else{
      
  #' Current BGGE default (benchmark model)
  #'-------------------------------------------------------------------------------------#
      cat(paste("Building ",model," with fixed environmental effects \n"),sep="");
      n = length(Kg);
      K = getK(Y = Y, X = X, setKernel = Kg, model = model,intercept.random = intercept.random);
      
    }
    if(model == "MDs" & type == "EGE"){
      cat("----------------------------------------------------- \n");    
      cat("ATTENTION!!: Model with GxE plus reaction norm. 
          This might not be the best option because 
          the model may be over-parameterized \n");
      cat("----------------------------------------------------- \n");}
    
    
  }
  
  #'-------------------------------------------------------------------------------------#
  #' Ending step: return New K (if covariate is null, return benchmark K)
  #'-------------------------------------------------------------------------------------#
  cat(paste("Kernel:",names(K),"\n",sep=" "));
  cat("----------------------------------------------------- \n");
  return(K)
  }

#' ===============================================================================
#' getting environmental enriched kernels for genomic prediciton
#' ===============================================================================
getEenriched = function(Ke = NULL,                    #' environmental kernel see envKernel function
                        W = NULL,                     #' covariate matrix
                        Kg,                           #' genotypic kernel (p x p genotypes)
                        Y,                            #' phenotypic dataframe
                        X = NULL,                     #' fixed effects
                        model = c("MM","MDs"),        #' family model
                        type = c("EGE","E"),          #' environmental structure
                        intercept.random = FALSE      #' insert genomic random intercept)
                        ){
  n = length(Kg);
  K = getK(Y = Y, X = X, setKernel = Kg, model = model,intercept.random = intercept.random);
  if(is.null(Ke)){
    
    # if Ke is missing, use envKernel function to produce Ke from W
    if(!is.null(W)){
      if(nrow(W) == nrow(Y)){Ke  = envKernel(df.cov = as.matrix(W))$O}
      if(nrow(W) != nrow(Y)){
        W   =  envK(df.cov = W,   df.p =  Y,  skip=3)
        Ke  = envKernel(df.cov = as.matrix(W))$O
      }
    }
    if(is.null(W)){cat("Missing environmental covariate effects \n")}
  }
  K$E = list(Kernel = Ke, Type = "D");
  return(K)
}
#' ===============================================================================
#' getting environmental plus GE enriched kernels for genomic prediciton
#' ===============================================================================
getGEernriched = function(Ke = NULL,                    #' environmental kernel
                          W = NULL,                     #' covariate matrix
                          Kg,                           #' genotypic kernel (p x p genotypes)
                          Y,                            #' phenotypic dataframe
                          X = NULL,                     #' fixed effects
                          model = c("MM","MDs","RN"),   #' family model
                          type = c("EGE","E"),          #' environmental structure
                          intercept.random = FALSE      #' insert genomic random intercept)
                          ){
  n = length(Kg);
  K = getK(Y = Y, X = X, setKernel = Kg, model = model,intercept.random = intercept.random);
  if(is.null(Ke)){
    
    # if Ke is missing, use envKernel function to produce Ke from W
    if(!is.null(W)){
      if(nrow(W) == nrow(Y)){Ke  = envKernel(df.cov = as.matrix(W))$O}
      if(nrow(W) != nrow(Y)){
        W   =  envK(df.cov = W,   df.p =  Y,  skip=3)
        Ke  = envKernel(df.cov = as.matrix(W))$O
      }
    }
    if(is.null(W)){cat("Missing environmental covariate effects \n")}
  }
  K$E = list(Kernel = Ke, Type = "D");
  j = length(K);
  
  for(i in 1:n){
    K[[j+i]] = list(Kernel = Ke*K[[i]]$Kernel,Type="D");
    names(K)[j+i] = paste("W",names(K)[i],sep="_");}
  return(K)
}
#' ===============================================================================
#' getting reaction norm model
#' ===============================================================================
getReactionNorm =  function(
                   W = NULL,                     #' covariate matrix
                   Kg,                           #' genotypic kernel (p x p genotypes)
                   Y,                            #' phenotypic dataframe
                   X = NULL,                     #' fixed effects
                   react.norm = NULL     ,       #' react.norm = c("WGE","WGW","WGEGW","GWGE"),
                   intercept.random = FALSE      #' insert genomic random intercept)
                   ){
  
  if(is.null(react.norm)){react.norm = "WGW"}
  n = length(Kg);

  if(!is.null(W)){
    
    Kw  =  envKernel(df.cov = as.matrix(W))$H;
    Kw  = tcrossprod(W %*% t(chol(Kw)));
    if(is.null(W)){cat("Missing environmental covariate effects \n")}
  }
 
  if(react.norm %in% c("WGW","GWW")){
    K = getK(Y = Y, X = X, setKernel = Kg, model = "MM",intercept.random = intercept.random);
    j = length(K);
    K$W = list(Kernel = Kw, Type = "D");
    
    for(i in 1:n){
      K[[j+i]] = list(Kernel = Kw*K[[i]]$Kernel,Type="D");
      names(K)[j+i] = paste("W",names(K)[i],sep="_");
      K$W = list(Kernel = Kw, Type = "D")}
    return(K)
  }
  
  if(react.norm %in% c("WGE","GEW")){
    K = getK(Y = Y, X = X, setKernel = Kg, model = "MDs",intercept.random = intercept.random);
    K$W = list(Kernel = Kw, Type = "D");
    return(K)
  }
  
  if(react.norm %in% c("WGEGW","WGWGE","GEGWW","GEWGW")){
    K = getK(Y = Y, X = X, setKernel = Kg, model = "MM",intercept.random = intercept.random);
    j = length(K);
   
    for(i in 1:n){
      K[[j+i]] = list(Kernel = Kw*K[[i]]$Kernel,Type="D");
      names(K)[j+i] = paste("W",names(K)[i],sep="_");}
    K0 = getK(Y = Y, X = X, setKernel = Kg, model = "MDs",intercept.random = intercept.random);
    K=c(K,K0[!names(K0) %in% names(K)]);
    if(!react.norm %in% c("GEGW","GWGE")){K$W = list(Kernel = Kw, Type = "D");}
    return(K)

  }
}
#' #####################################################################################################
gcostaneto/envirotype documentation built on Feb. 19, 2020, 10:36 p.m.