#' #####################################################################################################
#' Bayesian prediction models for genomic selection with ecophysiological information
#' Author: Germano MFC Neto date 19/06/19 , version BETA 0.03 (first version from oct 2018)
#' implementation in BGLR or BGGE packages
#' #####################################################################################################
#
#' ===============================================================================
#' Environmental kernels
#' ===============================================================================
envKernel <-function(df.cov, is.scaled=T, sd.tol = 1,tol=.001){
if(!is.matrix(df.cov)){stop('df.cov must be a matrix')}
if(!isTRUE(is.scaled)){
Amean <- df.cov-apply(df.cov,2,mean)+tol
sdA <- apply(Amean,2,sd)
A. <- Amean/sdA
removed <- names(sdA[sdA < sd.tol])
df.cov <- A.[,!colnames(A.) %in% removed]
t <- ncol(df.cov)
r <- length(removed)
cat(paste0('------------------------------------------------','\n'))
cat(paste0(' Removed envirotype markers:','\n'))
cat(paste0(r,' from ',t,'\n'))
cat(paste0(removed,'\n'))
cat(paste0('------------------------------------------------','\n'))
}
O <- tcrossprod(df.cov)/ncol(df.cov) # env kernel from covariates
H <- crossprod(df.cov)/nrow(df.cov) # omega kernel for covariates
O <- O + diag(1E-4,nrow=nrow(O),ncol=ncol(O))
H <- H + diag(1E-4,nrow=nrow(H),ncol=ncol(H))
return(list(H=H,O=O))
}
envK = function(df.cov,df.p,skip=3){
df.p <-data.frame(df.p)
df.cov <-data.frame(df.cov)
df.cov$env <- as.factor(rownames(df.cov))
W <- as.matrix(merge(df.p,df.cov, by="env")[,-c(1:skip)])
return(W)
}
envMarker = function(df.cov,digits=0, format=c("101","rel-01","PCA"), ncp=2){
#' df.cov : dataframe of environmental covariates
#' digits: number of output digits
#' format: environmental marker (012),
#' centering and scaled to mean 0 and variance 1 (rel-01)
#' principal component analysis (PCA)
#' ncp : number of principal components used (valid only for format="PCA")
require(scales)
require(FactoMineR)
COVARIATES <- df.cov
if(format == "rel-01"){
COVARIATES <- as.matrix(round(scale(COVARIATES,scale = T,center = T),digits))
return(W=COVARIATES)
}
if(format == "101"){
COVARIATES<-round(apply(COVARIATES,2,function(X) scales::rescale(x=X,to=c(-1,1))),digits)
return(W=COVARIATES)}
if(format == "PCA"){
res.pca <- PCA(COVARIATES,graph=FALSE)
COVARIATESp <- res.pca$svd$U[,1:ncp]
rownames(COVARIATESp) <- row.names(COVARIATES)
return(list(W=COVARIATESp,eig=res.pca$eig))}
}
#' ===============================================================================
#' Getting Ecophysiological-enriched Kernel models
#' ===============================================================================
getEnrichedKernel = function( Ke = NULL, #' environmental kernel see envKernel()
Kw = NULL, #' covariate kernel
W = NULL, #' covariate matrix
Kg, #' genotypic kernel (p x p genotypes)
Y, #' phenotypic dataframe
X = NULL, #' fixed effects
skip = 3, #' for envK function
model = "MM", #' family model : model = c("MM","MDs","RN")
type = "E", #' environmental structure: type = c("EGE","E")
react.norm = "WGW", #' reaction norm structure
intercept.random = FALSE #' insert genomic random intercept
){
#'-------------------------------------------------------------------------------------#
#' 1 Introduction step
#'-------------------------------------------------------------------------------------#
if (!require(BGGE)) {install.packages("BGGE");require(BGGE);}
cat("----------------------------------------------------- \n")
cat("Getting Ecophysiological-enriched Kernel models\n")
cat("See BGGE package vignette for more details \n")
cat("----------------------------------------------------- \n")
if(!any(model %in% c("MM","MDs","RN"))){stop("Model not specified. Choose between MM or MDs")}
#'-------------------------------------------------------------------------------------#
#' 2 Creating Kernels
#'-------------------------------------------------------------------------------------#
#' 2.1 Reaction norm model from Jarquin et al. 2014
#' see getReactionNorm() funciton
#' for W build, see envMarker() function
#'-------------------------------------------------------------------------------------#
if(model == "RN"){
cat(paste("Building kernel-based reaction norm model \n"),sep="");
K = getReactionNorm(W = W,Kg = Kg,Y = Y,X = X, react.norm = react.norm,
intercept.random = intercept.random);
}else{
#' 2.2 our new models
#'-------------------------------------------------------------------------------------#
if(isTRUE(type %in% c("EGE","E"))){
#' 2.2.1 Ecophysiological enriched environmental plus GE structure (E+GE)
#' see getGEenriched() function
#'-------------------------------------------------------------------------------------#
if(type == "EGE"){
cat(paste("Building ",model," with ecophysioligical enriched effects \n"),sep="");
K = getGEernriched(Ke = Ke,W = W,Kg = Kg,Y = Y,
X = X,model = model,
type = type,
intercept.random = intercept.random);
}
#' 2.2.2 Ecophysiological enriched environmental structure (E)
#' see getEenriched() function
#'-------------------------------------------------------------------------------------#
if(type == "E"){
cat(paste("Building ",model," with random environmental kernel (E) \n"),sep="");
K = getEenriched(Ke = Ke,W = W,Kg = Kg,Y = Y,
X = X,model = model,
type = type,
intercept.random = intercept.random);
}
}else{
#' Current BGGE default (benchmark model)
#'-------------------------------------------------------------------------------------#
cat(paste("Building ",model," with fixed environmental effects \n"),sep="");
n = length(Kg);
K = getK(Y = Y, X = X, setKernel = Kg, model = model,intercept.random = intercept.random);
}
if(model == "MDs" & type == "EGE"){
cat("----------------------------------------------------- \n");
cat("ATTENTION!!: Model with GxE plus reaction norm.
This might not be the best option because
the model may be over-parameterized \n");
cat("----------------------------------------------------- \n");}
}
#'-------------------------------------------------------------------------------------#
#' Ending step: return New K (if covariate is null, return benchmark K)
#'-------------------------------------------------------------------------------------#
cat(paste("Kernel:",names(K),"\n",sep=" "));
cat("----------------------------------------------------- \n");
return(K)
}
#' ===============================================================================
#' getting environmental enriched kernels for genomic prediciton
#' ===============================================================================
getEenriched = function(Ke = NULL, #' environmental kernel see envKernel function
W = NULL, #' covariate matrix
Kg, #' genotypic kernel (p x p genotypes)
Y, #' phenotypic dataframe
X = NULL, #' fixed effects
model = c("MM","MDs"), #' family model
type = c("EGE","E"), #' environmental structure
intercept.random = FALSE #' insert genomic random intercept)
){
n = length(Kg);
K = getK(Y = Y, X = X, setKernel = Kg, model = model,intercept.random = intercept.random);
if(is.null(Ke)){
# if Ke is missing, use envKernel function to produce Ke from W
if(!is.null(W)){
if(nrow(W) == nrow(Y)){Ke = envKernel(df.cov = as.matrix(W))$O}
if(nrow(W) != nrow(Y)){
W = envK(df.cov = W, df.p = Y, skip=3)
Ke = envKernel(df.cov = as.matrix(W))$O
}
}
if(is.null(W)){cat("Missing environmental covariate effects \n")}
}
K$E = list(Kernel = Ke, Type = "D");
return(K)
}
#' ===============================================================================
#' getting environmental plus GE enriched kernels for genomic prediciton
#' ===============================================================================
getGEernriched = function(Ke = NULL, #' environmental kernel
W = NULL, #' covariate matrix
Kg, #' genotypic kernel (p x p genotypes)
Y, #' phenotypic dataframe
X = NULL, #' fixed effects
model = c("MM","MDs","RN"), #' family model
type = c("EGE","E"), #' environmental structure
intercept.random = FALSE #' insert genomic random intercept)
){
n = length(Kg);
K = getK(Y = Y, X = X, setKernel = Kg, model = model,intercept.random = intercept.random);
if(is.null(Ke)){
# if Ke is missing, use envKernel function to produce Ke from W
if(!is.null(W)){
if(nrow(W) == nrow(Y)){Ke = envKernel(df.cov = as.matrix(W))$O}
if(nrow(W) != nrow(Y)){
W = envK(df.cov = W, df.p = Y, skip=3)
Ke = envKernel(df.cov = as.matrix(W))$O
}
}
if(is.null(W)){cat("Missing environmental covariate effects \n")}
}
K$E = list(Kernel = Ke, Type = "D");
j = length(K);
for(i in 1:n){
K[[j+i]] = list(Kernel = Ke*K[[i]]$Kernel,Type="D");
names(K)[j+i] = paste("W",names(K)[i],sep="_");}
return(K)
}
#' ===============================================================================
#' getting reaction norm model
#' ===============================================================================
getReactionNorm = function(
W = NULL, #' covariate matrix
Kg, #' genotypic kernel (p x p genotypes)
Y, #' phenotypic dataframe
X = NULL, #' fixed effects
react.norm = NULL , #' react.norm = c("WGE","WGW","WGEGW","GWGE"),
intercept.random = FALSE #' insert genomic random intercept)
){
if(is.null(react.norm)){react.norm = "WGW"}
n = length(Kg);
if(!is.null(W)){
Kw = envKernel(df.cov = as.matrix(W))$H;
Kw = tcrossprod(W %*% t(chol(Kw)));
if(is.null(W)){cat("Missing environmental covariate effects \n")}
}
if(react.norm %in% c("WGW","GWW")){
K = getK(Y = Y, X = X, setKernel = Kg, model = "MM",intercept.random = intercept.random);
j = length(K);
K$W = list(Kernel = Kw, Type = "D");
for(i in 1:n){
K[[j+i]] = list(Kernel = Kw*K[[i]]$Kernel,Type="D");
names(K)[j+i] = paste("W",names(K)[i],sep="_");
K$W = list(Kernel = Kw, Type = "D")}
return(K)
}
if(react.norm %in% c("WGE","GEW")){
K = getK(Y = Y, X = X, setKernel = Kg, model = "MDs",intercept.random = intercept.random);
K$W = list(Kernel = Kw, Type = "D");
return(K)
}
if(react.norm %in% c("WGEGW","WGWGE","GEGWW","GEWGW")){
K = getK(Y = Y, X = X, setKernel = Kg, model = "MM",intercept.random = intercept.random);
j = length(K);
for(i in 1:n){
K[[j+i]] = list(Kernel = Kw*K[[i]]$Kernel,Type="D");
names(K)[j+i] = paste("W",names(K)[i],sep="_");}
K0 = getK(Y = Y, X = X, setKernel = Kg, model = "MDs",intercept.random = intercept.random);
K=c(K,K0[!names(K0) %in% names(K)]);
if(!react.norm %in% c("GEGW","GWGE")){K$W = list(Kernel = Kw, Type = "D");}
return(K)
}
}
#' #####################################################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.