Nothing
(0) Generating a testing set
library(BGLR)
data(wheat)
X=scale(wheat.X)
y=wheat.Y[,2]
n=length(y)
nTST=150
tst=sample(1:n,size=nTST)
(1) Setting the entries of the testing set in the response (y) to NA
yNA=y
yNA[tst]=NA
fm=BGLR(y=yNA,ETA=list(list(X=X,model='BRR')),nIter=6000,burnIn=1000)
yHat_1=fm$yHat[tst]
(2) Splitting both X and y into training and testing sets
yTRN=y[-tst] ; yTST=y[tst]
X.TRN=X[-tst,] ; X.TST=X[tst,]
fm=BGLR(y=yTRN,ETA=list(list(X=X.TRN,model='BRR')),nIter=6000,burnIn=1000)
yHat_2=fm$mu+as.vector(X.TST%*%fm$ETA[[1]]$b)
(3) Using G-matrix (only valid for GBLUP)
G=tcrossprod(X)/ncol(X)
G11=G[-tst,-tst] # genomic relationships in the training data
G21=G[tst,-tst]
fm=BGLR(y=yTRN,ETA=list(list(K=G11,model='RKHS')),nIter=6000,burnIn=1000)
yHat_3=fm$mu+as.vector(G21%*%solve(G11)%*%fm$ETA[[1]]$u)
cor(cbind(yHat_1,yHat_2,yHat_3))
(4) Cross-validation using a loop
folds=sample(1:5,size=n,replace=T)
yHatCV=rep(NA,n)
timeIn=proc.time()[3]
for(i in 1:max(folds)){
tst=which(folds==i)
yNA=y
yNA[tst]=NA
fm=BGLR(y=yNA,ETA=list(list(X=X,model='BRR')),nIter=6000,burnIn=1000)
yHatCV[tst]=fm$yHat[tst]
}
proc.time()[3]-timeIn
(5) Cross-validation in parallel at multiple cores
# First create a function that fits one fold
fitFold=function(y,W,folds,fold){
tst=which(folds==fold)
yNA=y
yNA[tst]=NA
fm=BGLR(y=yNA,ETA=list(list(X=W,model='BRR')),nIter=6000,burnIn=1000,verbose=F)
tmp=fm$yHat[tst]
names(tmp)=tst
return(tmp)
}
library(parallel)
system.time( tmp<-mclapply(FUN=fitFold,y=y,W=X,folds=folds,X=1:5,mc.cores=3))
yHatCV2=unlist(tmp)
yHatCV2=yHatCV2[order(as.integer(names(yHatCV2)))] # need to order the vector
cor(yHatCV,yHatCV2)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.