gmm = function(y,gen,dta=NULL,it=75,bi=25,th=1,model="BRR",...){
# models: BRR - BayesA - GBLUP - RKHS
# spline: dta must include a column called "ID Block Row Col"
if(is.null(dta)&nrow(gen)==length(y)){
n = length(y)
ids = sort(gsub('\\..\\.','',paste('geno',round(1:n/n,5)+0.000001,sep='.')))
dta = data.frame("ID"=ids)
rownames(gen) = ids
rm(n,ids)
}
# Dealing with Genotypic information and Missing loci
gen = data.matrix(gen)
if(anyNA(gen)){
cat("Removing markers with more than 80% missing \n")
m = apply(gen,2,function(x) mean(is.na(x)))
if(any(m>0.8)) gen = gen[,-which(m>0.8)]
cat("Imputing missing markers with expectations \n")
expectation = function(x){
if(anyNA(x)) x[is.na(x)] = mean(x)
return(x)}
E = apply(gen,2,expectation)
dim(E) = dim(gen)
gen = E; rm(E)
}
# Checking if the model is acceptable
if(! model %in% c("BRR","BayesA","GBLUP","RKHS")) stop("Model must be either 'BRR', 'BayesA','GBLUP' or'RKHS' ")
if(model=="GBLUP"|model=="RKHS"){
KERN = TRUE
if(model=="GBLUP"){
idnames = rownames(gen)
gen = GRM(gen)
cat("Eigendecomposing GRM for GBLUP \n")
gen = eigen(gen,TRUE)
V = gen$values
gen = gen$vectors
rownames(gen) = idnames
colnames(gen) = paste('PC',1:ncol(gen),sep='')
}
if(model=="RKHS"){
idnames = rownames(gen)
gen = GAU(gen)
cat("Eigendecomposing Gaussian Kernel for RKHS \n")
gen = eigen(gen,TRUE)
V = gen$values
gen = gen$vectors
rownames(gen) = idnames
colnames(gen) = paste('PC',1:ncol(gen),sep='')
}
# Reduction of dimensionality
ww = which(cumsum(V)/length(V)<.98);gen = gen[,ww];V = V[ww];rm(ww)
}else{
KERN = FALSE
}
cat("Checking for missing and setting up parameters \n")
# Dealing with missing information
if(anyNA(y)){
w = which(is.na(y))
y = y[-w]
dtanm = names(dta)
dta = data.frame(dta[-w,])
names(dta) = dtanm; rm(dtanm)
}
n = length(y)
# Isolating spatial information
if(sum(c("Block","Row","Col")%in%names(dta))==3){
spline = TRUE
spdta = dta[,c("Block","Row","Col")]
}else{
spline = FALSE
cat("No spline: 'dta' does not have columns Block-Row-Col \n")
}
# Isolating genotypic information
id = dta[,'ID']
# Isolating of fixed effects
if(any(!names(dta)%in%c("Block","Row","Col","ID"))){
fixed = TRUE
dta2 = dta[,-which(names(dta)%in%c("Block","Row","Col","ID"))]
dta2 = data.frame(dta2)
X = model.matrix(~.-1,data=dta2)
}else{
fixed = FALSE
cat("No additional effects other than splines and genomics \n")
}
# Search for NN plots
NNsrc = function(sp=NULL,rho=1,dist=3){
# OCTREE search function
NN = function(a,sp){
# shared environment
s0 = which(sp[,1]==a[1])
# row neighbors
s1 = s0[which(abs(sp[s0,2]-a[2])<(rho))]
s2 = s1[which(abs(sp[s1,3]-a[3])<=(rho*dist))]
# col neighbors
s3 = s0[which(abs(sp[s0,3]-a[3])<(rho*dist*0.5))]
s4 = s3[which(abs(sp[s3,2]-a[2])<=(rho))]
# Neighbors only
s5 = union(s2,s4)
wo = which(sp[s5,1]==a[1]&sp[s5,2]==a[2]&sp[s5,3]==a[3])
s5 = s5[-wo]
return(s5)}
# Set the data format
rownames(sp) = 1:nrow(sp)
if(is.data.frame(sp)) sp = data.matrix(sp)
# Search for neighbors within environment
Local_NN = apply(sp,1,NN,sp=sp)
cat(paste('Average n# of components:',round(mean(sapply(Local_NN,length)),2)),'\n')
# RETURN
return(Local_NN)
}
# Create covariate from neighbor plots
SPcov = function(NN,y) sapply(X = NN,FUN = function(x,y) mean(y[x],na.rm=TRUE),y = y,USE.NAMES = FALSE)
if(isTRUE(spline)){
# Preparing kNN matrix
cat("Preparing sparse mapping matrix (spline) \n")
R = NNsrc(spdta,...)
}
# Preparing genotypic mapping matrix
cat("Generating the incidence matrix of genotypes \n")
Z = model.matrix(~id-1)
colnames(Z) = gsub('^id','',colnames(Z))
Weight = colSums(Z)
# Dealing with unphenotyped material
if(any(Weight==0)){
Z = Z[,-which(Weight==0)]
Weight = Weight[-which(Weight==0)]
}
# Dealing with ungenotyped material
if(any(!colnames(Z)%in%rownames(gen))){
w = which(!colnames(Z)%in%rownames(gen))
if(fixed){
X = cbind(X,Z[,w])
Z = Z[,-w]
Weight = Weight[-w]
}else{
fixed = TRUE
X = Z[,w]
Z = Z[,-w]
Weight = Weight[-w]
}
}
# Getting the first round fitted
# (a) Fixed
fitSM = function(y,X,rdg=1){
XX = crossprod(X)
diag(XX) = diag(XX)+rdg
Xy = crossprod(X,y)
b = solve(XX,Xy)
hat = tcrossprod(t(b),X)
e = y - as.vector(hat)
ret = list('b'=as.vector(b),'e'=e,'y'=hat)
}
# intercept
mu = mean(y)
e = y-mu
# other "fixed" effects with minimal ridge
if(isTRUE(fixed)){
LE = fitSM(e,X)
e = LE$e
b = LE$b
}
# (b) Preparing genotypic data
gen0 = gen
gen = gen[colnames(Z),]
MAP = function(e,Z,Weight) as.vector(crossprod(Z,e))/Weight
cat("Computing priors and genomic parameters \n")
xx = apply(gen,2,crossprod)
p = ncol(gen)
g = rep(0,p)
L = rep(1,p)
Ve = 1
# (c) Running first round
# KERNEL AND REGRESSION METHODS
d = rep(1,ncol(gen))
E = MAP(e,Z,Weight)
update = KMUP(X=gen,b=g,d=d,xx=xx,e=E,L=L,Ve=Ve,pi=0)
# First round of WGR: Setting priors
if(KERN){
R2 = 0.5
df_prior = 10
Sk_prior = R2 * var(y, na.rm = T) * (df_prior + 2)
}else{
R2 = 0.5
df_prior = 10
MSx = sum(apply(gen, 2, var, na.rm = T))
S_prior = R2 * var(y, na.rm = T) * (df_prior + 2)/MSx
shape_prior = 1.1
rate_prior = (shape_prior - 1)/S_prior
}
Se_prior = (1-R2) * var(y, na.rm = T) * (df_prior + 2)
# BV
g = update$b
bv = tcrossprod(g,gen)
bvs = as.vector(tcrossprod(bv,Z))
e = e-bvs
if(KERN){
Vb = (sum(g^2/V) + Sk_prior)/rchisq(1, df_prior + p)
Ve = (crossprod(e)+Se_prior)/rchisq(1,n+df_prior)
L = c(Ve)/(Vb*V)
}else{
# Update BayesA variance components
S_conj = rgamma(1, p * df_prior/2 + shape_prior, sum(1/(g^2))/2 + rate_prior)
# For every round
Vb = (S_conj + g^2)/rchisq(p, df_prior + 1)
S_conj = rgamma(1, p * df_prior/2 + shape_prior,sum(1/Vb)/2 + rate_prior)
# Update Ve and Lambda
Ve = c(crossprod(e)+Se_prior)/rchisq(1,n+df_prior)
L = c(Ve)/c(Vb)
}
# (d) Adding Splines to X
if(isTRUE(spline)){
spEff = SPcov(R,e)
Q = qr.solve(spEff,e)*spEff
e = e - Q
sp = Q
}
# What to STORE
MCMC = seq(bi,it,th)
mc = length(MCMC)
G = VE = Mu = 0
VB = rep(0,p)
if(isTRUE(spline)) SP = rep(0,n)
if(isTRUE(fixed)) B = rep(0,ncol(X))
# LOOP STARTS HERE
cat("Fitting the model \n")
pb = txtProgressBar(style = 3)
for(i in 1:it){
# (a) Intercept
muE = mean(e)
mu = mu+muE
e = e-muE
# (b) Other fixed
if(isTRUE(fixed)){
LE = fitSM(e,X)
b = LE$b + b
e = LE$e
}
# (c) Update genetic components
bvs0 = bvs
E = MAP(e,Z,Weight)
update = KMUP(X=gen,b=g,d=d,xx=xx,e=E,L=L,Ve=Ve,pi=0)
g = update$b
bv = tcrossprod(g,gen)
bvs = as.vector(tcrossprod(Z,bv))
e = e+bvs0-bvs
# (d) Update VC
if(model=="BRR"){
Va = (sum(g^2) + S_prior)/rchisq(1, df_prior + p)
Vm = rep(Va,p)
Ve = c(crossprod(e)+Se_prior)/rchisq(1,n+df_prior)
L = c(Ve)/c(Vm)
}
if(model=="BayesA"){
Vm = (S_conj + g^2)/rchisq(p, df_prior + 1)
S_conj = rgamma(1, p * df_prior/2 + shape_prior,sum(1/Vb)/2 + rate_prior)
Ve = (crossprod(e)+Se_prior)/rchisq(1,n+df_prior)
L = c(Ve)/c(Vm)
}
if(KERN){
Va = (sum(g^2/V) + Sk_prior)/rchisq(1, df_prior + p)
Vm = rep(Va,p)
Ve = (crossprod(e)+Se_prior)/rchisq(1,n+df_prior)
L = c(Ve)/c(Vm*V)
}
# (e) Update Splines
if(isTRUE(spline)){
spEff = SPcov(R,e)
Q = qr.solve(spEff,e)*spEff
e = e - Q
sp = Q + sp
}
# (f) Storage
if(i %in% MCMC){
Mu = Mu+mu
G = G+g
VB = VB+Vm
VE = VE+Ve
if(isTRUE(fixed)) B = B+b
if(isTRUE(spline)) SP = SP+sp
}
# (g) Update status bar
setTxtProgressBar(pb, i/it)
}
close(pb)
# Posterior means
Mu = Mu/mc
G = G/mc
VB = VB/mc
VE = VE/mc
if(model=="BRR"|KERN) VB=mean(VB)
# Fitted
RND = as.vector(tcrossprod(Z,tcrossprod(G,gen)))
names(RND) = rownames(gen)
GEBV = as.vector(tcrossprod(G,gen0))
names(GEBV) = rownames(gen0)
# Hat
hat = Mu + RND
# Adding terms
if(isTRUE(spline)){
SP = SP/mc
hat = hat + SP
}
if(isTRUE(fixed)){
B = B/mc; names(B)=colnames(X)
FIX = as.vector(tcrossprod(B,X))
hat = hat + FIX
}
FINAL = list('hat'=hat,'obs'=y,
"mu"=Mu,"Z"=Z,"g"=G,
"Vg"=VB,"Ve"=VE,
"EBV"=GEBV)
if(model=="BRR"|KERN){
FINAL = c(FINAL,list('cxx'=mean(xx)))
}
if(isTRUE(spline)){
FINAL = c(FINAL,list("sp"=SP))
}
if(isTRUE(fixed)){
FINAL = c(FINAL,list("X"=X,"b"=B))
}
class(FINAL) = "SSM"
return(FINAL)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.