`GAPIT.SS` <-
function(DP=NULL,IC=NULL,buspred=FALSE,lmpred=TRUE){
#Object: To Sufficient Statistics (SS) for GWAS and GS
#Designed by Zhiwu Zhang
#Writen by Jiabo Wang
#Last update: Novenber 3, 2016
##############################################################################################
print("GAPIT.SS in process...")
#Define the funcitno here
Timmer=GAPIT.Timmer(Infor="GAPIT.SS")
Memory=GAPIT.Memory(Infor="GAPIT.SS")
GR=list()
GR$GVs=NULL
if(DP$SNP.test)
{
print("GAPIT will be into GWAS approach...")
ic_GD=IC$GD
ic_GM=IC$GM
ic_Y=IC$Y
ic_KI=IC$K
ic_PCA=IC$PCA
Z=DP$Z
taxa_Y=as.character(ic_Y[,1])
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GAPIT.QC")
Memory=GAPIT.Memory(Memory=Memory,Infor="GAPIT.QC")
if(DP$kinship.algorithm!="None" & DP$kinship.algorithm!="SUPER" & is.null(Z))
{
Z=as.data.frame(diag(1,nrow(ic_Y)))
Z=rbind(taxa_Y,Z)
taxa=c('Taxa',as.character(taxa_Y))
Z=cbind(taxa,Z)
}
# print(head(ic_PCA))
# print(dim(DP$CV))
# print(head(DP$PC))
if(max(ic_PCA[,2])==min(ic_PCA[,2]))ic_PCA=NULL
#print(head(ic_PCA))
# print("@@@@@")
print(DP$kinship.algorithm)
if(DP$kinship.algorithm%in%c("FarmCPU","BLINK","MLMM","BLINKC"))
{
print("The GAPIT would go into Bus...")
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GAPIT.FarmCPU")
Memory=GAPIT.Memory(Memory=Memory,Infor="GAPIT.FarmCPU")
myBus=GAPIT.Bus(Y=ic_Y,CV=ic_PCA,Z=NULL,GK=NULL,KI=ic_KI,GD=ic_GD,GM=ic_GM,GT=IC$GT,name.of.trait=DP$name.of.trait,DP=DP,
method=DP$kinship.algorithm,GTindex=DP$GTindex,LD=DP$LD,opt=DP$opt,N.sig=DP$N.sig,
bin.size=DP$bin.size,bin.selection=DP$bin.selection,alpha=DP$alpha,WS=DP$WS,
cutOff=DP$cutOff,p.threshold=DP$p.threshold,QTN.threshold=DP$QTN.threshold,FDRcut=DP$FDRcut,
maf.threshold=DP$maf.threshold,method.GLM=DP$method.GLM,method.sub=DP$method.sub,seq.cutoff=DP$seq.cutoff,
method.sub.final=DP$method.sub.final,method.bin=DP$method.bin,Random.model=DP$Random.model,
DPP=DP$DPP,file.output=DP$file.output,Multi_iter=DP$Multi_iter,num_regwas=DP$num_regwas )
GWAS=myBus$GWAS
Pred=myBus$Pred
p=GWAS[,4]
# print(head(GWAS))
# print(head(ic_GM))
myBus$seqQTN=match(as.character(GWAS[p<(DP$cutOff/length(p)),1]),as.character(ic_GM[,1]))
myBus$seqQTN=myBus$seqQTN[!is.na(myBus$seqQTN)]
# BUS Prediction with gBLUP
# lmpred=TRUE
print(myBus$seqQTN)
if(buspred)
{
X=IC$myallGD[,-1]
for(l in lmpred)
{
memo=ifelse(l,"MAS","ABLUP")
if(length(myBus$seqQTN)>DP$seq.num)
{
myBus$seqQTN=order(p)[1:DP$seq.num]
}
if(l)
{
print("MAS to Predict phenotype !!")
# colnames(busCV)[1]=c("Taxa")
# print(length(IC$GT))
index=as.character(IC$myallGD[,1])%in%as.character(ic_Y[,1])
# print(cbind(ic_Y,IC$PCA))
if(!is.null(myBus$seqQTN))
{
# busCV=cbind(IC$myallCV,X[,myBus$seqQTN])
GD1 = as.matrix(X[index,myBus$seqQTN])
GD2 = as.matrix(X[,myBus$seqQTN])
}else{
numMarker=nrow(GWAS)
sp=sort(GWAS$P.value)
spd=abs(DP$cutOff-sp)
index_fdr=grep(min(spd),spd)[1]
FDRcutoff=DP$cutOff*index_fdr/numMarker
seqQTN=as.numeric(rownames(GWAS[GWAS$P.value<FDRcutoff,]))
# busCV=cbind(IC$myallCV,X[,seqQTN])
GD1 = as.matrix(X[index,seqQTN])
GD2 = as.matrix(X[,seqQTN])
}
if(!is.null(IC$myallCV))
{
CV1=as.matrix(IC$myallCV[index,-1])
CV2=as.matrix(IC$myallCV[,-1])
Group=1:nrow(IC$myallGD)
RefInf=rep(2,nrow(IC$myallGD))
# print(table(index))
RefInf[index]=1
ID=1:nrow(IC$myallCV)
BLUP=rep(0,nrow(IC$myallGD))
PEV=rep(0,nrow(IC$myallGD))
# BLUE=rep(NA,nrow(IC$myallGD))
print("The dimension of CV in lm model :")
print(dim(CV1))
# print(dim(GD1))
# print(head(CV1))
# print(dim(GD1))
XCV1=cbind(CV1, GD1)
cv.licols=GAPIT.Licols(X=XCV1)
XCV1=cv.licols$Xsub
mylm = stats::lm(ic_Y[,2] ~XCV1)
# {
# kk=1:2
# }else{
# kk=1
# }
lm.coeff=mylm$coefficients
if(stats::var(CV2[,1])==0)lm.coeff=lm.coeff[-2]
print(lm.coeff)
XCV=cbind(CV2, GD2)
XCV=XCV[,cv.licols$idx]
XCV=cbind(1,as.matrix(XCV))
#CV.Extragenetic specified
QTN.gs=ncol(GD2)
CV.Extragenetic=DP$CV.Extragenetic
if(ncol(XCV)>1)XCVI=XCV[,c((2+CV.Extragenetic):(ncol(XCV)-QTN.gs))]
XCVN=XCV[,c(1:(1+CV.Extragenetic)),drop=FALSE]
if(QTN.gs!=0)XCVqtn=XCV[,c((ncol(XCV)-QTN.gs):ncol(XCV))]
if(ncol(XCV)>1)beta.I=lm.coeff[c((2+CV.Extragenetic):(ncol(XCV)-QTN.gs))]
beta.N=lm.coeff[c(1:(1+CV.Extragenetic))]
if(QTN.gs!=0)beta.QTN=lm.coeff[c((ncol(XCV)-QTN.gs):ncol(XCV))]
BLUE.N=XCVN%*%beta.N
BLUE.QTN=rep(0,length(BLUE.N))
if(QTN.gs!=0)BLUE.QTN=XCVqtn%*%beta.QTN
BLUE.I=rep(0,length(BLUE.N))
if(ncol(XCV)>1)BLUE.I=XCVI%*%beta.I
BLUE=cbind(BLUE.N,BLUE.I,BLUE.QTN)
BLUE=data.frame(cbind(data.frame(IC$myallGD[,1]),data.frame(BLUE)))
colnames(BLUE)=c("Taxa","BLUE.N","BLUE.I","QTNs")
BB= cbind(BLUE,Group,RefInf,ID,BLUP,PEV)
gBreedingValue=BB[,3]+BB[,4]+BB[,8]
Prediction=BB[,2]+BB[,3]+BB[,4]+BB[,8]
Pred=cbind(BB,gBreedingValue,Prediction)
colnames(Pred)=c("Taxa","BLUE.N","BLUE.I","QTNs","Group","RefInf","ID","BLUP","PEV","gBreedingValue","Prediction")
}else{
busCV=cbind(as.data.frame(DP$GD[,1]),X[,myBus$seqQTN])
CV1=NULL
Group=1:nrow(IC$myallCV)
RefInf=rep(2,nrow(IC$myallCV))
RefInf[index]=1
ID=1:nrow(IC$myallCV)
BLUP=NA
PEV=NA
BLUE=NA
print("The dimension of CV in lm model :")
print(dim(CV1))
# print(dim(GD1))
mylm = stats::lm(ic_Y[!is.na(ic_Y[,2]),2] ~GD1)
# print(stats::predict(mylm,as.data.frame(cbind(IC$myallCV[,-1],GD2))))
Pred = cbind(as.character(DP$GD[,1]),Group,RefInf,ID,BLUP,PEV,BLUE,stats::predict(mylm,as.data.frame(cbind(IC$myallCV[,-1],GD2))))
colnames(Pred)=c("Taxa","Group","RefInf","ID","BLUP","PEV","BLUE","Prediction")
}
# print(dim(CV1))
# print(table(index))
print("Linear Regression to Predict phenotype Done !!")
}else{
print("ABLUP to Predict phenotype !!")
if(!is.null(IC$myallCV))
{
com.taxa=intersect(as.character(IC$myallCV[,1]),as.character(DP$GD[,1]))
CV1 = IC$myallCV
ablup.GD=IC$myallGD
ablup.X=ablup.GD[,-1]
# CV1=as.matrix(CV1[match(com.taxa,as.character(CV1[,1])),])
# print(dim(CV1))
# print(dim(ablup.GD))
# print(dim(ablup.X))
if(!is.null(myBus$seqQTN))
{
busCV=cbind(CV1,ablup.X[,myBus$seqQTN])
}else{
busCV=CV1
}
}else{
ablup.GD=DP$GD
ablup.X=ablup.GD[,-1]
busCV=cbind(as.data.frame(ablup.GD[,1]),ablup.X[,myBus$seqQTN])
}
# print(myBus$seqQTN)
pv=GWAS$P.value
noneff=as.numeric(rownames(GWAS[GWAS$P.value>DP$cutOff,]))
licols=FALSE
if(ncol(X)<50000)licols=TRUE
if(licols)
{
gene.licols=GAPIT.Licols(X=ablup.X)
ablup.X=ablup.X[,gene.licols$idx]
}
if(is.null(DP$KI))
{
KI= GAPIT.kinship.VanRaden(snps=as.matrix(ablup.X))
colnames(KI)=as.character(ablup.GD[,1])
busKI=cbind(as.data.frame(ablup.GD[,1]),KI)
colnames(busKI)[1]=c("Taxa")
}else{
busKI=DP$KI
}
# cv.licols=GAPIT.Licols(X=busCV[,-1])
# geneGD=cv.licols$Xsub
# print(dim(busCV))
# print(head(busCV))
# busCV=as.data.frame(busCV[,cv.licols$idx])
busCV=cbind(as.data.frame(busCV[,1]),matrix(as.numeric(as.matrix(busCV[,-1])),nrow(busCV),ncol(busCV)-1))
print("The dimension of CV and phenotype in ABLUP model :")
print(dim(busCV))
# print(head(busCV))
# print(apply(busCV[,-1],2,sum))
# print(dim(ic_Y))
busGAPIT=GAPIT(
Y=ic_Y,
KI=busKI,
CV=busCV,
CV.Extragenetic=DP$CV.Extragenetic,
QTN.gs=ncol(busCV)-ncol(CV1),
model="gBLUP",
file.output=F)
Pred=busGAPIT$Pred
print("ABLUP Predict phenotype Done!!")
}#if lmpred
if(DP$file.output)
{
utils::write.csv(Pred,paste("GAPIT.Association.Prediction_results.",DP$name.of.trait,".",memo,".csv",sep=""), row.names = FALSE)
}
}#lmpred0
}#buspred
va=myBus$vg
ve=myBus$ve
h2=va/(va+ve)
mc=NULL
bc=NULL
mp=NULL
TV=NULL
Compression=NULL
GVs=myBus$GVs
}else{
print("The GAPIT would go into Main...")
Timmer=GAPIT.Timmer(Timmer=Timmer,Infor="GAPIT.Main")
Memory=GAPIT.Memory(Memory=Memory,Infor="GAPIT.Main")
GT=as.matrix(ic_GD[,1])
if(DP$PCA.total==0) ic_PCA=NULL
gapitMain <- GAPIT.Main(Y=ic_Y,
GD=IC$GD[,-1],
allGD=IC$myallGD[,-1],
allCV=IC$myallCV,
GM=DP$GM,
KI=ic_KI,
CV=IC$PCA,
CV.Extragenetic=DP$CV.Extragenetic,
GP=DP$GP,
GK=DP$GK,
SNP.P3D=DP$SNP.P3D,
kinship.algorithm=DP$kinship.algorithm,
bin.from=DP$bin.from,
bin.to=DP$bin.to,
bin.by=DP$bin.by,
inclosure.from=DP$inclosure.from,
inclosure.to=DP$inclosure.to,
inclosure.by=DP$inclosure.by,
group.from=DP$group.from,
group.to=DP$group.to,
group.by=DP$group.by,
kinship.cluster=DP$kinship.cluster,
kinship.group=DP$kinship.group,
name.of.trait=DP$name.of.trait,
file.path=DP$file.path,
file.from=DP$file.from,
file.to=DP$file.to,
file.total=DP$file.total,
file.fragment = DP$file.fragment,
file.G=DP$file.G,
file.Ext.G=DP$file.Ext.G,
file.GD=DP$file.GD,
file.GM=DP$file.GM,
file.Ext.GD=DP$file.Ext.GD,
file.Ext.GM=DP$file.Ext.GM,
SNP.MAF= DP$SNP.MAF,
FDR.Rate = DP$FDR.Rate,
SNP.FDR=DP$SNP.FDR,
SNP.effect=DP$SNP.effect,
SNP.impute=DP$SNP.impute,
PCA.total=DP$PCA.total,
#GAPIT.Version=GAPIT.Version,
GT=IC$GT,
SNP.fraction = DP$SNP.fraction,
seed =DP$seed,
BINS = DP$BINS,
SNP.test=DP$SNP.test,DPP=DP$DPP,
SNP.permutation=DP$SNP.permutation,
LD.chromosome=DP$LD.chromosome,
# LD.location=LD.location,
# LD.range=LD.range,
# SNP.CV=SNP.CV,
SNP.robust=DP$SNP.robust,
model=DP$model,
genoFormat="EMMA",
hasGenotype=TRUE,
byFile=FALSE,
fullGD=TRUE,
PC=DP$PC,
GI=ic_GM,
Timmer = DP$Timmer,
Memory = DP$Memory,
sangwich.top=DP$sangwich.top,
sangwich.bottom=DP$sangwich.bottom,
QC=DP$QC,
GTindex=DP$GTindex,
LD=DP$LD,
file.output=FALSE,
cutOff=DP$cutOff,
GAPIT3.output=DP$file.output,
Model.selection = DP$Model.selection,
Create.indicator = DP$Create.indicator,
# QTN=DP$QTN,
# QTN.round=DP$QTN.round,
# QTN.limit=DP$QTN.limit,
#QTN.update=QTN.update,
# QTN.method=DP$QTN.method,
Major.allele.zero=DP$Major.allele.zero,
NJtree.group=DP$NJtree.group,
NJtree.type=DP$NJtree.type,
plot.bin=DP$plot.bin,
QTN.position=DP$QTN.position,
plot.style=DP$plot.style,
SUPER_GS=DP$SUPER_GS)
GWAS=gapitMain$GWAS
if(DP$Random.model&DP$file.output)GR=GAPIT.RandomModel(Y=ic_Y,X=IC$GD[,-1],GWAS=GWAS,CV=gapitMain$PC,cutOff=DP$cutOff,name.of.trait=DP$name.of.trait,N.sig=DP$N.sig,GT=IC$GT)
Pred=gapitMain$Pred
va=NA#gapitMain$vg
ve=NA#gapitMain$ve
h2=gapitMain$h2
mc=gapitMain$effect.snp
bc=gapitMain$effect.cv
mp=gapitMain$P
TV=gapitMain$TV
Compression=gapitMain$Compression
GVs=GR$GVs
}#!DP$kinship.algorithm%in%c("FarmCPU","MLMM","BLINK","BLINKC")
myPower=NULL
if(!is.null(GWAS))myPower=GAPIT.Power(WS=DP$WS, alpha=DP$alpha, maxOut=DP$maxOut,seqQTN=DP$QTN.position,GM=DP$GM,GWAS=GWAS)
return (list(GWAS=GWAS,Pred=Pred,FDR=myPower$FDR,Power=myPower$Power,
Power.Alpha=myPower$Power.Alpha,alpha=myPower$alpha,h2=h2,va=va,ve=ve,
mc=mc,bc=bc,mp=mp,TV=TV,Compression=Compression,
Timmer=Timmer,Memory=Memory,GVs=GVs))
}else{
# Here is Genomic Prediction function
print("GAPIT will be into GS approach...")
gapitMain <- GAPIT.Main(Y=IC$Y,
GD=IC$GD[,-1],
allGD=IC$allGD[,-1],
GM=DP$GM,
KI=IC$KI,
Z=DP$Z,
CV=IC$PCA,
allCV=IC$myallCV,
CV.Extragenetic=DP$CV.Extragenetic,
GP=DP$GP,
GK=DP$GK,
SNP.P3D=DP$SNP.P3D,
kinship.algorithm=DP$kinship.algorithm,
bin.from=DP$bin.from,
bin.to=DP$bin.to,
bin.by=DP$bin.by,
inclosure.from=DP$inclosure.from,
inclosure.to=DP$inclosure.to,
inclosure.by=DP$inclosure.by,
group.from=DP$group.from,
group.to=DP$group.to,
group.by=DP$group.by,
kinship.cluster=DP$kinship.cluster,
kinship.group=DP$kinship.group,
name.of.trait=DP$name.of.trait,
file.path=DP$file.path,
file.from=DP$file.from,
file.to=DP$file.to,
file.total=DP$file.total,
file.fragment = DP$file.fragment,
file.G=DP$file.G,
file.Ext.G=DP$file.Ext.G,
file.GD=DP$file.GD,
file.GM=DP$file.GM,
file.Ext.GD=DP$file.Ext.GD,
file.Ext.GM=DP$file.Ext.GM,
SNP.MAF= DP$SNP.MAF,
FDR.Rate = DP$FDR.Rate,
SNP.FDR=DP$SNP.FDR,
SNP.effect=DP$SNP.effect,
SNP.impute=DP$SNP.impute,
PCA.total=DP$PCA.total,
#GAPIT.Version=GAPIT.Version,
GT=DP$GT,
SNP.fraction = DP$SNP.fraction,
seed =DP$ seed,
BINS = DP$BINS,
SNP.test=DP$SNP.test,
DPP=DP$DPP,
SNP.permutation=DP$SNP.permutation,
LD.chromosome=DP$LD.chromosome,
QTN.gs=DP$QTN.gs,
#LD.location=LD.location,
#LD.range=LD.range,
#SNP.CV=SNP.CV,
SNP.robust=DP$SNP.robust,
model=DP$model,
genoFormat="EMMA",
hasGenotype=TRUE,
byFile=FALSE,
fullGD=TRUE,
PC=DP$PC,
GI=DP$GI,
Timmer = DP$Timmer,
Memory = DP$Memory,
GAPIT3.output=DP$file.output,
sangwich.top=DP$sangwich.top,
sangwich.bottom=DP$sangwich.bottom,
QC=DP$QC,GTindex=DP$GTindex,
LD=DP$LD,file.output=FALSE,
cutOff=DP$cutOff,
Model.selection = DP$Model.selection,
Create.indicator = DP$Create.indicator,
# QTN=DP$QTN,
# QTN.round=DP$QTN.round,
# QTN.limit=DP$QTN.limit,
#QTN.update=QTN.update,
# QTN.method=DP$QTN.method,
Major.allele.zero=DP$Major.allele.zero,
NJtree.group=DP$NJtree.group,
NJtree.type=DP$NJtree.type,
plot.bin=DP$plot.bin,
QTN.position=DP$QTN.position,
plot.style=DP$plot.style,
SUPER_GS=TRUE
)
#print(str(gapitMain))
GWAS=gapitMain$GWAS
Pred=gapitMain$Pred
#print(head(Pred))
va=NA#gapitMain$vg
ve=NA#gapitMain$ve
h2=gapitMain$h2
mc=gapitMain$effect.snp
bc=gapitMain$effect.cv
mp=gapitMain$P
Compression=gapitMain$Compression
GAPIT.Compression.Visualization(Compression = Compression, name.of.trait = DP$name.of.trait,file.output=DP$file.output)
# # print(list(GWAS=GWAS,Pred=Pred,FDR=NULL,Power=NULL,
# Power.Alpha=NULL,alpha=NULL,h2=h2,va=va,ve=ve,Compression=Compression,
# mc=mc,bc=bc,mp=mp,TV=gapitMain$TV,
# Timmer=Timmer,Memory=Memory))
return (list(GWAS=GWAS,Pred=Pred,FDR=NULL,Power=NULL,
Power.Alpha=NULL,alpha=NULL,h2=h2,va=va,ve=ve,Compression=Compression,
mc=mc,bc=bc,mp=mp,TV=gapitMain$TV,
Timmer=Timmer,Memory=Memory))
}#end of SNP.TEST
} #end of GAPIT.SS function
#=============================================================================================
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.