R/GAPIT.cross_validation.compare.R

Defines functions `GAPIT.cross_validation.compare`

`GAPIT.cross_validation.compare` <-function(GD=NULL,Y=NULL, nrep=NULL,tc=NULL){
# Object: GAPIT.cross validation compare to different folders by replicate Times,result:a pdf of the scree barplot and .cvs
# myGD:numeric SNP
# Y: phenotype with columns of taxa,Y1,Y2...
# rel:replications
# tc:comparation folds number and value
# Authors: You Tang,Jiabo Wang and You Zhou
# Last update: December 31, 2014 
##############################################################################################
if(is.null(GD)||is.null(Y)){stop("Validation Invalid. Please select read valid flies !")}
if(is.null(nrep))
  {
	nrep=10  #not input rel value,default replications number is 10
  }

if(nrep<2){stop("Validation Invalid. Please select replications >1 !")}
#rel<-2 ##replications
#t<-2
Y<-Y[!is.na(Y[,2]),] 
Y<-Y[,c(1,2)]
y<- stats::na.omit(Y)
#############
commonGeno <- unique(as.character(y[,1]))[unique(as.character(y[,1])) %in% myGD[,1]]
cG<-data.frame(commonGeno)
names(cG)<-"Taxa"
colnames(y)<-c("Taxa","pheno")
y2<-merge(y,cG,all.x=FALSE, all.y=TRUE, by = c("Taxa"))
GD=GD[match(y2$Taxa,GD[,1]),]
y<-y2
##############
X<-GD[,-1]
k1<-as.matrix(X)
k2=GAPIT.kinship.VanRaden(snps=k1)
myKI<-as.data.frame(k2)
myKI<-cbind(GD[,1],myKI)
# utils::write.table(y,"Y.txt",quote=F,sep="\t",row.names=F,col.names=T)
# utils::write.table(myKI,"K.txt",quote=F,row.names=F,col.names=F,sep="\t")
gc()
# myK<- utils::read.table("K.txt",head=F)
# y = utils::read.table("Y.txt",head=T)
myK=myKI
y <- stats::na.omit(y)
y=y[(y[,1] %in% myK[,1]),]
m=nrow(y)
if(is.null(tc))
	tc<-c(2,5,10,20,50)  ##default compare to folders num
tc1<-as.matrix(tc)
	allstorage.ref=matrix(0,nrep,nrow(tc1))
	allstorage.inf=matrix(0,nrep,nrow(tc1))
for(w in 1:nrow(tc1)){
num<-tc1[w,]
m.sample=floor(m/num)
	storage.ref=matrix(0,nrep,num)
	storage.inf=matrix(0,nrep,num)
	#storage.REML=matrix(0,rel,num)
for(k in 1:nrep)
{
   #################Rand group method 1############
 sets=sample(cut(1:nrow(y),num,labels=FALSE),nrow(y))
 sets = as.data.frame(sets)
 ynew <- cbind(sets,y)

	#i=sample(1:num, size = 1)
for(i in 1:num){
	
	 #use only genotypes that were genotyped and phenotyped
    ref <- y$Taxa[!ynew$sets==i]
      
     lines.cali<- ref     
   # ycali<- y[match(ref,y$Taxa),]
    #use only genotypes that were genotyped and phenotyped

    test <- y$Taxa[ynew$sets==i]
    lines.vali<-test 
    #yvali<- y[match(test,y$Taxa),]  	
 
 #################end Rand group method############

	 #use only genotypes that were genotyped and phenotyped
	 commonGeno_v <- lines.vali[lines.vali %in% myK[,1]]	               
	 yvali<- y[match(commonGeno_v,y[,1]),]    

	 #use only genotypes that were genotyped and phenotyped
	 commonGeno_c <- lines.cali[lines.cali %in% myK[,1]]
	 ycali<- y[match(commonGeno_c,y[,1]),]               
	
	Y.raw=ycali[,c(1,2)]#choos a trait

	myY=Y.raw
	myKI=myK
	max.groups=m

#Run GAPIT
#############################################
	# print(dim(myKI))
	# print(dim(myY))
	myGAPIT <- GAPIT(
	Y=myY,
	KI=myKI,
	# #group.from=max.groups,
	# group.from=max.groups,
	# group.to=max.groups,
	model="gBLUP",
	#group.by=10,
	# PCA.total=3,
	SNP.test=FALSE,
	file.output=FALSE
	)

prediction=myGAPIT$Pred

prediction.ref<-prediction[match(commonGeno_c,prediction$Taxa),]
prediction.inf<-prediction[match(commonGeno_v,prediction$Taxa),]

YP.ref <- merge(y, prediction.ref, by.x = 1, by.y = "Taxa")
YP.inf <- merge(y, prediction.inf, by.x = 1, by.y = "Taxa")

#Calculate correlation and store them
r.ref=stats::cor(as.numeric(as.vector(YP.ref[,2])),as.numeric(as.vector(YP.ref[,6]) ))
r.inf=stats::cor(as.numeric(as.vector(YP.inf[,2])),as.numeric(as.vector(YP.inf[,6]) ))

if(r.inf<0){
#r.inf=cor(as.numeric(as.vector(YP.inf[,2])),as.numeric(as.vector(YP.inf[,2]+YP.inf[,6])))
combine_output=cbind(as.numeric(as.vector(YP.inf[,2])),as.numeric(as.vector(YP.inf[,6]) ))

utils::write.csv(combine_output, paste("Accuracy_folders",num,k,i,rel,".csv",sep=""))
#stop("...........")
}
storage.ref[k,i]=r.ref
storage.inf[k,i]=r.inf

print(paste(" rel= ", rel, " k= ",k," i= ",i,sep = ""))
}
print(paste("finish  replications k= ",k," folders= ",num,sep = ""))
}
#Find missing position-->0.0
index=is.na(storage.inf)
storage.inf[index]=0


allstorage.inf[,w]=as.matrix(rowMeans(storage.inf))
allstorage.ref[,w]=as.matrix(rowMeans(storage.ref))
#as.matrix(rowMeans(storage.ref))

##output rel times and accuracy for every folders 

combine_output=cbind(storage.inf,allstorage.inf[,w])
combine_output1=cbind(storage.ref,allstorage.ref[,w])
colnames(combine_output)=c(paste("folders",c(1:num),sep=""),"mean")
utils::write.csv(combine_output, paste("Accuracy_folders",num,"by CMLM,rel_",rel,".csv",sep=""))
utils::write.csv(combine_output1, paste("Accuracy_folders  ref",num,"by CMLM,rel_",rel,".csv",sep=""))

}	
sr<-nrow(tc1)
##output means accuracy by rel for every folders 
colnames(allstorage.inf)=c(paste(tc1[c(1:sr),]," folders",sep=""))
utils::write.csv(allstorage.inf, paste("Accuracy_folders",nrow(tc1),"by CMLM,rel_",rel,".compare to means",".csv",sep=""))
utils::write.csv(allstorage.ref, paste("Accuracy_folders  ref",nrow(tc1),"by CMLM,rel_",rel,".compare to means",".csv",sep=""))

	name.of.trait=noquote(names(Y.raw)[2])
#rrel=round(rel/2)
#ppp<-matrix(0,sr,2)
ppp<-matrix(0,sr,2)

#if(rrel!=1){
#	aarm<-colMeans(allstorage.inf[1:rrel,])
#	}else{
#	aarm<-allstorage.inf[1,]	
#	}
#aam<-colMeans(allstorage.inf)
aam<-allstorage.inf
aam<-data.frame(aam)
bbm<-allstorage.ref
bbm<-data.frame(bbm)
for(b in 1:sr){
#ppp[b,]<-as.matrix(c(aarm[b],aam[b]))
ppp[b,1]<-as.matrix(mean(aam[,b]))
#colnames(ppp)<-c(rrel,rel)
}
for(c in 1:sr){
ppp[c,2]<-as.matrix(mean(bbm[,c]))
}
ppp<-as.matrix(cbind(ppp,tc1))
#colnames(ppp)<-c(rel)
sj <- stats::runif(1, 0, 1)
#name.of.trait="qqq"
grDevices::pdf(paste("GAPIT.cross_validation ", name.of.trait,sj,".compare to different folders.", ".pdf", sep = ""),width = 4.5, height = 4,pointsize=9)
graphics::par(mar = c(5,6,5,3))
grDevices::palette(c("blue","red",grDevices::rainbow(2)))
plot(ppp[,3],ppp[,2],xaxt="n",ylim=c(0,1.04),xlim=c(min(tc1)-1,max(tc1)+1),bg="lightgray",xlab="Number of folds",ylab="Correlation",type="o",pch=1,col=1,cex=1.0,cex.lab=1.7, cex.axis=1.3, lwd=3,las=1,lty =2)
	graphics::axis(side=1,at=tc1,labels=tc1,cex.lab=1.7)
        graphics::lines(ppp[,1]~ppp[,3], lwd=3,type="o",pch=19,col=2,lty =1)
	graphics::legend("bottomright",horiz = FALSE,c("Reference","Inference"),pch = c(1,19), lty =c(2,1),col=c(1:2),lwd=2,cex=1.2,bty="n")
grDevices::dev.off()
print(paste("GAPIT.cross validation ", name.of.trait,".compare to different folders.","successfully!" ,sep = ""))
return(list(allstorage.inf))
}#end GAPIT.cross validation compare to different folders by replicate Times
#=============================================================================================
jiabowang/GAPIT3 documentation built on March 6, 2025, 2:21 a.m.