if (!file.exists("GAPIT_Tutorial_Data.zip")) { download.file("http://zzlab.net/GAPIT/GAPIT_Tutorial_Data.zip", destfile = "GAPIT_Tutorial_Data.zip") unzip("GAPIT_Tutorial_Data.zip") } download.file("http://zzlab.net/GAPIT/data/CROP545_Covariates.txt", destfile = "CROPS545_Covariates.txt") download.file("http://zzlab.net/GAPIT/data/CROP545_Phenotype.txt", destfile = "CROPS545_Phenotype.txt") # Import the GAPIT demo data genotypes gt_scan <- data.frame(read.table("GAPIT_Tutorial_Data/mdp_numeric.txt", header = T, stringsAsFactors = F, sep = "\t", nrows = 1)) classes <- sapply(gt_scan, class) genotypes <- data.frame(read.table("GAPIT_Tutorial_Data/mdp_numeric.txt", header = T, row.names = 1, colClasses = classes, stringsAsFactors = F, sep = "\t")) GM <- read.table("GAPIT_Tutorial_Data/mdp_SNP_information.txt", header = T, stringsAsFactors = F, sep = "\t") CV <- read.table("CROPS545_Covariates.txt", header = T, stringsAsFactors = F, sep = "\t") phenotypes <- read.table("CROPS545_Phenotype.txt", header = T, stringsAsFactors = F, sep = "\t") #Load dependencies if (!require("pacman")) install.packages("pacman") pacman::p_load(ggplot2,knitr,gridExtra,kableExtra,data.table) #source("R/Function_qqplot.R") #source("R/Function_manhattan_QTN.R") #source("R/Function_manhattan.R") #source("R/FUNCTION_powerFDRtype1error.R")
#Create QTNs n=nrow(genotypes) m=ncol(genotypes) NQTN=10 #Show the positions of the QTN QTN.position=sample(m,NQTN,replace=F) phenotypes_n_1=phenotypes[,2] test=GWASapply(phenotypes_n_1,genotypes,CV,GM,PCA.M = 3,QTN.position,plots=TRUE,messages=TRUE) hist(test$power.fdr.type1error.res$power)
phenotypes_n_1=phenotypes[,2] loop_start <- proc.time() test=GWASapply_rapid(phenotypes_n_1,genotypes,CV,GM,PCA.M = 3) loop_end <- proc.time() loop_elapsed <- loop_end[3] - loop_start[3] timet=c() garep=replicate(30,{ loop_start <- proc.time() test=GWASapply_rapid(phenotypes_n_1,genotypes,CV,GM,PCA.M = 3) loop_end <- proc.time() loop_elapsed <- loop_end[3] - loop_start[3] timet=c(time,loop_elapsed) }) mean(garep) manhattan_plot(GM,test$P.value.res,cutoff=NULL, trait = "unknown",messages=TRUE)
phenotypes_n_1=phenotypes[,2] test=GWASapply(phenotypes_n_1,genotypes,CV,GM,PCA.M = 3,plots=FALSE,messages=TRUE)
phenotypes_n_1=phenotypes[,2] test=GWASapply(phenotypes_n_1,genotypes,GM=GM,PCA.M = 3,plot=FALSE,messages=TRUE)
phenotypes_n_1=phenotypes[,2] test=GWASapply(phenotypes_n_1,genotypes,GM=GM,PCA.M = 3,plot=FALSE,messages=TRUE)
phenotypes_n_1=phenotypes[,2] test=GWASapply(mySim$y,genotypes,CV,GM,PCA.M = 3,mySim$QTN.position,plot=TRUE,messages=TRUE,print=TRUE,trait="height") test$power.fdr.type1error.res
phenotypes_n_1=phenotypes[,2] mySim=G2P(genotypes, h2, alpha, NQTN, distribution) test=GWASapply(mySim$y,genotypes,CV,GM,PCA.M = 3,mySim$QTN.position,plot=TRUE,messages=TRUE,print=FALSE,cutoff = NULL) test$power.fdr.type1error.res$FP.fdr.power test$power.fdr.type1error.res$TP.fdr.power hist(test$power.fdr.type1error.res$power)
source("http://www.zzlab.net/StaGen/2020/R/G2P.R") source("http://www.zzlab.net/StaGen/2020/R/GWASbyCor.R") NQTN = 10 #specify number of QTN h2 = 0.75 #heritability alpha = 0.6 #alpha value distribution = "normal" #specify distribution #set.seed(1337) #plotting mySim=G2P(genotypes, h2, alpha, NQTN, distribution) #phenotype simulation p= GWASbyCor(X=genotypes,y=mySim$y)#GWAS by using the correlation methods p=data.frame(p) pres=power.fdr(p, QTN.position=mySim$QTN.position,cutoff=NULL) gcm=manhattan_plot(GM,p,cutoff=NULL,mySim$QTN.position,FP=pres$FP.fdr.power,TP=pres$TP.fdr.power, trait = "unknown",messages=TRUE) gcq=qq_plot(GM,p,mySim$QTN.position, trait = "unknown") test=GWASapply(mySim$y,genotypes,CV,GM,PCA.M = 3,QTN.position=mySim$QTN.position) gtm=manhattan_plot(GM,test$P.value.res,cutoff=NULL,mySim$QTN.position,FP=test$False.Positive,TP=test$True.Positive, trait = "unknown",messages=TRUE) gtq=qq_plot(GM,test$P.value.res,mySim$QTN.position, trait = "unknown") grid.arrange(gcm, gtm, nrow = 2, ncol = 1, top = "Variance of Principal Components") grid.arrange(gcq,gtq, nrow = 2, ncol = 1, top = "Variance of Principal Components")
gcrep=replicate (30,{ mySim=G2P(genotypes, h2, alpha, NQTN, distribution) #phenotype simulation p= GWASbyCor(X=genotypes,y=mySim$y)#GWAS by using the correlation methods p=data.frame(p) pres=power.fdr(p, QTN.position=mySim$QTN.position,cutoff=NULL) FalP=length(pres$FP.fdr.power) TruP=length(pres$TP.fdr.power) num_output=c() num_output=c(FalP,TruP) }) View(gcrep) print(rowMeans(gcrep)) # 5.16 (# QTN in top 10 p-value SNPs) 211.59 (# QTN with p-value lower than 7th highest) print(apply(gcrep,1,sd))
garep=replicate(30,{ mySim=G2P(genotypes, h2, alpha, NQTN, distribution) test_rep=GWASapply(mySim$y,genotypes,CV,GM,PCA.M = 3,QTN.position=mySim$QTN.position) FalP=length(test_rep$False.Positive) TruP=length(test_rep$True.Positive) num_output=c() num_output=c(FalP,TruP) }) print(rowMeans(garep)) # 5.16 (# QTN in top 10 p-value SNPs) 211.59 (# QTN with p-value lower than 7th highest) print(apply(garep,1,sd))
h2 <- c(0.25, 0.5, 0.75, 1) NQTN <- c(2, 10, 100) rept <- 100 #replicate time nsnp <- length(genotypes[1,]) for(i in 1:length(h2)){ for(j in 1:length(NQTN)){ assign(paste("h2.", i, ".NQTN.", j, ".GLM.power", sep=""), matrix(NA, nrow=nsnp, ncol=0)) assign(paste("h2.", i, ".NQTN.", j, ".GLM.fdr", sep=""), matrix(NA, nrow=nsnp, ncol=0)) assign(paste("h2.", i, ".NQTN.", j, ".Cor.power", sep=""), matrix(NA, nrow=nsnp, ncol=0)) assign(paste("h2.", i, ".NQTN.", j, ".Cor.fdr", sep=""), matrix(NA, nrow=nsnp, ncol=0)) for(r in 1:rept){ mySim <- G2P(X=X1to5, h2=h2[i], alpha=1, NQTN=NQTN[j], distribution="normal") ###GWAS by GLM myGLM <- GWASapply(mySim$y,genotypes,CV,GM,PCA.M = 3,QTN.position=mySim$QTN.position) GLM.power <- get(paste("h2.", i, ".NQTN.", j, ".GLM.power", sep="")) GLM.power <- cbind(GLM.power, as.vector(myGLM$power.fdr.type1error$power)) GLM.fdr <- get(paste("h2.", i, ".NQTN.", j, ".GLM.fdr", sep="")) GLM.fdr <- cbind(GLM.fdr, myGLM$power.fdr.type1error$fdr) assign(paste("h2.", i, ".NQTN.", j, ".GLM.power", sep=""), GLM.power) assign(paste("h2.", i, ".NQTN.", j, ".GLM.fdr", sep=""), GLM.fdr) ###GWASbyCor myP <- GWASbyCor(genotypes, mySim$y) order.SNP <- order(myP) Cor.pwrfdr <- power.fdr(order.SNP, mySim$QTN.position) Cor.power <- get(paste("h2.", i, ".NQTN.", j, ".Cor.power", sep="")) Cor.power <- cbind(Cor.power, Cor.pwrfdr$power) Cor.fdr <- get(paste("h2.", i, ".NQTN.", j, ".Cor.fdr", sep="")) Cor.fdr <- cbind(Cor.fdr, Cor.pwrfdr$fdr) assign(paste("h2.", i, ".NQTN.", j, ".Cor.power", sep=""), Cor.power) assign(paste("h2.", i, ".NQTN.", j, ".Cor.fdr", sep=""), Cor.fdr) } } } par(mfrow=c(4, 3)) for(i in 1:length(h2)){ for(j in 1:length(NQTN)){ GLM.power <- get(paste("h2.", i, ".NQTN.", j, ".GLM.power", sep="")) GLM.fdr <- get(paste("h2.", i, ".NQTN.", j, ".GLM.fdr", sep="")) GLM.Y <- rowMeans(GLM.power) GLM.X <- rowMeans(GLM.fdr) Cor.power <- get(paste("h2.", i, ".NQTN.", j, ".Cor.power", sep="")) Cor.fdr <- get(paste("h2.", i, ".NQTN.", j, ".Cor.fdr", sep="")) Cor.Y <- rowMeans(Cor.power) Cor.X <- rowMeans(Cor.fdr) plot(GLM.X, GLM.Y, type="b", pch=1, col="red", xlim=c(0,1), ylim=c(0,1), xlab="fdr", ylab="power", main=paste("h2=", h2[i], " NQTN=", NQTN[j], sep="")) lines(Cor.Y~Cor.X, type="b", pch=2, col="blue") legend("topleft", c("GLM", "Cor"), col=c("red", "blue"), lty=1, pch=c(1, 2), cex=0.8, bty="n") } }
source("http://www.zzlab.net/StaGen/2020/R/G2P.R") source("http://www.zzlab.net/StaGen/2020/R/GWASbyCor.R") NQTN = 10 #specify number of QTN h2 = 0.75 #heritability alpha = 0.6 #alpha value distribution = "normal" #specify distribution #set.seed(1337) #plotting mySim=G2P(genotypes, h2, alpha, NQTN, distribution) #phenotype simulation p= as.vector(GWASbyCor(X=genotypes,y=phenotypes_n_1)) p= GWASbyCor(X=genotypes,y=phenotypes_n_1)#GWAS by using the correlation methods p=data.frame(p) pres=power.fdr(p, QTN.position=QTN.position,cutoff=NULL) manhattan_plot(GM,p,cutoff=NULL,QTN.position,FP=pres$FP.fdr.power,TP=pres$TP.fdr.power, trait = "unknown",messages=TRUE) qq_plot(GM,p,QTN.position, trait = "unknown") #Replicating gcrep=replicate (30,{ mySim=G2P(genotypes, h2, alpha, NQTN, distribution) #phenotype simulation p= as.vector(GWASbyCor(X=genotypes,y=phenotypes_n_1)) p= GWASbyCor(X=genotypes,y=phenotypes_n_1)#GWAS by using the correlation methods p=data.frame(p) pres=power.fdr(p, QTN.position=QTN.position,cutoff=NULL) FalP=length(pres$FP.fdr.power) TruP=length(pres$TP.fdr.power) num_output=c() num_output=c(FalP,TruP) }) num_output=c() for(i in 1:30){ mySim=G2P(genotypes, h2, alpha, NQTN, distribution) #phenotype simulation p= as.vector(GWASbyCor(X=genotypes,y=phenotypes_n_1)) p= GWASbyCor(X=genotypes,y=phenotypes_n_1)#GWAS by using the correlation methods p=data.frame(p) pres=power.fdr(p, QTN.position=QTN.position,cutoff=NULL) FalP=length(pres$FP.fdr.power) TruP=length(pres$TP.fdr.power) num_output=rbind(num_output,FalP,TruP) } View(gcrep) print(rowMeans(gcrep)) # 5.16 (# QTN in top 10 p-value SNPs) 211.59 (# QTN with p-value lower than 7th highest) print(apply(gcrep,1,sd)) num_output View(num_output) View(gcrep) means=mean(gcrep) stdevs=sd(gcrep) out4 <- rbind(means,stdevs) out4 <- as.data.frame(out4) colnames(out4) <- c("QTNs in the Top 10") rownames(out4)<-c("Averages","Standard Deviations") kable(out4)
NQTN = 10 #specify number of QTN h2 = 0.75 #heritability alpha = 0.6 #alpha value distribution = "normal" #specify distribution set.seed(1337) n=nrow(genotypes) m=ncol(genotypes) NQTN=10 #Show the positions of the QTN QTN.position=sample(m,NQTN,replace=F) garep=replicate(30,{ mySim=G2P(genotypes, h2, alpha, NQTN, distribution) test_rep=GWASapply(phenotypes_n_1,genotypes,CV,GM,PCA.M = 3,QTN.position=QTN.position) FalP=length(test_rep$False.Positive) TruP=length(test_rep$True.Positive) num_output=c() num_output=c(FalP,TruP) }) print(rowMeans(garep)) # 5.16 (# QTN in top 10 p-value SNPs) 211.59 (# QTN with p-value lower than 7th highest) print(apply(garep,1,sd)) means=mean(garep) stdevs=sd(garep) out5 <- rbind(means,stdevs) out5 <- as.data.frame(out5) colnames(out5) <- c("True Positives") rownames(out5)<-c("Averages","Standard Deviations") kable(out5) T10r=index[1:10] detected=intersect(T10r,mySim$QTN.position)#find the position of top10 detected QTN N_detected= length(detected) # the number of QTNs among top ten associated SNPs N_output=c() N_output=c(N_detected)#combine the output into matrix mean(test_rep$power.fdr.type1error.res$power) mean(test_rep$power.fdr.type1error.res$fdr) mean(test_rep$power.fdr.type1error.res$type1error) str(test_rep$P.value.res) test_c=GWASapply(phenotypes_n_1,genotypes,CV,GM,PCA.M = 3,QTN.position=QTN.position, plot=FALSE,cutoff = 0.05) test_c$True.Positive test_c$False.Positive test_rep$False.Positive pdr=power.fdr(test_rep$order.SNP.res,QTN.position) tes=fdr(test$P.value.res) tes mean(test$power.fdr.type1error.res$fdr) fal.neg(test_rep$P.value.res,QTN.position,test_rep$cutoff.final.res)
#Time BLINK loop_start <- proc.time() test=GWAStest(phenotypes_n_1,genotypes,CV,GM,PCA.M = 3) loop_end <- proc.time() loop_elapsed <- loop_end[3] - loop_start[3] #Time our function apply_start <- proc.time() test=GWASapply(phenotypes_n_1,genotypes,CV,GM,PCA.M = 3,QTN.position,plot=FALSE,messages=FALSE,print=FALSE,trait="height") apply_end <- proc.time() apply_elapsed <- apply_end[3] - apply_start[3] apply_elapsed # Consolidate times into a neat table time_table <- data.frame(c("loop", "apply"), c(loop_elapsed, apply_elapsed)) names(time_table) <- c("Method", "Time(s)") kable(time_table) %>% kable_styling(bootstrap_options = "striped", full_width = F) # Calculate the percent improvement perc_diff <- round((loop_elapsed - apply_elapsed) / loop_elapsed * 100,2) print(paste("Apply is ", perc_diff , "% faster!"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.