## GENERIC SIM TESTING FUNCTION ##
#############
## simTest ##
#############
#
# ## CHECKING SNP.SIM.Q vs. SNP.SIM: ##
# #####################################
# ## load treeWAS:
# require(treeWAS)
# ## load data:
# data(tree)
# data(phen)
# data(dist_0)
# data(dist_0.01)
# data(dist_0.05)
# data(dist_0.1)
#
# ## get phen loci:
# n.phen.subs <- 15
# grp.min <- 0.25
# seed <- 1
# phen.list <- phen.sim(tree, n.subs = n.phen.subs, grp.min = grp.min, seed = seed)
#
# ## set args:
# n.snps <- 10000
# n.subs <- dist_0.01
# n.snps.assoc <- 10
# assoc.prob <- 90
# tree <- tree
# phen.loci <- phen.list$phen.loci
# heatmap <- FALSE
# reconstruct <- FALSE
# dist.dna.model <- "JC69"
# row.names <- NULL
# set <- 1
# # seed <- 1
#
# #############
# ## SNP.SIM ##
# #############
# system.time(
# snps.list <- snp.sim(n.snps=n.snps,
# n.subs=n.subs,
# n.snps.assoc=n.snps.assoc,
# assoc.prob=assoc.prob,
# tree=tree,
# phen.loci=phen.loci,
# heatmap=heatmap,
# reconstruct=reconstruct,
# dist.dna.model=dist.dna.model,
# row.names = NULL,
# set=set,
# seed=seed)
# ) # 0.764 (w n.snps.assoc = 10), or 0.256 (w NO snps.assoc)
#
# str(snps.list)
# snps.list$snps[1:10,snps.list$snps.assoc]
#
# ###############
# ## SNP.SIM.Q ##
# ###############
# ## args:
# # Q.mat <- matrix(c(2, 0.75, 0.75, 1, 3, 0.5,
# # 0.25, 3, 3, 0.25, 0.5, 3, 1, 0.75, 0.75, 2), nrow = 4,
# # byrow = T, dimnames = rep(list(c("0|0", "0|1", "1|0", "1|1")), 2))
# s <- 20 # n.subs
# af <- 10 # association factor
# s <- s/sum(tree$edge.length)
# Q.mat <- matrix(c(NA, 1*s, 1*s, 0,
# 1*af*s, NA, 0, 1*af*s,
# 1*af*s, 0, NA, 1*af*s,
# 0, 1*s, 1*s, NA),
# nrow=4, byrow=T, dimnames=rep(list(c("0|0", "0|1", "1|0", "1|1")), 2))
# diag(Q.mat) <- sapply(c(1:nrow(Q.mat)), function(e) -sum(Q.mat[e, c(1:ncol(Q.mat))[-e]]))
#
#
# n.phen.subs <- 15
# grp.min <- 0.25
# set <- 3
#
#
# ## run snp.sim.Q:
# system.time(
# snps.list.Q <- snp.sim.Q(n.snps=n.snps,
# n.subs=n.subs,
# snp.root = NULL,
# n.snps.assoc=n.snps.assoc,
# assoc.prob=assoc.prob,
# Q = Q.mat,
# tree=tree,
# n.phen.subs = n.phen.subs,
# phen.loci=NULL,
# heatmap=heatmap,
# reconstruct=reconstruct,
# dist.dna.model=dist.dna.model,
# grp.min = grp.min,
# row.names = NULL,
# set=set,
# seed=seed)
# ) # 1.132 (w n.snps.assoc = 10), or 0.256 (w NO snps.assoc)
#
# str(snps.list.Q)
# snps.list.Q$snps[1:10,snps.list.Q$snps.assoc]
###################
## R PROFILING?? ##
###################
## profiling of time (and memory??)
## COALESCENT TREE (w n.snps.sim = 10000)
# Rprof("E:/treeWAS_Sims/Rprof_simTest", memory.profiling=T)
# foo <- simTest()
# Rprof(NULL)
# summaryRprof("E:/treeWAS_Sims/Rprof_simTest") # , memory="both"
## R-TREE (w n.snps.sim = 10000)
# Rprof("E:/treeWAS_Sims/Rprof_simTest_rtree", memory.profiling=T)
# # foo <- simTest()
# Rprof(NULL)
# summaryRprof("E:/treeWAS_Sims/Rprof_simTest_rtree") # , memory="both"
###################
## PCA/DAPC error?
# Error in weights * y : non-numeric argument to binary operator
## set 3 error:
# Error in text.default(x = (max(h.null$breaks) * 3/4), y = (max(h.null$counts) * :
# object 'myCol' not found
# In addition: Warning message:
# In matrix(as.numeric(snps), nrow = nrow(snps.ori), ncol = ncol(snps.ori)) :
# data length [47700] is not a sub-multiple or multiple of the number of columns [10000]
# ## rtree error:
# Error in if (snp.node[[edges[e, 1]]] == "0" & phen.node[[edges[e, 1]]] == :
# missing value where TRUE/FALSE needed
# HYP -- problem in snp.sim.Q snps.assc sim -- edges not going in ordered pairs
### SOLN? --> need to add which(edge == e) as in snps.sim instead of just using snp.node[[edges[e,1]]] to get index...
#################
## PARALLELIZE ##
#################
# ## Load libraries:
# library(foreach)
# library(doParallel)
#
# ## Calculate the number of cores:
# no_cores <- detectCores() - 1 # 3
#
# ## Initiate implicit cluster:
# registerDoParallel(no_cores)
################################################################################################################################
################################################################################################################################
################################################################################################################################
################################################################################################################################
#######################
## SET 2 (ACE / ML!) ##
#######################
# out <- foreach(n.reps=rep(1, 20), file.n=c(1:20), .packages="treeWAS") %dopar%
# simTest(
# ## simTest args:
# set.number = 2,
# n.reps = n.reps,
# set.seed.as = "file.number",
# working.dir = "/media/caitiecollins/88CC9BCECC9BB4C2/Cait 2016/Work/Xavier/Sims",
#
# ## data from file args:
# from.file = FALSE,
# file.n = file.n,
# Windows=TRUE,
#
# ## coalescent.sim args:
# n.ind = 100,
# n.snps = 10000, # gen.size
# # sim.by = "locus",
# n.subs = dist_0, # 15, # theta (*2)
# n.phen.subs = 15, # theta_p = NULL # 15
# n.snps.assoc = 10, #
# # assoc.option = "all",
# assoc.prob = NULL, #100, # 90, #
# grp.min = 0.25,
# s = 20,
# af = 10,
# coaltree = TRUE,
#
# ## treeWAS args:
# ## RUNNING ALL OF THESE OPTIONS (FOR NOW):
# p.value = 0.01, # REQUIRED FOR FISHER TEST
# # p.value.correct = c("bonf", "fdr", FALSE), #mt.correct = FALSE
# # p.value.by = c("count", "density"),
# sim.n.snps = 100000, # 10*n.snps #sim.gen.size = NULL ###################### CAREFUL (10,000) !!
# treeWAS.test = c("terminal", "simultaneous", "subsequent"), # "score"
# snps.reconstruction = "ace",
# phen.reconstruction = "ace"
# )
# ###############################################################################################################################
#
#
#
# out <- foreach(n.reps=rep(1, 20), file.n=c(21:40), .packages="treeWAS") %dopar%
# simTest(
# ## simTest args:
# set.number = 2,
# n.reps = n.reps,
# set.seed.as = "file.number",
# working.dir = "/media/caitiecollins/88CC9BCECC9BB4C2/Cait 2016/Work/Xavier/Sims",
#
# ## data from file args:
# from.file = FALSE,
# file.n = file.n,
# Windows=TRUE,
#
# ## coalescent.sim args:
# n.ind = 100,
# n.snps = 10000, # gen.size
# # sim.by = "locus",
# n.subs = dist_0.01, # 15, # theta (*2)
# n.phen.subs = 15, # theta_p = NULL # 15
# n.snps.assoc = 10, #
# # assoc.option = "all",
# assoc.prob = NULL, #100, # 90, #
# grp.min = 0.25,
# s = 20,
# af = 10,
# coaltree = TRUE,
#
# ## treeWAS args:
# ## RUNNING ALL OF THESE OPTIONS (FOR NOW):
# p.value = 0.01, # REQUIRED FOR FISHER TEST
# # p.value.correct = c("bonf", "fdr", FALSE), #mt.correct = FALSE
# # p.value.by = c("count", "density"),
# sim.n.snps = 100000, # 10*n.snps #sim.gen.size = NULL ###################### CAREFUL (10,000) !!
# treeWAS.test = c("terminal", "simultaneous", "subsequent"), # "score"
# snps.reconstruction = "ace",
# phen.reconstruction = "ace"
# )
# ###############################################################################################################################
#
#
# out <- foreach(n.reps=rep(1, 20), file.n=c(41:60), .packages="treeWAS") %dopar%
# simTest(
# ## simTest args:
# set.number = 2,
# n.reps = n.reps,
# set.seed.as = "file.number",
# working.dir = "/media/caitiecollins/88CC9BCECC9BB4C2/Cait 2016/Work/Xavier/Sims",
#
# ## data from file args:
# from.file = FALSE,
# file.n = file.n,
# Windows=TRUE,
#
# ## coalescent.sim args:
# n.ind = 100,
# n.snps = 10000, # gen.size
# # sim.by = "locus",
# n.subs = dist_0.05, # 15, # theta (*2)
# n.phen.subs = 15, # theta_p = NULL # 15
# n.snps.assoc = 10, #
# # assoc.option = "all",
# assoc.prob = NULL, #100, # 90, #
# grp.min = 0.25,
# s = 20,
# af = 10,
# coaltree = TRUE,
#
# ## treeWAS args:
# ## RUNNING ALL OF THESE OPTIONS (FOR NOW):
# p.value = 0.01, # REQUIRED FOR FISHER TEST
# # p.value.correct = c("bonf", "fdr", FALSE), #mt.correct = FALSE
# # p.value.by = c("count", "density"),
# sim.n.snps = 100000, # 10*n.snps #sim.gen.size = NULL ###################### CAREFUL (10,000) !!
# treeWAS.test = c("terminal", "simultaneous", "subsequent"), # "score"
# snps.reconstruction = "ace",
# phen.reconstruction = "ace"
# )
# ###############################################################################################################################
#
#
# out <- foreach(n.reps=rep(1, 20), file.n=c(61:80), .packages="treeWAS") %dopar%
# simTest(
# ## simTest args:
# set.number = 2,
# n.reps = n.reps,
# set.seed.as = "file.number",
# working.dir = "/media/caitiecollins/88CC9BCECC9BB4C2/Cait 2016/Work/Xavier/Sims",
#
# ## data from file args:
# from.file = FALSE,
# file.n = file.n,
# Windows=TRUE,
#
# ## coalescent.sim args:
# n.ind = 100,
# n.snps = 10000, # gen.size
# # sim.by = "locus",
# n.subs = dist_0.1, # 15, # theta (*2)
# n.phen.subs = 15, # theta_p = NULL # 15
# n.snps.assoc = 10, #
# # assoc.option = "all",
# assoc.prob = NULL, #100, # 90, #
# grp.min = 0.25,
# s = 20,
# af = 10,
# coaltree = TRUE,
#
# ## treeWAS args:
# ## RUNNING ALL OF THESE OPTIONS (FOR NOW):
# p.value = 0.01, # REQUIRED FOR FISHER TEST
# # p.value.correct = c("bonf", "fdr", FALSE), #mt.correct = FALSE
# # p.value.by = c("count", "density"),
# sim.n.snps = 100000, # 10*n.snps #sim.gen.size = NULL ###################### CAREFUL (10,000) !!
# treeWAS.test = c("terminal", "simultaneous", "subsequent"), # "score"
# snps.reconstruction = "ace",
# phen.reconstruction = "ace"
# )
# ###############################################################################################################################
################################################################################################################################
################################################################################################################################
## AT THE END, RUN:
# stopImplicitCluster()
## PROBLEM -- WHY WERE THERE SO FEW UNIQUE COLUMNS BEFORE AND NOW SO MANY? (just bc dist_0 -> dist_0.01???)
###############################################################################################################################################
###############################################################################################################################################
# out <- simTest(
# ## simTest args:
# set.number = 1,
# n.reps = 1,
# set.seed.as = "file.number",
# working.dir = "/media/caitiecollins/88CC9BCECC9BB4C2/Cait 2016/Work/Xavier/Sims",
#
# ## data from file args:
# from.file = FALSE,
# file.n = 21,
# Windows=TRUE,
#
# ## coalescent.sim args:
# n.ind = 100,
# n.snps = 10000, # gen.size
# # sim.by = "locus",
# n.subs = dist_0.01, # 15, # theta (*2)
# n.phen.subs = 15, # theta_p = NULL # 15
# n.snps.assoc = 10, #
# # assoc.option = "all",
# assoc.prob = NULL, #100, # 90, #
# grp.min = 0.25,
# s = 20,
# af = 10,
# coaltree = TRUE,
#
# ## treeWAS args:
# ## RUNNING ALL OF THESE OPTIONS (FOR NOW):
# p.value = 0.01, # REQUIRED FOR FISHER TEST
# # p.value.correct = c("bonf", "fdr", FALSE), #mt.correct = FALSE
# # p.value.by = c("count", "density"),
# sim.n.snps = 100000, # 10*n.snps #sim.gen.size = NULL ###################### CAREFUL (10,000) !!
# treeWAS.test = c("terminal", "simultaneous", "subsequent"), # "score"
# snps.reconstruction = "ace",
# phen.reconstruction = "ace"
# )
###############################################################################################################################################
#
# # simTest args:
# set.number = 3
# n.reps = 80
# set.seed.as = "file.number"
# working.dir = "/media/caitiecollins/88CC9BCECC9BB4C2/Cait 2016/Work/Xavier/Sims"
#
# ## data from file args:
# from.file = FALSE
# file.n = c(1:80) # Need to update this if not starting from 1!
# Windows = TRUE
# cluster = TRUE
#
# ## coalescent.sim args:
# # n.ind = sample(round(seq(50, 200, length.out = 80), 0), 80, replace = FALSE) # 100
# # n.snps = sample(round(seq(10000, 100000, length.out = 80), 0), 80, replace = FALSE) # 10000
# # n.subs = dist_0.01 # 1 #theta (*2)
# # n.phen.subs = 15 # 5 #theta_p = NULL ###
# # n.snps.assoc = round((n.snps/1000), 0) # = 0
# n.ind = 100
# n.snps = 2000
# n.subs <- c(rep(list(dist_0), 20), rep(list(dist_0.01), 20), rep(list(dist_0.05), 20), rep(list(dist_0.1), 20))
# n.phen.subs = 15 # 5 #theta_p = NULL ###
# n.snps.assoc = 10
# assoc.prob = 100
# grp.min = 0.25
# s = 20
# af = 10
# coaltree = TRUE
#
# ## treeWAS args:
# p.value = 0.01
# p.value.correct = "bonf"
# p.value.by = "count"
# sim.n.snps = n.snps*10 # 100000 # 10*n.snps #sim.gen.size = NULL
# treeWAS.test = c("terminal", "simultaneous", "subsequent") # "score"
# snps.reconstruction = "parsimony" # "ace" #
# phen.reconstruction = "parsimony" # "ace" #
########################################################################
###################
## DOCUMENTATION ##
###################
#' Simulation Testing.
#'
#' Generic simulation-testing function used to validate treeWAS performance on simulated datasets. Not designed for public use!
#'
#' @param test A character string or vector containing one or more of the following available tests of association:
#' "terminal", "simultaneous", "subsequent", "cor", "fisher". By default, the first three tests are run.
#' See details for more information on what these tests do and when they may be appropriate.
#'
#' @author Caitlin Collins \email{caitiecollins@@gmail.com}
#' @export
#'
#' @import adegenet
#' @rawNamespace import(ape, except = zoom)
#' @importFrom Hmisc all.is.numeric
#' @importFrom ade4 dudi.pca
#'
#' @export
########################################################################
simTest <- function(
## simTest args:
set.number = 3,
n.reps = 1,
set.seed.as = "file.number",
working.dir = "~/",
## data from file args:
from.file = FALSE,
file.n = NULL,
Windows = FALSE,
cluster = FALSE,
## coalescent.sim args:
n.ind = 100,
n.snps = 10000, # gen.size
# n.subs = 1, # theta (*2)
n.subs = treeWAS::dist_0.01, # theta (*2)
n.phen.subs = 15, # theta_p = NULL
n.snps.assoc = 10, # = 0
assoc.prob = 90, # 100 (set2)
grp.min = 0.25,
s = 20,
af = 10,
coaltree = TRUE,
## treeWAS args:
## RUNNING ALL OF THESE OPTIONS (FOR NOW):
p.value = 0.01, # REQUIRED FOR FISHER TEST
p.value.correct = "bonf", # c("bonf", "fdr", FALSE), #mt.correct = FALSE
p.value.by = "count", # c("count", "density"),
sim.n.snps = 100000, # 10*n.snps #sim.gen.size = NULL
treeWAS.test = c("terminal", "simultaneous", "subsequent"), # "score"
snps.reconstruction = "parsimony",
phen.reconstruction = "parsimony"
){
###############################################
## make lists in which to store all ###########
## data and output from each of n.reps runs: ##
###############################################
SNPS <- PHEN <- PHEN.PLOT.COL <- TREE <- OUT <- RES <-
FISHER.RESULTS <- PLINK.RESULTS <- PCA.RESULTS <- DAPC.RESULTS <- CMH.RESULTS <-
ARGS <- PERFORMANCE <- SCORE3 <- list()
## and make lists for saving filenames
filename.snps <- filename.phen <- filename.phen.plot.col <- filename.tree <-
filename.out <- filename.res <- filename.fisher.results <-
filename.plink.results <- filename.pca <- filename.dapc <- filename.cmh <-
filename.args <- filename.performance <- filename.score3 <-
filename.plots <- filename.tree.plot <- list()
####################################################################################################################################
################################### *** DEFINE ARGUMENTS | SET *** #################################################################
####################################################################################################################################
## coalescent.sim args:
if(missing(n.reps)) n.reps <- 1
if(missing(set.seed.as)) set.seed.as <- "file.number"
if(missing(n.ind)) n.ind <- 100
if(missing(n.snps)) n.snps <- 10000
if(missing(n.subs)) n.subs <- 1
if(missing(n.phen.subs)) n.phen.subs <- NULL
if(missing(n.snps.assoc)) n.snps.assoc <- 10
if(missing(assoc.prob)) assoc.prob <- 90
if(missing(grp.min)) grp.min <- 0.25
## treeWAS args:
# if(missing(p.value)) p.value <- 0.001
# if(missing(p.value.correct)) p.value.correct <- c("bonf", "fdr", FALSE)
# if(missing(p.value.by)) p.value.by <- c("count", "density")
if(missing(sim.n.snps)) sim.n.snps <- 100000
if(missing(treeWAS.test)) treeWAS.test <- c("terminal", "simultaneous", "subsequent")
if(missing(snps.reconstruction)) snps.reconstruction <- "parsimony"
if(missing(phen.reconstruction)) phen.reconstruction <- "parsimony"
## ensure phen is NULL (will be generated by sim)
phen <- NULL
###########
## SET 1 ##
###########
if(set.number == 1){
if(!is.null(phen)) phen <- NULL
### ensure n.phen.subs is NOT null
if(is.null(n.phen.subs)) n.phen.subs <- 15
if(is.null(assoc.prob)) assoc.prob <- 90
} # end set 1
###########
## SET 2 ##
###########
if(set.number == 2){
if(!is.null(phen)) phen <- NULL
## ensure n.phen.subs is NOT null
if(is.null(n.phen.subs)) n.phen.subs <- 15
assoc.prob <- 100
} # end set 2
###########
## SET 3 ##
###########
if(set.number == 3){
if(!is.null(phen)) phen <- NULL
## n.phen.subs & assoc.prob not actually being used in snp.sim.Q
if(is.null(n.phen.subs)) n.phen.subs <- 15
if(is.null(assoc.prob)) assoc.prob <- 90
## ensure 10 associated SNPs
if(is.null(n.snps.assoc)){
if(length(n.snps) == 1 & n.snps[1] == 2000){
n.snps.assoc <- 10
}else{
n.snps.assoc <- round((n.snps/1000), 0)
}
}
## set sim.n.snps:
if(is.null(sim.n.snps)){
sim.n.snps <- n.snps*10
}
} # end set 3
###########
## SET 4 ##
###########
if(set.number == 4){
if(!is.null(phen)) phen <- NULL
## ensure n.phen.subs is NOT null
if(is.null(n.phen.subs)) n.phen.subs <- 15
## ensure MULTIPLE associated SNPs
if(is.null(n.snps.assoc)) n.snps.assoc <- 10
if(is.null(assoc.prob)) assoc.prob <- 90
} # end set 4
## If cluster, no from.file!
if(cluster == TRUE){
from.file <- FALSE
working.dir <- "" # just in case?
}
args <- snps.assoc <- NULL
sim.n.snps.ori <- sim.n.snps
## change working dir if WINDOWS:
if(Windows == TRUE){
working.dir <- c("C:/Cait 2016/Work/Xavier/Sims")
}
##############################################################################################################################
########################################### *** DATA FROM FILE *** ###########################################################
##############################################################################################################################
if(from.file==TRUE){
###################################################
## READING IN DATA (OPTION for POST HOC TESTING) ##
###################################################
## Get from file: snps, phen, tree, performance, snps.assoc
if(is.null(file.n)) stop("Use argument file.n
to specify which files to read in.")
## get n.reps for main for loop
n.reps <- length(file.n)
## set working directory for the set specified
wd <- paste(working.dir, "/", "set", set.number, sep="")
setwd(wd)
## for loop:
for(i in 1:n.reps){
## get filename prefix for this round of this set:
filename.prefix <- paste("set", set.number, "_", file.n[i], "_", sep="")
## get snps
filename <- paste("./", filename.prefix, "snps.Rdata", sep="")
snps <- get(load(filename))
## get phen
filename <- paste("./", filename.prefix, "phen.Rdata", sep="")
phen <- get(load(filename))
## get tree
filename <- paste("./", filename.prefix, "tree.Rdata", sep="")
tree <- get(load(filename))
## get performance
filename <- paste("./", filename.prefix, "performance.Rdata", sep="")
performance <- get(load(filename))
## get snps.assoc (if any)
snps.assoc <- NULL
if(!is.null(performance$snps.assoc)) snps.assoc <- performance$snps.assoc
} # end for loop
} # end from.file == TRUE
##############################################################################################################################
##################################
## CONVERT VECTOR ARGS --> LIST ##
##################################
## For args of length n.reps, make lists:
if(length(n.ind) == 1){
N.IND <- as.list(rep(n.ind[1], n.reps))
cat("n.ind was of length 1. Using n.ind[1] for all reps.\n")
}else{
N.IND <- as.list(n.ind)
if(length(n.ind) != n.reps) warning("n.ind was not of length 1 or n.reps.\n")
}
if(length(n.snps) == 1){
N.SNPS <- as.list(rep(n.snps[1], n.reps))
cat("n.snps was of length 1. Using n.snps[1] for all reps.\n")
}else{
N.SNPS <- as.list(n.snps)
if(length(n.snps) != n.reps) warning("n.snps was not of length 1 or n.reps.\n")
}
if(length(n.snps.assoc) == 1){
N.SNPS.ASSOC <- as.list(rep(n.snps.assoc[1], n.reps))
cat("n.snps.assoc was of length 1. Using n.snps.assoc[1] for all reps.\n")
}else{
N.SNPS.ASSOC <- as.list(n.snps.assoc)
if(length(n.snps.assoc) != n.reps) warning("n.snps.assoc was not of length 1 or n.reps.\n")
}
if(length(sim.n.snps) == 1){
SIM.N.SNPS <- as.list(rep(sim.n.snps[1], n.reps))
cat("sim.n.snps was of length 1. Using sim.n.snps[1] for all reps.\n")
}else{
SIM.N.SNPS <- as.list(sim.n.snps)
if(length(sim.n.snps) != n.reps) warning("sim.n.snps was not of length 1 or n.reps.\n")
}
if(length(n.subs) == 1 | !is.list(n.subs)){
N.SUBS <- as.list(rep(list(n.subs), n.reps))
cat("n.subs was of length 1. Using same n.subs for all reps.\n")
}else{
N.SUBS <- as.list(n.subs)
if(length(N.SUBS) != n.reps){
N.SUBS <- as.list(rep(n.subs, n.reps))[1:n.reps]
warning("n.subs was not of length 1 or n.reps: repeating sequence until n.subs met.\n")
}
}
##############################################################################################################################
##############
## FOR LOOP ##
##############
for(i in 1:n.reps){
## get list args i:
n.ind <- N.IND[[i]]
n.snps <- N.SNPS[[i]]
n.snps.assoc <- N.SNPS.ASSOC[[i]]
sim.n.snps <- SIM.N.SNPS[[i]]
n.subs <- N.SUBS[[i]]
wd <- paste(working.dir, "/", "set", set.number, sep="")
if(cluster == FALSE) setwd(wd)
## get file number:
if(from.file==FALSE){
if(cluster == FALSE){
## get number for group | number of set3_snps already in file:
number <- (length(grep("_snps", dir("./")))+1)
}
## Unless file.n is provided as argument!
if(!is.null(file.n)){
if(!is.null(file.n[i])){
number <- file.n[i]
}
}
}else{
number <- file.n[i]
}
number <- 20
################
## dummy plot ##
################
## to give user indication of what round of simTest.set3 we are on:
par(mfrow=c(1,1))
round.marker <- paste("\n\n\n\n\n\n\n\n\n\n\n\nROUND", i,
"\nset", set.number,
"\n(file number", number, ")",
sep=" ")
plot.new()
title(round.marker, adj=0.5)
###############
## set seed? ##
###############
if(!is.null(set.seed.as)){
if(set.seed.as == "file.number"){
seed <- number
set.seed(seed)
}else{
if(length(set.seed.as) == n.reps){
seed <- set.seed.as[i]
set.seed(seed)
}else{
warning("seed is not of length n.reps; seed will not be set.")
seed <- NULL
}
}
}
# end set.seed.as
##############################################################################################################################
################################################ *** COALESCENT.SIM *** ######################################################
##############################################################################################################################
if(from.file==FALSE){
########################
## get PHEN for SET 1 ##
########################
# if(set.number == 1){
# ## simulate phen first, by random sampling ##
# ## enforce even split of cases and controls??
# #phen <- sample(c(rep("A", floor(n.ind/2)), rep("B", ceiling(n.ind/2)), replace=FALSE))
# ## or just draw phen by purely random sampling...?
# phen <- sample(c("A", "B"), n.ind, replace=TRUE)
# phen <- as.factor(phen)
# }else{
# phen <- NULL
# }
############################
## simulate data and tree ##
############################
## TESTING -- used to be phen = phen (but Hyp: multiple rounds causing problems--if so, could rename to phen.prior, eg.)
## CHECK:
# print("NUMBER"); print(number)
# print("PHEN BEFORE"); print(phen)
# gc()
filename.tree.plot[[i]] <- paste("./set", set.number, "_", number, "_tree_plot", ".pdf", sep="")
filename.panel.plot <- paste("./set", set.number, "_", number, "_panel_plot", ".pdf", sep="")
foo <- coalescent.sim(n.ind = n.ind,
n.snps = n.snps,
n.subs = n.subs,
n.snps.assoc = n.snps.assoc,
assoc.prob = assoc.prob,
n.phen.subs = n.phen.subs,
phen = NULL,
plot = TRUE,
heatmap = FALSE,
reconstruct = FALSE,
dist.dna.model = "JC69",
row.names = NULL,
grp.min = grp.min,
coaltree = coaltree,
set=set.number,
s = s,
af = af,
filename.plot=list(filename.tree.plot[[i]],
filename.panel.plot),
seed = seed)
print("coalescent done")
# gc()
####################################
## isolate common elements of foo ##
####################################
snps <- snps.ori <- snps.ori.ori <- foo$snps
if(!is.null(n.snps.assoc)) if(n.snps.assoc > 0){
snps.assoc <- snps.assoc.ori <- snps.assoc.loci <- foo$snps.assoc
}else{
snps.assoc <- NULL
}
phen <- phen.ori <- phen.ori.ori <- foo$phen
tree <- tree.ori <- foo$tree
phen.plot.col <- foo$phen.plot.col
## snps names:
if(is.null(colnames(snps))) colnames(snps) <- c(1:ncol(snps))
snps.names <- colnames(snps)
##########################################
## isolate set-specific elements of foo ##
##########################################
# if(is.null(phen)) phen <- foo$phen
## MAKE SURE PHEN IS IN CORRECT ORDER OF INDS NOT IN LEAF ORDER!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
######################
## save plot as pdf ##
######################
## CHECK-- Not sure, but may be safer to write this using pdf() instead of dev.copy ~ treeWAS plots...?
# filename.tree.plot[[i]] <- paste("./set", set.number, "_", number, "_tree_plot", ".pdf", sep="")
# dev.copy(pdf, file=filename.tree.plot[[i]], width=7, height=11) # , pointsize=1
# dev.off()
}# end from.file == FALSE
##############################################################################################################################
############################################### *** treeWAS *** ##############################################################
##############################################################################################################################
sim.n.snps.ori <- sim.n.snps
if(is.null(sim.n.snps)) sim.n.snps <- ncol(snps)*10
#######################
## save treeWAS plot ##
#######################
## NB: plot.png will not be viewable until fn has finished running...
# filename.plots[[i]] <- list()
# for(t in 1:length(treeWAS.test)){
#
# ## Save both Manhattan and Hist per test:
# filename.plots[[i]][[t]] <- c(## manhattan:
# paste("./set",
# set.number,
# "_", number,
# "_plot_manhattan_",
# treeWAS.test[t],
# ".pdf", sep=""),
#
# ## null.dist:
# paste("./set",
# set.number,
# "_", number,
# "_plot_",
# treeWAS.test[t],
# ".pdf", sep="")
# )
# }
## Save both Manhattan and Hist per test:
filename.plots[[i]] <-
## null.dist:
paste("./set",
set.number,
"_", number,
"_plot",
".pdf", sep="")
#################
## RUN treeWAS ##
#################
print("treeWAS started")
set.seed(seed)
syst.time <- system.time( # 341
out <- treeWAS(snps = snps,
phen = phen,
tree = tree,
n.subs = NULL,
n.snps.sim = sim.n.snps,
chunk.size = ncol(snps),
test = treeWAS.test, # c("terminal", "simultaneous", "subsequent")
snps.reconstruction = snps.reconstruction, # "parsimony",
snps.sim.reconstruction = "parsimony",
phen.reconstruction = phen.reconstruction,
phen.type = NULL,
na.rm = TRUE,
p.value = p.value, # 0.01
p.value.correct = p.value.correct, # "bonf
p.value.by = p.value.by, # "count"
dist.dna.model = "JC69",
plot.tree = FALSE,
plot.manhattan = TRUE,
plot.null.dist = TRUE,
plot.dist = FALSE,
snps.assoc = snps.assoc,
filename.plot = filename.plots[[i]],
seed = 1)
)
print("treeWAS done")
# gc()
# i <- 1 #####
#####
# dev.copy(pdf, file=filename.plots[[i]], width=7, height=11) # , pointsize=12
# dev.off()
##################################
## isolate df of Sig SNPs found ##
##################################
res.complete <- out
res <- out$res
treeWAS.all <- out$treeWAS.combined ## also stored in res.complete, which gets saved later.
##############
## get call ##
##############
## get arguments inputed to simTest for this round
#call <- match.call()
## TO DO: update arguments to contain actual values used
## (ie. return actual value of seed instead of set.seed.as)
## (eg. if n.phen.subs was NULL and got set to 25 due to set.number,
## report 25)
args <- NA
# args <- mget(names(formals()), sys.frame(sys.nframe()))
###########################################################################################################################
############################################# *** FISHER TEST *** #########################################################
###########################################################################################################################
#############################
## RUN FISHER'S EXACT TEST ##
#############################
## NOTE--we will run and save all 3 (uncorrected, Bonferroni-corrected, and FDR-corrected),
######### BUT we will only calculate performance metrics for the latter two (or only FDR??)...
## RUN TEST
pval.fisher <- sapply(c(1:ncol(snps)),
function(e) fisher.test(snps[,e], y=phen,
alternative="two.sided")$p.value)
## two.sided bc we want to know if
## inds w the phen have EITHER more 1s or 0s
p.thresh <- p.value
## WITHOUT CORRECTION, identify sig.snps
fisher.snps.uncorr <- colnames(snps)[which(pval.fisher < p.thresh)]
n.fisher.snps.uncorr <- length(fisher.snps.uncorr)
## w BONFERRONI CORRECTION, identify sig.snps
pval.bonf <- p.adjust(pval.fisher, method="bonferroni", n=length(pval.fisher))
fisher.snps.bonf <- colnames(snps)[which(pval.bonf < p.thresh)]
n.fisher.snps.bonf <- length(fisher.snps.bonf)
## w FDR CORRECTION, identify sig.snps
pval.fdr <- p.adjust(pval.fisher, method="fdr", n=length(pval.fisher))
fisher.snps.fdr <- colnames(snps)[which(pval.fdr < p.thresh)]
n.fisher.snps.fdr <- length(fisher.snps.fdr)
## CONVERT 0-LENGTH RESULTS TO NULL
if(length(fisher.snps.uncorr) == 0) fisher.snps.uncorr <- NULL
if(length(fisher.snps.bonf) == 0) fisher.snps.bonf <- NULL
if(length(fisher.snps.fdr) == 0) fisher.snps.fdr <- NULL
## STORE FISHER TEST RESULTS ##
fisher.results <- list(pval.fisher, fisher.snps.uncorr, fisher.snps.bonf, fisher.snps.fdr)
names(fisher.results) <- c("pval.fisher", "fisher.snps.uncorr", "fisher.snps.bonf", "fisher.snps.fdr")
## end fisher tests
###########################################################################################################################
################################################ *** PLINK *** ###########################################################
###########################################################################################################################
###################
## RUNNING PLINK ##
###################
# ## Set working directory to run plink program
# if(Windows == FALSE){
# setwd("/media/caitiecollins/88CC9BCECC9BB4C2/Program Files/plink-1.07-dos")
# }else{
# setwd("C:/Program Files/plink-1.07-dos")
# }
# plink.wd <- getwd()
# # setwd(plink.wd)
#
# #################
# ## Handle DATA ##
# #################
#
# ## STORE ORIGINAL DATA FOR LATER ##
# snps.ori <- snps.ori.ori
# phen.ori <- phen.ori.ori
#
# # #######################
# # ## from.file = FALSE ##
# # #######################
# # if(from.file == FALSE){
#
# ## If data was created during THIS round of simTest...
#
# ###########################
# ## CONVERT AND SAVE DATA ##
# ###########################
#
# ## set working directory for saving
# dat.wd <- paste(working.dir, "/set", set.number, "/", sep="")
# setwd(dat.wd)
#
# ##################################
# ## convert snps --> ped format: ##
# ##################################
#
# ## PED FORMAT: ##
# ## 6 "MANDATORY" COLUMNS first:
# ## FamilyID, IndividualID, PaternalID, MaternalID, Sex, Phenotype
# #### ... EXCLUDE OTHER "mandatory" columns w PLINK commands
# #### --> (IndividualID, Phenotype)
# ## 7-to-p: GENOTYPE COLUMNS
# ## NO HEADER ROW (belongs in .MAP file)
#
# ## SNPs can NOT be 0 --> convert from 0/1 to 1/2:
# snps <- snps+1
# row.names(snps) <- paste("ind", row.names(snps), sep=".")
# ## save row and column names for elsewhere:
# individualID <- row.names(snps)
# loci.names <- colnames(snps)
#
# ## convert to matrix:
# snps <- matrix(snps, byrow=FALSE, ncol=ncol(snps))
# ## Replace every other column with a copy of the column before it:
# ## NOTE--THIS MAY CHANGE (ie. IF WORKING INSTEAD WITH DATA AS MT DNA....)
# #snps[,seq(2, ncol(snps), 2)] <- snps[,seq(1, ncol(snps), 2)]
# ## DUPLICATE columns:
# snps.ori <- snps
# snps.new <- matrix(NA, nrow=nrow(snps.ori), ncol=2*ncol(snps.ori))
# snps.new[, seq(1, 2*ncol(snps.ori), 2)] <- snps.ori
# snps.new[, seq(2, 2*ncol(snps.ori), 2)] <- snps.ori
# snps <- snps.new
#
# ## recode phen: S/A as 0, R/B as 1:
# phen <- as.character(phen)
# # replace phen coding w 1/2:
# phen <- replace(phen, which(phen %in% c("R", "B", "1")), 2)
# phen <- replace(phen, which(phen %in% c("S", "A", "0")), 1)
# # phen <- as.numeric(phen) #?
# # names(phen) <- names(phen.ori) #?
#
# ## bind individualID, phen, snps
# dat <- cbind(individualID, phen, snps)
# colnames(dat) <- NULL
#
# ped <- dat
#
# ## get filename
# uniqueID <- paste("set", set.number, "_", number, sep="")
# filename <- paste("./", uniqueID, "_ped.Rdata", sep="")
#
# ## save dat.ped as Rdata
# save(ped, file=filename)
# #ped <- get(load("C:/Cait 2012/Work/Xavier/Sims/set3/set3_1_ped.Rdata"))
#
# ## save as text!
# #ped <- dat
# if(Windows == FALSE){
# filename <- paste("/media/caitiecollins/88CC9BCECC9BB4C2/PLINK/", uniqueID, ".txt", sep="")
# }else{
# filename <- paste("C:/PLINK/", uniqueID, ".txt", sep="")
# }
# write.table(ped, file=filename, quote=FALSE, row.names=FALSE, col.names=FALSE)
#
# ## Do NOT save as .PED
# ## convert from text to PED!!
#
# ## UHOH -- STOPPED HERE -- SHELL COMMAND DOESNT SEEM TO BE WORKING FROM LINUX!!!!!!!!!!!!!!!!!!
# ## SYSTEM COMMAND NOT READING FILE NAMES CORRECTLY....????????????????///
#
# # filename.ori <- paste("\\media\\caitiecollins\\88CC9BCECC9BB4C2\\PLINK\\", uniqueID, ".txt", sep="")
# # filename.new <- paste("\\media\\caitiecollins\\88CC9BCECC9BB4C2\\PLINK\\", uniqueID, ".ped", sep="")
#
# if(Windows == FALSE){
# filename.ori <- paste("/media/caitiecollins/88CC9BCECC9BB4C2/PLINK/", uniqueID, ".txt", sep="")
# filename.new <- paste("/media/caitiecollins/88CC9BCECC9BB4C2/PLINK/", uniqueID, ".ped", sep="")
# command <- paste("mv", filename.ori, filename.new, sep=" ")
# }else{
# filename.ori <- paste("C:\\PLINK\\", uniqueID, ".txt", sep="")
# filename.new <- paste("C:\\PLINK\\", uniqueID, ".ped", sep="")
# command <- paste("move", filename.ori, filename.new, sep=" ")
# }
#
# ## run command
# if(Windows == FALSE){
# system(command)
# }else{
# shell(command)
# }
#
#
# ####################################
# ## convert snp meta-data --> .map ##
# ####################################
#
# ## MAP format: ##
# ## EACH LINE of a .map file describes a SINGLE marker.
# ## Contains 4 "MANDATORY" COLUMNS:
# ## Chromosome, rs#/SNP identifier, (Genetic distance), Base-pair position.
#
#
# ## loci.names:
# ## SHOULD BE ONE NAME PER SITE (ie. PER TWO LOCI): ie. L001, NOT L001.1, L001.2 !!!!
# ## remove last TWO characters (ie. decimal and trailing digit):
# # loci.names <- substr(loci.names, 1, nchar(loci.names)-2)
# # ## keep only every other:
# # loci.names <- loci.names[seq(1, length(loci.names), 2)]
# ## OR: # loci.names <- unique(loci.names)
#
# ## make dummy variables for irrelevant fields:
# chromosome <- rep(26, length(loci.names)) # 26 = human mitochondrial (haploid)
# gen.dist <- rep(0, length(loci.names))
# ## get base-pair posi (ie. loci name - L):
# bp <- loci.names
# bp <- as.numeric(gsub("L", "", bp))
# dat <- data.frame(chromosome, loci.names, gen.dist, bp)
#
# ## as matrix, no header:
# dat <- as.matrix(dat, byrow=FALSE, ncol=ncol(dat))
# colnames(dat) <- NULL
#
# map <- dat
#
# ## get filename
# uniqueID <- paste("set", set.number, "_", number, sep="")
# filename <- paste("./", uniqueID, "_map.Rdata", sep="")
#
# ## save dat.map as Rdata
# save(map, file=filename)
# #map <- get(load("C:/Cait 2012/Work/Xavier/Sims/set3/set3_1_map.Rdata"))
#
# ## save as text!
# if(Windows == FALSE){
# filename <- paste("/media/caitiecollins/88CC9BCECC9BB4C2/PLINK/", uniqueID, ".txt", sep="")
# }else{
# filename <- paste("C:/PLINK/", uniqueID, ".txt", sep="")
# }
# write.table(map, file=filename, quote=FALSE, row.names=FALSE, col.names=FALSE)
#
# ## Do NOT save as .MAP
# ## convert from text to MAP!!
#
# # filename.ori <- paste("C:\\PLINK\\", uniqueID, ".txt", sep="")
# # filename.new <- paste("C:\\PLINK\\", uniqueID, ".map", sep="")
#
# if(Windows == FALSE){
# filename.ori <- paste("/media/caitiecollins/88CC9BCECC9BB4C2/PLINK/", uniqueID, ".txt", sep="")
# filename.new <- paste("/media/caitiecollins/88CC9BCECC9BB4C2/PLINK/", uniqueID, ".map", sep="")
# command <- paste("mv", filename.ori, filename.new, sep=" ")
# }else{
# filename.ori <- paste("C:\\PLINK\\", uniqueID, ".txt", sep="")
# filename.new <- paste("C:\\PLINK\\", uniqueID, ".map", sep="")
# command <- paste("move", filename.ori, filename.new, sep=" ")
# }
#
# ## Run command
# if(Windows == FALSE){
# system(command)
# }else{
# shell(command)
# }
#
#
#
# ###### ###### ###### ######
#
# # }else{ # end from.file = FALSE
# #
# # ######################
# # ## from.file = TRUE ##
# # ######################
# #
# # } # end from.file = TRUE
#
#
# ########## #################### ########## #################### #################### ########## ####################
#
#
# #####################
# ## GWAS with PLINK ##
# #####################
#
# ## set wd for PLINK program
# setwd(plink.wd)
#
# ## get filename
# if(Windows == FALSE){
# filename <- paste("\\media\\caitiecollins\\88CC9BCECC9BB4C2\\PLINK\\", uniqueID, sep="")
# }else{
# filename <- paste("C:\\PLINK\\", uniqueID, sep="")
# }
#
#
# ## inspect file?
# command <- paste("plink --file ", filename, " --no-fid --no-parents --no-sex --allow-no-sex", sep="")
# # ## Run command
# # if(Windows == FALSE){
# # system(command)
# # }else{
# # shell(command)
# # }
#
# ## make a binary PED file ##
# ## (provide the full path, not just the file name)
# command <- paste("plink --file ", filename, " --make-bed --no-fid --no-parents --no-sex --allow-no-sex --out ", filename, sep="")
#
# ## Run command
# if(Windows == FALSE){
# system(command)
# }else{
# shell(command)
# }
# ########################################################################################################################################
# ## SYSTEM(COMMAND) FAILS HERE WITH THE FOLLOWING ERROR ################################################# #### #### #### ####
#
# # sh: 1: plink: not found
#
# ## PROBABLY NEED TO FIND SOLN AND APPLY IT TO ALL PLINK-RELATED CODE, STARTING WELL ABOVE HERE....................
# #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### ####
# ########################################################################################################################################
#
# ## use --bfile to work with the BINARY file
# # (same as --file, but loads the binary one and prints summary stats)
# command <- paste("plink --bfile ", filename, " --no-fid --no-parents --no-sex --allow-no-sex", sep="")
#
# ## Run command
# if(Windows == FALSE){
# system(command)
# }else{
# shell(command)
# }
#
# #######################
# ## basic association ##
# #######################
# ##--> 1df chi-square test
#
# ## check freq of SNPs...?
# command <- paste("plink --file ", filename, " --no-fid --no-parents --no-sex --allow-no-sex --freq --out ", filename, sep="")
#
# ## Run command
# if(Windows == FALSE){
# system(command)
# }else{
# shell(command)
# }
#
# ## yay!
# if(Windows == FALSE){
# filename <- paste("/media/caitiecollins/88CC9BCECC9BB4C2/PLINK/", uniqueID, ".frq", sep="")
# }else{
# filename <- paste("C:/PLINK/", uniqueID, ".frq", sep="")
# }
# freq <- read.table(filename, header=TRUE)
# #head(freq)
#
# ## perform a basic association analysis on the disease trait for all single SNPs
# if(Windows == FALSE){
# filename <- paste("\\media\\caitiecollins\\88CC9BCECC9BB4C2\\PLINK\\", uniqueID, sep="")
# }else{
# filename <- paste("C:\\PLINK\\", uniqueID, sep="")
# }
# command <- paste("plink --bfile ", filename, " --assoc --counts --allow-no-sex --out ", filename, sep="")
#
# ## Run command
# if(Windows == FALSE){
# system(command)
# }else{
# shell(command)
# }
#
# ## to view the file you created, just read it in with R:
# if(Windows == FALSE){
# filename <- paste("/media/caitiecollins/88CC9BCECC9BB4C2/PLINK/", uniqueID, ".assoc", sep="")
# }else{
# filename <- paste("C:/PLINK/", uniqueID, ".assoc", sep="")
# }
# plink.res <- read.table(filename, header=TRUE)
# # head(plink.res)
#
#
# ## get p.vals
# pval.plink.assoc <- plink.res$P
#
# ## get sig ##
#
# ## p.thresh:
# p.thresh <- p.value # 0.05 # 0.01 # 0.001 # ??
#
#
# ## Uncorrected ##
# plink.assoc.snps.uncorr <- snps.names[which(pval.plink.assoc < p.thresh)]
#
# ## Bonferonni ##
# p.vals.bonf <- p.adjust(pval.plink.assoc, "bonferroni")
# p.bonf <- which(p.vals.bonf < p.thresh)
# plink.assoc.snps.bonf <- snps.names[p.bonf]
#
# ## FDR ##
# p.vals.fdr <- p.adjust(pval.plink.assoc, "fdr")
# p.fdr <- which(p.vals.fdr < p.thresh)
# plink.assoc.snps.fdr <- snps.names[p.fdr]
#
# ## CONVERT 0-LENGTH RESULTS TO NULL
# if(length(plink.assoc.snps.uncorr) == 0) plink.assoc.snps.uncorr <- NULL
# if(length(plink.assoc.snps.bonf) == 0) plink.assoc.snps.bonf <- NULL
# if(length(plink.assoc.snps.fdr) == 0) plink.assoc.snps.fdr <- NULL
#
# ## STORE PLINK TEST RESULTS ##
# plink.assoc.results <- list(pval.plink.assoc, plink.assoc.snps.uncorr, plink.assoc.snps.bonf, plink.assoc.snps.fdr)
# names(plink.assoc.results) <- c("pval.plink.assoc", "plink.assoc.snps.uncorr", "plink.assoc.snps.bonf", "plink.assoc.snps.fdr")
#
# ############################################
#
# ########################################################
# ## association w control for genomic inflation factor ##
# ########################################################
# ##--> 1df chi-square test
#
# ## perform a basic association analysis on the disease trait for all single SNPs
# if(Windows == FALSE){
# filename <- paste("\\media\\caitiecollins\\88CC9BCECC9BB4C2\\PLINK\\", uniqueID, sep="")
# }else{
# filename <- paste("C:\\PLINK\\", uniqueID, sep="")
# }
# command <- paste("plink --bfile ", filename, " --assoc --adjust --gc --counts --allow-no-sex --out ", filename, sep="")
#
# ## Run command
# if(Windows == FALSE){
# system(command)
# }else{
# shell(command)
# }
#
#
# ## to view the file you created, just read it in with R:
# if(Windows == FALSE){
# filename <- paste("/media/caitiecollins/88CC9BCECC9BB4C2/PLINK/", uniqueID, ".assoc.adjusted", sep="")
# }else{
# filename <- paste("C:/PLINK/", uniqueID, ".assoc.adjusted", sep="")
# }
# plink.res <- read.table(filename, header=TRUE)
# # head(plink.res)
# ## NOT SURE WHY, BUT THE "UNADJ" p-values and the "GC" p-values are the same
# ## in this table (even though the "UNADJ" p-values are not actually the same
# ## as those in the plink.res from the basic association test above,
# ## AND, in this case, lambdaGC was 5.12 and the mean chi-squared was 4.63!!!!!)
#
#
# ## get p.vals
# pval.plink.assoc.gc <- plink.res$GC
#
# ## REORDER!!! ##
# ## NOTE: plink.gc returns results in order of SIGNIFICANCE!
# pval.plink.assoc.gc <- pval.plink.assoc.gc[order(plink.res$SNP)]
#
#
# ## get sig ##
#
# ## p.thresh:
# p.thresh <- p.value # 0.05 # 0.01 # 0.001 # ??
#
#
# ## Uncorrected ##
# plink.assoc.gc.snps.uncorr <- snps.names[which(pval.plink.assoc.gc < p.thresh)]
#
# ## Bonferonni ##
# p.vals.bonf <- p.adjust(pval.plink.assoc.gc, "bonferroni")
# p.bonf <- which(p.vals.bonf < p.thresh)
# plink.assoc.gc.snps.bonf <- snps.names[p.bonf]
#
# ## FDR ##
# p.vals.fdr <- p.adjust(pval.plink.assoc.gc, "fdr")
# p.fdr <- which(p.vals.fdr < p.thresh)
# plink.assoc.gc.snps.fdr <- snps.names[p.fdr]
#
# ## CONVERT 0-LENGTH RESULTS TO NULL
# if(length(plink.assoc.snps.uncorr) == 0) plink.assoc.gc.snps.uncorr <- NULL
# if(length(plink.assoc.snps.bonf) == 0) plink.assoc.gc.snps.bonf <- NULL
# if(length(plink.assoc.snps.fdr) == 0) plink.assoc.gc.snps.fdr <- NULL
#
#
# ## STORE PLINK TEST RESULTS ##
# plink.assoc.gc.results <- list(pval.plink.assoc.gc, plink.assoc.gc.snps.uncorr, plink.assoc.gc.snps.bonf, plink.assoc.gc.snps.fdr)
# names(plink.assoc.gc.results) <- c("pval.plink.assoc.gc", "plink.assoc.gc.snps.uncorr", "plink.assoc.gc.snps.bonf", "plink.assoc.gc.snps.fdr")
#
#
# ############################################
#
# ## STORE COMBINED PLINK RESULTS ##
# plink.results <- list(plink.assoc.results,
# plink.assoc.gc.results)
###########################################################################################################################
################################################# *** PCA *** #########################################################
###########################################################################################################################
#########
## PCA ##
#########
## STEPS: ##
## (1) Run PCA
## (2) Select sig. number of PCs (--> HOW??)
## (3) Regress snps along sig residuals
## (4) Run assoc test on adjusted snps dataset.
## (Below: taken from Glasgow/practical/practical-GWAS_before_cuts.Rnw ~ practical-GWAS_day4.pdf)
print("PCA started")
## get snps:
snps <- snps.ori.ori
phen <- phen.ori.ori
if(is.null(colnames(snps))) colnames(snps) <- c(1:ncol(snps))
## run PCA: ##
## Keep only n.PCs significant axes:
# n.PCs <- 5
# n.PCs <- 7
# n.PCs <- 9
n.PCs <- 15
# n.PCs <- 25
pca1 <- dudi.pca(snps, scale=FALSE, scannf=FALSE, nf=n.PCs)
## Identify main pop clusters: ##
set.seed(seed)
# grp <- find.clusters(snps, choose.n.clust=TRUE, max.n.clust=(n.PCs + 1))
grp <- find.clusters(snps, n.pca=n.PCs, choose.n.clust=FALSE, max.n.clust=(n.PCs + 1))
# # grp <- find.clusters(snps, choose.n.clust=FALSE, max.n.clust=(n.PCs + 1), pca.select="percVar", perc.pca=60)
pop <- grp$grp # gives same result as cutree(clust, k=6)
n.grp <- length(levels(pop))
# table(pop)
## CHECK-- ensure no populations contain only 1 individual:
## (UNLESS highest tree division leads to a single-node clade?)
# snps.mat <- matrix(as.character(snps), nrow=nrow(snps), ncol=ncol(snps), dimnames=list(rownames(snps), colnames(snps)))
# snps.mat <- replace(snps.mat, which(snps.mat == "0"), "a")
# snps.mat <- replace(snps.mat, which(snps.mat == "1"), "t")
# snps.dna <- as.DNAbin(snps.mat)
# clust <- hclust(dist.dna(snps.dna))
# tab <- table(cutree(clust, k=2))
# ## If 1 of 2 smallest pops contains only 1 ind, CMH will fail...
# if(any(tab == 1)) cmh.fails <- TRUE
cmh.fails <- FALSE
## (1) reduce max.n.clust by 1:
if(any(table(pop) < 2)){
max.k <- n.PCs
set.seed(seed)
grp <- find.clusters(snps, n.pca=n.PCs, choose.n.clust=FALSE, max.n.clust=max.k) # pca.select="percVar", perc.pca=60,
pop <- grp$grp # gives same result as cutree(clust, k=6)
n.grp <- length(levels(pop))
## (2) repeat by re-setting seed with same reduced max.n.clust
seed.new <- seed+1
counter <- 0
while(any(table(pop) < 2)){
if(counter > 5){
max.k <- max.k - 1
counter <- 0
}
set.seed(seed.new)
grp <- find.clusters(snps, n.pca=n.PCs, choose.n.clust=FALSE, max.n.clust=max.k) # pca.select="percVar", perc.pca=60,
pop <- grp$grp # gives same result as cutree(clust, k=6)
n.grp <- length(levels(pop))
seed.new <- seed.new+1
counter <- counter+1
} # end while
} # end pop check
if(n.grp == 1) cmh.fails <- TRUE
# table(pop)
########################################################
## Correct for pop strat by regressing along sig PCs: ##
########################################################
## NOTE-- if any MISSING DATA contained in SNPs dataset, must REPLACE it here (eg. with the mean for that snps column).
## get formula (get string up to n.PCs): lm(e ~ pca1$li[,1] + pca1$li[,2] + pca1$li[,3] + pca1$li[,4] + pca1$li[,5])
PC.string <- sapply(c(1:n.PCs), function(e) paste("pca1$li[, ", e, "]", sep=""))
PC.string <- paste0(PC.string, collapse=" + ")
var.string <- paste("e ~ ", PC.string)
## Correct SNPs w PCA!
## SLOW STEP..!
snps.corrected <- apply(snps, 2, function(e) residuals(do.call(lm, list(as.formula(var.string))))) # may take a minute
## First make sure PHEN is in BINARY form (0, 1) only!
levs <- levels(as.factor(phen))
phen.ini <- phen
if(any(!levs %in% c(0,1))){
phen <- as.character(phen)
phen <- replace(phen, which(phen == levs[1]), 0)
phen <- replace(phen, which(phen == levs[2]), 1)
phen <- as.numeric(phen)
names(phen) <- names(phen.ini)
} # end make phen binary..
if(!is.numeric(phen)) phen <- as.numeric(phen)
## Get UNIQUE snps.corrected ##
snps.corrected.ori <- snps.corrected
temp <- get.unique.matrix(snps.corrected, MARGIN=2)
temp.unique <- temp$unique.data
index <- temp$index
if(ncol(temp.unique) == ncol(snps.corrected.ori)){
all.unique <- TRUE
}else{
all.unique <- FALSE
}
## work w only unique snps:
snps.corrected <- temp.unique
## SLOW STEP..
pval2 <- numeric(0)
# system.time( # 120.78
for(n in 1:ncol(snps.corrected)){
foo <- suppressWarnings(glm(phen ~ snps.corrected[,n], family="binomial"))
ANOVA <- anova(foo, test="Chisq")
pval2[n] <- ANOVA$"Pr(>Chi)"[2]
} # end for loop
# )
## Get all non-unique values:
snps.corrected <- snps.corrected.ori
if(all.unique == FALSE) pval2 <- pval2[index]
## Store pvals and snps.corrected from pca-corrected ANOVA association test:
pval.pca <- pval2
snps.corrected.pca <- snps.corrected
## Get results:
p.thresh <- p.value # 0.01
p.vals.bonf <- p.adjust(pval.pca, "bonferroni")
p.bonf <- which(p.vals.bonf < p.thresh)
pca.snps.bonf <- snps.names[p.bonf]
## Store results:
pca.results <- list(snps.corrected.pca, pval.pca, pca.snps.bonf)
names(pca.results) <- c("snps.corrected.pca", "pval.pca", "pca.snps.bonf")
print("PCA done")
###########################################################################################################################
################################################ *** DAPC *** #########################################################
###########################################################################################################################
print("DAPC started")
## Using our pop clusters as the group factor in DAPC,
## we can generate a new DAPC object ...
## after performing cross-validation to optimise the discrimination between these subpopulations:
## NEW: make sure we get as many PCs as DAs?
n.pca.min <- (n.grp)
n.pca.max <- min(min(dim(snps))*0.5, pca1$rank)
runs <- 10
n.pca <- round(pretty(n.pca.min:n.pca.max, runs))
############
## set3_1 ##
# n.PCs <- 7
## k = 8 ## (?) k = 14 ##
# set.seed(1)
# xval.pop <- xvalDapc(snps, pop, n.pca=n.pca, training.set = 0.5)
# abline(v=20, col="red", lty=2, lwd=2)
# xval.pop9 <- xval.pop
# xval.pop9[2:6]
# # $`Number of PCs Achieving Lowest MSE`
# # [1] "25"
# ###########
# set.seed(1)
# xval.pop2 <- xvalDapc(snps, pop, n.pca=n.pca, training.set = 0.9)
# xval.pop2[2:6]
## NOTE -- If this is SLOW and always results in 5-10 PCs, we may want to skip it... ??
xval.pop <- xvalDapc(snps, pop, n.pca=n.pca)
# str(xval.pop)
# xval.pop[2:6]
## store DAPC object:
dapc.pop <- xval.pop$DAPC
###################################
## Correct for pop strat w DAPC: ##
###################################
## As we did when correcting with PCA, we regress along the axes of DAPC.
## When correcting with the DAPC approach, we do not need to determine
## how many axes are ``significant'': we will always correct with (k - 1) axes.
## NOTE that (k - 1) is not necessarily = n.PCs
## ... though we may want to limit the K we work with depending on how system.time scales with K... (?)
## get formula (get string up to n.PCs):
## lm(e ~ dapc.pop$ind.coord[,1] + dapc.pop$ind.coord[,2] + dapc.pop$ind.coord[,3] + dapc.pop$ind.coord[,4])
# DAPC.string <- sapply(c(1:n.PCs), function(e) paste("dapc.pop$ind.coord[, ", e, "]", sep=""))
DAPC.string <- sapply(c(1:(n.grp - 1)), function(e) paste("dapc.pop$ind.coord[, ", e, "]", sep=""))
DAPC.string <- paste0(DAPC.string, collapse=" + ")
var.string <- paste("e ~ ", DAPC.string)
## Correct SNPs w DAPC!
snps.corrected <- apply(snps, 2, function(e) residuals(do.call(lm, list(as.formula(var.string))))) # may take a minute
## Get UNIQUE snps.corrected ##
snps.corrected.ori <- snps.corrected
temp <- get.unique.matrix(snps.corrected, MARGIN=2)
temp.unique <- temp$unique.data
index <- temp$index
if(ncol(temp.unique) == ncol(snps.corrected.ori)){
all.unique <- TRUE
}else{
all.unique <- FALSE
}
## work w only unique snps:
snps.corrected <- temp.unique
## Run association test on DAPC-corrected snps:
pval3 <- numeric(0)
system.time(
for(n in 1:ncol(snps.corrected)){
foo <- suppressWarnings(glm(phen ~ snps.corrected[,n], family="binomial"))
ANOVA <- anova(foo, test="Chisq")
pval3[n] <- ANOVA$"Pr(>Chi)"[2]
} # end for loop
)
## Get all non-unique values:
snps.corrected <- snps.corrected.ori
if(all.unique == FALSE) pval3 <- pval3[index]
## store pvals and snps.corrected for DAPC-corrected assoc test:
pval.dapc <- pval3
snps.corrected.dapc <- snps.corrected
## Get results:
p.thresh <- p.value # 0.01
p.vals.bonf <- p.adjust(pval.dapc, "bonferroni")
p.bonf <- which(p.vals.bonf < p.thresh)
dapc.snps.bonf <- snps.names[p.bonf]
## Store results:
dapc.results <- list(snps.corrected.dapc, pval.dapc, dapc.snps.bonf)
names(dapc.results) <- c("snps.corrected.dapc", "pval.dapc", "dapc.snps.bonf")
print("DAPC done")
###########################################################################################################################
################################################ *** CMH *** ##########################################################
###########################################################################################################################
## NB: Only running CMH test if it is possible to get at least 2 pops with > 1 individual in them...
## If not --> set results = CMH find 0 sig snps...
## (If problem recurs too often, could make a genuine soln w drop.tip etc... )
if(cmh.fails == FALSE){
print("CMH started")
snps <- snps.ori.ori
phen <- phen.ori.ori
## Get colnames(snps)
if(is.null(colnames(snps))) colnames(snps) <- c(1:ncol(snps))
snps.names <- colnames(snps)
## First make sure PHEN is in BINARY form (0, 1) only!
levs <- levels(as.factor(phen))
phen.ini <- phen
if(any(!levs %in% c(0,1))){
phen <- as.character(phen)
phen <- replace(phen, which(phen == levs[1]), 0)
phen <- replace(phen, which(phen == levs[2]), 1)
} # end make phen binary..
phen <- as.numeric(phen)
names(phen) <- names(phen.ini)
##############################################
snps.12 <- snps+1
phen.34 <- phen+3
mat <- t(matrix(as.numeric(paste(snps.12, ".", phen.34, sep="")), nrow=ncol(snps), byrow=T))
## get only unique columns of pasted mat:
mat.u <- get.unique.matrix(mat)
mat.unique <- mat.u$unique.data
index <- mat.u$index
## get all 2x2 combos of snps.12 and phen.34:
noms <- c("1.3", "1.4", "2.3", "2.4")
## get array from table, by pop:
arr.l <- list()
for(n in 1:ncol(mat.unique)){
tab <- list()
for(e in 1:length(levels(pop))){
temp <- ftable(mat.unique[pop==e, n])
tab[[e]] <- replace(rep(0, 4), which(noms %in% attr(temp, "col.vars")[[1]]), temp)
} # end (e) loop
arr.l[[n]] <- do.call(cbind, tab)
} # end for (i) loop
arr <- do.call(rbind, arr.l)
arr.complete <- arr
##############
## FOR LOOP ##
##############
## TO GET P-VALUES FROM CMH TEST for EACH SNPs COLUMN:
p.vals <- list()
for(n in 1:ncol(mat.unique)){
## get indices for this snp for all pops and all 4 2x2 combos:
from <- seq(1, nrow(arr.complete), 4)[n]
to <- from+3
arr <- arr.complete[from:to,]
dat <- array(arr,
dim = c(2,2,ncol(arr)),
dimnames = list(
phen = c("0", "1"),
SNP = c("0", "1"),
pop = levels(pop)
))
## Run CMH test on this unique snps column:
CMH <- mantelhaen.test(dat)
p.vals[[n]] <- CMH$p.value
} # end for loop
p.vals <- as.vector(unlist(p.vals))
## get full set of p-vals for non-unique columns:
pval.cmh <- p.vals[index]
## Get results:
p.thresh <- p.value # 0.01
p.vals.bonf <- p.adjust(pval.cmh, "bonferroni")
p.bonf <- which(p.vals.bonf < p.thresh)
cmh.snps.bonf <- snps.names[p.bonf]
## Store results:
cmh.results <- list(pval.cmh, cmh.snps.bonf)
names(cmh.results) <- c("pval.cmh", "cmh.snps.bonf")
print("CMH done")
}else{
print("CMH SKIPPED (single-node major clade)")
## set results to null results?
# pval.cmh <- rep(1, ncol(snps))
# cmh.snps.bonf <- snps.names[which(1 < 0)] # character(0)
## set results to fisher results:
pval.cmh <- pval.fisher
cmh.snps.bonf <- fisher.snps.bonf
## Store results:
cmh.results <- list(pval.cmh, cmh.snps.bonf)
names(cmh.results) <- c("pval.cmh", "cmh.snps.bonf")
print("CMH (null) done")
}
###########################################################################################################################
############################################# *** PERFORMANCE *** #########################################################
###########################################################################################################################
##########################
## EVALUATE PERFORMANCE ##
##########################
performance <- list()
####################
## common metrics ##
####################
## get n.tests
snps <- snps.ori.ori
phen <- phen.ori.ori
n.tests <- dim(snps)[2]
###########################################
## performance[[1]] contains snps.assoc: ##
###########################################
performance[[1]] <- snps.assoc
##############################
## FOR LOOP FOR ALL 3 TESTS ##
##############################
for(j in 2:10){
if(j == 2) test <- "treeWAS.combined"
if(j == 3) test <- "treeWAS.terminal"
if(j == 4) test <- "treeWAS.simultaneous"
if(j == 5) test <- "treeWAS.subsequent"
if(j == 6) test <- "fisher.bonf"
if(j == 7) test <- "fisher.fdr"
if(j == 8) test <- "pca"
if(j == 9) test <- "dapc"
if(j == 10) test <- "cmh"
######################
## treeWAS.combined ##
######################
if(test == "treeWAS.combined"){
test.positive <- treeWAS.all$treeWAS.combined
}
######################
## treeWAS.terminal ##
######################
if(test == "treeWAS.terminal"){
test.positive <- treeWAS.all$treeWAS$terminal
}
##########################
## treeWAS.simultaneous ##
##########################
if(test == "treeWAS.simultaneous"){
test.positive <- treeWAS.all$treeWAS$simultaneous
}
########################
## treeWAS.subsequent ##
########################
if(test == "treeWAS.subsequent"){
test.positive <- treeWAS.all$treeWAS$subsequent
}
########### ########### ########### ########### ########### ########### ###########
#################
## fisher.bonf ##
#################
if(test == "fisher.bonf"){
## get test.positive
test.positive <- fisher.snps.bonf
} # end test = fisher.bonf
################
## fisher.fdr ##
################
if(test == "fisher.fdr"){
test.positive <- fisher.snps.fdr
} # end test = fisher.fdr
################
## PCA & DAPC ## ########### ########### ########### ########### ########### ###########
################
#########
## PCA ##
#########
if(test == "pca"){
test.positive <- pca.snps.bonf
}
##########
## DAPC ##
##########
if(test == "dapc"){
test.positive <- dapc.snps.bonf
}
#########
## CMH ##
#########
if(test == "cmh"){
test.positive <- cmh.snps.bonf
}
###########
## PLINK ## ########### ########### ########### ########### ########### ###########
###########
# ## Basic association ##
#
# ######################
# ## plink.assoc.bonf ##
# ######################
# if(test == "plink.assoc.bonf"){
# test.positive <- plink.assoc.snps.bonf
# } # end test = plink.assoc.bonf
#
# #####################
# ## plink.assoc.fdr ##
# #####################
# if(test == "plink.assoc.fdr"){
# test.positive <- plink.assoc.snps.fdr
# } # end test = plink.assoc.fdr
#
#
# ## Corrected w Genomic Control ##
#
# #########################
# ## plink.assoc.gc.bonf ##
# #########################
# if(test == "plink.assoc.gc.bonf"){
# test.positive <- plink.assoc.gc.snps.bonf
# } # end test = plink.assoc.gc.bonf
#
# ########################
# ## plink.assoc.gc.fdr ##
# ########################
# if(test == "plink.assoc.gc.fdr"){
# test.positive <- plink.assoc.gc.snps.fdr
# } # end test = plink.assoc.gc.fdr
########### ########### ########### ########### ########### ########### ###########
if(is.null(names(snps.assoc))) names(snps.assoc) <- as.character(snps.assoc)
if(is.null(snps.names)){
if(is.null(colnames(snps))) colnames(snps) <- as.character(1:ncol(snps))
snps.names <- colnames(snps)
}
#########################
## common calculations ##
#########################
## get test.negative
if(length(which(snps.names %in% test.positive)) != 0){
test.negative <- snps.names[-which(snps.names %in% test.positive)]
}else{
test.negative <- snps.names
}
## get n.test.positive
n.test.positive <- length(test.positive)
## get n.test.negative
n.test.negative <- length(test.negative) ## == n.tests - n.test.positive
n.tests <- ncol(snps)
########################
## GET TP, TN, FP, FN ##
########################
## get true positives
snps.associated <- names(snps.assoc)
true.positive <- test.positive[which(test.positive %in% snps.associated)]
TP <- length(true.positive)
## get true negatives
snps.not <- snps.names[-which(snps.names %in% snps.associated)]
true.negative <- test.negative[which(test.negative %in% snps.not)]
TN <- length(true.negative)
## get false positives
false.positive <- test.positive[which(test.positive %in% snps.not)]
FP <- length(false.positive)
## get false negatives
false.negative <- test.negative[which(test.negative %in% snps.associated)]
FN <- length(false.negative)
#####################################
## CALCULATE METRICS OF EVALUATION ##
#####################################
## Do NOT need anything to find (no associated SNPs required) ######################################
##############
## accuracy ##
##############
## ie. SUMMARY STATISTIC--Of all the CALLS/tests you made, how many of them were CORRECT
## ~ Pr(Correct Call | Call)
# accuracy <- ((TP + TN) / n.tests) ### WHY IS THIS GIVING ME 0.5 (when all other metrics seem to be working....) ??!?!
accuracy <- ((TP + TN) / (TP + TN + FP + FN))
# acc <- (sensitivity*length(snps.associated) + specificity*length(snps.not))/ncol(snps)
#################
## specificity ##
#################
## ie. Of all the truly NOT associated SNPs, how many did you manage to rule out?
## ~ Pr(Negative Test | SNP NOT associated)
specificity <- (TN / (TN + FP)) ## = (1 - FPR)
#########
## FPR ##
#########
## ie. How many truly NOT associated SNPs did you accidentally call significant
## ~ Pr(Positive Test | SNP NOT associated)
FPR <- (FP / (FP + TN)) ## = (1 - specificity)
## NEED something to FIND, else uninformative! (True ASSOCIATED SNPs ~ required) ###################
#########
## FNR ##
#########
## ie. How many truly ASSOCIATED SNPs did you accidentally miss
## ~ Pr(Negative Test | SNP ASSOCIATED)
## --> Set 1: will be 0/0 = NaN
FNR <- (FN / (FN + TP))
#################
## sensitivity ##
#################
## ie. How many truly ASSOCIATED SNPs did you manage to catch
## ~ Pr(Positive Test | SNP ASSOCIATED)
## --> Set 1: will be 0/0 = NaN
sensitivity <- (TP / (TP + FN))
#########
## PPV ##
#########
## ie. Of all the POSITIVE calls you made, how many were CORRECT/ identified truly ASSOCIATED SNPs
## ~ Pr(SNP ASSOCIATED | Positive Test)
## --> Set 1: will be 0 (UNLESS you made NO positive calls, then 0/0 = NaN)
PPV <- (TP / (TP + FP)) ## = (1 - FDR)
#########
## FDR ##
#########
## ie. Of all the POSITIVE calls you made, how many were WRONG/ identified truly NOT associated SNPs
## ~ Pr(SNP NOT associated | Positive Test)
## --> Set 1: will be 1 (UNLESS you made NO positive calls, then 0/0 = NaN)
FDR <- (FP / (FP + TP)) ## = (1 - PPV)
##############
## F1.score ##
##############
## Balanced accuracy-like score considering both sensitivity and PPV:
F1.score <- 2*((sensitivity*PPV) / (sensitivity+PPV))
##################################
## combine eval metrics into df ##
##################################
performance[[j]] <- data.frame(accuracy, specificity, FPR, FNR, sensitivity, PPV, FDR, F1.score)
} # end for loop
names(performance) <- c("snps.assoc",
"treeWAS.combined", "treeWAS.terminal", "treeWAS.simultaneous", "treeWAS.subsequent",
"fisher.bonf", "fisher.fdr",
# "plink.assoc.bonf", "plink.assoc.fdr",
# "plink.assoc.gc.bonf", "plink.assoc.gc.fdr",
"pca", "dapc", "cmh")
################################ ################################ ################################
###########################################################################################################################
######################################### *** SAVING & RETURNING *** ######################################################
###########################################################################################################################
########################
## SAVE DATA & OUTPUT ##
########################
## set wd
if(cluster == FALSE) setwd(wd)
## get uniqueID
uniqueID <- paste("set", set.number, "_", number, sep="")
## save snps, phen, tree, out, res, fisher.results, plink.assoc.results, performance
snps <- snps.ori.ori
phen <- phen.ori.ori
## save snps
filename.snps[[i]] <- paste("./", uniqueID, "_snps", ".Rdata", sep="")
save(snps, file=filename.snps[[i]])
## save phen
filename.phen[[i]] <- paste("./", uniqueID, "_phen", ".Rdata", sep="")
save(phen, file=filename.phen[[i]])
## save phen.plot.col
filename.phen.plot.col[[i]] <- paste("./", uniqueID, "_phen.plot.col", ".Rdata", sep="")
save(phen.plot.col, file=filename.phen.plot.col[[i]])
## save tree
filename.tree[[i]] <- paste("./", uniqueID, "_tree", ".Rdata", sep="")
save(tree, file=filename.tree[[i]])
## save out
filename.out[[i]] <- paste("./", uniqueID, "_out", ".Rdata", sep="")
save(out, file=filename.out[[i]])
## save res
res <- res.complete # includes data from reconstructions, values from treeWAS tests
filename.res[[i]] <- paste("./", uniqueID, "_res", ".Rdata", sep="")
save(res, file=filename.res[[i]])
## save fisher.results
filename.fisher.results[[i]] <- paste("./", uniqueID, "_fisher.results", ".Rdata", sep="")
save(fisher.results, file=filename.fisher.results[[i]])
## save plink.assoc.results
# filename.plink.results[[i]] <- paste("./", uniqueID, "_plink.results", ".Rdata", sep="")
# save(plink.results, file=filename.plink.results[[i]])
## save pca
filename.pca[[i]] <- paste("./", uniqueID, "_pca", ".Rdata", sep="")
save(pca.results, file=filename.pca[[i]])
## save dapc
filename.dapc[[i]] <- paste("./", uniqueID, "_dapc", ".Rdata", sep="")
save(dapc.results, file=filename.dapc[[i]])
## save cmh
filename.cmh[[i]] <- paste("./", uniqueID, "_cmh", ".Rdata", sep="")
save(cmh.results, file=filename.cmh[[i]])
## save performance
filename.args[[i]] <- paste("./", uniqueID, "_args", ".Rdata", sep="")
save(args, file=filename.args[[i]])
## save performance
filename.performance[[i]] <- paste("./", uniqueID, "_performance", ".Rdata", sep="")
save(performance, file=filename.performance[[i]])
#########################
## STORE DATA & OUTPUT ##
#########################
SNPS[[i]] <- snps
names(SNPS)[[i]] <- uniqueID
PHEN[[i]] <- phen
names(PHEN)[[i]] <- uniqueID
PHEN.PLOT.COL[[i]] <- phen.plot.col
names(PHEN.PLOT.COL[[i]]) <- uniqueID
TREE[[i]] <- tree
names(TREE)[[i]] <- uniqueID
OUT[[i]] <- out
names(OUT)[[i]] <- uniqueID
RES[[i]] <- res
names(RES)[[i]] <- uniqueID
FISHER.RESULTS[[i]] <- fisher.results
names(FISHER.RESULTS)[[i]] <- uniqueID
# PLINK.RESULTS[[i]] <- plink.results
# names(PLINK.RESULTS)[[i]] <- uniqueID
PCA.RESULTS[[i]] <- pca.results
names(PCA.RESULTS)[[i]] <- uniqueID
DAPC.RESULTS[[i]] <- dapc.results
names(DAPC.RESULTS)[[i]] <- uniqueID
CMH.RESULTS[[i]] <- cmh.results
names(CMH.RESULTS)[[i]] <- uniqueID
ARGS[[i]] <- args
names(ARGS)[[i]] <- uniqueID
PERFORMANCE[[i]] <- performance
names(PERFORMANCE)[[i]] <- uniqueID
} # end for loop
##########################
## RETURN DATA & OUTPUT ##
##########################
toReturn <- list(SNPS, PHEN, PHEN.PLOT.COL, TREE, RES,
FISHER.RESULTS, PCA.RESULTS, DAPC.RESULTS, CMH.RESULTS,
ARGS, PERFORMANCE)
names(toReturn) <- c("snps", "phen", "phen.plot.col", "tree", "res",
"fisher.results", "pca.results", "dapc.results", "cmh.results",
"arguments", "performance")
################################
## SAVE PERFORMANCE & OUTPUT: ##
################################
## get timestamp for filename:
from <- number-length(PERFORMANCE)+1
to <- number
time <- Sys.time()
t1 <- keepFirstN(time, 10)
t2 <- strsplit(keepLastN(keepFirstN(time, 19), 8), split =":")
names(t2) <- NULL
t2 <- t2[[1]]
t2 <- paste(t2, collapse="_", sep="")
timestamp <- paste(t1, t2, sep="_")
## save PERFORMANCE:
nom1 <- paste("./", "set", set.number, "_", from, "_", to, "_PERFORMANCE", "_", timestamp, ".Rdata", sep="")
save(PERFORMANCE, file=nom1)
## save all OUTPUT:
output <- toReturn
nom2 <- paste("./", "set", set.number, "_", from, "_", to, "_OUTPUT", "_", timestamp, ".Rdata", sep="")
save(output, file=nom2)
## return output:
return(toReturn)
} # end simTest (generic) function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.