library(mtmcskat)
library(foreach)
library(optparse)
option_list = list(
make_option(c("-p", "--phenodata"),
type="character",
default=NA,
help="Path to folder of phenotype data files in PLINK format",
metavar="character"),
make_option(c("-c", "--covariates"),
type="character",
default=NA,
help=paste("Path to a covariate file, comma-delimited with a",
"header, with genotypes in same order as phenotype",
"file"),
metavar="character"),
make_option(c("-g", "--genodata"),
type="character",
default=NA,
help=paste("Path to folder of SNP data, organized into",
"chromosomes or other scaffolds, in PLINK .traw",
"format"),
metavar="character"),
make_option(c("-o", "--output"),
type="character",
default=NA,
help=paste("Path to folder where outputs will be saved"),
metavar="character"),
make_option(c("-l", "--pre_allocated_dir"),
type="character",
default=NA,
help=paste("Path to folder pre-allocated SNP data is saved",
"or will be saved if it doesn't already exist"),
metavar="character"),
make_option(c("-j", "--job_id"),
type="character",
default=NA,
help=paste("Identifier for this group of jobs, to be submitted"),
metavar="character"),
make_option(c("-S", "--window_size"),
type="numeric",
default=3000,
help=paste("Size of each SNP window (in base pairs)"),
metavar="numeric"),
make_option(c("-s", "--window_shift"),
type="numeric",
default=1000,
help=paste("Shift between overlapping SNP windows",
"(in base pairs)"),
metavar="numeric"),
make_option(c("-a", "--min_accuracy"),
type="numeric",
default=2,
help=paste("numeric, threshold x to begin resampling for",
"SNP windows with p-values below 10^-x; default",
"value is 2"),
metavar="numeric"),
make_option(c("-A", "--max_accuracy"),
type="numeric",
default=5,
help=paste("numeric, limit x to end resampling for SNP windows",
"with p-values below 10^-x, usually due to",
"computational cost"),
metavar="numeric"),
make_option(c("-t", "--time"),
type="character",
default="1:00:00",
help=paste("If running on SLURMS, a time string formatted as",
"hh:mm:ss indicating the maximum time to allow jobs",
"to run prior to termination"),
metavar="character"),
make_option(c("-m", "--mode"),
type="character",
default="sequential",
help=paste('character string of "sequential" or "slurms"'),
metavar="character"),
make_option(c("-n", "--n_thread"),
type="numeric",
default=24,
help=paste('Number of threads for this job'),
metavar="numeric")
);
opt_parser = OptionParser(option_list=option_list);
opt = parse_args(opt_parser);
# setwd("/scratch2/NSF_GWAS/notebooks/MTMC-SKAT")
# opt = list(phenodata = "/scratch2/NSF_GWAS/notebooks/InPlantaGWAS/02_Parsing_phenodata/pheno_files/untransformed_allpheno_with_header",
# covariates = "/scratch2/NSF_GWAS/notebooks/InPlantaGWAS/02_Parsing_phenodata/pheno_files/Covariates_AllPhasesButLast_Stem_noheader_PLINKformat.txt",
# genodata = "/scratch2/NSF_GWAS/notebooks/InPlantaGWAS/00_SNP_format_conversions/wholeChr1323geno/",
# output = "/scratch2/NSF_GWAS/Results/SKAT/",
# pre_allocated_dir = "/scratch2/NSF_GWAS/SKAT_SLURMS/pre_allocated_dir",
# job_id = "test_allpheno_allgeno_steed_a3",
# window_size = 3000,
# window_shift = 1000,
# min_accuracy = 4,
# max_accuracy = 5,
# mode = "sequential",
# n_thread = 40)
opt = list(phenodata = "/oasis/projects/nsf/osu123/naglemi/test_two_phenotypes",
covariates = "/oasis/projects/nsf/osu123/naglemi/covariates/Covariates_AllPhasesButLast_Stem_noheader_PLINKformat.txt",
genodata = "/oasis/projects/nsf/osu123/naglemi/wholeChr1323geno",
output = "/oasis/projects/nsf/osu123/naglemi/mtmcskat_out",
pre_allocated_dir = "/oasis/projects/nsf/osu123/naglemi/pre_allocated",
job_id = "firstrun_twoscaffolds_two_phenos_on_debug_node_a",
window_size = 3000,
window_shift = 1000,
min_accuracy = 2,
max_accuracy = 5,
mode = "slurm",
n_thread = 24,
time = "6:00:00")
raw_file_path_list <- list.files(opt$genodata,
full.names = TRUE)
phenotype_files <- list.files(opt$phenodata,
full.names = TRUE)
pars <- foreach(phenotype = phenotype_files,
.combine = rbind) %:%
foreach(raw_file_path = raw_file_path_list,
.combine = rbind) %do% {
pars <- data.frame(job_id = paste0(opt$job_id,
"_",
basename(basename(raw_file_path)),
"_",
basename(basename(phenotype))),
phenodata = phenotype,
covariates = opt$covariates,
raw_file_path = raw_file_path,
window_size = opt$window_size,
window_shift = opt$window_shift,
output_dir = opt$output,
pre_allocated_dir = opt$pre_allocated_dir,
desired_sig_figs = 2,
min_accuracy = opt$min_accuracy,
max_accuracy = opt$max_accuracy,
plot = TRUE,
RAM = "AllRAM",
n_thread = opt$n_thread)
}
if(opt$mode == "slurm"){
library(rslurm)
sjob <- slurm_apply(MTMCSKAT_workflow,
pars,
jobname = opt$job_id,
nodes = nrow(pars),
cpus_per_node = opt$n_thread,
submit = FALSE,
slurm_options = list(time = opt$time),
preschedule_cores = FALSE,
multithread = TRUE)
}
if(opt$mode == "sequential"){
mapply(FUN = MTMCSKAT_workflow,
phenodata = as.character(pars$phenodata),
covariates = as.character(pars$covariates),
raw_file_path = as.character(pars$raw_file_path),
window_size = pars$window_size,
window_shift = pars$window_shift,
output_dir = as.character(pars$output_dir),
pre_allocated_dir = as.character(pars$pre_allocated_dir),
job_id = pars$job_id,
desired_sig_figs = pars$desired_sig_figs,
min_accuracy = pars$min_accuracy,
max_accuracy = pars$max_accuracy,
plot = pars$plot,
RAM = pars$RAM,
n_thread = opt$n_thread)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.