#' @export
#' @importFrom yaml read_yaml
#' @importFrom dplyr filter
#' @importFrom magrittr "%>%"
#' @importFrom clustermq Q
wrapper.script <- function(input){
## read yaml file
input.yaml <- check.yaml(input)
## create directories
system(paste0("mkdir -p ",input.yaml$output.directory,"/coverage"))
system(paste0("mkdir -p ",input.yaml$output.directory,"/results"))
system(paste0("mkdir -p ",input.yaml$output.directory,"/logs"))
system(paste0("mkdir -p ",input.yaml$output.directory,"/results/individual.edm.calls/"))
system(paste0("mkdir -p ",input.yaml$output.directory,"/results/individual.ed.objects/"))
##check and read manifest
if(is.null(input.yaml[["manifest"]])){
message('Cannot proceed without a manifest. Please look at the sample manifest provided in the documentation.\n')
break;
}else if(!is.null(input.yaml[["manifest"]]) & file.exists(input.yaml$manifest)){
message('Reading the manifest...')
manifest <- read.table(input.yaml$manifest, header = T, sep ="\t", stringsAsFactors = F)
manifest$sex <- gsub(F,"F",manifest$sex)
names(manifest) <- c("bam","sampleID","sex")
nSamples <- NROW(manifest)
nFemales <- NROW(manifest[manifest$sex == "M",])
nMales <- NROW(manifest[manifest$sex == "F",])
message(paste0('Found:\n * ',
nSamples,' individuals (',
nMales,' Males and ',nFemales,' females.)\n'))
}else if(sum(is.na(manifest)) > 0){
# check for empty entries in the manifest
message('Entries in manifest cannot be empty. Please check your manifest before running.\n');
break;
}
# Check for duplicate individual IDs
if(sum(duplicated(sort(manifest$sampleID))) > 0){
message('Found these duplicates in individual IDs\n', manifest$sampleID[duplicated(manifest$sampleID)] );
break;
}
## check bam files
message('Checking the bam files...\n');
if(sum(file.exists(as.character(manifest$bam))) < length(manifest$bam)){
message('List of missing bam files:\n\n');
print(paste(manifest$bam[!file.exists(as.character(manifest$bam))],"\n"))
stop('Looks like some of your bam files are missing. Check your bam paths.\n\n\n')
}
## check for index files
message('Checking the index files for bams...\n');
manifest$index <- NA;
manifest$index[which(file.exists(paste0(as.character(manifest$bam),".bai")))] <- paste0(as.character(manifest$bam[which(file.exists(paste0(as.character(manifest$bam),".bai")))]),".bai")
manifest$index[which(file.exists(paste0(gsub(pattern = "bam","bai",manifest$bam))))] <- paste0(gsub(pattern = "bam","bai",manifest$bam[which(file.exists(paste0(gsub(pattern = "bam","bai",manifest$bam))))]))
manifest$index[which(file.exists(paste0(as.character(manifest$bam),".crai")))] <- paste0(as.character(manifest$bam[which(file.exists(paste0(as.character(manifest$bam),".crai")))]),".crai")
manifest$index[which(file.exists(paste0(gsub(pattern = "cram","crai",manifest$bam))))] <- paste0(gsub(pattern = "cram","crai",manifest$bam[which(file.exists(paste0(gsub(pattern = "cram","crai",manifest$bam))))]))
message(paste0('Samples with missing index files are written to results/missing.index.samples.txt \n sample_names:'))
message(paste0(manifest$sampleID[is.na(manifest$index)],'\n'));
write.table( paste0('Index file not found for ',manifest[is.na(manifest$index),]),paste0(input.yaml$output.directory,"/results/missing.index.samples.txt"),row.names=F,quote=F, sep ="\t")
manifest <- manifest[!is.na(manifest$index),]
# write.table(manifest[,1:3],paste0(input.yaml$output.directory,"/results/",input.yaml$manifest),row.names =F,quote =F, sep ="\t")
write.table(manifest[,1:3],paste0(input.yaml$output.directory,"/results/",input.yaml$cohort.name,"_manifest.txt"),row.names =F,quote =F, sep ="\t")
# for (i in 1:length(manifest$bam)){
# if(file.exists(gsub(pattern = "bam","bai",manifest$bam[i]))){
# manifest$bai[i] <- gsub(pattern = "bam","bai",manifest$bam[i])
# } else if(file.exists(paste0(as.character(manifest$bam[i]),".bai"))){
# manifest$bai[i] <- paste0(as.character(manifest$bam[i]),".bai")
# } else{
# message(paste0('Samples with missing index files are written to temp/missing.index.samples.txt\n'));
# write.table( paste0('Index file not found for ',manifest$bam[i]),paste0(input.yaml$output.directory,"/temp/missing.index.samples.txt"),row.names=F,quote=F)
## Edit manifest based on index
# manifest <- manifest[
# }
#}
## edit this based on cluster template
## clustermq template
if(is.null(input.yaml[["env.name"]])){
message('########### Cannot proceed without a environment name. ###########\n')
break;
}
if(is.null(input.yaml[["memory"]])){
message('########### Cannot proceed without memory value. ###########\n')
break;
}
if(is.null(input.yaml[["hpc.scheduler"]])){
message('Cannot proceed without a Scheduler type\n')
break;
} else {
if(is.null(input.yaml[["clustermq.template"]]) == T | is.na(input.yaml[["clustermq.template"]]) ==T ){
options(
clustermq.scheduler = paste0(input.yaml$hpc.scheduler),
clustermq.template = paste0(find.package("EDM"),"/template/",input.yaml$hpc.scheduler,"_template"),
clustermq.defaults = list(conda=paste0(input.yaml$env.name))
)
} else {
options(
clustermq.scheduler = paste0(input.yaml$hpc.scheduler),
clustermq.template = paste0(input.yaml$clustermq.template),
clustermq.defaults = list(conda=paste0(input.yaml$env.name))
)
}
}
input <- paste0(input.yaml$output.directory,"/results/input.yaml")
EDM::check.exons.def(input= input)
if( is.null(input.yaml[["precomputed.coverage"]]) | is.na(input.yaml[["precomputed.coverage"]]) ==T ){
## get Counts step
# source("clustermq_getCounts.R")
nSamples = NROW(manifest)
message(' Count step... \n')
clustermq::Q(get.bam.counts.mosdepth,pkgs="EDM",task_id=1:nSamples,input = input,n_jobs=nSamples, max_calls_worker = 1, template=list(job_name="compute_coverage", env.name = input.yaml$env.name ,memory = (input.yaml$memory*1024), mem = paste0(input.yaml$memory,"G")),export = list(input = input),timeout = 72000)
## move log files
system(paste0("mv compute_coverage* ",input.yaml$output.directory,"/logs/."))
## gather counts
message(' Gathering Counts... \n')
clustermq::Q(gather.mosdepth.coverage,pkgs="EDM",task_id=1,input = input,n_jobs=1, max_calls_worker = 1,template=list(job_name="gather_coverage",env.name = input.yaml$env.name ,memory = (input.yaml$memory*1024), mem = paste0(input.yaml$memory,"G")), export = list(input = input))
# source("gather_counts.R")
## move log files
system(paste0("mv gather_coverage* ",input.yaml$output.directory,"/logs/."))
} else {
if(is.null(input.yaml[["precomputed.coverage.autosomes"]]) | input.yaml[["precomputed.coverage.autosomes"]] ==F | is.na(input.yaml[["precomputed.coverage.autosomes"]])){
message('########### Cannot proceed without a file name. ###########\n')
break;
} else {
if(file.exists(input.yaml$precomputed.coverage.autosomes)){
precomputed_auto <- readRDS(input.yaml$precomputed.coverage.autosomes)
if(length(colnames(precomputed_auto[["countmat"]]) %in% manifest$sampleID) != nrow(manifest)){
message(' Not all samples from Manifest are present in precomputed.coverage.autosomes')
message(paste0('These samples are missing: \n',manifest$sampleID[!manifest$sampleID %in% colnames(precomputed_auto[["countmat"]]) ]))
} else {
saveRDS(precomputed_auto, file = paste0(input.yaml$output.directory,
"/results/", input.yaml$cohort.name, ".exomedepth.cohort.auto.rds"))
}
}
}
if(is.null(input.yaml[["precomputed.coverage.men"]]) | input.yaml[["precomputed.coverage.men"]] ==F |is.na(input.yaml[["precomputed.coverage.men"]]) == T){
message('########### Cannot proceed without a file name. ###########\n')
break;
} else {
if(file.exists(input.yaml$precomputed.coverage.men)) {
precomputed_men <- readRDS(input.yaml$precomputed.coverage.men)
if(length(colnames(precomputed_men[["countmat"]]) %in% manifest$sampleID) != nrow( manifest[manifest$sex == "M",]) ) {
message(' Not all samples from Manifest are present in precomputed.coverage.autosomes')
message(paste0('These samples are missing: \n',manifest[manifest$sex == "M",]$sampleID[! manifest[manifest$sex == "M",]$sampleID %in% colnames(precomputed_men[["countmat"]]) ]))
} else {
saveRDS(precomputed_men, file = paste0(input.yaml$output.directory,
"/results/", input.yaml$cohort.name, ".exomedepth.cohort.men.rds"))
}
}
}
if(is.null(input.yaml[["precomputed.coverage.women"]]) | input.yaml[["precomputed.coverage.autosomes"]] ==F |is.na(input.yaml[["precomputed.coverage.women"]]) ==T ){
message('########### Cannot proceed without a file name. ###########\n')
break;
} else {
if(file.exists(input.yaml$precomputed.coverage.women)) {
precomputed_women <- readRDS(input.yaml$precomputed.coverage.women)
if(length(colnames(precomputed_women[["countmat"]]) %in% manifest$sampleID) != nrow( manifest[manifest$sex == "F",]) ) {
message(' Not all samples from Manifest are present in precomputed.coverage.autosomes')
message(paste0('These samples are missing: \n',manifest[manifest$sex == "F",]$sampleID[! manifest[manifest$sex == "F",]$sampleID %in% colnames(precomputed_women[["countmat"]]) ]))
} else {
saveRDS(precomputed_women, file = paste0(input.yaml$output.directory,
"/results/", input.yaml$cohort.name, ".exomedepth.cohort.women.rds"))
}
}
}
write.table(manifest, paste0(input.yaml$output.directory,
"/results/", input.yaml$cohort.name, "_manifest.txt"),
row.names = F, quote = F, sep = "\t")
}
## call variants
message('Calling variants... \n')
manifest <- read.table(paste0(input.yaml$output.directory,"/results/",input.yaml$cohort.name,"_manifest.txt"),header=T, stringsAsFactors=F,sep ="\t")
nSamples = NROW(manifest)
call.variants <- suppressWarnings(call.variants)
clustermq::Q(call.variants,pkgs=list("EDM","ExomeDepth"),columnIndex=1:nSamples,n_jobs=nSamples,input=input, max_calls_worker = 1,template=list(job_name="call_variants",env.name = input.yaml$env.name ,memory = (input.yaml$memory*1024), mem = paste0(input.yaml$memory,"G")), export = list(input = input),timeout = 1440000)
## move log files
system(paste0("mv call_variants* ",input.yaml$output.directory,"/logs/."))
## gather variants
message(' Gathering Calls... \n')
clustermq::Q(gather.variant.calls,pkgs=list("EDM"),task_id=1,input = input,n_jobs=1, max_calls_worker = 1,template=list(job_name="gather_calls",env.name = input.yaml$env.name ,memory = (input.yaml$memory*1024), mem = paste0(input.yaml$memory,"G")), export = list(input = input))
system(paste0("mv gather_calls* ",input.yaml$output.directory,"/logs/."))
message(' Plotting correlation... \n')
clustermq::Q(plot.sample.max.correlation,pkgs=list("EDM"),input = input,n_jobs=1, max_calls_worker = 1,template=list(job_name="plotting_correlation",env.name = input.yaml$env.name ,memory = (input.yaml$memory*1024), mem = paste0(input.yaml$memory,"G")), export = list(input = input))
system(paste0("mv plotting_correlation* ",input.yaml$output.directory,"/logs/."))
# system(paste0("rm ",input.yaml$output.directory,"/results/individual.edm.calls/*.edm.stat.txt"))
## summary data
cohort.summary <- read.table(paste0(input.yaml$output.directory,"/results/",input.yaml$cohort.name,".edm.summary.cohort.txt"),header=T,stringsAsFactors=F)
## summary
message('Summary of number of calls in cohort \n')
message('Mean of cohort: ',round(mean(cohort.summary$Freq)),'\n')
message('Median of cohort: ',median(cohort.summary$Freq),'\n')
message('Min calls: ',min(cohort.summary$Freq),' Max calls:',max(cohort.summary$Freq),'\n')
message('Standard deviation: ',paste0(sd(cohort.summary$Freq),'\n'))
return(message('Pipeline ran to complete'))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.