Nothing
###############################################
## Class and Method Definitions for SYSargs2 ##
###############################################
## Define SYSargs2 class
setClass("SYSargs2", representation(
targets="list",
targetsheader="list",
modules="list",
wf="list",
clt="list",
yamlinput="list",
cmdlist="list",
input="list",
output="list",
cwlfiles="list",
inputvars="list")
)
## Methods to return SYSargs2 components
setGeneric(name="targets", def=function(x) standardGeneric("targets"))
setMethod(f="targets", signature="SYSargs2", definition=function(x) {return(x@targets)})
setMethod(f="targetsheader", signature="SYSargs2", definition=function(x) {return(x@targetsheader)})
setMethod(f="modules", signature="SYSargs2", definition=function(x) {return(setNames(as.character(x@modules), names(x@modules)))})
setGeneric(name="wf", def=function(x) standardGeneric("wf"))
setMethod(f="wf", signature="SYSargs2", definition=function(x) {return(x@wf)})
setGeneric(name="clt", def=function(x) standardGeneric("clt"))
setMethod(f="clt", signature="SYSargs2", definition=function(x) {return(x@clt)})
setGeneric(name="yamlinput", def=function(x) standardGeneric("yamlinput"))
setMethod(f="yamlinput", signature="SYSargs2", definition=function(x) {return(x@yamlinput)})
setGeneric(name="cmdlist", def=function(x) standardGeneric("cmdlist"))
setMethod(f="cmdlist", signature="SYSargs2", definition=function(x) {return(x@cmdlist)})
setGeneric(name="input", def=function(x) standardGeneric("input"))
setMethod(f="input", signature="SYSargs2", definition=function(x) {return(x@input)})
setGeneric(name="output", def=function(x) standardGeneric("output"))
setMethod(f="output", signature="SYSargs2", definition=function(x) {return(x@output)})
setGeneric(name="cwlfiles", def=function(x) standardGeneric("cwlfiles"))
setMethod(f="cwlfiles", signature="SYSargs2", definition=function(x) {return(x@cwlfiles)})
setGeneric(name="inputvars", def=function(x) standardGeneric("inputvars"))
setMethod(f="inputvars", signature="SYSargs2", definition=function(x) {return(x@inputvars)})
## Constructor methods
## List to SYSargs2 with: as(mylist, "SYSargs2")
setAs(from="list", to="SYSargs2",
def=function(from) {
new("SYSargs2", targets=from$targets,
targetsheader=from$targetsheader,
modules=from$modules,
wf=from$wf,
clt=from$clt,
yamlinput=from$yamlinput,
cmdlist=from$cmdlist,
input=from$input,
output=from$output,
cwlfiles=from$cwlfiles,
inputvars=from$inputvars)
})
## Coerce back to list: as(SYSargs2, "list")
setGeneric(name="sysargs2", def=function(x) standardGeneric("sysargs2"))
setMethod(f="sysargs2", signature="SYSargs2", definition=function(x) {
sysargslist <- list(targets=x@targets, targetsheader=x@targetsheader, modules=x@modules, wf=x@wf, clt=x@clt, yamlinput=x@yamlinput, cmdlist=x@cmdlist, input=x@input, output=x@output, cwlfiles=x@cwlfiles, inputvars=x@inputvars)
return(sysargslist)
})
## SYSargs2 to list with: as("SYSargs2", list)
setAs(from="SYSargs2", to="list",
def=function(from) {
sysargs2(from)
})
## Define print behavior for SYSargs2
setMethod(f="show", signature="SYSargs2",
definition=function(object) {
cat(paste0("Instance of '", class(object), "':"),
paste0(" Slot names/accessors: "),
paste0(" targets: ", length(object@targets),
" (", head(names(object@targets), 1), "...",
tail(names(object@targets), 1), ")",
", targetsheader: " , length(unlist(object@targetsheader)), " (lines)"),
paste0(" modules: " , length(object@modules)),
paste0(" wf: " , length(object@wf),
", clt: ", length(object@clt),
", yamlinput: ", length(object@yamlinput), " (components)"),
paste0(" input: ", length(object@input),
", output: ", length(object@output)),
paste0(" cmdlist: ", length(object@cmdlist)),
" WF Steps:",
paste0(" ", seq_along(object@clt), ". ", object@cwlfiles$steps,
" (rendered: ", length(object@cmdlist[[1]])!=0, ")"),
"\n", sep="\n")
})
## Extend names() method
setMethod(f="names", signature="SYSargs2",
definition=function(x) {
return(slotNames(x))
})
## Extend infile1() method
setMethod(f="infile1", signature="SYSargs2", definition=function(x){
subset_input <- input(x)
subset_sample <- sapply(names(subset_input), function(x) list(NULL))
for(i in seq_along(names(subset_input))){
if("FileName" %in% names(subset_input[[i]])){
subset_sample[[i]] <- normalizePath(subset_input[[i]][['FileName']])
} else {
subset_sample[[i]] <- normalizePath(subset_input[[i]][['FileName1']]) }
subset_sample <- as.character(subset_sample)
names(subset_sample) <- names(subset_input)
}
return(subset_sample)
})
## Extend infile2() method
setMethod(f="infile2", signature="SYSargs2", definition=function(x){
subset_input <- input(x)
subset_sample <- sapply(names(subset_input), function(x) list(NULL))
for(i in seq_along(names(subset_input))){
if("FileName2" %in% names(subset_input[[i]])){
subset_sample[[i]] <- normalizePath(subset_input[[i]][['FileName2']]) }
subset_sample <- as.character(subset_sample)
names(subset_sample) <- names(subset_input)
}
return(subset_sample)
})
## Extend length() method
setMethod(f="length", signature="SYSargs2",
definition=function(x) {
return(length(x@targets))
})
## Convert targets data.frame to list
targets.as.list <- function(x) {
targetslist <- yaml::yaml.load(yaml::as.yaml(x, column.major = FALSE))
names(targetslist) <- x$SampleName
return(targetslist)
}
## Usage:
# targets <- read.delim("targets.txt", comment.char = "#")
# targetslist <- targets.as.list(x=targets)
## Convert targets list to data.frame
targets.as.df <- function(x) {
targetstmp <- sapply(x, as.character, simplify=FALSE)
targetsDF <- as.data.frame(do.call("rbind", targetstmp))
rownames(targetsDF) <- NULL
colnames(targetsDF) <- names(x[[1]])
return(targetsDF)
}
## Usage:
# targets.as.df(x=targetslist)
# Behavior of "[" operator for SYSargs2
setMethod(f="[", signature="SYSargs2", definition=function(x, i, ..., drop) {
if(is.logical(i)) {
i <- which(i)
}
x@targets <- x@targets[i]
x@input <- x@input[i]
x@output <- x@output[i]
x@cmdlist <- x@cmdlist[i]
return(x)
})
## Behavior of "[[" operator for SYSargs2
setMethod(f="[[", signature=c("SYSargs2", "ANY", "missing"),
definition=function(x, i, ..., drop) {
return(as(x, "list")[[i]])
})
## Behavior of "$" operator for SYSargs2
setMethod("$", signature="SYSargs2",
definition=function(x, name) {
slot(x, name)
})
## Replacement method for SYSargs2 using "[" operator
setReplaceMethod(f="[[", signature="SYSargs2", definition=function(x, i, j, value) {
if(i==1) x@targets <- value
if(i==2) x@targetsheader <- value
if(i==3) x@modules <- value
if(i==4) x@wf <- value
if(i==5) x@clt <- value
if(i==6) x@yamlinput <- value
if(i==7) x@cmdlist <- value
if(i==8) x@input <- value
if(i==9) x@output <- value
if(i==10) x@cwlfiles <- value
if(i=="targets") x@targets <- value
if(i=="targetsheader") x@targetsheader <- value
if(i=="modules") x@modules <- value
if(i=="wf") x@wf <- value
if(i=="clt") x@clt <- value
if(i=="yamlinput") x@yamlinput <- value
if(i=="cmdlist") x@cmdlist <- value
if(i=="input") x@input <- value
if(i=="output") x@output <- value
if(i=="cwlfiles") x@cwlfiles <- value
return(x)
})
###################
## Load Workflow ##
###################
loadWorkflow <- function(targets=NULL, wf_file, input_file, dir_path=".") {
if(!file.exists(file.path(dir_path, wf_file))==TRUE) stop("Provide valid '.cwl' file. Check the file PATH.")
if(!file.exists(file.path(dir_path, input_file))==TRUE) stop("Provide valid 'files.'.yml' file. Check the file PATH.")
if(all(!is.null(targets) && !file.exists(targets)==TRUE)) stop("Provide valid 'targets' file. Check the file PATH.")
wf <- yaml::read_yaml(file.path(dir_path, wf_file))
input <- yaml::read_yaml(file.path(dir_path, input_file))
modules <- input$ModulesToLoad
if(is.null(modules)) modules <- list()
cwlfiles <- list(cwl=normalizePath(file.path(dir_path, wf_file)), yml=normalizePath(file.path(dir_path, input_file)))
inputvars <- list()
if(tolower(wf$class) == "workflow") {
steps <- names(wf$steps)
cwlfiles$steps <- steps
cltpaths <- sapply(seq_along(steps), function(x) normalizePath(wf$steps[[steps[x]]]$run))
cltlist <- sapply(cltpaths, function(x) yaml::read_yaml(file.path(x)), simplify = F)
names(cltlist) <- sapply(seq_along(steps), function(x) wf$steps[[steps[x]]]$run)
cmdlist <- sapply(names(cltlist), function(x) list(NULL))
myinput <- sapply(names(cltlist), function(x) list(NULL))
myoutput <- sapply(names(cltlist), function(x) list(NULL))
WF <- list(modules=modules, wf=wf, clt=cltlist, yamlinput=input, cmdlist=cmdlist, input=myinput, output=myoutput, cwlfiles=cwlfiles, inputvars=inputvars)
} else if(tolower(wf$class) == "commandlinetool") {
cltlist <- list(wf)
names(cltlist) <- basename(wf_file)
cmdlist <- sapply(names(cltlist), function(x) list(NULL))
myinput <- sapply(names(cltlist), function(x) list(NULL))
myoutput <- sapply(names(cltlist), function(x) list(NULL))
cwlfiles$steps <- strsplit(basename(wf_file), ".cwl")[[1]]
WF <- list(modules=modules, wf=list(), clt=cltlist, yamlinput=input, cmdlist=cmdlist, input=myinput, output=myoutput, cwlfiles=cwlfiles, inputvars=inputvars)
} else {
stop("Class slot in '<wf_file>.cwl' needs to be 'Workflow' or 'CommandLineTool'.")
}
if(!is.null(targets)) {
if(class(targets)=="SYSargs2"){
mytargets <- targets(targets)
targetsheader <- targetsheader(targets)[[1]]
} else {
if(!file.exists(file.path(targets))==TRUE) stop("Provide valid 'targets' file. Check the file PATH.")
ext <- strsplit(basename(targets), split="\\.")[[1]]
ext <- ext[[-1]]
if("txt" %in% ext){
mytargets <- read.delim(normalizePath(file.path(targets)), comment.char = "#")
mytargets <- targets.as.list(mytargets)
} else if( any(c("yml", "yaml") %in% ext)){
mytargets <- yaml::read_yaml(targets)
}
targetsheader <- readLines(normalizePath(file.path(targets)))
targetsheader <- targetsheader[grepl("^#", targetsheader)]
}
WF <- c(list(targets=mytargets, targetsheader=list(targetsheader=targetsheader)), WF)
} else {
WF <- c(list(targets=data.frame(), targetsheader=list()), WF)
}
return(as(WF, "SYSargs2"))
}
## Wrapper for loadWorkflow: Short and consistent name for the function
loadWF <- loadWorkflow
## Usage:
# targets <- system.file("extdata", "targets.txt", package="systemPipeR")
# dir_path <- system.file("extdata/cwl/hisat2", package="systemPipeR")
# WF <- loadWF(targets=targets, wf_file="hisat2-se/hisat2-mapping-se.cwl", input_file="hisat2-se/hisat2-mapping-se.yml", dir_path=dir_path)
###################################################
## Create CommandLineTools from Command-line ##
###################################################
createWF <- function(targets=NULL, commandLine, results_path="./results", module_load="baseCommand", file = "default",
overwrite = FALSE, cwlVersion = "v1.0", class = "CommandLineTool"){
## TODO if is not default, module load and file
if(!class(commandLine)=="list") stop("'commandLine' needs to be object of class 'list'.")
if(any(!c("baseCommand", "inputs", "outputs") %in% names(commandLine))) stop("Argument 'commandLine' needs to be assigned at least to: 'baseCommand', 'input' or 'output'.")
if(all(!c("Workflow", "CommandLineTool") %in% class)) stop("Class slot in '<wf_file>.cwl' needs to be 'Workflow' or 'CommandLineTool'.")
if(dir.exists(results_path)==FALSE) dir.create(path=results_path)
## module_load
## 1. module_load="baseCommand" will use the name of the software
## 2. module_load=c("ncbi-blast/2.2.30+", "hisat2/2.1.0") will use the specific version and names
if(module_load == "baseCommand"){
module_load <- commandLine$baseCommand[[1]]
} else {
module_load <- module_load
}
## File Path
## 1. file="default"
## 2. file = c("test.cwl", "test.yml")
if("default" %in% file){
if(dir.exists(paste("param/cwl/", commandLine$baseCommand, sep=""))==FALSE) dir.create(path=paste("param/cwl/", commandLine$baseCommand, sep=""))
file.cwl <- paste("param/cwl/", commandLine$baseCommand, "/", commandLine$baseCommand, ".cwl", sep="")
file.yml <- paste("param/cwl/", commandLine$baseCommand, "/", commandLine$baseCommand, ".yml", sep="")
} else {
for(i in seq_along(file)){
extension <- sub('.*\\.', '', file[[i]])
if(!c("cwl") %in% extension & !c("yml") %in% extension) stop ("Argument 'file' needs to be assigned as a character vector with the names of the two param file. For example, 'test.cwl' and 'test.yml'.")
if(c("yml") %in% extension){
file.yml <- file[[i]]
} else if(c("cwl") %in% extension){
file.cwl <- file[[i]]
}
}
}
if(file.exists(file.cwl) & overwrite == FALSE)
stop(paste("I am not allowed to overwrite files; please delete existing file:",
file, "or set 'overwrite=TRUE', or provide a different name in the 'file' argument"))
if(file.exists(file.yml) & overwrite == FALSE)
stop(paste("I am not allowed to overwrite files; please delete existing file:",
file, "or set 'overwrite=TRUE', or provide a different name in the 'file' argument"))
## class("CommandLineTool", "Workflow")
WF.temp <- SYScreate("SYSargs2")
WF.temp <- as(WF.temp, "list")
## TODO: Expand to write.WF()
WF.temp$wf <- list()
WF.temp$clt <- write.clt(commandLine, cwlVersion, class, file.cwl)
WF.temp$yamlinput <- write.yml(commandLine, file.yml, results_path, module_load)
WF.temp$modules <- WF.temp$yamlinput$ModulesToLoad
WF.temp$cmdlist <- sapply(names(WF.temp$clt), function(x) list(NULL))
WF.temp$input <- sapply(names(WF.temp$clt), function(x) list(NULL))
WF.temp$output <- sapply(names(WF.temp$clt), function(x) list(NULL))
WF.temp$cwlfiles <- list(cwl=normalizePath(file.path(file.cwl)), yml=normalizePath(file.path(file.yml)), steps=names(WF.temp$clt))
## targets
if(!is.null(targets)) {
mytargets <- read.delim(normalizePath(file.path(targets)), comment.char = "#")
mytargets <- targets.as.list(mytargets)
targetsheader <- readLines(normalizePath(file.path(targets)))
targetsheader <- targetsheader[grepl("^#", targetsheader)]
WF.temp <- c(list(targets=mytargets, targetsheader=list(targetsheader=targetsheader)), WF.temp)
} else {
WF.temp <- c(list(targets=data.frame(), targetsheader=list()), WF.temp)
}
return(as(WF.temp, "SYSargs2"))
}
## Usage:
## Example commandLine
# "hisat2 -S ./results/_SampleName_.sam -x ./data/tair10.fasta -k 1 --min-intronlen 30 --max-intronlen 3000 --threads 4 -U _FASTQ_PATH1_"
## Provide a list
# baseCommand <- list(baseCommand="hisat2")
# inputs <- list(
# "S"=list(type="File", preF="-S", yml="./results/_SampleName_.sam"),
# "x"=list(type="File", preF="-x", yml="./data/tair10.fasta"),
# "k"= list(type="int", preF="-k", yml= 1L),
# "threads"= list(type="int", preF="-threads", yml=4L),
# "min-intronlen"= list(type="int", preF="--min-intronlen", yml= 30L),
# "max-intronlen"= list(type="int", preF="--max-intronlen", yml=3000L),
# "U"=list(type="File", preF="-U", yml="./data/_FASTQ_PATH1_") )
# outputs <- list("hisat2_sam"=list(type="File", "./results/_SampleName_.sam"))
# commandLine <- list(baseCommand=baseCommand, inputs=inputs, outputs=outputs)
#
## Creates a SYSargs2 object, populate all the command-line, and creates *.cwl and *.yml files
# targets <- system.file("extdata", "targets.txt", package="systemPipeR")
# WF <- createWF(targets=targets, commandLine, results_path="./results", module_load="baseCommand",
# file = "default", overwrite = FALSE, cwlVersion = "v1.0",
# class = "CommandLineTool")
# WF <- renderWF(WF, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName_"))
###############################################
## Render WF for all samples in targets slot ##
###############################################
renderWF <- function(WF, inputvars=c(FileName="_FASTQ_PATH_")) {
if(any(length(cmdlist(WF)[[1]])!=0)) stop("Argument 'WF' needs to be assigned an object of class 'SYSargs2' and an object created by the 'loadWorkflow' function")
ids <- names(targets(WF))
if(length(ids)==0) ids <- "defaultid"
bucket <- sapply(ids, function(x) "", simplify=FALSE)
bucketlist <- list(cmd=bucket, input=bucket, output=bucket)
.renderWFsingle <- function(WF, id) {
inputvarslist <- sapply(names(inputvars), function(x) "", simplify=FALSE)
if(!length(names(targets(WF)))==0) (if(any(!names(inputvars) %in% colnames(targets.as.df(WF$targets)))) stop("Please note that the 'inputvars' variables need to be defined in the 'input_file'; as well it needs to match the column names defined in the 'targets' file."))
input <- yamlinput(WF)
for(i in seq_along(inputvars)) {
subvalue <- targets(WF)[[id]][[names(inputvars)[i]]]
if(length(subvalue)!=0) {
input <- rapply(input, function(x) gsub(inputvars[[i]], subvalue, x), how = "replace")
inputvarslist[[i]] <- subvalue
}
}
WF <- as(WF, "list")
WF$yamlinput <- input
WF <- as(WF, "SYSargs2")
WF <- injectCommandlinelist(WF)
## Fix for cases like IDX/tophat/STAR, more than one output file per cmdlist
outfilelist <- sapply(names(cmdlist(WF)), function(x) list(NULL))
for(i in seq_along(outfilelist)) {
for (j in seq_along(cmdlist(WF)[[names(outfilelist[i])]]$output))
outfilelist[[i]][j] <- cmdlist(WF)[[names(outfilelist[i])]]$output[[j]]$glob
}
cmdlist <- renderCommandline(WF, redirect=">")
inputvars <- as.list(inputvars)
return(list(cmd=cmdlist, input=inputvarslist, output=outfilelist, inputvars=inputvars))
}
for(i in ids) {
tmplist <- .renderWFsingle(WF=WF, id=i)
bucketlist[["cmd"]][[i]] <- tmplist[["cmd"]]
bucketlist[["input"]][[i]] <- tmplist[["input"]]
bucketlist[["output"]][[i]] <- tmplist[["output"]]
}
WF <- as(WF, "list")
WF$cmdlist <- bucketlist$cmd
WF$input <- bucketlist$input
WF$output <- bucketlist$output
WF$inputvars <- tmplist$inputvars
return(as(WF, "SYSargs2"))
}
## Usage:
# WF <- renderWF(WF, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName_"))
###############################################################
## Subsetting the input and output slots by name or position ##
###############################################################
subsetWF <- function(args, slot, subset=NULL, index=NULL, delete=FALSE){
## Check the class and slot
if(!class(args)=="SYSargs2") stop("args needs to be object of class 'SYSargs2'.")
if(all(!c("input", "output", "step") %in% slot)) stop("Argument slot can only be assigned one of: 'input' or 'output' or 'step'.")
## slot input
if(slot %in% "input"){
## Check the subset
if(all(!is.null(subset) & is.character(subset) & !any(names(inputvars(args)) %in% subset))) stop(paste("For the", slot, "slot, can only be assigned one of the following values in the subset argument:", paste(names(inputvars(args)), collapse=", "), "OR the corresponding position OR NULL"))
if(all(!is.null(subset) & is.numeric(subset) & !any(seq_along(names(inputvars(args))) %in% subset))) stop(paste("For the", slot, "slot, can only be assigned one of the following position in the subset argument:", paste(seq_along(names(inputvars(args))), collapse=", "), "OR the names OR NULL"))
subset_input <- input(args)
subset_sample <- sapply(names(subset_input), function(x) list(NULL))
if(!is.null(subset)) {
for(i in seq_along(names(subset_input)))
subset_sample[[i]] <- subset_input[[i]][[subset]]
} else {
subset_sample <- subset_input
}
}
## slot output
if(slot %in% "output"){
## Check the subset
if(all(!is.null(subset) & is.character(subset) & !any(names(args$clt) %in% subset))) stop(paste("For the", slot, "slot, can only be assigned one of the following values in the subset argument:", paste(names(args$clt), collapse=", "), "OR the corresponding position OR NULL"))
if(all(!is.null(subset) & is.numeric(subset) & !any(seq_along(names(args$clt)) %in% subset))) stop(paste("For the", slot, "slot, can only be assigned one of the following position in the subset argument:", paste(seq_along(names(args$clt)), collapse=", "), "OR the names OR NULL"))
if(!is.null(subset)){
if(!any(seq_along(output(args)[[1]][[subset]]) %in% index)) stop(paste("For the 'index' argument, can only be assigned one of the following position:", paste(seq_along(output(args)[[1]][[subset]]), collapse=", ")))
}
subset_output <- output(args)
subset_sample <- as.character()
if(all(!is.null(subset) & !is.null(index))) {
for(i in seq_along(names(subset_output)))
# subset_sample[[i]] <- subset_output[[i]][[subset]]
subset_sample <- c(subset_sample, subset_output[[i]][[subset]][index])
} else {
subset_sample <- subset_output
}
names(subset_sample) <- rep(names(subset_output), each=length(subset_sample[1]))
}
## slot step
if(slot %in% "step"){
## Check the subset
if(all(!is.null(subset) & is.character(subset) & !any(names(args$clt) %in% subset))) stop(paste("For the", slot, "slot, can only be assigned one of the following values in the subset argument:", paste(names(args$clt), collapse=", "), "OR the corresponding position OR NULL"))
if(all(!is.null(subset) & is.numeric(subset) & !any(seq_along(names(args$clt)) %in% subset))) stop(paste("For the", slot, "slot, can only be assigned one of the following position in the subset argument:", paste(seq_along(names(args$clt)), collapse=", "), "OR the names OR NULL"))
subset_step <- cmdlist(args)
subset_sample <- sapply(names(subset_step), function(x) list(NULL))
if(!is.null(subset)) {
for(i in seq_along(names(subset_sample)))
subset_sample[[i]] <- subset_step[[i]][[subset]]
} else {
subset_sample <- subset_step
}
}
## IF subset=NULL returns a list
if(!is.null(subset)){
names <- names(subset_sample)
subset_sample <- as.character(subset_sample)
names(subset_sample) <- names
}
## delete == TRUE
if(delete==TRUE){
## delete option only works if the subset is define
if(!is.character(subset_sample)) {
stop(paste("Please define the 'subset' to be deleted in the subset argument"))
}
if(all(file.exists(subset_sample))){
del <- file.remove(subset_sample)
cat("The following files has been deleted:", paste(subset_sample, collapse = ", "), "\n")
} else if(all(!file.exists(subset_sample))) {
cat("The subset cannot be deleted: no such file ", "\n")
}
}
return(subset_sample)
}
## Usage:
# subsetWF(WF, slot="input", subset='FileName')
# subsetWF(WF, slot="step", subset=1)
# subsetWF(WF, slot="output", subset=1, index=1)
# subsetWF(WF, slot="output", subset=1, index=1, delete=TRUE) ## in order to delete the subset files list
#########################################################
## Update the output location after run runCommandline ##
#########################################################
output_update <- function(args, dir=FALSE, dir.name=NULL, replace=FALSE, extension=NULL, make_bam=FALSE){
## Folder name provide in the yml file
## this file will exists, because its create on the definition of the project or when runCommandline is used.
# if(is.null(args$yamlinput$results_path$path)) {
# if(is.null(dir.name)) {
# stop("argument 'dir.name' missing. The argument can only be assigned 'NULL' when directory name is provided in the yml template. The argument should be assigned as a character vector of length 1")
# }}
## Validation for 'args'
if(any(class(args)!="SYSargs" & class(args)!="SYSargs2")) stop("Argument 'args' needs to be assigned an object of class 'SYSargs' OR 'SYSargs2'")
## If the argument 'replace' is TRUE, it is required to specify the 'extension' argument
if(replace!=FALSE){
if(is.null(extension)) {
stop("argument 'extension' missing. The argument can only be assigned 'NULL' when no replacement is required. The argument should be assigned as a character vector with the current extension and the new one.")
}}
if(replace==TRUE){
args <- as(args, "list")
for(i in seq_along(args$output)){
for(j in seq_along(args$output[[i]])){
for(k in seq_along(args$output[[i]][[j]])){
name <- basename(args$output[[i]][[j]][k])
dirRep <- sub("/([^/]*)$", "", args$output[[i]][[j]][k])
if(grepl(extension[1], name)){
sam <- .getExt(name)
args$output[[i]][[j]][k] <- suppressWarnings(normalizePath(paste0(dirRep, "/", .getFileName(name), extension[2])))
} else {
# message if the extension are not matching, return the same object
args$output[[i]][[j]][k] <- args$output[[i]][[j]][k]
}
if(make_bam==TRUE){
if(grepl("bam", args$output[[i]][[j]][k])){
args$output[[i]][[j]][length(args$output[[i]][[j]])+1] <- paste0(gsub("\\.bam$", "", args$output[[i]][[j]][k]), ".bam.bai")
}
}
}
}
}
args <- as(args, "SYSargs2")
}
if(dir==TRUE){
args <- as(args, "list")
## Results path
logdir <- normalizePath(args$yamlinput$results_path$path)
## Workflow Name: Detail: if the folder was not created during 'runCommandline,' it will return a warning message pointing 'no such directory'
if(is.null(dir.name)) {
cwl.wf <- normalizePath(paste0(logdir, "/", .getFileName(args$cwlfiles$cwl)))
} else if(!is.null(dir.name)){
cwl.wf <- normalizePath(paste0(logdir, "/", dir.name))
}
## New path
for(i in seq_along(args$output)){
for(j in seq_along(args$output[[i]])){
for(k in seq_along(args$output[[i]][[j]])){
name <- basename(args$output[[i]][[j]][k])
args$output[[i]][[j]][k] <- paste0(cwl.wf, "/", names(args$output[i]), "/", name)
}
}
}
args <- as(args, "SYSargs2")
}
return(args)
}
## Usage:
# WF <- output_update(WF, dir=FALSE, replace=TRUE, extension=c(".sam", ".bam"))
# WF <- output_update(WF, dir=TRUE, replace=TRUE, extension=c(".sam", ".bam"))
##################################
## Unexported helper functions ##
##################################
##############################################
## Resolve CWL variable-based path instance ##
##############################################
pathInstance <- function(pathvar, input, altinput) {
pathvar <- unlist(strsplit(pathvar[[1]], "/"))
extension <- gsub("^.*\\)", "", pathvar)
pathvar <- gsub("(^.*\\)).*", "\\1", pathvar)
pathvar <- gsub("\\$|\\(|\\)", "", pathvar)
pathvarlist <- strsplit(pathvar, "\\.")
filenametype <- unlist(lapply(seq_along(pathvarlist), function(x) pathvarlist[[x]][pathvarlist[[x]] %in% c("basename", "nameroot")]))
filenametype <- sapply(seq_along(pathvarlist), function(x) filenametype[x]) # In case of empty filenamelist NA are returned instead
filenametype <- ifelse(is.na(filenametype), "NA", filenametype)
myvalue_list <- sapply((pathvarlist), function(x) list(NULL), simplify=FALSE)
for(i in seq_along(pathvarlist)){
myvalue <- NULL
for(j in seq_along(pathvarlist[[i]])){
myvalue_input <- input[[pathvarlist[[i]][[j]]]]
if("class" %in% names(myvalue_input)) myvalue_input$class <- NULL
if (!is.null(myvalue_input)) {
myvalue <- c(myvalue, list(myvalue_input))
}
}
myvalue_list[i] <- myvalue
}
## Use altinput if no values were assigned by input
if(all(sapply(myvalue_list, length)==0)) {
myvalue_list <- sapply(seq_along(pathvarlist), function(x) altinput[[pathvarlist[[x]][[2]]]]$default, simplify=FALSE)
}
enforce_list <- length(myvalue_list) == 1
if(any(enforce_list)) for(i in which(enforce_list)) myvalue_list[[i]] <- list(class=NULL, path=myvalue_list[[i]])
mypathvec <- list()
for(i in seq_along(myvalue_list)){
for(j in seq_along(myvalue_list[[i]])){
vec_input <- myvalue_list[[i]][[j]]
if (any(names(myvalue_list[[i]][[j]]) %in% c("path"))){
vec_input <- myvalue_list[[i]][[j]]$path
}
if (!is.null(vec_input )) {
vec <- sapply(seq_along(filenametype), function(x) pathUtils(vec_input [x], type=filenametype[x]))
mypathvec <- c(mypathvec, list(vec))}
}
mypathvec <- lapply(mypathvec, function(x) x[!is.na(x)])
}
## Assign '.' to 'runtime.outdir'
pwdindex <- any(pathvar %in% c("runtime.outdir"))
nullindex <- any(sapply(mypathvec, length) == 0)
if(any(pwdindex & nullindex)) {
mypathvec[pwdindex & nullindex] <- "."
}
mypathvec <- sapply(seq_along(mypathvec), function(x) if(is.null(mypathvec[[x]])) {mypathvec[[x]] <- "" } else {mypathvec[[x]] <- mypathvec[[x]]})
## Generate output
if(length(mypathvec)==1){
extension <- extension[!is.na(extension)]
if(all("" %in% extension & length(extension)==1)){
mypath <- paste(mypathvec)
} else {
extension <- extension[extension != ""]
mypath <- paste(mypathvec, extension, sep="/") }
} else if(any(is.na(extension))){
mypath <- sapply(seq_along(mypathvec), function(x) paste0(mypathvec[x]))
} else {
mypath <- sapply(seq_along(mypathvec), function(x) paste0(mypathvec[x], extension[x]))
if(length(mypathvec) < length(extension)){
extension <- extension[extension != ""]
mypath <- c(mypath, extension)
}
}
returnpath <- file.path(paste(mypath, collapse="/"))
return(returnpath)
}
#################################################################
## Helper function to construct/manipulate path and file names ##
#################################################################
pathUtils <- function(x, type, dropdir=TRUE) {
if(type=="dirname") {
mypath <- dirname(x)
} else if(type=="basename") {
mypath <- basename(x)
} else if(type=="nameroot") {
mypath <- gsub("(^.*)\\..*$", "\\1", basename(x))
} else {
mypath <- x # Return unchanged input if 'type' is not one the above three values
# warning("Argument 'type' needs to be assigned one of: 'dirname', 'basename', 'nameroot'")
}
## Construct output
if(dropdir==TRUE) {
return(mypath)
} else if(dropdir==FALSE & type!="dirname") {
return(file.path(dirname(x), mypath))
} else {
stop("Invalid path request, e.g. drop and appenrequest, e.g. drop and append dir.")
}
}
#############################################
## Assemble Commandline Components in List ##
#############################################
assembleCommandlineList <- function(clt=WF$clt[[1]]) {
## Base command and arguments
basecommand <- clt$baseCommand
arguments <- clt$arguments
if(!is.null(arguments)){
for(i in seq_along(arguments)) arguments[[i]][["position"]] <- ""
}
# ## Handling of special cases (here mkdir)
# if(basecommand[1]=="mkdir") {
# clt$inputs <- ""
# clt$outputs[[1]]$type <- NULL
# }
## Inputs
inputargs <- renderInputs(x=clt$inputs, returntags=list(inputBinding=c("prefix"), type="any"))
## Outputs
outputargs <- renderOutputs(x=clt$outputs, stdout=clt$stdout, returntags=list(outputBinding=c("prefix", "glob"), type="any"))
## Assemble command-line
arglist <- list(baseCommand=basecommand, arguments=arguments, inputs=inputargs, outputs=outputargs)
return(arglist)
}
########################################
## Populate Variables in Command-line ##
########################################
populateCommandline <- function(WF, cltid, exclude=exclude, mmp) {
## Populate inputs
cmdlist <- assembleCommandlineList(nameUnnamed(WF$clt[[cltid]]))
# yamlinput <- list(inputs=WF$yamlinput) # fix 29Dec18
yamlinput <- list(inputs=nameUnnamed(WF$yamlinput))
cmdnames <- nestedNames(cmdlist)
yamlnames <- nestedNames(yamlinput)
## Create yamlnames list without 'unnamed' tags if present
yamlnamestmp <- sapply(names(yamlnames), function(x) yamlnames[[x]][!grepl("_\\d{1,}_", yamlnames[[x]])], simplify=FALSE)
matchmap <- findBestMatch(x=cmdnames, y=yamlnamestmp, mmp, minmatch=2)
excludepos <- sapply(names(cmdnames), function(x) tail(cmdnames[[x]], 1) %in% exclude)
removetype <- sapply(cmdnames, function(x) tail(x %in% "type", 1))
for(i in seq_along(cmdnames)) {
inputslice <- yamlnames[[matchmap[[i]]]]
## Inject yamlinput in corresponding cmdlist components
if((length(inputslice) > 0) & (excludepos[[i]])!=TRUE) {
cmdlist <- injectListbyName(cmdlist, cmdnames[[i]], value=subsetListbyName(yamlinput, inputslice))
}
## Eliminate type values (e.g. boolean/string) that are not associated with yamlinput
if((length(inputslice) == 0) & (excludepos[[i]]!=TRUE & removetype[[i]]==TRUE)) {
cmdlist <- injectListbyName(cmdlist, cmdnames[[i]], value="")
}
}
## Populate variable path instances
pos <- sapply(names(cmdnames), function(x) grepl("^\\$\\(", subsetListbyName(cmdlist, cmdnames[[x]])))
if(any(pos)) {
for(i in which(pos)) {
mypath <- pathInstance(pathvar=subsetListbyName(cmdlist, cmdnames[[i]]), input=WF$yamlinput, altinput=nameUnnamed(WF$clt[[cltid]])$inputs)
cmdlist <- injectListbyName(cmdlist, cmdnames[[i]], value=mypath)
}
}
return(cmdlist)
}
####################################################################################
## Inject command-line lists from populateCommandline into cmdlist slot WF object ##
####################################################################################
injectCommandlinelist <- function(WF) {
if(class(WF)=="SYSargs2") {
WF <- as(WF, "list")
} else {
stop("WF needs to be object of class 'SYSargs2'.")
}
cmdsteps <- names(WF$cmdlist)
mmp <- list(class=c("class", "type"), path=c("path", "type"))
exclude <- c("baseCommand", "prefix")
for(i in seq_along(cmdsteps)) {
WF$cmdlist[[i]] <- populateCommandline(WF, cltid=cmdsteps[i], exclude=exclude, mmp)
}
## Update inputs that depend on output of one of the previous steps
.connectInout <- function(WF) {
steps <- WF$wf$steps
stepnames <- sapply(names(steps), function(x) steps[[x]]$run)
connectedlist <- sapply(seq_along(steps), function(x) which(grepl("/", steps[[x]]$`in`)))
names(connectedlist) <- names(steps)
connectedlist <- connectedlist[sapply(connectedlist, length) > 0]
if(length(connectedlist) > 0) {
for(j in seq_along(connectedlist)) {
connected <- connectedlist[j]
## Inner loop to support multiple connections
for(i in seq_along(connected[[1]])) {
## Generate output string from corresponding upstream step
connectednames <- strsplit(unlist(steps[[names(connected)]])[connected[[1]][i]], "/")
connectedstep <- stepnames[[connectednames[[1]][1]]]
connectedoutput <- WF$cmdlist[[connectedstep]]$outputs
connectedoutput <- unlist(connectedoutput[connectednames[[1]][[2]]])
## Inject output string into input of corresponding downstream step
termname <- as.character(sapply(nestedNames(steps[[names(connected)]]), tail, 1)[connected[[1]][i]])
namepath <- c("cmdlist", stepnames[[names(connected)]], "inputs", termname)
#fix 02Jan19 # namepath <- c("cmdlist", stepnames[[names(connected[i])]], "inputs",
# names(steps[[names(connected[i])]][connected[[i]]][["in"]]))
WF <- injectListbyName(l=WF, namepath, value=as.character(connectedoutput), type="value")
}
}
}
names(WF$cmdlist) <- WF$cwlfiles$steps
names(WF$clt) <- WF$cwlfiles$steps
return(WF)
}
WF <- .connectInout(WF)
return(as(WF, "SYSargs2"))
}
################################
## Render Commandline Strings ##
################################
renderCommandline <- function(x, dropoutput=TRUE, redirect=">") {
if(class(x)=="SYSargs2") {
x <- as(x, "list")
} else {
stop("x needs to be object of class 'SYSargs2'.")
}
## Fct to render single command-line string
.renderCommandline <- function(x=x, redirect=redirect) {
cmdnames <- nestedNames(x)
## Check for stdout instance, if present prepend value of redirect
stdoutpos <- sapply(cmdnames, function(y) {
tmp <- y %in% c("outputs", "stdout")
tmp[1] == TRUE & tail(tmp, 1) == TRUE
})
if(any(stdoutpos)) {
cmd <- x
cmdnamessub <- cmdnames[stdoutpos]
for(i in seq_along(cmdnamessub)) {
stdoutstr <- unlist(subsetListbyName(l=cmd, cmdnamessub[[i]]))
stdoutstr <- paste(redirect, stdoutstr, sep=" ")
cmd <- injectListbyName(l=cmd, cmdnamessub[[i]], value=stdoutstr, type="value")
}
} else {
#if(dropoutput==TRUE & x$baseCommand[1]!="mkdir") x <- x[names(x) != "outputs"]
if(dropoutput==TRUE) x <- x[names(x) != "outputs"]
cmd <- x
}
## Collapse list to single string
cmd <- paste(unlist(cmd), collapse=" ")
return(cmd)
}
## Check here for WF class instead of names once implemented
if(all(names(x) == c("targets", "targetsheader", "modules", "wf", "clt", "yamlinput", "cmdlist", "input", "output", "cwlfiles", "inputvars"))) {
cmdstring <- sapply(names(x$cmdlist),
function(i) .renderCommandline(x=x$cmdlist[[i]], redirect=redirect),
simplify=FALSE)
} else {
cmdstring <- .renderCommandline(x=x, redirect=redirect)
}
return(cmdstring)
}
###########################################################
## Helper Function to render inputs of single clt object ##
###########################################################
renderInputs <- function(x=WF$clt[[1]]$inputs, returntags=list(inputBinding=c("prefix"), type="any")) {
inputnames <- names(x)
## Remove entries where inputBinding is missing since those parameters do not appear on the command-line
inputnames <- inputnames[!sapply(inputnames, function(i) is.null(x[[i]]$inputBinding))]
x <- x[inputnames]
inputlist <- sapply(inputnames, function(i) {
tmp <- c(x[[i]][[names(returntags["inputBinding"])]][returntags$inputBinding],
x[[i]][names(returntags["type"])])
# input=x[[i]][[names(returntags["type"])]])
tmp <- tmp[!sapply(tmp, length) < 1]
tmp}, simplify=FALSE)
inputposition <- sapply(inputnames, function(i) x[[i]]$inputBinding$position)
if(class(inputposition)=="list") inputposition <- seq_along(inputposition)
inputlist <- inputlist[order(inputposition)]
return(inputlist)
}
############################################################
## Helper Function to render outputs of single clt object ##
############################################################
renderOutputs <- function(x=WF$clt[[1]]$outputs, stdout=WF$clt[[1]]$stdout, returntags=list(outputBinding=c("prefix", "glob"), type="any")) {
outputnames <- names(x)
outputlist <- sapply(outputnames, function(i) {
tmp <- c(x[[i]][[names(returntags["outputBinding"])]][returntags$outputBinding[1]],
x[[i]][[names(returntags["outputBinding"])]][returntags$outputBinding[2]],
type=x[[i]][[names(returntags["type"])]])
# x[[i]][[names(returntags["type"])]])
tmp <- tmp[!sapply(tmp, length) < 1]
tmp}, simplify=FALSE)
## Remove entries 'type: Directory' since they are only relevant for CWL internals
types <- sapply(seq_along(outputlist), function(i) outputlist[[i]][["type"]])
outputlist <- outputlist[tolower(types) != "directory"]
for(i in seq_along(outputlist)) {
check <- outputlist[[i]][["type"]]
if(is.null(check)) check <- "absent"
if(check == "stdout") {
# outputlist[[i]] <- c(outputlist[[i]]["type"], stdout=stdout)
outputlist[[i]] <- list(stdout=stdout)
}
}
return(outputlist)
}
##################
## Name unnamed ##
##################
## Yaml format may have unnamed components; not having names creates
## problems in some cases with name-based subsetting of lists; thus the
## following function assigns names to unnamed components. A tagging syntax
## can be used to ignore them in subsetting routines if necessary.
nameUnnamed <- function(l) {
for(a in seq_along(l)) {
for(b in seq_along(l[[a]])) {
for(c in seq_along(l[[a]][[b]])) {
for(d in seq_along(l[[a]][[b]][[c]])) {
if(is.null(names(l[[a]][[b]][[c]][[d]])) & class(l[[a]][[b]][[c]][[d]])=="list") names(l[[a]][[b]][[c]][[d]]) <- paste0("_", seq_along(l[[a]][[b]][[c]][[d]]), "_")
if(is.null(names(l[[a]][[b]][[c]])) & class(l[[a]][[b]][[c]])=="list") names(l[[a]][[b]][[c]]) <- paste0("_", seq_along(l[[a]][[b]][[c]]), "_")
if(is.null(names(l[[a]][[b]])) & class(l[[a]][[b]])=="list") names(l[[a]][[b]]) <- paste0("_", seq_along(l[[a]][[b]]), "_")
if(is.null(names(l[[a]])) & class(l[[a]])=="list") names(l[[a]]) <- paste0("_", seq_along(l[[a]]), "_")
}
}
}
}
return(l)
}
###########################################################################
## Return names of nested lists (with up to 5 nesting levels) as vectors ##
###########################################################################
nestedNames <- function(l, sep="_") {
nestednames <- lapply(seq_along(l), function(a)
lapply(seq_along(l[[a]]), function(b)
lapply(seq_along(l[[a]][[b]]), function(c)
lapply(seq_along(l[[a]][[b]][[c]]), function(d)
lapply(seq_along(l[[a]][[b]][[c]][[d]]), function(e)
c(names(l[a]),
names(l[[a]][b]),
names(l[[a]][[b]][c]),
names(l[[a]][[b]][[c]][d]),
names(l[[a]][[b]][[c]][[d]][e])))))))
while(any(sapply(nestednames, is.list))) nestednames <- unlist(nestednames, recursive=FALSE)
names(nestednames) <- sapply(nestednames, paste, collapse=sep)
return(nestednames)
}
########################################################
## Find best matches among name vectors for two lists ##
########################################################
findBestMatch <- function(x, y, mmp, minmatch=2) {
matchindex <- setNames(rep(NA, length(x)), names(x))
## Perfect matches
matchpos <- sapply(names(y), function(j) {
sapply(names(x), function(i) {
ident <- all(y[[j]] %in% x[[i]])
lengthcheck <- length(y[[j]]) == length(x[[i]])
ident & lengthcheck
})
})
## Note, if there are several input matches to one command-line slot then only the last one is used (see tail)
matchpos <- sapply(rownames(matchpos), function(i) tail(which(matchpos[i,]),1), simplify=FALSE)
for(i in seq_along(matchindex)) {
if(is.na(matchindex[i])==TRUE) matchindex[i] <- matchpos[[i]][1]
}
## Matches with tag specific terminal mismatches
matchpos <- sapply(names(y), function(j) {
sapply(names(x), function(i) {
all(termMMatch(x=x[[i]], y=y[[j]], mmp=mmp, minmatch=minmatch, returntype="logical"))
})
})
## Note, if there are several input matches to one command-line slot then only the last one is used (see tail)
matchpos <- sapply(rownames(matchpos), function(i) tail(which(matchpos[i,]),1), simplify=FALSE)
for(i in seq_along(matchpos)) {
if(is.na(matchindex[i])==TRUE) matchindex[i] <- matchpos[[i]][1]
}
## Matches with terminal (max 1) length differences
matchpos <- sapply(names(y), function(j) {
sapply(names(x), function(i) {
ident <- all(y[[j]] %in% x[[i]][1:length(y[[j]])])
lengthcheck <- length(y[[j]]) >= minmatch
ident & lengthcheck
})
})
## Note, if there are several input matches to one command-line slot then only the last one is used (see tail)
matchpos <- sapply(rownames(matchpos), function(i) tail(which(matchpos[i,]),1), simplify=FALSE)
for(i in seq_along(matchpos)) {
if(is.na(matchindex[i])==TRUE) matchindex[i] <- matchpos[[i]][1]
}
return(matchindex)
}
###############################################
## Inject into List Subsetted by Name Vector ##
###############################################
injectListbyName <- function(l, name_index, value, type="value") {
## Input validity check for l and name_index
if(all(!c("name", "value") %in% type[1])) stop("Argument type can only be assigned one of: 'name' or 'value'.")
checknames <- nestedNames(l)
checknames <- checknames[!sapply(checknames, length) < length(name_index)]
checknames <- sapply(checknames, function(x) which(name_index == x[seq_along(name_index)]), simplify=FALSE)
checknames <- checknames[sapply(checknames, length) == length(name_index)]
checkindex <- sapply(seq_along(checknames), function(x) all(checknames[[x]] == seq_along(checknames[[x]])))
if(length(checkindex)==0) checkindex <- FALSE
if(any(checkindex==FALSE)) stop("Invalid 'name_index' lacking consecutive name matches in any list component.")
if(length(name_index)==1) {
if(type=="name") names(l)[which(names(l) %in% name_index[1])] <- value
if(type=="value") l[[name_index[1]]] <- value
} else if(length(name_index)==2) {
if(type=="name") names(l[[name_index[1]]])[1] <- value
if(type=="value") l[[name_index[1]]][[name_index[2]]] <- value
} else if(length(name_index)==3) {
if(type=="name") names(l[[name_index[1]]][[name_index[2]]]) <- value
if(type=="value") l[[name_index[1]]][[name_index[2]]][[name_index[3]]] <- value
} else if(length(name_index)==4) {
if(type=="name") names(l[[name_index[1]]][[name_index[2]]][[name_index[3]]]) <- value
if(type=="value") l[[name_index[1]]][[name_index[2]]][[name_index[3]]][[name_index[4]]] <- value
} else if(length(name_index)==5) {
if(type=="name") names(l[[name_index[1]]][[name_index[2]]][[name_index[3]]][[name_index[4]]]) <- value
if(type=="value") l[[name_index[1]]][[name_index[2]]][[name_index[3]]][[name_index[4]]][[name_index[5]]] <- value
} else {
stop("Nesting level (length of name_index) cannot exceed 5.")
}
return(l)
}
################################
## Subset List by Name vector ##
################################
subsetListbyName <- function(l, name_index) {
## Input validity check for l and name_index
checknames <- nestedNames(l)
checknames <- checknames[!sapply(checknames, length) < length(name_index)]
checknames <- sapply(checknames, function(x) which(name_index == x[seq_along(name_index)]), simplify=FALSE)
checknames <- checknames[sapply(checknames, length) == length(name_index)]
checkindex <- sapply(seq_along(checknames), function(x) all(checknames[[x]] == seq_along(checknames[[x]])))
if(length(checkindex)==0) checkindex <- FALSE
if(any(checkindex==FALSE)) stop("Invalid 'name_index' lacking consecutive name matches in any list component.")
if(length(name_index)==1) {
lsub <- l[name_index[1]]
} else if(length(name_index)==2) {
lsub <- l[[name_index[1]]][name_index[2]]
} else if(length(name_index)==3) {
lsub <- l[[name_index[1]]][[name_index[2]]][name_index[3]]
} else if(length(name_index)==4) {
lsub <- l[[name_index[1]]][[name_index[2]]][[name_index[3]]][name_index[4]]
} else if(length(name_index)==5) {
lsub <- l[[name_index[1]]][[name_index[2]]][[name_index[3]]][[name_index[4]]][name_index[5]]
} else {
stop("Nesting level (length of name_index) cannot exceed 5.")
}
return(lsub)
}
############################################################
## Matching vectors with tag specific terminal mismatches ##
############################################################
termMMatch <- function(x, y, mmp, minmatch=2, returntype="values") {
## Input validity checks
if(!all(sapply(mmp, length) == 2)) stop("List components of 'mmp' need to be vectors of length = 2.")
if(!all(sapply(names(mmp), function(x) x %in% mmp[[x]]))) stop("Names of 'mmp' list components need to match one of the entries in the corresponding list.")
## Check for candidate matches
lengthcheck <- length(x) == length(y)
term <- unique(c(tail(x, 1), tail(y, 1)))
l <- length(term)==2
minmatchcheck <- sum(x[-length(x)] == y[-length(y)]) >= minmatch
index <- which(sapply(names(mmp), function(x) all(term %in% mmp[[x]])))
p <- length(index) > 0
if(lengthcheck & l & p & minmatchcheck) {
for(i in index) {
x[length(x)] <- names(mmp)[i]
y[length(y)] <- names(mmp)[i]
}
if(returntype=="values") {
return(list(x=x, y=y))
} else if(returntype=="logical") {
return(rep(TRUE, length(x)))
} else {
stop("Argument 'returntype' can only be assigned one of: 'values' or 'logical'.")
}
## Return input if there are not matches
} else {
if(returntype=="values") {
return(list(x=x, y=y))
} else if(returntype=="logical") {
x <- x == y[1:length(x)]
x[is.na(x)] <- FALSE
return(x)
} else {
stop("Argument 'returntype' can only be assigned one of: 'values' or 'logical'.")
}
}
}
###################
## write.cwl ##
###################
write.clt <- function(commandLine, cwlVersion, class, file.cwl) {
cwlVersion <- cwlVersion
class <- class
baseCommand <- commandLine$baseCommand[[1]]
##requeriments
if(is.null(commandLine$requeriments)){
dump <- "do nothing"
} else {
requeriments <- list() ##TODO
}
##ARGUMENTS
if(is.null(commandLine$arguments)){
dump <- "do nothing"
} else {
arguments <- sapply(seq_along(commandLine$arguments), function(x) list())
for(i in seq_along(commandLine$arguments)){
arguments[[i]]["prefix"] <- commandLine$arguments[[i]]$preF
arguments[[i]]["valueFrom"] <- commandLine$arguments[[i]]$valueFrom
}
}
##INPUTS
if(any(names(commandLine$inputs)=="")) stop("Each element of the list 'commandLine' needs to be assigned a name")
if(is.null(names(commandLine$inputs))) stop("Each element of the list 'commandLine' needs to be assigned a name")
inputs <- sapply(names(commandLine$inputs), function(x) list())
for(i in seq_along(commandLine$inputs)){
if("type" %in% names(commandLine$inputs[[i]])){
if(!c("type") %in% names(commandLine$inputs[[i]])) stop("Each element of the sublist 'inputs' in 'commandLine' needs to be defined the type of the argument, for example: type='Directory' or type='File' or type='int' or type='string'")
inputs[[i]]["type"] <-commandLine$inputs[[i]]$type
}
if("preF" %in% names(commandLine$inputs[[i]])){
if(commandLine$inputs[[i]]$preF==""){
inputs[[i]]["inputBinding"] <- list(list(prefix=NULL))
} else{
inputs[[i]]["inputBinding"] <- list(list(prefix=commandLine$inputs[[i]]$preF))
}
}
if(any(c("label", "secondaryFiles", "doc", "default", "format", "streamable") %in% names(commandLine$inputs[[i]]))){
for(j in which(c("label", "secondaryFiles", "doc", "default", "format", "streamable") %in% names(commandLine$inputs[[i]]))){
inputs[[i]][names(commandLine$inputs[[i]][j])] <- commandLine$inputs[[i]][names(commandLine$inputs[[i]])][[j]]
}
}
}
##OUTPUTS
outputs <- sapply(names(commandLine$outputs), function(x) list())
if(!c("type") %in% names(commandLine$inputs[[i]])) stop("Each element of the sublist 'outputs' in 'commandLine' needs to be defined the type of the argument, for example: type='Directory' or type='File'.")
if(all(!c("File", "Directory") %in% commandLine$outputs[[1]]$type)) stop("Each element of the sublist 'outputs' in 'commandLine' needs to be defined the type = 'Directory', 'File'.")
for(i in seq_along(commandLine$outputs)){
outputs[[i]]["type"] <- commandLine$outputs[[i]]$type
outputs[[i]]["outputBinding"] <- list(list(glob=commandLine$outputs[[i]][[2]]))
}
clt <- list(cwlVersion=cwlVersion, class=class)
if(exists("requeriments")){
clt <- c(clt, list(requeriments=requeriments))
}
if(exists("arguments")){
clt <- c(clt, list(arguments=arguments))
}
clt <- c(clt, list(baseCommand=baseCommand, inputs=inputs, outputs=outputs))
yaml::write_yaml(x=clt, file = file.cwl)
clt <- list(clt)
names(clt) <- baseCommand
return(clt)
}
## Usage:
# clt_cwl <- write.clt(commandLine, cwlVersion, class, file.cwl)
###################
## write.yml ##
###################
## Write the yaml file
write.yml <- function(commandLine, file.yml, results_path, module_load){
inputs <- commandLine$inputs
if(any(names(inputs)=="")) stop("Each element of the list 'commandLine' needs to be assigned a name")
if(is.null(names(inputs))) stop("Each element of the list 'commandLine' needs to be assigned a name")
##yamlinput_yml
yamlinput_yml <- sapply(names(inputs), function(x) list())
for(i in seq_along(inputs)){
if(!c("type") %in% names(inputs[[i]])) stop("Each element of the sublist 'inputs' in 'commandLine' needs to be defined the type of the argument, for example: type='Directory' or type='File' or type='int' or type='string'")
if("type" %in% names(inputs[[i]])){
if(any(c("File", "Directory") %in% inputs[[i]])){
yamlinput_yml[[i]]["class"] <- inputs[[i]]$type
yamlinput_yml[[i]]["path"] <- inputs[[i]]$yml
} else if (any(c("int", "string") %in% inputs[[i]])){
yamlinput_yml[[i]] <- inputs[[i]]$yml
}
} else {
print("do something")
}
}
## results_path
yamlinput_yml[["results_path"]]["class"] <- list("Directory")
yamlinput_yml[["results_path"]]["path"] <- list(results_path)
## moduleload
for(i in seq_along(module_load)){
yamlinput_yml[["ModulesToLoad"]][paste0("module", i)] <- list(module_load[[i]])
}
## write out the '.yml' file
yaml::write_yaml(x=yamlinput_yml, file = file.yml)
return(yamlinput_yml)
}
## Usage:
# yamlinput_yml <- write.yml(commandLine, file.yml, results_path, module_load)
##################
## SYSargs2Pipe ##
##################
####################################################
## Class and Method Definitions for SYSargs2Pipe ##
###################################################
## Define SYSargs2Pipe class
setClass("SYSargs2Pipe", representation(
WF_steps="list",
track="list",
summaryWF="list")
)
## Methods to return SYSargs2Pipe components
setGeneric(name="WF_steps", def=function(x) standardGeneric("WF_steps"))
setMethod(f="WF_steps", signature="SYSargs2Pipe", definition=function(x) {return(x@WF_steps)})
setGeneric(name="track", def=function(x) standardGeneric("track"))
setMethod(f="track", signature="SYSargs2Pipe", definition=function(x) {return(x@track)})
setGeneric(name="summaryWF", def=function(x) standardGeneric("summaryWF"))
setMethod(f="summaryWF", signature="SYSargs2Pipe", definition=function(x) {return(x@summaryWF)})
## Constructor methods
## List to SYSargs2Pipe
setAs(from="list", to="SYSargs2Pipe",
def=function(from) {
new("SYSargs2Pipe", WF_steps=from$WF_steps,
track=from$track, summaryWF=from$summaryWF)
})
## Coerce back to list: as(SYSargs2Pipe, "list")
setGeneric(name="SYSargs2Pipe_ls", def=function(x) standardGeneric("SYSargs2Pipe_ls"))
setMethod(f="SYSargs2Pipe_ls", signature="SYSargs2Pipe", definition=function(x) {
sysargsset <- list(WF_steps=x@WF_steps, track=x@track, summaryWF=x@summaryWF)
return(sysargsset)
})
## SYSargs2Pipe to list with: as("SYSargs2Pipe", list)
setAs(from="SYSargs2Pipe", to="list",
def=function(from) {
SYSargs2Pipe_ls(from)
})
## Define print behavior for SYSargs2Pipe
setMethod(f="show", signature="SYSargs2Pipe",
definition=function(object) {
cat(paste0("Instance of '", class(object), "':"),
" WF Instances:",
paste0(" ", seq_along(object@WF_steps), ". ", names(object@WF_steps)),
"\n", sep="\n")
warning("This class is deprecated. Use 'SYSargsList' instead. See help('SYSargs2-class) and help('Deprecated'')")
})
## Extend names() method
setMethod(f="names", signature="SYSargs2Pipe",
definition=function(x) {
return(slotNames(x))
})
## Extend length() method
setMethod(f="length", signature="SYSargs2Pipe",
definition=function(x) {
return(length(x@WF_steps))
})
# Behavior of "[" operator for SYSargs2Pipe
setMethod(f="[", signature="SYSargs2Pipe", definition=function(x, i, ..., drop) {
if(is.logical(i)) {
i <- which(i)
}
x@WF_steps <- x@WF_steps[i]
return(x)
})
## Behavior of "[[" operator for SYSargs2Pipe
setMethod(f="[[", signature="SYSargs2Pipe",
definition=function(x, i, ..., drop) {
return(as(x, "list")[[i]])
})
## Behavior of "$" operator for SYSargs2Pipe
setMethod("$", signature="SYSargs2Pipe",
definition=function(x, name) {
.Deprecated(new="SYSargsList")
slot(x, name)
})
## Replacement method for SYSargs2 using "[" operator
setReplaceMethod(f="[[", signature="SYSargs2Pipe", definition=function(x, i, j, value) {
if(i==1) x@WF_steps <- value
if(i=="WF_steps") x@WF_steps <- value
return(x)
})
########################
## Run_track function ##
########################
run_track <- function(WF_ls){
.Deprecated(new="initWF")
if(any(class(WF_ls)!="list" & class(WF_ls[[1]])!="SYSargs2")) stop("Argument 'WF_steps' needs to be assigned a list of object of class 'SYSargs2'")
## Define workflows names in superlist
namesWF <- as.character()
for(i in seq_along(WF_ls)){
cwl.wf <- strsplit(basename(cwlfiles(WF_ls[[i]])$cwl), split="\\.")[[1]]
cwl.wf <- cwl.wf[[-2]]
namesWF[[i]] <- cwl.wf
}
names(WF_ls) <- namesWF
## Check and print if the expected files exists
track_results <- sapply(names(WF_ls), function(x) list(NULL))
summary_results <- sapply(names(WF_ls), function(x) list(NULL))
for(i in seq_along(WF_ls)){
track_results[[i]] <- trackWF(WF_ls[[i]])
summary_results[[i]] <- summary.WF(WF_ls[[i]])
}
## Returns list
WFS <- list(WF_steps=WF_ls, track=track_results, summaryWF=summary_results)
return(as(WFS, "SYSargs2Pipe"))
}
## Usage:
# pipeline <- run_track(WF_ls = c(WF1, WF))
# WF_steps(pipeline)
# track(pipeline)
##################################
## Unexported helper functions ##
##################################
########################
## track_WF function ##
########################
trackWF <- function(WF){
completed <- output(WF)
for(i in seq_along(completed)){
for(j in seq_along(completed[[i]])){
completed[[i]][[j]][[2]] <- file.exists(completed[[i]][[j]])
}
}
status.df <- t(data.frame(completed))
status.df <- cbind(Steps=rep(names(completed[[1]])), SamplesNames=rep(names(completed), each=length(names(completed[[1]]))), status.df)
row.names(status.df) <- NULL
colnames(status.df) <- c("Steps", "SamplesNames", "Location", "Check")
return(status.df)
}
########################
## Summary function ##
########################
summary.WF <- function(WF, show=TRUE){
outputList <- as.character()
for(i in seq_along(output(WF))){
for(j in seq_along(output(WF)[[i]])){
outputList <- c(outputList, output(WF)[[i]][[j]])
}
}
output_completed <- as.character()
for(i in seq_along(outputList)){
output_completed[i] <- file.exists(outputList[i])
}
names(output_completed) <- outputList
output_summary <- list(Done=paste0("Existing expected outputs files: ", sum(as.logical(output_completed))),
NotRun=paste0("Missing expected outputs files: ", sum(!as.logical(output_completed))))
if(show==TRUE){
notrun <- as.character()
if(any(output_completed==FALSE)){
for(i in which(output_completed==FALSE)){
notrun <- c(notrun, names(output_completed[i]))
}
}
output_summary$NotRun[2] <- list(notrun)
names(output_summary$NotRun) <- c("Summary", "ListFiles")
return(output_summary)
}
return(output_summary)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.