#################################################################
## Functions to construct SYSargs2 objects and other utilities ##
################################################################
###################
## Load Workflow ##
###################
loadWorkflow <- function(targets = NULL, wf_file, input_file, dir_path = "param/cwl", id = "SampleName") {
if (is.null(dir_path)) {
dir_path <- ""
cwlfiles <- list(
cwl = wf_file,
yml = input_file,
dir_path = NA
)
} else if (!is.null(dir_path)) {
cwlfiles <- list(
cwl = wf_file,
yml = input_file,
dir_path = normalizePath(dir_path)
)
} else {
stopifnot(is.character(dir_path))
}
if (any(inherits(wf_file, "list"))) {
wf <- wf_file
} else {
if (!file.exists(file.path(dir_path, wf_file)) == TRUE) stop("Provide valid '.cwl' file. Check the file PATH.")
wf <- yaml::read_yaml(file.path(dir_path, wf_file))
}
if (any(inherits(input_file, "list"))) {
input <- input_file
} else {
if (!file.exists(file.path(dir_path, input_file)) == TRUE) stop("Provide valid 'files.'.yml' file. Check the file PATH.")
input <- yaml::read_yaml(file.path(dir_path, input_file))
}
modules <- input$ModulesToLoad
if (is.null(modules)) modules <- list()
inputvars <- list()
if (tolower(wf$class) == "workflow") {
steps <- names(wf$steps)
cwlfiles$steps <- steps
cltpaths <- sapply(seq_along(steps), function(x) file.path(dir_path, wf$steps[[steps[x]]]$run))
names(cltpaths) <- strsplit(basename(cltpaths), ".cwl")
cwlfiles$cltpaths <- cltpaths
cltlist <- sapply(cltpaths, function(x) yaml::read_yaml(file.path(x)), simplify = FALSE)
names(cltlist) <- sapply(seq_along(steps), function(x) wf$steps[[steps[x]]]$run)
cmdlist <- sapply(names(cltlist), function(x) list(NULL))
myinput <- sapply(names(cltlist), function(x) list(NULL))
myoutput <- sapply(names(cltlist), function(x) list(NULL))
cwlfiles$output_names <- sapply(names(cltlist), function(x) names(cltlist[[x]]$outputs))
WF <- list(
modules = modules, wf = wf, clt = cltlist, yamlinput = input, cmdlist = cmdlist,
input = myinput, output = myoutput, files = cwlfiles, inputvars = inputvars,
cmdToCwl = list(), status = list(), internal_outfiles = list()
)
} else if (tolower(wf$class) == "commandlinetool") {
cltlist <- list(wf)
names(cltlist) <- basename(wf_file)
cmdlist <- sapply(names(cltlist), function(x) list(NULL))
myinput <- sapply(names(cltlist), function(x) list(NULL))
myoutput <- sapply(names(cltlist), function(x) list(NULL))
cwlfiles$steps <- strsplit(basename(wf_file), ".cwl")[[1]]
cwlfiles$output_names <- names(cltlist[[1]]$outputs)
WF <- list(
modules = modules, wf = list(), clt = cltlist, yamlinput = input, cmdlist = cmdlist,
input = myinput, output = myoutput, files = cwlfiles, inputvars = inputvars,
cmdToCwl = list(), status = list(), internal_outfiles = list()
)
} else {
stop("Class slot in '<wf_file>.cwl' needs to be 'Workflow' or 'CommandLineTool'.")
}
if (!is.null(targets)) {
if (inherits(targets, "SummarizedExperiment")) {
mytargets <- as.data.frame(SummarizedExperiment::colData(targets))
mytargets <- targets.as.list(mytargets, id)
targetsheader <- S4Vectors::metadata(targets)
WF$files["id"] <- id
WF <- c(list(targets = mytargets, targetsheader = targetsheader), WF)
} else {
if (!file.exists(file.path(targets)) == TRUE) stop("Provide valid 'targets' file. Check the file PATH.")
ext <- strsplit(basename(targets), split = "\\.")[[1]]
ext <- ext[[-1]]
if ("txt" %in% ext) {
mytargets <- read.delim(normalizePath(file.path(targets)), comment.char = "#")
mytargets <- targets.as.list(mytargets, id)
} else if (any(c("yml", "yaml") %in% ext)) {
mytargets <- yaml::read_yaml(targets)
}
targetsheader <- readLines(normalizePath(file.path(targets)))
targetsheader <- targetsheader[grepl("^#", targetsheader)]
WF$files["targets"] <- file.path(targets)
WF$files["id"] <- id
WF <- c(list(targets = mytargets, targetsheader = list(targetsheader = targetsheader)), WF)
}
} else {
WF$files["targets"] <- NA
WF$files["id"] <- id
WF <- c(list(targets = data.frame(), targetsheader = list()), WF)
}
return(as(WF, "SYSargs2"))
}
## Wrapper for loadWorkflow: Short and consistent name for the function
loadWF <- loadWorkflow
## Usage:
# targetspath <- system.file("extdata", "targets.txt", package="systemPipeR")
# dir_path <- system.file("extdata/cwl/hisat2", package="systemPipeR")
# WF <- loadWF(targets=targetspath, wf_file="hisat2-se/hisat2-mapping-se.cwl",
# input_file="hisat2-se/hisat2-mapping-se.yml", dir_path=dir_path)
###############################################
## Render WF for all samples in targets slot ##
###############################################
renderWF <- function(WF, inputvars = NULL) {
if (!is.null(inputvars)) {
.checkInputVars(WF, inputvars)
}
if (any(length(cmdlist(WF)[[1]]) != 0)) stop("Argument 'WF' needs to be assigned an object of class 'SYSargs2' and an object created by the 'loadWorkflow' function")
ids <- names(targets(WF))
if (length(ids) == 0) ids <- "defaultid"
bucket <- sapply(ids, function(x) "", simplify = FALSE)
bucketlist <- list(cmd = bucket, input = bucket, output = bucket)
for (i in ids) {
tmplist <- .renderWFsingle(WF = WF, id = i, inputvars = inputvars)
bucketlist[["cmd"]][[i]] <- tmplist[["cmd"]]
bucketlist[["input"]][[i]] <- tmplist[["input"]]
bucketlist[["output"]][[i]] <- tmplist[["output"]]
}
WF <- as(WF, "list")
WF$cmdlist <- bucketlist$cmd
WF$input <- bucketlist$input
WF$output <- bucketlist$output
WF$internal_outfiles <- bucketlist$output
WF$inputvars <- tmplist$inputvars
WF <- as(WF, "SYSargs2")
WF[["status"]] <- .statusPending(WF)
return(WF)
}
## Usage:
# WF <- renderWF(WF, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName_"))
###############################################################
## Update WF container for all samples in targets slot ##
###############################################################
updateWF <- function(WF, write.yaml = FALSE, name.yaml = "default", new_targets = NULL,
new_targetsheader = NULL, inputvars = NULL, silent = FALSE) {
if (!inherits(WF, "SYSargs2")) stop("WF needs to be object of class 'SYSargs2'.")
WF <- sysargs2(WF)
if (is.null(inputvars)) {
WF$inputvars <- inputvars <- WF$inputvars
} else {
WF$inputvars <- inputvars
}
if (length(WF$cmdToCwl) > 1) {
cwlVersion <- WF$clt[[1]]$cwlVersion
class <- WF$clt[[1]]$class
module_load <- WF$yamlinput$ModulesToLoad[[1]]
results_path <- WF$yamlinput$results_path$path
WF$clt <- write.clt(WF$cmdToCwl, cwlVersion = cwlVersion, class = class, writeout = FALSE, silent = silent)
WF$yamlinput <- write.yml(WF$cmdToCwl, results_path = results_path, module_load = module_load, writeout = FALSE, silent = silent)
} else if (length(WF$cmdToCwl) == 0) {
## targets
if (!is.null(new_targets)) {
WF$targets <- new_targets
} else {
WF$targets <- WF$targets
}
## targetsheader
if (!is.null(new_targetsheader)) {
WF$targetsheader <- new_targetsheader
} else {
WF$targetsheader <- WF$targetsheader
}
WF$yamlinput <- WF$yamlinput
WF$clt <- WF$clt
results_path <- WF$yamlinput$results_path$path
}
WF$input <- WF$output <- WF$cmdlist <- sapply(names(WF$clt), function(x) list(NULL))
## write the new yaml
if (write.yaml == TRUE) {
if (name.yaml == "default") {
path <- .getPath(WF$files$yml)
name <- paste0(
.getFileName(WF$files$yml),
format(Sys.time(), "%b%d%Y_%H%M"), ".yml"
)
name.yaml <- file.path(path, name)
} else {
name.yaml <- name.yaml
}
yaml::write_yaml(WF$yamlinput, name.yaml)
if (silent != TRUE) {
cat(
"\t", "Written content of 'yamlinput(x)' to file:", "\n",
name.yaml, "\n"
)
}
WF$files$yml <- name.yaml
}
WF <- as(WF, "SYSargs2")
WF <- renderWF(WF, inputvars = inputvars)
## Update status slot
WF[["status"]] <- .statusPending(WF)
return(WF)
}
## Usage:
# yamlinput(WF, "thread") <- 6L
# WF <- updateWF(WF, write.yaml = TRUE)
# cmdlist(WF)[1]
# yamlinput(WF)$thread
##################################################
## check outfiles: if the expected files exist ##
##################################################
check.output <- function(sysargs, type = "data.frame") {
## Check the class and slot
if (inherits(sysargs, c("SYSargs2"))) {
return(.check.output.sysargs2(sysargs, type = type))
} else if (inherits(sysargs, c("SYSargsList"))) {
steps <- sapply(names(stepsWF(sysargs)), function(x) list(NULL))
sysargs_names <- unlist(lapply(stepsWF(sysargs), function(x) which(inherits(x, c("SYSargs2")))))
sysargs <- sysargs[names(sysargs_names)]
for (i in names(sysargs_names)) {
step.dir <- sysargs$runInfo$runOption[[i]]$directory
steps[[i]] <- .check.output.sysargs2(stepsWF(sysargs)[[i]],
type = type,
step.name = i, step.dir = step.dir
)
}
return(steps)
} else {
stop("sysargs needs to be object of class 'SYSargs2' or 'SYSargsList'.")
}
}
## Usage:
# check.output(WF)
## check.outfiles alias
check.outfiles <- check.output
##################################
## Unexported helper functions ##
##################################
######################
## .checkInputVars ##
######################
.checkInputVars <- function(WF, inputvars) {
inputvars <- unlist(inputvars)
if (length(WF$targets) != 0) {
targets <- colnames(targets.as.df(WF$targets))
if (!all(names(inputvars) %in% targets)) {
stop("names of the inputvars are not matching with targets colnames.", "\n",
"Names available are:", "\n",
paste(targets, collapse = " | "),
call. = FALSE
)
}
}
if (any(duplicated(names(inputvars)))) warning("names of the inputvars are duplicated. Please continue with attention! This object can not be resumed in a later occasion.")
input <- unlist(WF$yamlinput)
if (!all(inputvars %in% input)) {
stop(
"inputvars elements are not matching with input variables. ", "\n",
"To check variable names, please see:", "\n",
file.path(WF$files$dir_path, WF$files$yml),
call. = FALSE
)
}
}
## Usage:
# targetspath <- system.file("extdata", "targets.txt", package="systemPipeR")
# dir_path <- system.file("extdata/cwl/hisat2", package="systemPipeR")
# WF <- loadWF(targets=targetspath, wf_file="hisat2-se/hisat2-mapping-se.cwl",
# input_file="hisat2-se/hisat2-mapping-se.yml", dir_path=dir_path)
# WF <- renderWF(WF, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName2_")) # will return error
##############################################
## Resolve CWL variable-based path instance ##
##############################################
pathInstance <- function(pathvar, input, altinput) {
pathvar <- unlist(strsplit(pathvar[[1]], "/"))
extension <- gsub("^.*\\)", "", pathvar)
if (grepl(".\\$\\(", pathvar[length(pathvar)])) {
ext <- sub("\\)", "", sub(".*[$\\(]", "", pathvar[length(pathvar)]))
ext <- strsplit(ext, "\\.")[[1]]
ext <- ext[length(ext)]
ext <- input[[ext]]
extension <- c("", paste0(".", ext))
pathvar <- sub(".\\$\\(.*", "", pathvar)
}
pathvar <- gsub("(^.*\\)).*", "\\1", pathvar)
pathvar <- gsub("\\$|\\(|\\)", "", pathvar)
pathvarlist <- strsplit(pathvar, "\\.")
filenametype <- lapply(seq_along(pathvarlist), function(x) pathvarlist[[x]][pathvarlist[[x]] %in% c("basename", "nameroot", "path")])
filenametype <- unlist(lapply(filenametype, function(x) ifelse(length(x) == 0, NA, x)))
filenametype <- ifelse(is.na(filenametype), "NA", filenametype)
myvalue_list <- sapply((pathvarlist), function(x) list(NULL), simplify = FALSE)
for (i in seq_along(pathvarlist)) {
myvalue <- NULL
for (j in seq_along(pathvarlist[[i]])) {
myvalue_input <- input[[pathvarlist[[i]][[j]]]]
if ("class" %in% names(myvalue_input)) myvalue_input$class <- NULL
if (!is.null(myvalue_input)) {
myvalue <- c(myvalue, list(myvalue_input))
}
}
myvalue_list[i] <- myvalue
}
## Use altinput if no values were assigned by input
if (all(sapply(myvalue_list, length) == 0)) {
myvalue_list <- sapply(seq_along(pathvarlist), function(x) altinput[[pathvarlist[[x]][[2]]]]$default, simplify = FALSE)
}
enforce_list <- length(myvalue_list) == 1
if (any(enforce_list)) for (i in which(enforce_list)) myvalue_list[[i]] <- list(class = NULL, path = myvalue_list[[i]])
mypathvec <- list()
for (i in seq_along(myvalue_list)) {
for (j in seq_along(myvalue_list[[i]])) {
vec_input <- myvalue_list[[i]][[j]]
if (any(names(myvalue_list[[i]][[j]]) %in% c("path"))) {
vec_input <- myvalue_list[[i]][[j]]$path
}
if (!is.null(vec_input)) {
# # vec <- sapply(seq_along(filenametype), function(x) pathUtils(vec_input[x], type = filenametype[x]))
vec <- sapply(seq_along(vec_input), function(x) pathUtils(vec_input[x], type = filenametype[i]))
mypathvec <- c(mypathvec, list(vec))
}
}
mypathvec <- lapply(mypathvec, function(x) x[!is.na(x)])
}
## Assign '.' to 'runtime.outdir'
pwdindex <- any(pathvar %in% c("runtime.outdir"))
nullindex <- any(sapply(mypathvec, length) == 0)
if (any(pwdindex & nullindex)) {
mypathvec[pwdindex & nullindex] <- "."
}
mypathvec <- sapply(seq_along(mypathvec), function(x) {
if (is.null(mypathvec[[x]])) {
mypathvec[[x]] <- ""
} else {
mypathvec[[x]] <- mypathvec[[x]]
}
})
## Generate output
if (length(mypathvec) == 1) {
extension <- extension[!is.na(extension)]
if (all("" %in% extension & length(extension) == 1)) {
mypath <- paste(mypathvec)
} else {
extension <- extension[extension != ""]
mypath <- paste(mypathvec, extension, sep = "/")
}
} else if (any(is.na(extension))) {
mypath <- sapply(seq_along(mypathvec), function(x) paste0(mypathvec[x]))
} else {
mypath <- sapply(seq_along(mypathvec), function(x) paste0(mypathvec[x], extension[x]))
if (length(mypathvec) < length(extension)) {
extension <- extension[extension != ""]
mypath <- c(mypath, extension)
}
}
returnpath <- file.path(paste(mypath, collapse = "/"))
return(returnpath)
}
# pathvar <- c("$(inputs.results_path.basename)","$(inputs.SampleName).$(inputs.ext)")
# pathvar <- c("$(inputs.results_path.basename)","$(inputs.SampleName).sam")
#################################################################
## Helper function to construct/manipulate path and file names ##
#################################################################
pathUtils <- function(x, type, dropdir = TRUE) {
if (type == "dirname") {
mypath <- dirname(x)
} else if (type == "basename") {
mypath <- basename(x)
} else if (type == "nameroot") {
mypath <- gsub("(^.*)\\..*$", "\\1", basename(x))
} else if (type == "path") {
mypath <- x
} else if (type == "NA") {
mypath <- x
} else {
mypath <- x # Return unchanged input if 'type' is not one the above three values
}
## Construct output
if (dropdir == TRUE) {
return(mypath)
} else if (dropdir == FALSE & type != "dirname") {
return(file.path(dirname(x), mypath))
} else {
stop("Invalid path request, e.g. drop and appenrequest, e.g. drop and append dir.")
}
}
#############################################
## Assemble Commandline Components in List ##
#############################################
assembleCommandlineList <- function(clt = WF$clt[[1]]) {
## global functions or variables
WF <- NULL
## Base command and arguments
basecommand <- clt$baseCommand
arguments <- clt$arguments
## Special case for Rscript -- get the absolute path to the Rscript command
if ("Rscript" %in% basecommand) {
basecommand <- file.path(R.home("bin"), "Rscript")
}
if (!is.null(arguments)) {
for (i in seq_along(arguments)) arguments[[i]][["position"]] <- ""
}
## Inputs
inputargs <- renderInputs(x = clt$inputs, returntags = list(inputBinding = c("prefix"), type = "any"))
## Outputs
outputargs <- renderOutputs(x = clt$outputs, stdout = clt$stdout, returntags = list(outputBinding = c("prefix", "glob"), type = "any"))
## Assemble command-line
arglist <- list(baseCommand = basecommand, arguments = arguments, inputs = inputargs, outputs = outputargs)
return(arglist)
}
########################################
## Populate Variables in Command-line ##
########################################
populateCommandline <- function(WF, cltid, exclude = exclude, mmp) {
## Populate inputs
cmdlist <- assembleCommandlineList(nameUnnamed(WF$clt[[cltid]]))
yamlinput <- list(inputs = nameUnnamed(WF$yamlinput))
cmdnames <- nestedNames(cmdlist)
yamlnames <- nestedNames(yamlinput)
## Create yamlnames list without 'unnamed' tags if present
yamlnamestmp <- sapply(names(yamlnames), function(x) yamlnames[[x]][!grepl("_\\d{1,}_", yamlnames[[x]])], simplify = FALSE)
matchmap <- findBestMatch(x = cmdnames, y = yamlnamestmp, mmp, minmatch = 2)
excludepos <- sapply(names(cmdnames), function(x) tail(cmdnames[[x]], 1) %in% exclude)
removetype <- sapply(cmdnames, function(x) tail(x %in% "type", 1))
for (i in seq_along(cmdnames)) {
inputslice <- yamlnames[[matchmap[[i]]]]
## Inject yamlinput in corresponding cmdlist components
if ((length(inputslice) > 0) & (excludepos[[i]]) != TRUE) {
cmdlist <- injectListbyName(cmdlist, cmdnames[[i]], value = subsetListbyName(yamlinput, inputslice))
}
## Eliminate type values (e.g. boolean/string) that are not associated with yamlinput
if ((length(inputslice) == 0) & (excludepos[[i]] != TRUE & removetype[[i]] == TRUE)) {
cmdlist <- injectListbyName(cmdlist, cmdnames[[i]], value = "")
}
}
## Populate variable path instances
pos <- sapply(names(cmdnames), function(x) grepl("^\\$\\(", subsetListbyName(cmdlist, cmdnames[[x]])))
if (any(pos)) {
for (i in which(pos)) {
mypath <- pathInstance(pathvar = subsetListbyName(cmdlist, cmdnames[[i]]), input = WF$yamlinput, altinput = nameUnnamed(WF$clt[[cltid]])$inputs)
cmdlist <- injectListbyName(cmdlist, cmdnames[[i]], value = mypath)
}
}
return(cmdlist)
}
####################################################################################
## Inject command-line lists from populateCommandline into cmdlist slot WF object ##
####################################################################################
injectCommandlinelist <- function(WF) {
if (class(WF) == "SYSargs2") {
WF <- as(WF, "list")
} else {
stop("WF needs to be object of class 'SYSargs2'.")
}
cmdsteps <- names(WF$cmdlist)
mmp <- list(class = c("class", "type"), path = c("path", "type"))
exclude <- c("baseCommand", "prefix")
for (i in seq_along(cmdsteps)) {
WF$cmdlist[[i]] <- populateCommandline(WF, cltid = cmdsteps[i], exclude = exclude, mmp)
}
## Update inputs that depend on output of one of the previous steps
.connectInout <- function(WF) {
steps <- WF$wf$steps
stepnames <- sapply(names(steps), function(x) steps[[x]]$run)
connectedlist <- sapply(seq_along(steps), function(x) which(grepl("/", steps[[x]]$`in`)))
names(connectedlist) <- names(steps)
connectedlist <- connectedlist[sapply(connectedlist, length) > 0]
if (length(connectedlist) > 0) {
for (j in seq_along(connectedlist)) {
connected <- connectedlist[j]
## Inner loop to support multiple connections
for (i in seq_along(connected[[1]])) {
## Generate output string from corresponding upstream step
connectednames <- strsplit(unlist(steps[[names(connected)]])[connected[[1]][i]], "/")
connectedstep <- stepnames[[connectednames[[1]][1]]]
connectedoutput <- WF$cmdlist[[connectedstep]]$outputs
connectedoutput <- unlist(connectedoutput[connectednames[[1]][[2]]])
## Inject output string into input of corresponding downstream step
termname <- as.character(sapply(nestedNames(steps[[names(connected)]]), tail, 1)[connected[[1]][i]])
namepath <- c("cmdlist", stepnames[[names(connected)]], "inputs", termname)
WF <- injectListbyName(l = WF, namepath, value = as.character(connectedoutput), type = "value")
}
}
}
names(WF$cmdlist) <- WF$files$steps
names(WF$clt) <- WF$files$steps
return(WF)
}
WF <- .connectInout(WF)
return(as(WF, "SYSargs2"))
}
################################
## Render Commandline Strings ##
################################
renderCommandline <- function(x, dropoutput = TRUE, redirect = ">") {
if (class(x) == "SYSargs2") {
x <- as(x, "list")
} else {
stop("x needs to be object of class 'SYSargs2'.")
}
## Fct to render single command-line string
.renderCommandline <- function(x = x, redirect = redirect) {
cmdnames <- nestedNames(x)
## Check for stdout instance, if present prepend value of redirect
stdoutpos <- sapply(cmdnames, function(y) {
tmp <- y %in% c("outputs", "stdout")
tmp[1] == TRUE & tail(tmp, 1) == TRUE
})
if (any(stdoutpos)) {
cmd <- x
cmdnamessub <- cmdnames[stdoutpos]
for (i in seq_along(cmdnamessub)) {
stdoutstr <- unlist(subsetListbyName(l = cmd, cmdnamessub[[i]]))
stdoutstr <- paste(redirect, stdoutstr, sep = " ")
cmd <- injectListbyName(l = cmd, cmdnamessub[[i]], value = stdoutstr, type = "value")
}
} else {
if (dropoutput == TRUE) x <- x[names(x) != "outputs"]
cmd <- x
}
## Collapse list to single string
cmd <- paste(unlist(cmd), collapse = " ")
return(cmd)
}
## Check here for WF class instead of names once implemented
if (all(names(x) == c("targets", "targetsheader", "modules", "wf", "clt", "yamlinput", "cmdlist", "input", "output", "files", "inputvars", "cmdToCwl", "status", "internal_outfiles"))) {
cmdstring <- sapply(names(x$cmdlist),
function(i) .renderCommandline(x = x$cmdlist[[i]], redirect = redirect),
simplify = FALSE
)
} else {
cmdstring <- .renderCommandline(x = x, redirect = redirect)
}
return(cmdstring)
}
###########################################################
## Helper Function to render inputs of single clt object ##
###########################################################
renderInputs <- function(x = WF$clt[[1]]$inputs, returntags = list(inputBinding = c("prefix"), type = "any")) {
## global functions or variables
WF <- NULL
inputnames <- names(x)
## Remove entries where inputBinding is missing since those parameters do not appear on the command-line
inputnames <- inputnames[!sapply(inputnames, function(i) is.null(x[[i]]$inputBinding))]
x <- x[inputnames]
inputlist <- sapply(inputnames, function(i) {
tmp <- c(
x[[i]][[names(returntags["inputBinding"])]][returntags$inputBinding],
x[[i]][names(returntags["type"])]
)
tmp <- tmp[!sapply(tmp, length) < 1]
tmp
}, simplify = FALSE)
inputposition <- sapply(inputnames, function(i) x[[i]]$inputBinding$position)
if (class(inputposition) == "list") inputposition <- seq_along(inputposition)
inputlist <- inputlist[order(inputposition)]
return(inputlist)
}
############################################################
## Helper Function to render outputs of single clt object ##
############################################################
renderOutputs <- function(x = WF$clt[[1]]$outputs, stdout = WF$clt[[1]]$stdout, returntags = list(outputBinding = c("prefix", "glob"), type = "any")) {
## global functions or variables
WF <- NULL
outputnames <- names(x)
outputlist <- sapply(outputnames, function(i) {
tmp <- c(x[[i]][[names(returntags["outputBinding"])]][returntags$outputBinding[1]],
x[[i]][[names(returntags["outputBinding"])]][returntags$outputBinding[2]],
type = x[[i]][[names(returntags["type"])]]
)
tmp <- tmp[!sapply(tmp, length) < 1]
tmp
}, simplify = FALSE)
## Remove entries 'type: Directory' since they are only relevant for CWL internals
types <- sapply(seq_along(outputlist), function(i) outputlist[[i]][["type"]])
outputlist <- outputlist[tolower(types) != "directory"]
for (i in seq_along(outputlist)) {
check <- outputlist[[i]][["type"]]
if (is.null(check)) check <- "absent"
if (check == "stdout") {
outputlist[[i]] <- list(stdout = stdout)
}
}
return(outputlist)
}
##################
## Name unnamed ##
##################
## Yaml format may have unnamed components; not having names creates
## problems in some cases with name-based subsetting of lists; thus the
## following function assigns names to unnamed components. A tagging syntax
## can be used to ignore them in subsetting routines if necessary.
nameUnnamed <- function(l) {
for (a in seq_along(l)) {
for (b in seq_along(l[[a]])) {
for (c in seq_along(l[[a]][[b]])) {
for (d in seq_along(l[[a]][[b]][[c]])) {
if (is.null(names(l[[a]][[b]][[c]][[d]])) & class(l[[a]][[b]][[c]][[d]]) == "list") names(l[[a]][[b]][[c]][[d]]) <- paste0("_", seq_along(l[[a]][[b]][[c]][[d]]), "_")
if (is.null(names(l[[a]][[b]][[c]])) & class(l[[a]][[b]][[c]]) == "list") names(l[[a]][[b]][[c]]) <- paste0("_", seq_along(l[[a]][[b]][[c]]), "_")
if (is.null(names(l[[a]][[b]])) & class(l[[a]][[b]]) == "list") names(l[[a]][[b]]) <- paste0("_", seq_along(l[[a]][[b]]), "_")
if (is.null(names(l[[a]])) & class(l[[a]]) == "list") names(l[[a]]) <- paste0("_", seq_along(l[[a]]), "_")
}
}
}
}
return(l)
}
###########################################################################
## Return names of nested lists (with up to 5 nesting levels) as vectors ##
###########################################################################
nestedNames <- function(l, sep = "_") {
nestednames <- lapply(seq_along(l), function(a) {
lapply(seq_along(l[[a]]), function(b) {
lapply(seq_along(l[[a]][[b]]), function(c) {
lapply(seq_along(l[[a]][[b]][[c]]), function(d) {
lapply(seq_along(l[[a]][[b]][[c]][[d]]), function(e) {
c(
names(l[a]),
names(l[[a]][b]),
names(l[[a]][[b]][c]),
names(l[[a]][[b]][[c]][d]),
names(l[[a]][[b]][[c]][[d]][e])
)
})
})
})
})
})
while (any(sapply(nestednames, is.list))) nestednames <- unlist(nestednames, recursive = FALSE)
names(nestednames) <- sapply(nestednames, paste, collapse = sep)
return(nestednames)
}
########################################################
## Find best matches among name vectors for two lists ##
########################################################
findBestMatch <- function(x, y, mmp, minmatch = 2) {
matchindex <- setNames(rep(NA, length(x)), names(x))
## Perfect matches
matchpos <- sapply(names(y), function(j) {
sapply(names(x), function(i) {
ident <- all(y[[j]] %in% x[[i]])
lengthcheck <- length(y[[j]]) == length(x[[i]])
ident & lengthcheck
})
})
## Note, if there are several input matches to one command-line slot then only the last one is used (see tail)
matchpos <- sapply(rownames(matchpos), function(i) tail(which(matchpos[i, ]), 1), simplify = FALSE)
for (i in seq_along(matchindex)) {
if (is.na(matchindex[i]) == TRUE) matchindex[i] <- matchpos[[i]][1]
}
## Matches with tag specific terminal mismatches
matchpos <- sapply(names(y), function(j) {
sapply(names(x), function(i) {
all(termMMatch(x = x[[i]], y = y[[j]], mmp = mmp, minmatch = minmatch, returntype = "logical"))
})
})
## Note, if there are several input matches to one command-line slot then only the last one is used (see tail)
matchpos <- sapply(rownames(matchpos), function(i) tail(which(matchpos[i, ]), 1), simplify = FALSE)
for (i in seq_along(matchpos)) {
if (is.na(matchindex[i]) == TRUE) matchindex[i] <- matchpos[[i]][1]
}
## Matches with terminal (max 1) length differences
matchpos <- sapply(names(y), function(j) {
sapply(names(x), function(i) {
ident <- all(y[[j]] %in% x[[i]][1:length(y[[j]])])
lengthcheck <- length(y[[j]]) >= minmatch
ident & lengthcheck
})
})
## Note, if there are several input matches to one command-line slot then only the last one is used (see tail)
matchpos <- sapply(rownames(matchpos), function(i) tail(which(matchpos[i, ]), 1), simplify = FALSE)
for (i in seq_along(matchpos)) {
if (is.na(matchindex[i]) == TRUE) matchindex[i] <- matchpos[[i]][1]
}
return(matchindex)
}
###############################################
## Inject into List Subsetted by Name Vector ##
###############################################
injectListbyName <- function(l, name_index, value, type = "value") {
## Input validity check for l and name_index
if (all(!c("name", "value") %in% type[1])) stop("Argument type can only be assigned one of: 'name' or 'value'.")
checknames <- nestedNames(l)
checknames <- checknames[!sapply(checknames, length) < length(name_index)]
checknames <- sapply(checknames, function(x) which(name_index == x[seq_along(name_index)]), simplify = FALSE)
checknames <- checknames[sapply(checknames, length) == length(name_index)]
checkindex <- sapply(seq_along(checknames), function(x) all(checknames[[x]] == seq_along(checknames[[x]])))
if (length(checkindex) == 0) checkindex <- FALSE
if (any(checkindex == FALSE)) stop("Invalid 'name_index' lacking consecutive name matches in any list component.")
if (length(name_index) == 1) {
if (type == "name") names(l)[which(names(l) %in% name_index[1])] <- value
if (type == "value") l[[name_index[1]]] <- value
} else if (length(name_index) == 2) {
if (type == "name") names(l[[name_index[1]]])[1] <- value
if (type == "value") l[[name_index[1]]][[name_index[2]]] <- value
} else if (length(name_index) == 3) {
if (type == "name") names(l[[name_index[1]]][[name_index[2]]]) <- value
if (type == "value") l[[name_index[1]]][[name_index[2]]][[name_index[3]]] <- value
} else if (length(name_index) == 4) {
if (type == "name") names(l[[name_index[1]]][[name_index[2]]][[name_index[3]]]) <- value
if (type == "value") l[[name_index[1]]][[name_index[2]]][[name_index[3]]][[name_index[4]]] <- value
} else if (length(name_index) == 5) {
if (type == "name") names(l[[name_index[1]]][[name_index[2]]][[name_index[3]]][[name_index[4]]]) <- value
if (type == "value") l[[name_index[1]]][[name_index[2]]][[name_index[3]]][[name_index[4]]][[name_index[5]]] <- value
} else {
stop("Nesting level (length of name_index) cannot exceed 5.")
}
return(l)
}
################################
## Subset List by Name vector ##
################################
subsetListbyName <- function(l, name_index) {
## Input validity check for l and name_index
checknames <- nestedNames(l)
checknames <- checknames[!sapply(checknames, length) < length(name_index)]
checknames <- sapply(checknames, function(x) which(name_index == x[seq_along(name_index)]), simplify = FALSE)
checknames <- checknames[sapply(checknames, length) == length(name_index)]
checkindex <- sapply(seq_along(checknames), function(x) all(checknames[[x]] == seq_along(checknames[[x]])))
if (length(checkindex) == 0) checkindex <- FALSE
if (any(checkindex == FALSE)) stop("Invalid 'name_index' lacking consecutive name matches in any list component.")
if (length(name_index) == 1) {
lsub <- l[name_index[1]]
} else if (length(name_index) == 2) {
lsub <- l[[name_index[1]]][name_index[2]]
} else if (length(name_index) == 3) {
lsub <- l[[name_index[1]]][[name_index[2]]][name_index[3]]
} else if (length(name_index) == 4) {
lsub <- l[[name_index[1]]][[name_index[2]]][[name_index[3]]][name_index[4]]
} else if (length(name_index) == 5) {
lsub <- l[[name_index[1]]][[name_index[2]]][[name_index[3]]][[name_index[4]]][name_index[5]]
} else {
stop("Nesting level (length of name_index) cannot exceed 5.")
}
return(lsub)
}
############################################################
## Matching vectors with tag specific terminal mismatches ##
############################################################
termMMatch <- function(x, y, mmp, minmatch = 2, returntype = "values") {
## Input validity checks
if (!all(sapply(mmp, length) == 2)) stop("List components of 'mmp' need to be vectors of length = 2.")
if (!all(sapply(names(mmp), function(x) x %in% mmp[[x]]))) stop("Names of 'mmp' list components need to match one of the entries in the corresponding list.")
## Check for candidate matches
lengthcheck <- length(x) == length(y)
term <- unique(c(tail(x, 1), tail(y, 1)))
l <- length(term) == 2
minmatchcheck <- sum(x[-length(x)] == y[-length(y)]) >= minmatch
index <- which(sapply(names(mmp), function(x) all(term %in% mmp[[x]])))
p <- length(index) > 0
if (lengthcheck & l & p & minmatchcheck) {
for (i in index) {
x[length(x)] <- names(mmp)[i]
y[length(y)] <- names(mmp)[i]
}
if (returntype == "values") {
return(list(x = x, y = y))
} else if (returntype == "logical") {
return(rep(TRUE, length(x)))
} else {
stop("Argument 'returntype' can only be assigned one of: 'values' or 'logical'.")
}
## Return input if there are not matches
} else {
if (returntype == "values") {
return(list(x = x, y = y))
} else if (returntype == "logical") {
x <- x == y[1:length(x)]
x[is.na(x)] <- FALSE
return(x)
} else {
stop("Argument 'returntype' can only be assigned one of: 'values' or 'logical'.")
}
}
}
#########################
## .renderWFsingle ##
#########################
.renderWFsingle <- function(WF, id, inputvars) {
inputvarslist <- sapply(names(inputvars), function(x) "", simplify = FALSE)
if (!length(names(targets(WF))) == 0) (if (any(!names(inputvars) %in% colnames(targets.as.df(WF$targets)))) stop("Please note that the 'inputvars' variables need to be defined in the 'input_file'; as well it needs to match the column names defined in the 'targets' file."))
input <- yamlinput(WF)
for (i in seq_along(inputvars)) {
subvalue <- targets(WF)[[id]][[names(inputvars)[i]]]
if (length(subvalue) != 0) {
input <- rapply(input, function(x) gsub(inputvars[[i]], subvalue, x), how = "replace")
inputvarslist[[i]] <- subvalue
}
}
WF <- as(WF, "list")
WF$yamlinput <- input
WF <- as(WF, "SYSargs2")
WF <- injectCommandlinelist(WF)
## Fix for cases like IDX/tophat/STAR, more than one output file per cmdlist
outfilelist <- sapply(names(cmdlist(WF)), function(x) list(NULL))
for (i in seq_along(outfilelist)) {
for (j in seq_along(cmdlist(WF)[[names(outfilelist[i])]]$output)) {
if ("stdout" %in% names(cmdlist(WF)[[names(outfilelist[i])]]$output[[j]])) {
outfilelist[[i]][j] <- cmdlist(WF)[[names(outfilelist[i])]]$output[[j]]$stdout
} else {
outfilelist[[i]][j] <- cmdlist(WF)[[names(outfilelist[i])]]$output[[j]]$glob
}
}
}
cmdlist <- renderCommandline(WF, redirect = ">")
inputvars <- as.list(inputvars)
return(list(cmd = cmdlist, input = inputvarslist, output = outfilelist, inputvars = inputvars))
}
############################
## .check.output.sysargs2 ##
############################
.check.output.sysargs2 <- function(sysargs, type, step.name, step.dir = FALSE) {
if (type == "data.frame") {
checkfile <- sapply(names(output(sysargs)), function(x) list(NULL))
for (i in seq_along(output(sysargs))) {
checkfile[[i]][["Total"]] <- length(unlist(output(sysargs)[[i]]))
if (step.dir == TRUE) {
newfile <- sum(file.exists(file.path(.getPath(unlist(output(sysargs)[[i]]), full_path = FALSE, warning = FALSE), step.name, basename(unlist(output(sysargs)[[i]])))))
checkfile[[i]][["Existing"]] <- sum(file.exists(unlist(output(sysargs)[[i]])), newfile)
} else if (step.dir == FALSE) {
checkfile[[i]][["Existing"]] <- sum(file.exists(unlist(output(sysargs)[[i]])))
}
checkfile[[i]][["Missing"]] <- length(unlist(output(sysargs)[[i]])) - checkfile[[i]][["Existing"]]
}
checkfile <- data.frame(matrix(unlist(checkfile), nrow = length(checkfile), byrow = TRUE))
checkfile <- data.frame(cbind(
Targets = names(output(sysargs)), Total_Files = as.numeric(checkfile$X1),
Existing_Files = checkfile$X2, Missing_Files = checkfile$X3
), stringsAsFactors = FALSE)
checkfile[, 2:4] <- sapply(checkfile[, 2:4], as.numeric)
return(checkfile)
} else if (type == "list") {
checkfile <- sapply(names(output(sysargs)), function(x) list(NULL))
for (i in seq_along(output(sysargs))) {
checkfile[[i]] <- all(file.exists(unlist(output(sysargs)[[i]])))
}
return(unlist(checkfile))
}
}
###############################################################
## Subsetting the input and output slots by name or position ##
###############################################################
subsetWF <- function(args, slot, subset = NULL, index = NULL, delete = FALSE) {
## Check the class and slot
if (!class(args) == "SYSargs2") stop("args needs to be object of class 'SYSargs2'.")
if (all(!c("input", "output", "step") %in% slot)) stop("Argument slot can only be assigned one of: 'input' or 'output' or 'step'.")
## slot input
if (slot %in% "input") {
## Check the subset
if (all(!is.null(subset) & is.character(subset) & !any(names(inputvars(args)) %in% subset))) {
stop(
"For the ", slot, " slot, can only be assigned one of the following values in the subset argument: ",
paste(names(inputvars(args)), collapse = ", "),
" OR the corresponding position OR NULL"
)
}
if (all(!is.null(subset) & is.numeric(subset) & !any(seq_along(names(inputvars(args))) %in% subset))) {
stop(
"For the ", slot, " slot, can only be assigned one of the following position in the subset argument: ",
paste(seq_along(names(inputvars(args))), collapse = ", "), " OR the names OR NULL"
)
}
subset_input <- input(args)
subset_sample <- sapply(names(subset_input), function(x) list(NULL))
if (!is.null(subset)) {
for (i in seq_along(names(subset_input))) {
subset_sample[[i]] <- subset_input[[i]][[subset]]
}
} else {
subset_sample <- subset_input
}
}
## slot output
if (slot %in% "output") {
## Check the subset
if (all(!is.null(subset) & is.character(subset) & !any(names(args$clt) %in% subset))) {
stop(
"For the ", slot, " slot, can only be assigned one of the following values in the subset argument:",
paste(names(args$clt), collapse = ", "),
" OR the corresponding position OR NULL"
)
}
if (all(!is.null(subset) & is.numeric(subset) & !any(seq_along(names(args$clt)) %in% subset))) {
stop(
"For the ", slot, " slot, can only be assigned one of the following position in the subset argument: ",
paste(seq_along(names(args$clt)), collapse = ", "),
" OR the names OR NULL"
)
}
if (!is.null(subset)) {
if (!any(seq_along(output(args)[[1]][[subset]]) %in% index)) {
stop(
"For the 'index' argument, can only be assigned one of the following position: ",
paste(seq_along(output(args)[[1]][[subset]]), collapse = ", ")
)
}
}
subset_output <- output(args)
subset_sample <- as.character()
if (all(!is.null(subset) & !is.null(index))) {
for (i in seq_along(names(subset_output))) {
# subset_sample[[i]] <- subset_output[[i]][[subset]]
subset_sample <- c(subset_sample, subset_output[[i]][[subset]][index])
}
} else {
subset_sample <- subset_output
}
names(subset_sample) <- rep(names(subset_output), each = length(subset_sample[1]))
}
## slot step
if (slot %in% "step") {
## Check the subset
if (all(!is.null(subset) & is.character(subset) & !any(names(args$clt) %in% subset))) {
stop(
"For the ", slot, " slot, can only be assigned one of the following values in the subset argument: ",
paste(names(args$clt), collapse = ", "),
" OR the corresponding position OR NULL"
)
}
if (all(!is.null(subset) & is.numeric(subset) & !any(seq_along(names(args$clt)) %in% subset))) {
stop(
"For the ", slot, " slot, can only be assigned one of the following position in the subset argument: ",
paste(seq_along(names(args$clt)), collapse = ", "),
" OR the names OR NULL"
)
}
subset_step <- cmdlist(args)
subset_sample <- sapply(names(subset_step), function(x) list(NULL))
if (!is.null(subset)) {
for (i in seq_along(names(subset_sample))) {
subset_sample[[i]] <- subset_step[[i]][[subset]]
}
} else {
subset_sample <- subset_step
}
}
## IF subset=NULL returns a list
if (!is.null(subset)) {
names <- names(subset_sample)
subset_sample <- as.character(subset_sample)
names(subset_sample) <- names
}
## delete == TRUE
if (delete == TRUE) {
## delete option only works if the subset is define
if (!is.character(subset_sample)) {
stop("Please define the 'subset' to be deleted in the subset argument")
}
if (all(file.exists(subset_sample))) {
del <- file.remove(subset_sample)
cat("The following files has been deleted:", paste(subset_sample, collapse = ", "), "\n")
} else if (all(!file.exists(subset_sample))) {
cat("The subset cannot be deleted: no such file ", "\n")
}
}
return(subset_sample)
}
## Usage:
# subsetWF(WF, slot="input", subset='FileName')
# subsetWF(WF, slot="step", subset=1)
# subsetWF(WF, slot="output", subset=1, index=1)
# subsetWF(WF, slot="output", subset=1, index=1, delete=TRUE) ## in order to delete the subset files list
#########################################################
## Update the output location after run runCommandline ##
#########################################################
output_update <- function(args, dir = FALSE, dir.name = NULL, replace = FALSE, extension = NULL, make_bam = FALSE, del_sam = TRUE) {
## Validation for 'args'
if (any(!inherits(args, "SYSargs") & !inherits(args, "SYSargs2"))) stop("Argument 'args' needs to be assigned an object of class 'SYSargs' OR 'SYSargs2'")
## If the argument 'replace' is TRUE, it is required to specify the 'extension' argument
if (replace != FALSE) {
if (is.null(extension)) {
stop("argument 'extension' missing. The argument can only be assigned 'NULL' when no replacement is required. The argument should be assigned as a character vector with the current extension and the new one.")
}
}
if (replace == TRUE) {
args <- as(args, "list")
for (i in seq_along(args$output)) {
for (j in seq_along(args$output[[i]])) {
for (k in seq_along(args$output[[i]][[j]])) {
name <- basename(args$output[[i]][[j]][k])
dirRep <- sub("/([^/]*)$", "", args$output[[i]][[j]][k])
if (grepl(extension[1], name)) {
sam <- .getExt(name)
args$output[[i]][[j]][k] <- file.path(dirRep, paste0(.getFileName(name), extension[2]))
} else {
# message if the extension are not matching, return the same object
args$output[[i]][[j]][k] <- args$output[[i]][[j]][k]
}
if (make_bam == TRUE) {
if (grepl("bam", args$output[[i]][[j]][k])) {
args$output[[i]][[j]][length(args$output[[i]][[j]]) + 1] <- paste0(gsub("\\.bam$", "", args$output[[i]][[j]][k]), ".bam.bai")
}
if (del_sam == FALSE) {
args$output[[i]][[j]] <- append(args$output[[i]][[j]], args$internal_outfiles[[i]][[j]][k], after = 0)
}
}
}
}
}
# }
args <- as(args, "SYSargs2")
}
if (dir == TRUE) {
args <- as(args, "list")
## Results path
logdir <- file.path(args$yamlinput$results_path$path)
## Workflow Name: Detail: if the folder was not created during 'runCommandline,' it will return a warning message pointing 'no such directory'
if (is.null(dir.name)) {
cwl.wf <- file.path(logdir, .getFileName(args$files$cwl))
} else if (!is.null(dir.name)) {
cwl.wf <- file.path(logdir, dir.name)
}
## New path
for (i in seq_along(args$output)) {
for (j in seq_along(args$output[[i]])) {
for (k in seq_along(args$output[[i]][[j]])) {
name <- basename(args$output[[i]][[j]][k])
args$output[[i]][[j]][k] <- file.path(cwl.wf, name)
}
}
}
args <- as(args, "SYSargs2")
}
return(args)
}
## Usage:
# WF <- output_update(WF, dir=FALSE, replace=TRUE, extension=c(".sam", ".bam"))
# WF <- output_update(WF, dir=TRUE, replace=TRUE, extension=c(".sam", ".bam"))
###################################################
## Create CommandLineTools from Command-line ##
###################################################
createWF <- function(targets = NULL, commandLine, results_path = "./results", module_load = "baseCommand", file = "default",
overwrite = FALSE, cwlVersion = "v1.0", class = "CommandLineTool", writeout = FALSE, silent = FALSE) {
if (all(!inherits(commandLine, "list"))) stop("'commandLine' needs to be object of class 'list'.")
if (any(!c("baseCommand", "inputs", "outputs") %in% names(commandLine))) stop("Argument 'commandLine' needs to be assigned at least to: 'baseCommand', 'input' or 'output'.")
if (all(!c("Workflow", "CommandLineTool") %in% class)) stop("Class slot in '<wf_file>.cwl' needs to be 'Workflow' or 'CommandLineTool'.")
if (dir.exists(results_path) == FALSE) dir.create(path = results_path)
if (module_load == "baseCommand") {
module_load <- commandLine$baseCommand[[1]]
} else {
module_load <- module_load
}
if ("default" %in% file) {
if (dir.exists(paste("param/cwl/", commandLine$baseCommand, sep = "")) == FALSE) dir.create(path = paste("param/cwl/", commandLine$baseCommand, sep = ""), recursive = TRUE)
file.cwl <- paste("param/cwl/", commandLine$baseCommand, "/", commandLine$baseCommand, ".cwl", sep = "")
file.yml <- paste("param/cwl/", commandLine$baseCommand, "/", commandLine$baseCommand, ".yml", sep = "")
} else {
for (i in seq_along(file)) {
extension <- sub(".*\\.", "", file[[i]])
if (!c("cwl") %in% extension & !c("yml") %in% extension) {
stop("Argument 'file' needs to be assigned as a character vector with the names of the two param file. For example, 'test.cwl' and 'test.yml'.")
}
if (c("yml") %in% extension) {
file.yml <- file[[i]]
} else if (c("cwl") %in% extension) {
file.cwl <- file[[i]]
}
}
}
if (file.exists(file.cwl) & overwrite == FALSE) {
stop(
"I am not allowed to overwrite files; please delete existing file: ",
file, " or set 'overwrite=TRUE', or provide a different name in the 'file' argument"
)
}
if (file.exists(file.yml) & overwrite == FALSE) {
stop(
"I am not allowed to overwrite files; please delete existing file: ",
file, "or set 'overwrite=TRUE', or provide a different name in the 'file' argument"
)
}
## class("CommandLineTool", "Workflow")
# WF.temp <- SYScreate("SYSargs2")
syntaxVersion <- if(inherits(commandLine, "v2")) "v2" else "v1"
WF.temp <- as(SYScreate("SYSargs2"), "list")
WF.temp$wf <- list()
WF.temp$clt <- write.clt(commandLine, cwlVersion, class, file.cwl, writeout = writeout, silent = silent, syntaxVersion = syntaxVersion)
WF.temp$yamlinput <- write.yml(commandLine, file.yml, results_path, module_load, writeout = writeout, silent = silent, syntaxVersion = syntaxVersion)
WF.temp$modules <- WF.temp$yamlinput$ModulesToLoad
WF.temp$cmdlist <- sapply(names(WF.temp$clt), function(x) list(NULL))
WF.temp$input <- sapply(names(WF.temp$clt), function(x) list(NULL))
WF.temp$output <- sapply(names(WF.temp$clt), function(x) list(NULL))
WF.temp$internal_outfiles <- sapply(names(WF.temp$clt), function(x) list(NULL))
WF.temp$files <- list(
cwl = file.path(file.cwl),
yml = file.path(file.yml),
steps = names(WF.temp$clt)
)
WF.temp$cmdToCwl <- commandLine
## targets
if (!is.null(targets)) {
mytargets <- read.delim(normalizePath(file.path(targets)), comment.char = "#")
mytargets <- targets.as.list(mytargets)
targetsheader <- readLines(normalizePath(file.path(targets)))
targetsheader <- targetsheader[grepl("^#", targetsheader)]
WF.temp$files["targets"] <- normalizePath(file.path(targets))
WF.temp <- c(list(targets = mytargets, targetsheader = list(targetsheader = targetsheader)), WF.temp)
} else {
WF.temp$files["targets"] <- NA
WF.temp <- c(list(targets = data.frame(), targetsheader = list()), WF.temp)
}
WF.temp <- as(WF.temp, "SYSargs2")
# WF.temp[["status"]] <- .statusPending(WF.temp)
return(WF.temp)
}
## Usage:
## Example commandLine
# "hisat2 -S ./results/_SampleName_.sam -x ./data/tair10.fasta -k 1 --min-intronlen 30 --max-intronlen 3000 --threads 4 -U _FASTQ_PATH1_"
## Provide a list
# baseCommand <- list(baseCommand="hisat2")
# inputs <- list(
# "S"=list(type="File", preF="-S", yml="./results/_SampleName_.sam"),
# "x"=list(type="File", preF="-x", yml="./data/tair10.fasta"),
# "k"= list(type="int", preF="-k", yml= 1L),
# "threads"= list(type="int", preF="-threads", yml=4L),
# "min-intronlen"= list(type="int", preF="--min-intronlen", yml= 30L),
# "max-intronlen"= list(type="int", preF="--max-intronlen", yml=3000L),
# "U"=list(type="File", preF="-U", yml="./data/_FASTQ_PATH1_") )
# outputs <- list("hisat2_sam"=list(type="File", "./results/_SampleName_.sam"))
# commandLine <- list(baseCommand=baseCommand, inputs=inputs, outputs=outputs)
#
## Creates a SYSargs2 object, populate all the command-line, and creates *.cwl and *.yml files
# targets <- system.file("extdata", "targets.txt", package="systemPipeR")
# WF <- createWF(targets=targets, commandLine, results_path="./results", module_load="baseCommand",
# file = "default", overwrite = FALSE, cwlVersion = "v1.0",
# class = "CommandLineTool")
# WF <- renderWF(WF, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName_"))
###################
## write.cwl ##
###################
write.clt <- function(commandLine, cwlVersion, class, file.cwl, writeout = TRUE, silent = FALSE, syntaxVersion = "v1") {
cwlVersion <- cwlVersion
class <- class
baseCommand <- commandLine$baseCommand[[1]]
## File
clt <- list(cwlVersion = cwlVersion, class = class)
## requirements
if (is.null(commandLine$requeriments)) {
dump <- "do nothing"
} else if (!is.null(commandLine$requeriments)) {
requeriments <- list()
clt <- c(clt, list(requeriments = requeriments))
}
## ARGUMENTS
arguments <- sapply(names(commandLine$args), function(x) list())
if (!is.null(commandLine$args)) {
for (i in seq_along(commandLine$args)) {
arguments[[i]]["prefix"] <- commandLine$args[[i]]$preF
arguments[[i]]["valueFrom"] <- ""
arguments[[i]]["position"] <- as.integer(commandLine$args[[i]]$index)
}
}
## INPUTS
if (any(names(commandLine$inputs) == "")) stop("Each element of the list 'commandLine' needs to be assigned a name")
if (is.null(names(commandLine$inputs))) stop("Each element of the list 'commandLine' needs to be assigned a name")
inputs <- sapply(names(commandLine$inputs), function(x) list())
for (i in seq_along(commandLine$inputs)) {
if ("type" %in% names(commandLine$inputs[[i]])) {
if (!c("type") %in% names(commandLine$inputs[[i]])) stop("Each element of the sublist 'inputs' in 'commandLine' needs to be defined the type of the argument, for example: type='Directory' or type='File' or type='int' or type='string'")
inputs[[i]]["type"] <- commandLine$inputs[[i]]$type
}
if ("preF" %in% names(commandLine$inputs[[i]])) {
if (commandLine$inputs[[i]]$preF == "") {
inputs[[i]]["inputBinding"] <- list(list(prefix = ""))
} else {
inputs[[i]]["inputBinding"] <- list(list(prefix = commandLine$inputs[[i]]$preF))
}
}
if(syntaxVersion == "v2") inputs[[i]]["inputBinding"]$inputBinding$position <- as.integer(commandLine$inputs[[i]]$index)
if (any(c("label", "secondaryFiles", "doc", "default", "format", "streamable") %in% names(commandLine$inputs[[i]]))) {
for (j in which(c("label", "secondaryFiles", "doc", "default", "format", "streamable") %in% names(commandLine$inputs[[i]]))) {
inputs[[i]][names(commandLine$inputs[[i]][j])] <- commandLine$inputs[[i]][names(commandLine$inputs[[i]])][[j]]
}
}
}
## OUTPUTS
outputs <- sapply(names(commandLine$outputs), function(x) list())
if (!c("type") %in% names(commandLine$inputs[[i]])) stop("Each element of the sublist 'outputs' in 'commandLine' needs to be defined the type of the argument, for example: type='Directory' or type='File'.")
if (all(!c("File", "Directory") %in% commandLine$outputs[[1]]$type)) stop("Each element of the sublist 'outputs' in 'commandLine' needs to be defined the type = 'Directory', 'File'.")
for (i in seq_along(commandLine$outputs)) {
outputs[[i]]["type"] <- commandLine$outputs[[i]]$type
outputs[[i]]["outputBinding"] <- list(list(glob = commandLine$outputs[[i]][[2]]))
}
## standard out
if(length(commandLine$stdout) > 0) {
stdout_name <- names(commandLine$stdout)
if(is.null(stdout_name) || stdout_name == "") stdout_name <- "new_stdout"
outputs[[stdout_name]]$type <- "stdout"
}
## FILE
positions <- list(baseCommand = baseCommand, arguments = arguments, inputs = inputs, outputs = outputs)
if(length(commandLine$stdout) > 0) positions$stdout <- commandLine$stdout[[1]]$value
clt <- c(clt, positions)
## writing file
if (writeout == TRUE) {
yaml::write_yaml(x = clt, file = file.cwl)
## print message
if (silent != TRUE) cat("\t", "Written content of 'commandLine' to file:", "\n", file.cwl, "\n")
}
clt <- list(clt)
names(clt) <- baseCommand
return(clt)
}
## Usage:
# clt_cwl <- write.clt(commandLine, cwlVersion, class, file.cwl)
###################
## write.yml ##
###################
## Write the yaml file
write.yml <- function(commandLine, file.yml, results_path, module_load, writeout = TRUE, silent = FALSE, syntaxVersion = "v1") {
inputs <- commandLine$inputs
if(length(inputs) == 0) {
return(
message(crayon::yellow$bold("No inputs given, no yml file is written"),
"\nAre you sure this is correct? Double-check the command string")
)
}
if (any(names(inputs) == "")) stop("Each element of the list 'commandLine' needs to be assigned a name")
if (is.null(names(inputs))) stop("Each element of the list 'commandLine' needs to be assigned a name")
## yamlinput_yml
yamlinput_yml <- sapply(names(inputs), function(x) list())
for (i in seq_along(inputs)) {
if (!c("type") %in% names(inputs[[i]])) stop("Each element of the sublist 'inputs' in 'commandLine' needs to be defined the type of the argument, for example: type='Directory' or type='File' or type='int' or type='string'")
if(syntaxVersion == "v1") {
if (any(c("File", "Directory") %in% inputs[[i]])) {
yamlinput_yml[[i]]["class"] <- inputs[[i]]$type
yamlinput_yml[[i]]["path"] <- inputs[[i]]$yml
} else if (any(c("int", "string") %in% inputs[[i]])) {
yamlinput_yml[[i]] <- inputs[[i]]$yml
}
} else {
# enforce types
if (any(c("File", "Directory") %in% inputs[[i]])) {
yamlinput_yml[[i]]["class"] <- inputs[[i]]$type
yamlinput_yml[[i]]["path"] <- inputs[[i]]$value
} else if ("string" %in% inputs[[i]]) {
yamlinput_yml[[i]] <- as.character(inputs[[i]]$value)
} else if (any(c("double", "float") %in% inputs[[i]])) {
yamlinput_yml[[i]] <- as.numeric(inputs[[i]]$value)
} else if (any(c("int", "long") %in% inputs[[i]])) {
yamlinput_yml[[i]] <- as.integer(inputs[[i]]$value)
} else if ("boolean" %in% inputs[[i]]) {
yamlinput_yml[[i]] <- as.logical(inputs[[i]]$value)
} else {
stop("Argument ", names(inputs[i])[1], " has a currently unsupported type ", inputs[[i]]$type)
}
}
}
## results_path
yamlinput_yml[["results_path"]]["class"] <- list("Directory")
yamlinput_yml[["results_path"]]["path"] <- list(results_path)
## moduleload
for (i in seq_along(module_load)) {
yamlinput_yml[["ModulesToLoad"]][paste0("module", i)] <- list(module_load[[i]])
}
## write out the '.yml' file
if (writeout == TRUE) {
yaml::write_yaml(x = yamlinput_yml, file = file.yml)
## print message
if (silent != TRUE) cat("\t", "Written content of 'commandLine' to file:", "\n", file.yml, "\n")
}
return(yamlinput_yml)
}
## Usage:
# yamlinput_yml <- write.yml(commandLine, file.yml, results_path, module_load)
###################
## cmdTool2wf ##
###################
## CommandlineTool --> Workflow class
cmdTool2wf <- function(cmdTool_path, file.cwl, writeout = TRUE, silent = FALSE) {
cmdTools <- yaml::read_yaml(file.path(cmdTool_path))
cwlVersion <- cmdTools$cwlVersion
class <- "Workflow"
## Input
inputs_names <- names(cmdTools$inputs)
inputs <- sapply(inputs_names, function(x) list(cmdTools$inputs[[x]]$type))
## output
outputs <- sapply(names(cmdTools$outputs), function(x) list(NULL))
for (i in seq_along(outputs)) {
outputs[[i]][["outputSource"]] <- paste0(cmdTools$baseCommand, "/", names(outputs)[i])
outputs[[i]][["type"]] <- cmdTools$outputs[[i]]$type
}
## Steps
step.in <- sapply(names(inputs), function(x) list(x))
step.out <- paste0("[", paste0(names(outputs), collapse = ", "), "]")
steps <- list(list(`in` = step.in, `out` = step.out, run = cmdTool_path))
if (length(cmdTools$baseCommand) > 1) {
cmdTools$baseCommand <- paste0(cmdTools$baseCommand, collapse = "_")
} else if (is.null(cmdTools$baseCommand)) {
cmdTools$baseCommand <- .getFileName(cmdTool_path)
}
names(steps) <- cmdTools$baseCommand
## Combine
wf2 <- list(
class = class, cwlVersion = cwlVersion, inputs = inputs,
outputs = outputs, steps = steps
)
## write out the '.cwl' file
if (writeout == TRUE) {
yaml::write_yaml(x = wf2, file = file.cwl)
## print message
if (silent != TRUE) cat("\t", "Written content of 'Workflow' to file:", "\n", file.cwl, "\n")
}
## Return
return(wf2)
}
# cmdTools_path <- "param/cwl/hisat2/hisat2-pe/hisat2-mapping-pe.cwl"
# cmdTools_path <- "param/cwl/hisat2/hisat2-idx/hisat2-index.cwl"
#
# ## Usage:
# wf <- cmdTool2wf(cmdTools_path, file.cwl = "test.cwl")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.