inst/doc/systemPipeR.R

## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()
options(width=80, max.print=1000)
knitr::opts_chunk$set(
    eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")),
    cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE")), 
    tidy.opts=list(width.cutoff=80), tidy=TRUE)

## ----setup, echo=FALSE, message=FALSE, warning=FALSE--------------------------
suppressPackageStartupMessages({
    library(systemPipeR)
    library(BiocParallel)
    library(Biostrings)
    library(Rsamtools)
    library(GenomicRanges)
    library(ggplot2)
    library(GenomicAlignments)
    library(ShortRead)
    library(ape)
    library(batchtools)
    library(magrittr)
})

## ----install, eval=FALSE------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly=TRUE)) install.packages("BiocManager")
#  BiocManager::install("systemPipeR")
#  BiocManager::install("systemPipeRdata")

## ----documentation, eval=FALSE------------------------------------------------
#  library("systemPipeR") # Loads the package
#  library(help="systemPipeR") # Lists package info
#  vignette("systemPipeR") # Opens vignette

## ----genRna_workflow, eval=FALSE----------------------------------------------
#  library(systemPipeRdata)
#  genWorkenvir(workflow="rnaseq")
#  setwd("rnaseq")

## ----genRna_new, eval=FALSE---------------------------------------------------
#  library(systemPipeRdata)
#  genWorkenvir(workflow="new", mydirname = "FEB_project")

## ----genRna_npkg, eval=FALSE--------------------------------------------------
#  library(systemPipeRdata)
#  genWorkenvir(workflow=NULL, package_repo = "systemPipeR/systemPipeRIBOseq", ref = "master", subdir = NULL)

## ----genRna_pkg_branch, eval=FALSE--------------------------------------------
#  library(systemPipeRdata)
#  genWorkenvir(workflow=NULL, package_repo = "systemPipeR/systemPipeRNAseq", ref = "singleMachine", subdir = NULL)

## ----targetsSE, eval=TRUE-----------------------------------------------------
library(systemPipeR)
targetspath <- system.file("extdata", "targets.txt", package="systemPipeR") 
read.delim(targetspath, comment.char = "#")[1:4,]

## ----targetsPE, eval=TRUE-----------------------------------------------------
targetspath <- system.file("extdata", "targetsPE.txt", package="systemPipeR")
read.delim(targetspath, comment.char = "#")[1:2,1:6]

## ----comment_lines, echo=TRUE-------------------------------------------------
readLines(targetspath)[1:4]

## ----targetscomp, eval=TRUE---------------------------------------------------
readComp(file=targetspath, format="vector", delim="-")

## ----CWL_structure, echo = FALSE, eval=FALSE----------------------------------
#  hisat2.cwl <- system.file("extdata", "cwl/hisat2/hisat2-se/hisat2-mapping-se.cwl", package="systemPipeR")
#  yaml::read_yaml(hisat2.cwl)

## ----yaml_structure, echo = FALSE, eval=FALSE---------------------------------
#  hisat2.yml <- system.file("extdata", "cwl/hisat2/hisat2-se/hisat2-mapping-se.yml", package="systemPipeR")
#  yaml::read_yaml(hisat2.yml)

## ----SYSargs2_structure, eval=TRUE--------------------------------------------
library(systemPipeR)
targets <- system.file("extdata", "targets.txt", package="systemPipeR")
dir_path <- system.file("extdata/cwl/hisat2/hisat2-se", package="systemPipeR")
WF <- loadWF(targets=targets, wf_file="hisat2-mapping-se.cwl",
                   input_file="hisat2-mapping-se.yml",
                   dir_path=dir_path)

WF <- renderWF(WF, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName_"))

## ----names_WF, eval=TRUE------------------------------------------------------
names(WF)

## ----cmdlist, eval=TRUE-------------------------------------------------------
cmdlist(WF)[1]

## ----output_WF, eval=TRUE-----------------------------------------------------
output(WF)[1]
modules(WF)
targets(WF)[1]
targets.as.df(targets(WF))[1:4,1:4]
output(WF)[1]
cwlfiles(WF)
inputvars(WF)

## ----table_tools, echo=FALSE, message=FALSE-----------------------------------
library(dplyr)
library(kableExtra)
SYS_software <- system.file("extdata", "SYS_software.csv", package="systemPipeR")
#SYS_software <- "SYS_software.csv"
software <- read.delim(SYS_software, sep=",", comment.char = "#")
colors <- colorRampPalette((c("darkseagreen", "indianred1")))(length(unique(software$Category)))
id <- as.numeric(c((unique(software$Category))))
software %>%
  mutate(Step = cell_spec(Step, color = "white", bold = T,
    background = factor(Category, id, colors)
  )) %>%
   select(Tool, Description, Step) %>%
  arrange(Tool) %>% 
  kable(escape = F, align = "c", col.names = c("Tool Name",	"Description", "Step")) %>%
  kable_styling(c("striped", "hover", "condensed"), full_width = T) %>%
  scroll_box(width = "80%", height = "500px")

## ----test_tool_path, eval=FALSE-----------------------------------------------
#  tryCL(command="grep")

## ----param_structure, eval=TRUE-----------------------------------------------
parampath <- system.file("extdata", "tophat.param", package="systemPipeR")
read.delim(parampath, comment.char = "#")

## ----param_import, eval=TRUE--------------------------------------------------
targetspath <- system.file("extdata", "targets.txt", package="systemPipeR") 
args <- suppressWarnings(systemArgs(sysma=parampath, mytargets=targetspath))
args

## ----sysarg_access, eval=TRUE-------------------------------------------------
names(args)

## ----sysarg_access2, eval=TRUE------------------------------------------------
sysargs(args)[1]
modules(args)
cores(args)
outpaths(args)[1]

## ----sysarg_json, eval=TRUE---------------------------------------------------
systemArgs(sysma=parampath, mytargets=targetspath, type="json")

## ----workflow, eval=FALSE-----------------------------------------------------
#  library(systemPipeRdata)
#  genWorkenvir(workflow="rnaseq")
#  setwd("rnaseq")

## ----test_software, eval=FALSE------------------------------------------------
#  tryCL(command="hisat2") ## "All set up, proceed!"

## ----initwf, eval=FALSE-------------------------------------------------------
#  script <- "systemPipeRNAseq.Rmd"
#  targetspath <- "targets.txt"
#  sysargslist <- initWF(script = script, targets = targetspath)

## ----initwf_tempdir, eval=FALSE-----------------------------------------------
#  library(systemPipeRdata)
#  script <- system.file("extdata/workflows/rnaseq", "systemPipeRNAseq.Rmd", package="systemPipeRdata")
#  targets <- system.file("extdata", "targets.txt", package="systemPipeR")
#  dir_path <- tempdir()
#  SYSconfig <- initProject(projPath=dir_path, targets=targets, script=script, overwrite = TRUE)
#  sysargslist_temp <- initWF(sysconfig ="SYSconfig.yml")

## ----config, eval=FALSE-------------------------------------------------------
#  sysargslist <- configWF(x=sysargslist, input_steps = "1:3")
#  sysargslist <- runWF(sysargslist = sysargslist, steps = "ALL")
#  sysargslist <- runWF(sysargslist = sysargslist, steps = "1:2")

## ----pipes, eval=FALSE--------------------------------------------------------
#  library(systemPipeR)
#  sysargslist <- initWF(script ="systemPipeRNAseq.Rmd", overwrite = T) %>%
#      configWF(input_steps = "1:3") %>%
#      runWF(steps = "1:2")

## ----closeR, eval=FALSE-------------------------------------------------------
#  q("no") # closes R session on head node

## ----r_environment, eval=FALSE------------------------------------------------
#  system("hostname") # should return name of a compute node starting with i or c
#  getwd() # checks current working directory of R session
#  dir() # returns content of current working directory

## ----clusterRun, eval=FALSE---------------------------------------------------
#  library(batchtools)
#  resources <- list(walltime=120, ntasks=1, ncpus=4, memory=1024)
#  reg <- clusterRun(args, FUN = runCommandline, more.args = list(args=args, make_bam=TRUE, dir=FALSE),
#                    conffile = ".batchtools.conf.R", template = "batchtools.slurm.tmpl",
#                    Njobs=18, runid="01", resourceList=resources)
#  getStatus(reg=reg)
#  waitForJobs(reg=reg)

## ----load_package, eval=FALSE-------------------------------------------------
#  library(systemPipeR)
#  library(systemPipeRdata)
#  genWorkenvir(workflow="rnaseq", mydirname=NULL)
#  setwd("rnaseq")

## ----construct_SYSargs2_trim-se, echo = FALSE, eval=FALSE---------------------
#  targets <- system.file("extdata", "targets.txt", package="systemPipeR")
#  dir_path <- system.file("extdata/cwl/preprocessReads/trim-se", package="systemPipeR")
#  trim <- loadWorkflow(targets=targets, wf_file="trim-se.cwl", input_file="trim-se.yml", dir_path=dir_path)
#  trim <- renderWF(trim, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName_"))
#  trim

## ----construct_SYSargs2_trim-pe, eval=FALSE-----------------------------------
#  targetsPE <- system.file("extdata", "targetsPE.txt", package="systemPipeR")
#  dir_path <- system.file("extdata/cwl/preprocessReads/trim-pe", package="systemPipeR")
#  trim <- loadWorkflow(targets=targetsPE, wf_file="trim-pe.cwl", input_file="trim-pe.yml", dir_path=dir_path)
#  trim <- renderWF(trim, inputvars=c(FileName1="_FASTQ_PATH1_", FileName2="_FASTQ_PATH2_", SampleName="_SampleName_"))
#  trim
#  output(trim)[1:2]

## ----preprocessing, eval=FALSE------------------------------------------------
#  preprocessReads(args=trim, Fct="trimLRPatterns(Rpattern='GCCCGGGTAA',
#                  subject=fq)", batchsize=100000, overwrite=TRUE, compress=TRUE)

## ----custom_preprocessing, eval=FALSE-----------------------------------------
#  filterFct <- function(fq, cutoff=20, Nexceptions=0) {
#      qcount <- rowSums(as(quality(fq), "matrix") <= cutoff, na.rm=TRUE)
#      # Retains reads where Phred scores are >= cutoff with N exceptions
#      fq[qcount <= Nexceptions]
#  }
#  preprocessReads(args=trim, Fct="filterFct(fq, cutoff=20, Nexceptions=0)",
#                  batchsize=100000)

## ----trimGalore, eval=FALSE---------------------------------------------------
#  targets <- system.file("extdata", "targets.txt", package="systemPipeR")
#  dir_path <- system.file("extdata/cwl/trim_galore/trim_galore-se", package="systemPipeR")
#  trimG <- loadWorkflow(targets=targets, wf_file="trim_galore-se.cwl", input_file="trim_galore-se.yml", dir_path=dir_path)
#  trimG <- renderWF(trimG, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName_"))
#  trimG
#  cmdlist(trimG)[1:2]
#  output(trimG)[1:2]
#  ## Run Single Machine Option
#  trimG <- runCommandline(trimG[1], make_bam = FALSE)

## ----trimmomatic, eval=FALSE--------------------------------------------------
#  targetsPE <- system.file("extdata", "targetsPE.txt", package="systemPipeR")
#  dir_path <- system.file("extdata/cwl/trimmomatic/trimmomatic-pe", package="systemPipeR")
#  trimM <- loadWorkflow(targets=targetsPE, wf_file="trimmomatic-pe.cwl", input_file="trimmomatic-pe.yml", dir_path=dir_path)
#  trimM <- renderWF(trimM, inputvars=c(FileName1="_FASTQ_PATH1_", FileName2="_FASTQ_PATH2_", SampleName="_SampleName_"))
#  trimM
#  cmdlist(trimM)[1:2]
#  output(trimM)[1:2]
#  ## Run Single Machine Option
#  trimM <- runCommandline(trimM[1], make_bam = FALSE)

## ----fastq_quality, eval=FALSE------------------------------------------------
#  fqlist <- seeFastq(fastq=infile1(trim), batchsize=10000, klength=8)
#  pdf("./results/fastqReport.pdf", height=18, width=4*length(fqlist))
#  seeFastqPlot(fqlist)
#  dev.off()

## ----fastq_quality_parallel_single, eval=FALSE--------------------------------
#  f <- function(x) seeFastq(fastq=infile1(trim)[x], batchsize=100000, klength=8)
#  fqlist <- bplapply(seq(along=trim), f, BPPARAM = MulticoreParam(workers=4))
#  seeFastqPlot(unlist(fqlist, recursive=FALSE))

## ----fastq_quality_parallel_cluster, eval=FALSE-------------------------------
#  library(BiocParallel); library(batchtools)
#  f <- function(x) {
#    library(systemPipeR)
#    targetsPE <- system.file("extdata", "targetsPE.txt", package="systemPipeR")
#    dir_path <- system.file("extdata/cwl/preprocessReads/trim-pe", package="systemPipeR")
#    trim <- loadWorkflow(targets=targetsPE, wf_file="trim-pe.cwl", input_file="trim-pe.yml", dir_path=dir_path)
#    trim <- renderWF(trim, inputvars=c(FileName1="_FASTQ_PATH1_", FileName2="_FASTQ_PATH2_", SampleName="_SampleName_"))
#    seeFastq(fastq=infile1(trim)[x], batchsize=100000, klength=8)
#  }
#  resources <- list(walltime=120, ntasks=1, ncpus=4, memory=1024)
#  param <- BatchtoolsParam(workers = 4, cluster = "slurm", template = "batchtools.slurm.tmpl", resources = resources)
#  fqlist <- bplapply(seq(along=trim), f, BPPARAM = param)
#  seeFastqPlot(unlist(fqlist, recursive=FALSE))

## ----hisat_SYSargs2_object, eval=TRUE-----------------------------------------
targets <- system.file("extdata", "targets.txt", package="systemPipeR")
dir_path <- system.file("extdata/cwl/hisat2/hisat2-se", package="systemPipeR")
args <- loadWorkflow(targets=targets, wf_file="hisat2-mapping-se.cwl", input_file="hisat2-mapping-se.yml", dir_path=dir_path)
args <- renderWF(args, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName_"))
args
cmdlist(args)[1:2]
output(args)[1:2]

## ----subset, eval=TRUE--------------------------------------------------------
subsetWF(args, slot="input", subset='FileName')[1:2] ## Subsetting the input files for this particular workflow 
subsetWF(args, slot="output", subset=1, index=1)[1:2]  ## Subsetting the output files for one particular step in the workflow 
subsetWF(args, slot="step", subset=1)[1] ## Subsetting the command-lines for one particular step in the workflow 
subsetWF(args, slot="output", subset=1, index=1, delete=TRUE)[1] ## DELETING specific output files

## ----hisat_index, eval=FALSE--------------------------------------------------
#  dir_path <- system.file("extdata/cwl/hisat2/hisat2-idx", package="systemPipeR")
#  idx <- loadWorkflow(targets=NULL, wf_file="hisat2-index.cwl", input_file="hisat2-index.yml", dir_path=dir_path)
#  idx <- renderWF(idx)
#  idx
#  cmdlist(idx)
#  
#  ## Run
#  runCommandline(idx, make_bam = FALSE)

## ----runCommandline_align, eval=FALSE-----------------------------------------
#  runCommandline(args, make_bam = FALSE) ## generates alignments and writes *.sam files to ./results folder
#  args <- runCommandline(args, make_bam = TRUE) ## same as above but writes files and converts *.sam files to sorted and indexed BAM files. Assigning the new extention of the output files to the object args.

## ----clusterRun_args, eval=FALSE----------------------------------------------
#  library(batchtools)
#  resources <- list(walltime=120, ntasks=1, ncpus=4, memory=1024)
#  reg <- clusterRun(args, FUN = runCommandline, more.args = list(args=args, make_bam=TRUE, dir=FALSE),
#                    conffile = ".batchtools.conf.R", template = "batchtools.slurm.tmpl",
#                    Njobs=18, runid="01", resourceList=resources)
#  getStatus(reg=reg)
#  waitForJobs(reg=reg)

## ----output, eval=FALSE-------------------------------------------------------
#  args <- output_update(args, dir=FALSE, replace=TRUE, extension=c(".sam", ".bam")) ## Updates the output(args) to the right location in the subfolders
#  output(args)

## ----writeTargetsout, eval=FALSE----------------------------------------------
#  names(clt(args))
#  writeTargetsout(x=args, file="default", step = 1,
#                  new_col = "FileName", new_col_output_index = 1, overwrite = TRUE)

## ----hisat_alignment, eval=FALSE----------------------------------------------
#  targets <- system.file("extdata", "targets.txt", package="systemPipeR")
#  dir_path <- system.file("extdata/cwl/workflow-hisat2/workflow-hisat2-se", package="systemPipeR")
#  WF <- loadWorkflow(targets=targets, wf_file="workflow_hisat2-se.cwl", input_file="workflow_hisat2-se.yml", dir_path=dir_path)
#  WF <- renderWF(WF, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName_"))
#  WF
#  cmdlist(WF)[1:2]
#  output(WF)[1:2]

## ----bowtie_index, eval=FALSE-------------------------------------------------
#  dir_path <- system.file("extdata/cwl/bowtie2/bowtie2-idx", package="systemPipeR")
#  idx <- loadWorkflow(targets=NULL, wf_file="bowtie2-index.cwl", input_file="bowtie2-index.yml", dir_path=dir_path)
#  idx <- renderWF(idx)
#  idx
#  cmdlist(idx)
#  
#  ## Run in single machine
#  runCommandline(idx, make_bam = FALSE)

## ----tophat2-pe, eval=FALSE---------------------------------------------------
#  targetsPE <- system.file("extdata", "targetsPE.txt", package = "systemPipeR")
#  dir_path <- system.file("extdata/cwl/tophat2/tophat2-pe", package="systemPipeR")
#  tophat2PE <- loadWorkflow(targets = targetsPE, wf_file = "tophat2-mapping-pe.cwl",
#      input_file = "tophat2-mapping-pe.yml", dir_path = dir_path)
#  tophat2PE <- renderWF(tophat2PE, inputvars = c(FileName1 = "_FASTQ_PATH1_", FileName2 = "_FASTQ_PATH2_",
#      SampleName = "_SampleName_"))
#  tophat2PE
#  cmdlist(tophat2PE)[1:2]
#  output(tophat2PE)[1:2]
#  
#  ## Run in single machine
#  tophat2PE <- runCommandline(tophat2PE[1], make_bam = TRUE)

## ----tophat2-pe_parallel, eval=FALSE------------------------------------------
#  resources <- list(walltime=120, ntasks=1, ncpus=4, memory=1024)
#  reg <- clusterRun(tophat2PE, FUN = runCommandline, more.args = list(args=tophat2PE, make_bam=TRUE, dir=FALSE),
#                    conffile = ".batchtools.conf.R", template = "batchtools.slurm.tmpl",
#                    Njobs=18, runid="01", resourceList=resources)
#  waitForJobs(reg=reg)

## ----writeTargetsout_tophat2PE, eval=FALSE------------------------------------
#  names(clt(tophat2PE))
#  writeTargetsout(x=tophat2PE, file="default", step = 1,
#                  new_col = "tophat2PE", new_col_output_index = 1, overwrite = TRUE)

## ----bowtie2_index, eval=FALSE------------------------------------------------
#  dir_path <- system.file("extdata/cwl/bowtie2/bowtie2-idx", package="systemPipeR")
#  idx <- loadWorkflow(targets=NULL, wf_file="bowtie2-index.cwl", input_file="bowtie2-index.yml", dir_path=dir_path)
#  idx <- renderWF(idx)
#  idx
#  cmdlist(idx)
#  
#  ## Run in single machine
#  runCommandline(idx, make_bam = FALSE)

## ----bowtie2_SYSargs2, eval=FALSE---------------------------------------------
#  targetsPE <- system.file("extdata", "targetsPE.txt", package = "systemPipeR")
#  dir_path <- system.file("extdata/cwl/bowtie2/bowtie2-pe", package="systemPipeR")
#  bowtiePE <- loadWorkflow(targets = targetsPE, wf_file = "bowtie2-mapping-pe.cwl",
#      input_file = "bowtie2-mapping-pe.yml", dir_path = dir_path)
#  bowtiePE <- renderWF(bowtiePE, inputvars = c(FileName1 = "_FASTQ_PATH1_", FileName2 = "_FASTQ_PATH2_",
#      SampleName = "_SampleName_"))
#  bowtiePE
#  cmdlist(bowtiePE)[1:2]
#  output(bowtiePE)[1:2]

## ----bowtie2_cluster, eval=FALSE----------------------------------------------
#  resources <- list(walltime=120, ntasks=1, ncpus=4, memory=1024)
#  reg <- clusterRun(bowtiePE, FUN = runCommandline, more.args = list(args=bowtiePE, dir = FALSE),
#      conffile = ".batchtools.conf.R", template = "batchtools.slurm.tmpl",
#      Njobs = 18, runid = "01", resourceList = resources)
#  getStatus(reg = reg)

## ----bowtie2_sm, eval=FALSE---------------------------------------------------
#  bowtiePE <- runCommandline(bowtiePE)

## ----writeTargetsout_bowtiePE, eval=FALSE-------------------------------------
#  names(clt(bowtiePE))
#  writeTargetsout(x=bowtiePE, file="default", step = 1,
#                  new_col = "bowtiePE", new_col_output_index = 1, overwrite = TRUE)

## ----bwa_index, eval=FALSE----------------------------------------------------
#  dir_path <- system.file("extdata/cwl/bwa/bwa-idx", package="systemPipeR")
#  idx <- loadWorkflow(targets=NULL, wf_file="bwa-index.cwl", input_file="bwa-index.yml", dir_path=dir_path)
#  idx <- renderWF(idx)
#  idx
#  cmdlist(idx) # Indexes reference genome
#  
#  ## Run
#  runCommandline(idx, make_bam = FALSE)

## ----bwa-pe_alignment, eval=FALSE---------------------------------------------
#  targetsPE <- system.file("extdata", "targetsPE.txt", package = "systemPipeR")
#  dir_path <- system.file("extdata/cwl/bwa/bwa-pe", package="systemPipeR")
#  bwaPE <- loadWorkflow(targets = targetsPE, wf_file = "bwa-pe.cwl",
#      input_file = "bwa-pe.yml", dir_path = dir_path)
#  bwaPE <- renderWF(bwaPE, inputvars = c(FileName1 = "_FASTQ_PATH1_", FileName2 = "_FASTQ_PATH2_",
#      SampleName = "_SampleName_"))
#  bwaPE
#  cmdlist(bwaPE)[1:2]
#  output(bwaPE)[1:2]
#  ## Single Machine
#  bwaPE <- runCommandline(args= bwaPE, make_bam=FALSE)
#  
#  ## Cluster
#  library(batchtools)
#  resources <- list(walltime = 120, ntasks = 1, ncpus = 4, memory = 1024)
#  reg <- clusterRun(bwaPE, FUN = runCommandline, more.args = list(args=bwaPE, dir = FALSE),
#      conffile = ".batchtools.conf.R", template = "batchtools.slurm.tmpl",
#      Njobs = 18, runid = "01", resourceList = resources)
#  getStatus(reg = reg)

## ----writeTargetsout_bwaPE, eval=FALSE----------------------------------------
#  names(clt(bwaPE))
#  writeTargetsout(x=bwaPE, file="default", step = 1,
#                  new_col = "bwaPE", new_col_output_index = 1, overwrite = TRUE)

## ----rsubread, eval=FALSE-----------------------------------------------------
#  ## Build the index:
#  dir_path <- system.file("extdata/cwl/rsubread/rsubread-idx", package="systemPipeR")
#  idx <- loadWorkflow(targets = NULL, wf_file = "rsubread-index.cwl",
#                       input_file = "rsubread-index.yml", dir_path = dir_path)
#  idx <- renderWF(idx)
#  idx
#  cmdlist(idx)
#  runCommandline(args= idx, make_bam = FALSE)
#  
#  ## Running the alignment:
#  targets <- system.file("extdata", "targets.txt", package = "systemPipeR")
#  dir_path <- system.file("extdata/cwl/rsubread/rsubread-se", package="systemPipeR")
#  rsubread <- loadWorkflow(targets = targets, wf_file = "rsubread-mapping-se.cwl",
#                       input_file = "rsubread-mapping-se.yml", dir_path = dir_path)
#  rsubread <- renderWF(rsubread, inputvars = c(FileName = "_FASTQ_PATH1_", SampleName = "_SampleName_"))
#  rsubread
#  cmdlist(rsubread)[1]
#  
#  ## Single Machine
#  rsubread <- runCommandline(args=rsubread[1])

## ----writeTargetsout_rsubread, eval=FALSE-------------------------------------
#  names(clt(rsubread))
#  writeTargetsout(x=rsubread, file="default", step = 1,
#                  new_col = "rsubread", new_col_output_index = 1, overwrite = TRUE)

## ----gsnap, eval=FALSE--------------------------------------------------------
#  ## Build the index:
#  dir_path <- system.file("extdata/cwl/gsnap/gsnap-idx", package="systemPipeR")
#  idx <- loadWorkflow(targets = NULL, wf_file = "gsnap-index.cwl",
#                       input_file = "gsnap-index.yml", dir_path = dir_path)
#  idx <- renderWF(idx)
#  idx
#  cmdlist(idx)
#  runCommandline(args= idx, make_bam = FALSE)
#  
#  ## Running the alignment:
#  targetsPE <- system.file("extdata", "targetsPE.txt", package = "systemPipeR")
#  dir_path <- system.file("extdata/cwl/gsnap/gsnap-pe", package="systemPipeR")
#  gsnap <- loadWorkflow(targets = targetsPE, wf_file = "gsnap-mapping-pe.cwl",
#                       input_file = "gsnap-mapping-pe.yml", dir_path = dir_path)
#  gsnap <- renderWF(gsnap, inputvars = c(FileName1 = "_FASTQ_PATH1_", FileName2 = "_FASTQ_PATH2_", SampleName = "_SampleName_"))
#  gsnap
#  cmdlist(gsnap)[1]
#  output(gsnap)[1]
#  
#  ## Cluster
#  library(batchtools)
#  resources <- list(walltime = 120, ntasks = 1, ncpus = 4, memory = 1024)
#  reg <- clusterRun(gsnap, FUN = runCommandline, more.args = list(args=gsnap, make_bam=FALSE),
#      conffile = ".batchtools.conf.R", template = "batchtools.slurm.tmpl",
#      Njobs = 18, runid = "01", resourceList = resources)
#  getStatus(reg = reg)
#  gsnap <- output_update(gsnap, dir=FALSE, replace=TRUE, extension=c(".sam", ".bam"))

## ----writeTargetsout_gsnap, eval=FALSE----------------------------------------
#  names(clt(gsnap))
#  writeTargetsout(x=gsnap, file="default", step = 1,
#                  new_col = "gsnap", new_col_output_index = 1, overwrite = TRUE)

## ----igv, eval=FALSE----------------------------------------------------------
#  symLink2bam(sysargs=args, htmldir=c("~/.html/", "somedir/"),
#              urlbase="http://myserver.edu/~username/",
#          urlfile="IGVurl.txt")

## ----create_txdb, eval=FALSE--------------------------------------------------
#  library(GenomicFeatures)
#  txdb <- makeTxDbFromGFF(file="data/tair10.gff", format="gff", dataSource="TAIR", organism="Arabidopsis thaliana")
#  saveDb(txdb, file="./data/tair10.sqlite")

## ----read_counting_multicore, eval=FALSE--------------------------------------
#  library(BiocParallel)
#  txdb <- loadDb("./data/tair10.sqlite")
#  eByg <- exonsBy(txdb, by="gene")
#  outpaths <- subsetWF(args, slot="output", subset=1, index=1)
#  bfl <- BamFileList(outpaths, yieldSize=50000, index=character())
#  multicoreParam <- MulticoreParam(workers=4); register(multicoreParam); registered()
#  counteByg <- bplapply(bfl, function(x) summarizeOverlaps(eByg, x, mode="Union", ignore.strand=TRUE, inter.feature=TRUE, singleEnd=TRUE))
#  
#  # Note: for strand-specific RNA-Seq set 'ignore.strand=FALSE' and for PE data set 'singleEnd=FALSE'
#  countDFeByg <- sapply(seq(along=counteByg),
#                        function(x) assays(counteByg[[x]])$counts)
#  rownames(countDFeByg) <- names(rowRanges(counteByg[[1]])); colnames(countDFeByg) <- names(bfl)
#  rpkmDFeByg <- apply(countDFeByg, 2, function(x) returnRPKM(counts=x, ranges=eByg))
#  write.table(countDFeByg, "results/countDFeByg.xls", col.names=NA, quote=FALSE, sep="\t")
#  write.table(rpkmDFeByg, "results/rpkmDFeByg.xls", col.names=NA, quote=FALSE, sep="\t")

## ----read_counting_multinode, eval=FALSE--------------------------------------
#  library(BiocParallel)
#  f <- function(x) {
#      library(systemPipeR); library(BiocParallel); library(GenomicFeatures)
#      txdb <- loadDb("./data/tair10.sqlite")
#      eByg <- exonsBy(txdb, by="gene")
#      args <- systemArgs(sysma="param/tophat.param", mytargets="targets.txt")
#      outpaths <- subsetWF(args, slot="output", subset=1, index=1)
#      bfl <- BamFileList(outpaths, yieldSize=50000, index=character())
#      summarizeOverlaps(eByg, bfl[x], mode="Union", ignore.strand=TRUE, inter.feature=TRUE, singleEnd=TRUE)
#  }
#  resources <- list(walltime=120, ntasks=1, ncpus=4, memory=1024)
#  param <- BatchtoolsParam(workers = 4, cluster = "slurm", template = "batchtools.slurm.tmpl", resources = resources)
#  counteByg <- bplapply(seq(along=args), f, BPPARAM = param)
#  countDFeByg <- sapply(seq(along=counteByg),
#                        function(x) assays(counteByg[[x]])$counts)
#  rownames(countDFeByg) <- names(rowRanges(counteByg[[1]])); colnames(countDFeByg) <- names(outpaths)

## ----process_monitoring, eval=FALSE-------------------------------------------
#  getStatus(reg=reg)
#  outpaths <- subsetWF(args, slot="output", subset=1, index=1)
#  file.exists(outpaths)
#  sapply(1:length(outpaths), function(x) loadResult(reg, id=x)) # Works after job completion

## ----align_stats1, eval=FALSE-------------------------------------------------
#  read_statsDF <- alignStats(args)
#  write.table(read_statsDF, "results/alignStats.xls", row.names=FALSE, quote=FALSE, sep="\t")

## ----align_stats2, eval=TRUE--------------------------------------------------
read.table(system.file("extdata", "alignStats.xls", package="systemPipeR"), header=TRUE)[1:4,]

## ----align_stats_parallel, eval=FALSE-----------------------------------------
#  f <- function(x) alignStats(args[x])
#  read_statsList <- bplapply(seq(along=args), f,
#                             BPPARAM = MulticoreParam(workers=8))
#  read_statsDF <- do.call("rbind", read_statsList)

## ----align_stats_parallel_cluster, eval=FALSE---------------------------------
#  library(BiocParallel); library(batchtools)
#  f <- function(x) {
#      library(systemPipeR)
#      targets <- system.file("extdata", "targets.txt", package="systemPipeR")
#      dir_path <- "param/cwl/hisat2/hisat2-se" ## TODO: replace path to system.file
#      args <- loadWorkflow(targets=targets, wf_file="hisat2-mapping-se.cwl", input_file="hisat2-mapping-se.yml", dir_path=dir_path)
#      args <- renderWF(args, inputvars=c(FileName="_FASTQ_PATH1_", SampleName="_SampleName_"))
#      args <- output_update(args, dir=FALSE, replace=TRUE, extension=c(".sam", ".bam"))
#      alignStats(args[x])
#  }
#  resources <- list(walltime=120, ntasks=1, ncpus=4, memory=1024)
#  param <- BatchtoolsParam(workers = 4, cluster = "slurm", template = "batchtools.slurm.tmpl", resources = resources)
#  read_statsList <- bplapply(seq(along=args), f, BPPARAM = param)
#  read_statsDF <- do.call("rbind", read_statsList)

## ----read_counting_mirna, eval=FALSE------------------------------------------
#  system("wget ftp://mirbase.org/pub/mirbase/19/genomes/My_species.gff3 -P ./data/")
#  gff <- import.gff("./data/My_species.gff3")
#  gff <- split(gff, elementMetadata(gff)$ID)
#  bams <- names(bampaths); names(bams) <- targets$SampleName
#  bfl <- BamFileList(bams, yieldSize=50000, index=character())
#  countDFmiR <- summarizeOverlaps(gff, bfl, mode="Union", ignore.strand=FALSE, inter.feature=FALSE) # Note: inter.feature=FALSE important since pre and mature miRNA ranges overlap
#  rpkmDFmiR <- apply(countDFmiR, 2,
#                     function(x) returnRPKM(counts=x, gffsub=gff))
#  write.table(assays(countDFmiR)$counts, "results/countDFmiR.xls", col.names=NA, quote=FALSE, sep="\t")
#  write.table(rpkmDFmiR, "results/rpkmDFmiR.xls", col.names=NA, quote=FALSE, sep="\t")

## ----sample_tree_rlog, eval=TRUE----------------------------------------------
library(DESeq2, warn.conflicts=FALSE, quietly=TRUE); library(ape, warn.conflicts=FALSE)
countDFpath <- system.file("extdata", "countDFeByg.xls", package="systemPipeR")
countDF <- as.matrix(read.table(countDFpath))
colData <- data.frame(row.names=targets.as.df(targets(args))$SampleName, condition=targets.as.df(targets(args))$Factor)
dds <- DESeqDataSetFromMatrix(countData = countDF, colData = colData, design = ~ condition)
d <- cor(assay(rlog(dds)), method="spearman")
hc <- hclust(dist(1-d))
plot.phylo(as.phylo(hc), type="p", edge.col=4, edge.width=3, show.node.label=TRUE, no.margin=TRUE)

## ----sample_tree_rpkm, eval=FALSE---------------------------------------------
#  rpkmDFeBygpath <- system.file("extdata", "rpkmDFeByg.xls", package="systemPipeR")
#  rpkmDFeByg <- read.table(rpkmDFeBygpath, check.names=FALSE)
#  rpkmDFeByg <- rpkmDFeByg[rowMeans(rpkmDFeByg) > 50,]
#  d <- cor(rpkmDFeByg, method="spearman")
#  hc <- hclust(as.dist(1-d))
#  plot.phylo(as.phylo(hc), type="p", edge.col="blue", edge.width=2, show.node.label=TRUE, no.margin=TRUE)

## ----edger_wrapper, eval=TRUE-------------------------------------------------
targets <- read.delim(targetspath, comment="#")
cmp <- readComp(file=targetspath, format="matrix", delim="-")
cmp[[1]]
countDFeBygpath <- system.file("extdata", "countDFeByg.xls", package="systemPipeR")
countDFeByg <- read.delim(countDFeBygpath, row.names=1)
edgeDF <- run_edgeR(countDF=countDFeByg, targets=targets, cmp=cmp[[1]], independent=FALSE, mdsplot="")

## ----edger_deg_counts, eval=TRUE----------------------------------------------
DEG_list <- filterDEGs(degDF=edgeDF, filter=c(Fold=2, FDR=10))

## ----edger_deg_stats, eval=TRUE-----------------------------------------------
names(DEG_list)
DEG_list$Summary[1:4,]

## ----deseq2_wrapper, eval=TRUE------------------------------------------------
degseqDF <- run_DESeq2(countDF=countDFeByg, targets=targets, cmp=cmp[[1]], independent=FALSE)

## ----deseq2_deg_counts, eval=TRUE---------------------------------------------
DEG_list2 <- filterDEGs(degDF=degseqDF, filter=c(Fold=2, FDR=10))

## ----vennplot, eval=TRUE------------------------------------------------------
vennsetup <- overLapper(DEG_list$Up[6:9], type="vennsets")
vennsetdown <- overLapper(DEG_list$Down[6:9], type="vennsets")
vennPlot(list(vennsetup, vennsetdown), mymain="", mysub="", colmode=2, ccol=c("blue", "red"))

## ----get_go_biomart, eval=FALSE-----------------------------------------------
#  library("biomaRt")
#  listMarts() # To choose BioMart database
#  listMarts(host="plants.ensembl.org")
#  m <- useMart("plants_mart", host="plants.ensembl.org")
#  listDatasets(m)
#  m <- useMart("plants_mart", dataset="athaliana_eg_gene", host="plants.ensembl.org")
#  listAttributes(m) # Choose data types you want to download
#  go <- getBM(attributes=c("go_id", "tair_locus", "namespace_1003"), mart=m)
#  go <- go[go[,3]!="",]; go[,3] <- as.character(go[,3])
#  go[go[,3]=="molecular_function", 3] <- "F"; go[go[,3]=="biological_process", 3] <- "P"; go[go[,3]=="cellular_component", 3] <- "C"
#  go[1:4,]
#  dir.create("./data/GO")
#  write.table(go, "data/GO/GOannotationsBiomart_mod.txt", quote=FALSE, row.names=FALSE, col.names=FALSE, sep="\t")
#  catdb <- makeCATdb(myfile="data/GO/GOannotationsBiomart_mod.txt", lib=NULL, org="", colno=c(1,2,3), idconv=NULL)
#  save(catdb, file="data/GO/catdb.RData")

## ----go_enrichment, eval=FALSE------------------------------------------------
#  load("data/GO/catdb.RData")
#  DEG_list <- filterDEGs(degDF=edgeDF, filter=c(Fold=2, FDR=50), plot=FALSE)
#  up_down <- DEG_list$UporDown; names(up_down) <- paste(names(up_down), "_up_down", sep="")
#  up <- DEG_list$Up; names(up) <- paste(names(up), "_up", sep="")
#  down <- DEG_list$Down; names(down) <- paste(names(down), "_down", sep="")
#  DEGlist <- c(up_down, up, down)
#  DEGlist <- DEGlist[sapply(DEGlist, length) > 0]
#  BatchResult <- GOCluster_Report(catdb=catdb, setlist=DEGlist, method="all", id_type="gene", CLSZ=2, cutoff=0.9, gocats=c("MF", "BP", "CC"), recordSpecGO=NULL)
#  library("biomaRt")
#  m <- useMart("plants_mart", dataset="athaliana_eg_gene", host="plants.ensembl.org")
#  goslimvec <- as.character(getBM(attributes=c("goslim_goa_accession"), mart=m)[,1])
#  BatchResultslim <- GOCluster_Report(catdb=catdb, setlist=DEGlist, method="slim", id_type="gene", myslimv=goslimvec, CLSZ=10, cutoff=0.01, gocats=c("MF", "BP", "CC"), recordSpecGO=NULL)

## ----plot_go_enrichment, eval=FALSE-------------------------------------------
#  gos <- BatchResultslim[grep("M6-V6_up_down", BatchResultslim$CLID), ]
#  gos <- BatchResultslim
#  pdf("GOslimbarplotMF.pdf", height=8, width=10); goBarplot(gos, gocat="MF"); dev.off()
#  goBarplot(gos, gocat="BP")
#  goBarplot(gos, gocat="CC")

## ----hierarchical_clustering, eval=FALSE--------------------------------------
#  library(pheatmap)
#  geneids <- unique(as.character(unlist(DEG_list[[1]])))
#  y <- assay(rlog(dds))[geneids, ]
#  pdf("heatmap1.pdf")
#  pheatmap(y, scale="row", clustering_distance_rows="correlation", clustering_distance_cols="correlation")
#  dev.off()

## ----genRna_workflow_single, eval=FALSE---------------------------------------
#  library(systemPipeRdata)
#  genWorkenvir(workflow="rnaseq")
#  setwd("rnaseq")

## ----genChip_workflow_single, eval=FALSE--------------------------------------
#  library(systemPipeRdata)
#  genWorkenvir(workflow="chipseq")
#  setwd("chipseq")

## ----genVar_workflow_single, eval=FALSE---------------------------------------
#  library(systemPipeRdata)
#  genWorkenvir(workflow="varseq")
#  setwd("varseq")

## ----genRibo_workflow_single, eval=FALSE--------------------------------------
#  library(systemPipeRdata)
#  genWorkenvir(workflow="riboseq")
#  setwd("riboseq")

## ----sessionInfo--------------------------------------------------------------
sessionInfo()

Try the systemPipeR package in your browser

Any scripts or data that you put into this service are public.

systemPipeR documentation built on Jan. 26, 2021, 2 a.m.