check_params <- function(x){
needtoexit = FALSE
for(i in 1:length(x)){
if(is.null(x[[i]])){
message("Seems this argument is null, please check: ", names(x[i]))
needtoexit = TRUE
}
}
if(needtoexit) stop("Some params are not defined properly, please check and resubmit.")
}
#' picard_rg
#' @export
#' @importFrom tools file_path_sans_ext
picard_rg <- function(x,
samplename = opts_flow$get("samplename"),
## convert these into get option also, only for this flow
rg_lane = opts_flow$get("rg_lane"),
rg_platform = opts_flow$get("rg_platform"),
rg_center = opts_flow$get("rg_center"),
java_exe = opts_flow$get("java_exe"),
java_mem = opts_flow$get("java_mem"),
java_tmp = opts_flow$get("java_tmp"),
picard_jar = opts_flow$get("picard_jar")){
check_args()
## make this editable later ....
rgid = rglb = rgsm = samplename
rgpu = rg_lane
## add RG to the orignal bam name
bamrg_files = sprintf("%s_rg.bam", tools::file_path_sans_ext(x))
cmds = list(fixrg = sprintf("%s %s -Djava.io.tmpdir=%s -jar %s AddOrReplaceReadGroups INPUT=%s OUTPUT=%s SORT_ORDER=coordinate RGID='%s' RGLB='%s' RGPL='%s' RGPU='%s' RGSM='%s' RGCN='%s' VALIDATION_STRINGENCY=LENIENT",
java_exe, java_mem, java_tmp, picard_jar,
x, bamrg_files, rgid, rglb,
rg_platform, rgpu, rgsm, rg_center))
flowmat = to_flowmat(cmds, samplename)
ret = list(outfiles = bamrg_files, flowmat = flowmat)
return(ret)
}
#' Use picard's MergeSamFiles tool to merge bam/sam files
#'
#' @description
#' The resulting file is sorted and index is created for it.
#' Validation stringency of inputs is kept as lenient.
#' Multi-threading is turned on by default, though in our experience this does
#' not seem to use a lot of threads.
#'
#' @param x a vectors of files to merge
#' @param mergedbam
#' @param samplename
#' @param java_exe
#' @param java_mem
#' @param java_tmp
#' @param picard_dir
#'
#' @export
picard_merge <- function(x,
mergedbam,
samplename = opts_flow$get("samplename"),
java_exe = opts_flow$get("java_exe"),
java_mem = opts_flow$get("java_mem"),
java_tmp = opts_flow$get("java_tmp"),
picard_dir = opts_flow$get("picard_dir")){
check_args()
bam_list = paste("INPUT=", x, sep = "", collapse = " ")
cmds = list(merge = sprintf("%s %s -Djava.io.tmpdir=%s -jar %s/picard.jar MergeSamFiles %s OUTPUT=%s ASSUME_SORTED=TRUE VALIDATION_STRINGENCY=LENIENT CREATE_INDEX=true USE_THREADING=true",
java_exe, java_mem, java_tmp, picard_dir, bam_list, mergedbam))
## --- INPUT is a NAMED list
flowmat = to_flowmat(cmds, samplename)
return(list(outfiles = mergedbam, flowmat = flowmat))
}
#' Title
#'
#' @param x
#' @param mergedbam
#' @param samplename
#' @param java_exe
#' @param java_mem
#' @param java_tmp
#' @param picard_dir
#'
#' @export
picard_bam_fastq <- function(bam,
samplename = opts_flow$get("samplename"),
paired = TRUE,
split = FALSE,
num_reads = 8000000,
split_fq_exe = system.file("scripts/split_fq", package = "ngsflows"),
java_exe = opts_flow$get("java_exe"),
java_mem = opts_flow$get("java_mem"),
java_tmp = opts_flow$get("java_tmp"),
picard_dir = opts_flow$get("picard_dir"),
bam_fastq_opts = "INCLUDE_NON_PF_READS=true INCLUDE_NON_PRIMARY_ALIGNMENTS=false RE_REVERSE=true VALIDATION_STRINGENCY=LENIENT"){
check_args()
if(paired){
fq1 = gsub(".bam$", "_1.fastq", bam)
fq2 = gsub(".bam$", "_2.fastq", bam)
fq3 = gsub(".bam$", "_unpaired.fastq", bam)
fqs = list(fq1 = fq1, fq2 = fq2, fq3 = fq3)
fqout = sprintf("FASTQ=%s SECOND_END_FASTQ=%s UNPAIRED_FASTQ=%s",
fq1, fq2, fq3)
# we would stream picard into creating splitted fq files. A second flow would read these and start the next step.
}else{
fq1 = gsub(".bam$", ".fastq", bam)
fqout = paste0("FASTQ=", fq1)
fqs = list(fq1 = fq1)
}
if(split){
fq = gsub(".bam$", "", bam)
fqout = sprintf("FASTQ=/dev/stdout INTERLEAVE=true | bash %s -n %s -f /dev/stdin -o %s",
split_fq_exe, as.integer(num_reads), fq)
}
bam_fastq = sprintf("%s %s -Djava.io.tmpdir=%s -jar %s/picard.jar SamToFastq INPUT=%s %s %s",
java_exe, java_mem, java_tmp, picard_dir, bam, bam_fastq_opts, fqout)
cmd = list(bam_fastq = bam_fastq)
#system(unlist(cmd))
# INPUT is a NAMED list ------
flowmat = to_flowmat(cmd, samplename)
return(list(flowmat = flowmat, outfiles = fqs))
}
#' Use picard's MergeSamFiles tool to merge bam/sam files
#'
#' @description
#' The resulting file is sorted and index is created for it.
#' Validation stringency of inputs is kept as lenient.
#' Multi-threading is turned on by default, though in our experience this does
#' not seem to use a lot of threads.
#'
#' @param x a vectors of files to merge
#' @param mergedbam
#' @param samplename
#' @param java_exe
#' @param java_mem
#' @param java_tmp
#' @param picard_dir
#'
#' @export
picard_reorder <- function(x, outfile,
samplename = opts_flow$get("samplename"),
java_exe = opts_flow$get("java_exe"),
java_mem = opts_flow$get("java_mem"),
java_tmp = opts_flow$get("java_tmp"),
picard_dir = opts_flow$get("picard_dir"),
picard_reorder_opts = opts_flow$get("picard_reorder_opts"),
ref_fasta = opts_flow$get('ref_fasta'),
samtools_exe = opts_flow$get("samtools_exe")){
if(missing(outfile))
outfile = gsub(".bam$", "_reorder.bam", x)
check_args()
reorder = sprintf("%s %s -Djava.io.tmpdir=%s -jar %s/picard.jar ReorderSam INPUT=%s OUTPUT=%s REFERENCE=%s;%s index %s",
java_exe, java_mem, java_tmp, picard_dir, x, outfile, ref_fasta, samtools_exe, outfile)
## --- INPUT is a NAMED list
flowmat = to_flowmat(list(reorder = reorder), samplename)
return(list(flowmat = flowmat, outfiles = outfile))
}
if(FALSE){
## test example
fqs1 = rep("read1.fq", 20)
fqs2 = rep("read2.fq", 20)
out_bwa = bwa(fqs1 = rep("read1.fq", 20),fqs2 = rep("read2.fq", 20))
undebug(to_flowmat)
#debug(picard_rg);
out_pic = picard_rg(x = out_bwa$outfiles, samplename = "smp")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.