R/tools.R

Defines functions unicycler test STAR rapsearch2 megahit kraken2 diamond_blastx diamond_blastp demucs clustalo bowtie2 bowtie2 alphafold

Documented in alphafold bowtie2 clustalo demucs diamond_blastp diamond_blastx kraken2 megahit rapsearch2 STAR test unicycler

#' Alphafold Client
#'
#' Runs AlphaFold via Toolchest.
#'
#' @param model_preset (optional) Allows you to choose a specific AlphaFold model from [monomer, monomer_casp14, monomer_ptm, multimer]. Default mode if not provided is monomer.
#' @param max_template_date (optional) Allows for predicting structure of protiens already in the database by setting a date before it was added in YYYY-MM-DD format. Will use today's date if not provided.
#' @param use_reduced_dbs (optional) Uses a smaller version of the BFD database that will reduce run time at the cost of result quality.
#' @param is_prokaryote_list (optional) takes a list of booleans that determine whether all input sequences in the given fasta file are prokaryotic. Expects the string that would normally input into AlphaFold (e.g. "true,true" if there are two prokaryote inputs)
#' @param inputs Path or list of paths (client-side) to be passed in as input.
#' @param output_path (optional) Path to directory where the output file(s) will be downloaded
#' @param is_async (optional) Whether to run a job asynchronously. Defaults to false.
#'
#' @return Reference to an object with output location data.
#'
#' @export
alphafold <- function(inputs, output_path = NULL, model_preset = NULL, max_template_date = NULL,
                      use_reduced_dbs = FALSE, is_prokaryote_list = NULL, ...) {
  toolchest_args <- c(
    list(
      inputs = inputs,
      output_path = output_path,
      model_preset = model_preset,
      max_template_date = max_template_date,
      use_reduced_dbs = use_reduced_dbs,
      is_prokaryote_list = is_prokaryote_list
    ),
    list(...)
  )
  output <- .do.toolchest.call(toolchest_client$alphafold, toolchest_args)
  return(output)
}

#' BLASTM Client
#'
#' Runs BLASTN via Toolchest.
#'
#' If `output_path` is left unspecified, downloading will be skipped.
#'
#' @param tool_args (optional) Additional arguments to be passed to BLASTN.
#' @param inputs Path to a file that will be passed in as input. Only FASTA formats are supported.
#' @param output_path (optional) Path to a directory where the output file(s) will be downloaded.
#' @param output_primary_name (optional) Base name of output file. Defaults to `blastn_results_out`.
#' @param database_name (optional) (optional) Name of database to use for BLASTN. Defaults to `blastn_nt`.
#' @param database_version (optional) Version of database to use for BLASTN. Defaults to `1`.
#' @param is_async (optional) Whether to run a job asynchronously. Defaults to false.
#' @return Reference to an object with output location data.
#'
#' @export
bowtie2 <- function(tool_args = "", inputs, output_path = NULL, output_primary_name = NULL,
                    database_name = "GRCh38_noalt_as", database_version = "1", ...) {
  toolchest_args <- c(
    list(
      inputs = inputs,
      output_path = output_path,
      output_primary_name = output_primary_name,
      database_name = database_name,
      database_version = database_version,
      tool_args = tool_args
    ),
    list(...)
  )
  output <- .do.toolchest.call(toolchest_client$blastn, toolchest_args)
  return(output)
}

#' Bowtie 2 Client
#'
#' Starts a query for Bowtie 2 (for alignment) using Toolchest.
#'
#' If `output_path` is left unspecified, downloading will be skipped.
#'
#' @param tool_args (optional) Additional arguments to be passed to Bowtie 2.
#' @param inputs Path or list of paths to be passed in as input.
#' @param output_path (optional) Path to a directory where the output file(s) will be downloaded.
#' @param database_name (optional) Name of database to use for Bowtie 2 alignment. Uses the GRCh38 no-alt analysis set ("GRCh38_noalt_as") by default. Index files generated by the Langmead lab.
#' @param database_version (optional) Version of database to use for Bowtie 2 alignment. Defaults to "1".
#' @param is_async (optional) Whether to run a job asynchronously. Defaults to false.
#' @return Reference to an object with output location data.
#'
#' @export
bowtie2 <- function(tool_args = "", inputs, output_path = NULL, database_name = "GRCh38_noalt_as",
                    database_version = "1", ...) {
  toolchest_args <- c(
    list(
      inputs = inputs,
      output_path = output_path,
      database_name = database_name,
      database_version = database_version,
      tool_args = tool_args
    ),
    list(...)
  )
  output <- .do.toolchest.call(toolchest_client$bowtie2, toolchest_args)
  return(output)
}

#' Clustal Omega Client
#'
#' Runs Clustal Omega via Toolchest.
#'
#' @param inputs Path (client-side) to a FASTA file that will be passed in as input.
#' @param output_path (optional) Path to directory where the output file(s) will be downloaded.
#' @param output_primary_name (optional) Base name of output file.
#' @param tool_args Additional arguments to be passed to Clustal Omega.
#' @param is_async (optional) Whether to run a job asynchronously. Defaults to false.
#' @return Reference to an object with output location data.
#'
#' @export
clustalo <- function(tool_args = "", inputs, output_path = NULL,
                     output_primary_name = NULL, ...) {
  toolchest_args <- c(
    list(
      inputs = inputs,
      output_path = output_path,
      output_primary_name = output_primary_name,
      tool_args = tool_args
    ),
    list(...)
  )
  output <- .do.toolchest.call(toolchest_client$clustalo, toolchest_args)
  return(output)
}

#' Demucs Client
#'
#' Runs Demucs via Toolchest.
#'
#' @param inputs Path to a file that will be passed in as input. All formats supported by ffmpeg are allowed.
#' @param output_path (optional) Path where the output will be downloaded.
#' @param tool_args Additional arguments to be passed to demucs.
#' @param is_async (optional) Whether to run a job asynchronously. Defaults to false.
#' @return Reference to an object with output location data.
#'
#' @export
demucs <- function(tool_args = "", inputs, output_path = NULL, ...) {
  toolchest_args <- c(
    list(
      inputs = inputs,
      output_path = output_path,
      tool_args = tool_args
    ),
    list(...)
  )
  output <- .do.toolchest.call(toolchest_client$demucs, toolchest_args)
  return(output)
}

#' Diamond BLASTp Client
#'
#' Runs Diamond in BLASTp mode via Toolchest.
#'
#' @param inputs Path to a file that will be passed in as input. FASTA or FASTQ formats are supported (it may be gzip compressed)
#' @param output_path (optional) (optional) Path to directory where the output file(s) will be downloaded.
#'   Log file (diamond.log) will be downloaded in the same directory as the out file(s).
#' @param output_primary_name (optional) Base name of output file.
#' @param tool_args Additional arguments to be passed to Diamond BLASTp.
#' @param is_async (optional) Whether to run a job asynchronously. Defaults to false.
#' @return Reference to an object with output location data.
#'
#' @export
diamond_blastp <- function(tool_args = "", inputs, output_path = NULL,
                           output_primary_name = NULL, ...) {
  toolchest_args <- c(
    list(
      inputs = inputs,
      output_path = output_path,
      output_primary_name = output_primary_name,
      tool_args = tool_args
    ),
    list(...)
  )
  output <- .do.toolchest.call(toolchest_client$diamond_blastp, toolchest_args)
  return(output)
}

#' Diamond BLASTx Client
#'
#' Runs Diamond in BLASTx mode via Toolchest.
#'
#' @param inputs Path to a file that will be passed in as input. FASTA or FASTQ formats are supported (it may be gzip compressed)
#' @param output_path (optional) (optional) Path to directory where the output file(s) will be downloaded.
#'   Log file (diamond.log) will be downloaded in the same directory as the out file(s).
#' @param output_primary_name (optional) Base name of output file.
#' @param tool_args Additional arguments to be passed to Diamond BLASTx.
#' @param is_async (optional) Whether to run a job asynchronously. Defaults to false.
#' @return Reference to an object with output location data.
#'
#' @export
diamond_blastx <- function(tool_args = "", inputs, output_path = NULL,
                           output_primary_name = NULL, ...) {
  toolchest_args <- c(
    list(
      inputs = inputs,
      output_path = output_path,
      output_primary_name = output_primary_name,
      tool_args = tool_args
    ),
    list(...)
  )
  output <- .do.toolchest.call(toolchest_client$diamond_blastx, toolchest_args)
  return(output)
}

#' Kraken 2 Client
#'
#' Starts a query for Kraken 2 using Toolchest.
#'
#' If `output_path` is left unspecified, downloading will be skipped.
#'
#' @param tool_args (optional) Additional arguments to be passed to Kraken 2.
#' @param read_one Path of input file to be passed in as read one.
#' @param read_two (optional) Path of input file to be passed in as read two.
#' @param output_path (optional) Path to a directory where the output file(s) will be downloaded.
#' @param database_name (optional) Name (string) of database to use for Kraken 2 alignment.
#' @param database_version (optional) Version (string) of database to use for Kraken 2 alignment.
#' @param is_async (optional) Whether to run a job asynchronously. Defaults to false.
#' @return Reference to an object with output location data.
#'
#' @note If the database is unspecified, Kraken 2 will use the standard
#' database, using index files generated by Ben Langmead's lab at
#' \url{https://benlangmead.github.io/aws-indexes/k2}.
#'
#' @export
kraken2 <- function(tool_args = "", read_one, read_two = NULL, output_path = NULL,
                    database_name = "standard", database_version = "1", ...) {
  toolchest_args <- c(
    list(
      tool_args = tool_args,
      read_one = read_one,
      read_two = read_two,
      output_path = output_path,
      database_name = database_name,
      database_version = database_version
    ),
    list(...)
  )
  output <- .do.toolchest.call(toolchest_client$kraken2, toolchest_args)
  return(output)
}

#' MEGAHIT Client
#'
#' Starts a query for MEGAHIT using Toolchest.
#'
#' If `output_path` is left unspecified, downloading will be skipped.
#'
#' @param tool_args (optional) Additional arguments to be passed to MEGAHIT.
#' @param read_one (optional) `-1` inputs. Path (string) or list of paths for read 1 of paired-read input files.
#' @param read_two (optional) `-2` inputs. Path (string) or list of paths for read 2 of paired-read input files.
#' @param output_path (optional) Path to a directory where the output file(s) will be downloaded.
#' @param interleaved (optional) Path (string) or list of paths for interleaved paired-end files.
#' @param is_async (optional) Whether to run a job asynchronously. Defaults to false.
#' @return Reference to an object with output location data.
#'
#' @export
megahit <- function(tool_args = "", read_one = NULL, read_two = NULL, output_path = NULL,
                    interleaved = NULL, ...) {
  toolchest_args <- c(
    list(
      tool_args = tool_args,
      read_one = read_one,
      read_two = read_two,
      output_path = output_path,
      interleaved = interleaved
    ),
    list(...)
  )
  output <- .do.toolchest.call(toolchest_client$megahit, toolchest_args)
  return(output)
}

#' RAPSearch2 Client
#'
#' Runs Diamond in BLASTx mode via Toolchest.
#'
#' @param inputs Path to a FASTA/FASTQ file that will be passed in as input.
#' @param output_path (optional) Path (client-side) to a directory where the output files will be downloaded.
#' @param output_primary_name (optional) Base name of output file(s).
#'   (Functions the same way as the "-o" tag for RAPSearch2, in combination with `output_path`.)
#' @param tool_args (optional) Additional arguments to be passed to RAPSearch2.
#' @param database_name (optional) Name of database to use for RAPSearch2 alignment. Defaults to SeqScreen DB.
#' @param database_version (optional) Version of database to use for RAPSearch2 alignment. Defaults to 1.
#' @param is_async (optional) Whether to run a job asynchronously. Defaults to false.
#' @return Reference to an object with output location data.
#'
#' @export
rapsearch2 <- function(tool_args = "", inputs, output_path = NULL, output_primary_name = NULL,
                       database_name = "rapsearch2_seqscreen", database_version = "1", ...) {
  toolchest_args <- c(
    list(
      tool_args = tool_args,
      inputs = inputs,
      output_path = output_path,
      output_primary_name = output_primary_name,
      database_name = database_name,
      database_version = database_version
    ),
    list(...)
  )
  output <- .do.toolchest.call(toolchest_client$rapsearch2, toolchest_args)
  return(output)
}

#' STAR Client
#'
#' Starts a query for STAR (mapping) using Toolchest.
#'
#' If `output_path` is left unspecified, downloading will be skipped.
#'
#' @param tool_args (optional) Additional arguments to be passed to STAR.
#' @param read_one Path of input file to be passed in as Read 1 (-1).
#' @param read_two (optional) Path of input file to be passed in as Read 2 (-2).
#' @param output_path (optional) Path to a directory where the output file(s) will be downloaded.
#' @param database_name (optional) Name (string) of database to use for STAR mapping. Defaults to "GRCh38".
#' @param database_version (optional) Version (string) of database to use for STAR mapping.
#' @param is_async (optional) Whether to run a job asynchronously. Defaults to false.
#' @return Reference to an object with output location data.
#'
#' @note `read_one` is required. If paired-end reads are not used, the input
#' file should be passed in as `read_one`.
#'
#' `database_name` should be specified as well.
#'
#' @export
STAR <- function(tool_args = "", read_one, read_two = NULL, output_path = NULL,
                 database_name = "GRCh38", database_version = "1", ...) {
  toolchest_args <- c(
    list(
      tool_args = tool_args,
      read_one = read_one,
      read_two = read_two,
      output_path = output_path,
      database_name = database_name,
      database_version = database_version
    ),
    list(...)
  )
  output <- .do.toolchest.call(toolchest_client$STAR, toolchest_args)
  return(output)
}

#' Test Pipeline Segment
#'
#' Run a test pipeline segment via Toolchest. A plain text file containing 'success' is returned."
#'
#' If `output_path` is left unspecified, downloading will be skipped.
#'
#' @param tool_args Additional arguments, present to maintain a consistent interface. This is disregarded.
#' @param inputs Path or list of paths (client-side) to be passed in as input.
#' @param output_path (optional) Path to a directory where the output file(s) will be downloaded.
#' @param is_async (optional) Whether to run a job asynchronously. Defaults to false.
#' @return Reference to an object with output location data.
#'
#' @export
test <- function(tool_args = "", inputs, output_path = NULL, ...) {
  toolchest_args <- c(
    list(
      inputs = inputs,
      output_path = output_path,
      tool_args = tool_args
    ),
    list(...)
  )
  output <- .do.toolchest.call(toolchest_client$test, toolchest_args)
  return(output)
}

#' Unicycler Client
#'
#' Runs Unicycler via Toolchest.
#'
#' @note If some but not all of `read_one`, `read_two`, or `long_reads` are
#' unspecified, Toolchest will assume that they are intended to be blank.
#' If all are left blank, the user will be prompted to provide input file(s)
#' and option(s).
#'
#' At least one input filepath is needed for Toolchest to run Unicycler.
#'
#' If `output_path` is left unspecified, downloading will be skipped.
#'
#' @param tool_args (optional) Additional arguments to be passed to Unicycler.
#' @param read_one Path of input file (FASTQ) to be passed in as Read 1 (-1).
#' @param read_two Path of input file (FASTQ) to be passed in as Read 2 (-2).
#' @param long_reads Path of input file (FASTA) to be passed in as long reads (-l).
#' @param output_path (optional) Path to a directory where the output file(s) will be downloaded.
#' @param is_async (optional) Whether to run a job asynchronously. Defaults to false.
#' @return Reference to an object with output location data.
#'
#' @export
unicycler <- function(tool_args = "", read_one = NULL, read_two = NULL,
                      long_reads = NULL, output_path = NULL, ...) {
  toolchest_args <- c(
    list(
      output_path = output_path,
      read_one = read_one,
      read_two = read_two,
      long_reads = long_reads,
      tool_args = tool_args
    ),
    list(...)
  )
  output <- .do.toolchest.call(toolchest_client$unicycler, toolchest_args)
  return(output)
}
gotoolchest/toolchest-client-r documentation built on July 14, 2022, 7 a.m.